From 53661ded2460b414644532de6b99bd87f71987e9 Mon Sep 17 00:00:00 2001 From: Tony Battersby Date: Thu, 7 Jul 2022 15:08:01 -0400 Subject: scsi: qla2xxx: Disable ATIO interrupt coalesce for quad port ISP27XX This partially reverts commit d2b292c3f6fd ("scsi: qla2xxx: Enable ATIO interrupt handshake for ISP27XX") For some workloads where the host sends a batch of commands and then pauses, ATIO interrupt coalesce can cause some incoming ATIO entries to be ignored for extended periods of time, resulting in slow performance, timeouts, and aborted commands. Disable interrupt coalesce and re-enable the dedicated ATIO MSI-X interrupt. Link: https://lore.kernel.org/r/97dcf365-89ff-014d-a3e5-1404c6af511c@cybernetics.com Reviewed-by: Himanshu Madhani Reviewed-by: Nilesh Javali Signed-off-by: Tony Battersby Signed-off-by: Martin K. Petersen --- drivers/scsi/qla2xxx/qla_target.c | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c index 2b2f68288375..62666df1a59e 100644 --- a/drivers/scsi/qla2xxx/qla_target.c +++ b/drivers/scsi/qla2xxx/qla_target.c @@ -6935,14 +6935,8 @@ qlt_24xx_config_rings(struct scsi_qla_host *vha) if (ha->flags.msix_enabled) { if (IS_QLA83XX(ha) || IS_QLA27XX(ha) || IS_QLA28XX(ha)) { - if (IS_QLA2071(ha)) { - /* 4 ports Baker: Enable Interrupt Handshake */ - icb->msix_atio = 0; - icb->firmware_options_2 |= cpu_to_le32(BIT_26); - } else { - icb->msix_atio = cpu_to_le16(msix->entry); - icb->firmware_options_2 &= cpu_to_le32(~BIT_26); - } + icb->msix_atio = cpu_to_le16(msix->entry); + icb->firmware_options_2 &= cpu_to_le32(~BIT_26); ql_dbg(ql_dbg_init, vha, 0xf072, "Registering ICB vector 0x%x for atio que.\n", msix->entry); -- cgit v1.2.3 From 54249306e2776774ccb827969e62d34570f991db Mon Sep 17 00:00:00 2001 From: Brian Bunker Date: Fri, 29 Jul 2022 14:41:10 -0700 Subject: scsi: core: Allow the ALUA transitioning state enough time The error path for the SCSI check condition of not ready, target in ALUA state transition, will result in the failure of that path after the retries are exhausted. In most cases that is well ahead of the transition timeout established in the SCSI ALUA device handler. Instead, reprep the command and re-add it to the queue after a 1 second delay. This will allow the handler to take care of the timeout and only fail the path if the target has exceeded the transition expiry timeout (default 60 seconds). If the expiry timeout is exceeded, the handler will change the path state from transitioning to standby leading to a path failure eliminating the potential of this re-prep to continue endlessly. In most cases the target will exit the transitioning state well before the expiry timeout but after the retries are exhausted as mentioned. Additionally remove the scsi_io_completion_reprep() function which provides little value. Link: https://lore.kernel.org/r/20220729214110.58576-1-brian@purestorage.com Reviewed-by: Martin Wilck Acked-by: Krishna Kant Acked-by: Seamus Connor Signed-off-by: Brian Bunker Signed-off-by: Martin K. Petersen --- drivers/scsi/scsi_lib.c | 44 +++++++++++++++++++++++++------------------- 1 file changed, 25 insertions(+), 19 deletions(-) diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index 2aca0a838ca5..784661035590 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -111,7 +111,7 @@ scsi_set_blocked(struct scsi_cmnd *cmd, int reason) } } -static void scsi_mq_requeue_cmd(struct scsi_cmnd *cmd) +static void scsi_mq_requeue_cmd(struct scsi_cmnd *cmd, unsigned long msecs) { struct request *rq = scsi_cmd_to_rq(cmd); @@ -121,7 +121,12 @@ static void scsi_mq_requeue_cmd(struct scsi_cmnd *cmd) } else { WARN_ON_ONCE(true); } - blk_mq_requeue_request(rq, true); + + if (msecs) { + blk_mq_requeue_request(rq, false); + blk_mq_delay_kick_requeue_list(rq->q, msecs); + } else + blk_mq_requeue_request(rq, true); } /** @@ -651,14 +656,6 @@ static unsigned int scsi_rq_err_bytes(const struct request *rq) return bytes; } -/* Helper for scsi_io_completion() when "reprep" action required. */ -static void scsi_io_completion_reprep(struct scsi_cmnd *cmd, - struct request_queue *q) -{ - /* A new command will be prepared and issued. */ - scsi_mq_requeue_cmd(cmd); -} - static bool scsi_cmd_runtime_exceeced(struct scsi_cmnd *cmd) { struct request *req = scsi_cmd_to_rq(cmd); @@ -676,14 +673,21 @@ static bool scsi_cmd_runtime_exceeced(struct scsi_cmnd *cmd) return false; } +/* + * When ALUA transition state is returned, reprep the cmd to + * use the ALUA handler's transition timeout. Delay the reprep + * 1 sec to avoid aggressive retries of the target in that + * state. + */ +#define ALUA_TRANSITION_REPREP_DELAY 1000 + /* Helper for scsi_io_completion() when special action required. */ static void scsi_io_completion_action(struct scsi_cmnd *cmd, int result) { - struct request_queue *q = cmd->device->request_queue; struct request *req = scsi_cmd_to_rq(cmd); int level = 0; - enum {ACTION_FAIL, ACTION_REPREP, ACTION_RETRY, - ACTION_DELAYED_RETRY} action; + enum {ACTION_FAIL, ACTION_REPREP, ACTION_DELAYED_REPREP, + ACTION_RETRY, ACTION_DELAYED_RETRY} action; struct scsi_sense_hdr sshdr; bool sense_valid; bool sense_current = true; /* false implies "deferred sense" */ @@ -772,8 +776,8 @@ static void scsi_io_completion_action(struct scsi_cmnd *cmd, int result) action = ACTION_DELAYED_RETRY; break; case 0x0a: /* ALUA state transition */ - blk_stat = BLK_STS_TRANSPORT; - fallthrough; + action = ACTION_DELAYED_REPREP; + break; default: action = ACTION_FAIL; break; @@ -832,7 +836,10 @@ static void scsi_io_completion_action(struct scsi_cmnd *cmd, int result) return; fallthrough; case ACTION_REPREP: - scsi_io_completion_reprep(cmd, q); + scsi_mq_requeue_cmd(cmd, 0); + break; + case ACTION_DELAYED_REPREP: + scsi_mq_requeue_cmd(cmd, ALUA_TRANSITION_REPREP_DELAY); break; case ACTION_RETRY: /* Retry the same command immediately */ @@ -926,7 +933,7 @@ static int scsi_io_completion_nz_result(struct scsi_cmnd *cmd, int result, * command block will be released and the queue function will be goosed. If we * are not done then we have to figure out what to do next: * - * a) We can call scsi_io_completion_reprep(). The request will be + * a) We can call scsi_mq_requeue_cmd(). The request will be * unprepared and put back on the queue. Then a new command will * be created for it. This should be used if we made forward * progress, or if we want to switch from READ(10) to READ(6) for @@ -942,7 +949,6 @@ static int scsi_io_completion_nz_result(struct scsi_cmnd *cmd, int result, void scsi_io_completion(struct scsi_cmnd *cmd, unsigned int good_bytes) { int result = cmd->result; - struct request_queue *q = cmd->device->request_queue; struct request *req = scsi_cmd_to_rq(cmd); blk_status_t blk_stat = BLK_STS_OK; @@ -979,7 +985,7 @@ void scsi_io_completion(struct scsi_cmnd *cmd, unsigned int good_bytes) * request just queue the command up again. */ if (likely(result == 0)) - scsi_io_completion_reprep(cmd, q); + scsi_mq_requeue_cmd(cmd, 0); else scsi_io_completion_action(cmd, result); } -- cgit v1.2.3 From 6d17a112e9a63ff6a5edffd1676b99e0ffbcd269 Mon Sep 17 00:00:00 2001 From: Kiwoong Kim Date: Tue, 2 Aug 2022 10:42:31 +0900 Subject: scsi: ufs: core: Enable link lost interrupt Link lost is treated as fatal error with commit c99b9b230149 ("scsi: ufs: Treat link loss as fatal error"), but the event isn't registered as interrupt source. Enable it. Link: https://lore.kernel.org/r/1659404551-160958-1-git-send-email-kwmad.kim@samsung.com Fixes: c99b9b230149 ("scsi: ufs: Treat link loss as fatal error") Reviewed-by: Bart Van Assche Signed-off-by: Kiwoong Kim Signed-off-by: Martin K. Petersen --- include/ufs/ufshci.h | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/include/ufs/ufshci.h b/include/ufs/ufshci.h index f81aa95ffbc4..f525566a0864 100644 --- a/include/ufs/ufshci.h +++ b/include/ufs/ufshci.h @@ -135,11 +135,7 @@ static inline u32 ufshci_version(u32 major, u32 minor) #define UFSHCD_UIC_MASK (UIC_COMMAND_COMPL | UFSHCD_UIC_PWR_MASK) -#define UFSHCD_ERROR_MASK (UIC_ERROR |\ - DEVICE_FATAL_ERROR |\ - CONTROLLER_FATAL_ERROR |\ - SYSTEM_BUS_FATAL_ERROR |\ - CRYPTO_ENGINE_FATAL_ERROR) +#define UFSHCD_ERROR_MASK (UIC_ERROR | INT_FATAL_ERRORS) #define INT_FATAL_ERRORS (DEVICE_FATAL_ERROR |\ CONTROLLER_FATAL_ERROR |\ -- cgit v1.2.3 From 8c499e49240bd93628368c3588975cfb94169b8b Mon Sep 17 00:00:00 2001 From: Guixin Liu Date: Tue, 2 Aug 2022 15:18:49 +0800 Subject: scsi: megaraid_sas: Fix double kfree() When allocating log_to_span fails, kfree(instance->ctrl_context) is called twice. Remove redundant call. Link: https://lore.kernel.org/r/1659424729-46502-1-git-send-email-kanie@linux.alibaba.com Acked-by: Sumit Saxena Signed-off-by: Guixin Liu Signed-off-by: Martin K. Petersen --- drivers/scsi/megaraid/megaraid_sas_fusion.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/scsi/megaraid/megaraid_sas_fusion.c b/drivers/scsi/megaraid/megaraid_sas_fusion.c index e48d4261d0bc..09c5fe37754c 100644 --- a/drivers/scsi/megaraid/megaraid_sas_fusion.c +++ b/drivers/scsi/megaraid/megaraid_sas_fusion.c @@ -5310,7 +5310,6 @@ megasas_alloc_fusion_context(struct megasas_instance *instance) if (!fusion->log_to_span) { dev_err(&instance->pdev->dev, "Failed from %s %d\n", __func__, __LINE__); - kfree(instance->ctrl_context); return -ENOMEM; } } -- cgit v1.2.3 From 7dd6f4af9482c319fa829583799e63e38967177d Mon Sep 17 00:00:00 2001 From: Guixin Liu Date: Tue, 2 Aug 2022 15:19:00 +0800 Subject: scsi: megaraid_sas: Remove unnecessary kfree() When alloc ctrl mem fails, the reply_map will subsequently be freed in megasas_free_ctrl_mem(). No need to free it in megasas_alloc_ctrl_mem(). Link: https://lore.kernel.org/r/1659424740-46918-1-git-send-email-kanie@linux.alibaba.com Acked-by: Sumit Saxena Signed-off-by: Guixin Liu Signed-off-by: Martin K. Petersen --- drivers/scsi/megaraid/megaraid_sas_base.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/drivers/scsi/megaraid/megaraid_sas_base.c b/drivers/scsi/megaraid/megaraid_sas_base.c index fb28c8178828..6b3d54c04baa 100644 --- a/drivers/scsi/megaraid/megaraid_sas_base.c +++ b/drivers/scsi/megaraid/megaraid_sas_base.c @@ -7150,22 +7150,18 @@ static int megasas_alloc_ctrl_mem(struct megasas_instance *instance) switch (instance->adapter_type) { case MFI_SERIES: if (megasas_alloc_mfi_ctrl_mem(instance)) - goto fail; + return -ENOMEM; break; case AERO_SERIES: case VENTURA_SERIES: case THUNDERBOLT_SERIES: case INVADER_SERIES: if (megasas_alloc_fusion_context(instance)) - goto fail; + return -ENOMEM; break; } return 0; - fail: - kfree(instance->reply_map); - instance->reply_map = NULL; - return -ENOMEM; } /* -- cgit v1.2.3 From 37dd4ab1ff8cb843c69835dcaf7bc719a2bf2e0c Mon Sep 17 00:00:00 2001 From: Alim Akhtar Date: Thu, 11 Aug 2022 21:40:53 +0530 Subject: scsi: ufs: host: ufs-exynos: Make fsd_ufs_drvs static struct fsd_ufs_drvs is not used outside this file, so make it static. This fixes sparse warning: drivers/ufs/host/ufs-exynos.c:1721:28: sparse: sparse: symbol 'fsd_ufs_drvs' was not declared. Should it be static? Link: https://lore.kernel.org/r/20220811161053.54081-1-alim.akhtar@samsung.com Fixes: 216f74e8059a ("scsi: ufs: host: ufs-exynos: Add support for FSD UFS HCI") Reported-by: kernel test robot Reviewed-by: Bart Van Assche Signed-off-by: Alim Akhtar Signed-off-by: Martin K. Petersen --- drivers/ufs/host/ufs-exynos.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/ufs/host/ufs-exynos.c b/drivers/ufs/host/ufs-exynos.c index eced97538082..c3628a8645a5 100644 --- a/drivers/ufs/host/ufs-exynos.c +++ b/drivers/ufs/host/ufs-exynos.c @@ -1711,7 +1711,7 @@ static struct exynos_ufs_uic_attr fsd_uic_attr = { .pa_dbg_option_suite = 0x2E820183, }; -struct exynos_ufs_drv_data fsd_ufs_drvs = { +static const struct exynos_ufs_drv_data fsd_ufs_drvs = { .uic_attr = &fsd_uic_attr, .quirks = UFSHCD_QUIRK_PRDT_BYTE_GRAN | UFSHCI_QUIRK_BROKEN_REQ_LIST_CLR | -- cgit v1.2.3 From d957e7ffb2c72410bcc1a514153a46719255a5da Mon Sep 17 00:00:00 2001 From: Saurabh Sengar Date: Thu, 4 Aug 2022 08:55:34 -0700 Subject: scsi: storvsc: Remove WQ_MEM_RECLAIM from storvsc_error_wq storvsc_error_wq workqueue should not be marked as WQ_MEM_RECLAIM as it doesn't need to make forward progress under memory pressure. Marking this workqueue as WQ_MEM_RECLAIM may cause deadlock while flushing a non-WQ_MEM_RECLAIM workqueue. In the current state it causes the following warning: [ 14.506347] ------------[ cut here ]------------ [ 14.506354] workqueue: WQ_MEM_RECLAIM storvsc_error_wq_0:storvsc_remove_lun is flushing !WQ_MEM_RECLAIM events_freezable_power_:disk_events_workfn [ 14.506360] WARNING: CPU: 0 PID: 8 at <-snip->kernel/workqueue.c:2623 check_flush_dependency+0xb5/0x130 [ 14.506390] CPU: 0 PID: 8 Comm: kworker/u4:0 Not tainted 5.4.0-1086-azure #91~18.04.1-Ubuntu [ 14.506391] Hardware name: Microsoft Corporation Virtual Machine/Virtual Machine, BIOS Hyper-V UEFI Release v4.1 05/09/2022 [ 14.506393] Workqueue: storvsc_error_wq_0 storvsc_remove_lun [ 14.506395] RIP: 0010:check_flush_dependency+0xb5/0x130 <-snip-> [ 14.506408] Call Trace: [ 14.506412] __flush_work+0xf1/0x1c0 [ 14.506414] __cancel_work_timer+0x12f/0x1b0 [ 14.506417] ? kernfs_put+0xf0/0x190 [ 14.506418] cancel_delayed_work_sync+0x13/0x20 [ 14.506420] disk_block_events+0x78/0x80 [ 14.506421] del_gendisk+0x3d/0x2f0 [ 14.506423] sr_remove+0x28/0x70 [ 14.506427] device_release_driver_internal+0xef/0x1c0 [ 14.506428] device_release_driver+0x12/0x20 [ 14.506429] bus_remove_device+0xe1/0x150 [ 14.506431] device_del+0x167/0x380 [ 14.506432] __scsi_remove_device+0x11d/0x150 [ 14.506433] scsi_remove_device+0x26/0x40 [ 14.506434] storvsc_remove_lun+0x40/0x60 [ 14.506436] process_one_work+0x209/0x400 [ 14.506437] worker_thread+0x34/0x400 [ 14.506439] kthread+0x121/0x140 [ 14.506440] ? process_one_work+0x400/0x400 [ 14.506441] ? kthread_park+0x90/0x90 [ 14.506443] ret_from_fork+0x35/0x40 [ 14.506445] ---[ end trace 2d9633159fdc6ee7 ]--- Link: https://lore.kernel.org/r/1659628534-17539-1-git-send-email-ssengar@linux.microsoft.com Fixes: 436ad9413353 ("scsi: storvsc: Allow only one remove lun work item to be issued per lun") Reviewed-by: Michael Kelley Signed-off-by: Saurabh Sengar Signed-off-by: Martin K. Petersen --- drivers/scsi/storvsc_drv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c index fe000da11332..8ced292c4b96 100644 --- a/drivers/scsi/storvsc_drv.c +++ b/drivers/scsi/storvsc_drv.c @@ -2012,7 +2012,7 @@ static int storvsc_probe(struct hv_device *device, */ host_dev->handle_error_wq = alloc_ordered_workqueue("storvsc_error_wq_%d", - WQ_MEM_RECLAIM, + 0, host->host_no); if (!host_dev->handle_error_wq) { ret = -ENOMEM; -- cgit v1.2.3 From 27e23836ce22a3e5d89712ef832ab72e47ce9f43 Mon Sep 17 00:00:00 2001 From: Daniel Müller Date: Wed, 10 Aug 2022 20:07:10 +0000 Subject: selftests/bpf: Add lru_bug to s390x deny list MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The lru_bug BPF selftest is failing execution on s390x machines. The failure is due to program attachment failing in turn, similar to a bunch of other tests. Those other tests have already been deny-listed and with this change we do the same for the lru_bug test, adding it to the corresponding file. Fixes: de7b9927105b ("selftests/bpf: Add test for prealloc_lru_pop bug") Signed-off-by: Daniel Müller Acked-by: Mykola Lysenko Link: https://lore.kernel.org/r/20220810200710.1300299-1-deso@posteo.net Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/DENYLIST.s390x | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/testing/selftests/bpf/DENYLIST.s390x b/tools/testing/selftests/bpf/DENYLIST.s390x index e33cab34d22f..db9810611788 100644 --- a/tools/testing/selftests/bpf/DENYLIST.s390x +++ b/tools/testing/selftests/bpf/DENYLIST.s390x @@ -65,3 +65,4 @@ send_signal # intermittently fails to receive signa select_reuseport # intermittently fails on new s390x setup xdp_synproxy # JIT does not support calling kernel function (kfunc) unpriv_bpf_disabled # fentry +lru_bug # prog 'printk': failed to auto-attach: -524 -- cgit v1.2.3 From 72cbc8f04fe2fa93443c0fcccb7ad91dfea3d9ce Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Thu, 28 Apr 2022 16:50:29 +0200 Subject: x86/PAT: Have pat_enabled() properly reflect state when running on Xen After commit ID in the Fixes: tag, pat_enabled() returns false (because of PAT initialization being suppressed in the absence of MTRRs being announced to be available). This has become a problem: the i915 driver now fails to initialize when running PV on Xen (i915_gem_object_pin_map() is where I located the induced failure), and its error handling is flaky enough to (at least sometimes) result in a hung system. Yet even beyond that problem the keying of the use of WC mappings to pat_enabled() (see arch_can_pci_mmap_wc()) means that in particular graphics frame buffer accesses would have been quite a bit less optimal than possible. Arrange for the function to return true in such environments, without undermining the rest of PAT MSR management logic considering PAT to be disabled: specifically, no writes to the PAT MSR should occur. For the new boolean to live in .init.data, init_cache_modes() also needs moving to .init.text (where it could/should have lived already before). [ bp: This is the "small fix" variant for stable. It'll get replaced with a proper PAT and MTRR detection split upstream but that is too involved for a stable backport. - additional touchups to commit msg. Use cpu_feature_enabled(). ] Fixes: bdd8b6c98239 ("drm/i915: replace X86_FEATURE_PAT with pat_enabled()") Signed-off-by: Jan Beulich Signed-off-by: Borislav Petkov Acked-by: Ingo Molnar Cc: Cc: Juergen Gross Cc: Lucas De Marchi Link: https://lore.kernel.org/r/9385fa60-fa5d-f559-a137-6608408f88b0@suse.com --- arch/x86/mm/pat/memtype.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/arch/x86/mm/pat/memtype.c b/arch/x86/mm/pat/memtype.c index d5ef64ddd35e..66a209f7eb86 100644 --- a/arch/x86/mm/pat/memtype.c +++ b/arch/x86/mm/pat/memtype.c @@ -62,6 +62,7 @@ static bool __read_mostly pat_bp_initialized; static bool __read_mostly pat_disabled = !IS_ENABLED(CONFIG_X86_PAT); +static bool __initdata pat_force_disabled = !IS_ENABLED(CONFIG_X86_PAT); static bool __read_mostly pat_bp_enabled; static bool __read_mostly pat_cm_initialized; @@ -86,6 +87,7 @@ void pat_disable(const char *msg_reason) static int __init nopat(char *str) { pat_disable("PAT support disabled via boot option."); + pat_force_disabled = true; return 0; } early_param("nopat", nopat); @@ -272,7 +274,7 @@ static void pat_ap_init(u64 pat) wrmsrl(MSR_IA32_CR_PAT, pat); } -void init_cache_modes(void) +void __init init_cache_modes(void) { u64 pat = 0; @@ -313,6 +315,12 @@ void init_cache_modes(void) */ pat = PAT(0, WB) | PAT(1, WT) | PAT(2, UC_MINUS) | PAT(3, UC) | PAT(4, WB) | PAT(5, WT) | PAT(6, UC_MINUS) | PAT(7, UC); + } else if (!pat_force_disabled && cpu_feature_enabled(X86_FEATURE_HYPERVISOR)) { + /* + * Clearly PAT is enabled underneath. Allow pat_enabled() to + * reflect this. + */ + pat_bp_enabled = true; } __init_cache_modes(pat); -- cgit v1.2.3 From 58ca14ed98c87cfe0d1408cc65a9745d9e9b7a56 Mon Sep 17 00:00:00 2001 From: Magnus Karlsson Date: Fri, 12 Aug 2022 13:32:59 +0200 Subject: xsk: Fix corrupted packets for XDP_SHARED_UMEM Fix an issue in XDP_SHARED_UMEM mode together with aligned mode where packets are corrupted for the second and any further sockets bound to the same umem. In other words, this does not affect the first socket bound to the umem. The culprit for this bug is that the initialization of the DMA addresses for the pre-populated xsk buffer pool entries was not performed for any socket but the first one bound to the umem. Only the linear array of DMA addresses was populated. Fix this by populating the DMA addresses in the xsk buffer pool for every socket bound to the same umem. Fixes: 94033cd8e73b8 ("xsk: Optimize for aligned case") Reported-by: Alasdair McWilliam Reported-by: Intrusion Shield Team Signed-off-by: Magnus Karlsson Signed-off-by: Daniel Borkmann Tested-by: Alasdair McWilliam Acked-by: Maciej Fijalkowski Link: https://lore.kernel.org/xdp-newbies/6205E10C-292E-4995-9D10-409649354226@outlook.com/ Link: https://lore.kernel.org/bpf/20220812113259.531-1-magnus.karlsson@gmail.com --- net/xdp/xsk_buff_pool.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/net/xdp/xsk_buff_pool.c b/net/xdp/xsk_buff_pool.c index f70112176b7c..a71a8c6edf55 100644 --- a/net/xdp/xsk_buff_pool.c +++ b/net/xdp/xsk_buff_pool.c @@ -379,6 +379,16 @@ static void xp_check_dma_contiguity(struct xsk_dma_map *dma_map) static int xp_init_dma_info(struct xsk_buff_pool *pool, struct xsk_dma_map *dma_map) { + if (!pool->unaligned) { + u32 i; + + for (i = 0; i < pool->heads_cnt; i++) { + struct xdp_buff_xsk *xskb = &pool->heads[i]; + + xp_init_xskb_dma(xskb, pool, dma_map->dma_pages, xskb->orig_addr); + } + } + pool->dma_pages = kvcalloc(dma_map->dma_pages_cnt, sizeof(*pool->dma_pages), GFP_KERNEL); if (!pool->dma_pages) return -ENOMEM; @@ -428,12 +438,6 @@ int xp_dma_map(struct xsk_buff_pool *pool, struct device *dev, if (pool->unaligned) xp_check_dma_contiguity(dma_map); - else - for (i = 0; i < pool->heads_cnt; i++) { - struct xdp_buff_xsk *xskb = &pool->heads[i]; - - xp_init_xskb_dma(xskb, pool, dma_map->dma_pages, xskb->orig_addr); - } err = xp_init_dma_info(pool, dma_map); if (err) { -- cgit v1.2.3 From a921be53b46c393d8d594a62a44f418c97e5504b Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Wed, 10 Aug 2022 12:07:31 +0200 Subject: thermal/core: Add missing EXPORT_SYMBOL_GPL The function thermal_zone_device_register_with_trips() is not exported for modules. Add the missing EXPORT_SYMBOL_GPL(). Fixes: fae11de507f0e ("thermal/core: Add thermal_trip in thermal_zone") Signed-off-by: Daniel Lezcano Link: https://lore.kernel.org/r/20220810100731.749317-1-daniel.lezcano@linaro.org --- drivers/thermal/thermal_core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c index 6a5d0ae5d7a4..50d50cec7774 100644 --- a/drivers/thermal/thermal_core.c +++ b/drivers/thermal/thermal_core.c @@ -1329,6 +1329,7 @@ free_tz: kfree(tz); return ERR_PTR(result); } +EXPORT_SYMBOL_GPL(thermal_zone_device_register_with_trips); struct thermal_zone_device *thermal_zone_device_register(const char *type, int ntrips, int mask, void *devdata, struct thermal_zone_device_ops *ops, -- cgit v1.2.3 From 8c596324232d22e19f8df59ba03410b9b5b0f3d7 Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Tue, 9 Aug 2022 10:56:28 +0200 Subject: dt-bindings: thermal: Fix missing required property When the thermal zone description was converted to yaml schema, the required 'trips' property was forgotten. The initial text bindings was describing: " [ ... ] * Thermal zone nodes The thermal zone node is the node containing all the required info for describing a thermal zone, including its cooling device bindings. The thermal zone node must contain, apart from its own properties, one sub-node containing trip nodes and one sub-node containing all the zone cooling maps. Required properties: - polling-delay: The maximum number of milliseconds to wait between polls Type: unsigned when checking this thermal zone. Size: one cell - polling-delay-passive: The maximum number of milliseconds to wait Type: unsigned between polls when performing passive cooling. Size: one cell - thermal-sensors: A list of thermal sensor phandles and sensor specifier Type: list of used while monitoring the thermal zone. phandles + sensor specifier - trips: A sub-node which is a container of only trip point nodes Type: sub-node required to describe the thermal zone. Optional property: - cooling-maps: A sub-node which is a container of only cooling device Type: sub-node map nodes, used to describe the relation between trips and cooling devices. [ ... ] " Now the schema describes: " [ ... ] required: - polling-delay - polling-delay-passive - thermal-sensors [ ... ] " Add the missing 'trips' property in the required properties. Fixed: 1202a442a31fd ("dt-bindings: thermal: Add yaml bindings for thermal zones") Signed-off-by: Daniel Lezcano Acked-by: Rob Herring Link: https://lore.kernel.org/r/20220809085629.509116-3-daniel.lezcano@linaro.org --- Documentation/devicetree/bindings/thermal/thermal-zones.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/thermal/thermal-zones.yaml b/Documentation/devicetree/bindings/thermal/thermal-zones.yaml index 2d34f3ccb257..8d2c6d74b605 100644 --- a/Documentation/devicetree/bindings/thermal/thermal-zones.yaml +++ b/Documentation/devicetree/bindings/thermal/thermal-zones.yaml @@ -214,6 +214,7 @@ patternProperties: - polling-delay - polling-delay-passive - thermal-sensors + - trips additionalProperties: false -- cgit v1.2.3 From 5b9f0c4df1c1152403c738373fb063e9ffdac0a1 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Tue, 16 Aug 2022 09:11:37 +0200 Subject: x86/entry: Fix entry_INT80_compat for Xen PV guests Commit c89191ce67ef ("x86/entry: Convert SWAPGS to swapgs and remove the definition of SWAPGS") missed one use case of SWAPGS in entry_INT80_compat(). Removing of the SWAPGS macro led to asm just using "swapgs", as it is accepting instructions in capital letters, too. This in turn leads to splats in Xen PV guests like: [ 36.145223] general protection fault, maybe for address 0x2d: 0000 [#1] PREEMPT SMP NOPTI [ 36.145794] CPU: 2 PID: 1847 Comm: ld-linux.so.2 Not tainted 5.19.1-1-default #1 \ openSUSE Tumbleweed f3b44bfb672cdb9f235aff53b57724eba8b9411b [ 36.146608] Hardware name: HP ProLiant ML350p Gen8, BIOS P72 11/14/2013 [ 36.148126] RIP: e030:entry_INT80_compat+0x3/0xa3 Fix that by open coding this single instance of the SWAPGS macro. Fixes: c89191ce67ef ("x86/entry: Convert SWAPGS to swapgs and remove the definition of SWAPGS") Signed-off-by: Juergen Gross Signed-off-by: Borislav Petkov Reviewed-by: Jan Beulich Cc: # 5.19 Link: https://lore.kernel.org/r/20220816071137.4893-1-jgross@suse.com --- arch/x86/entry/entry_64_compat.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S index 682338e7e2a3..4dd19819053a 100644 --- a/arch/x86/entry/entry_64_compat.S +++ b/arch/x86/entry/entry_64_compat.S @@ -311,7 +311,7 @@ SYM_CODE_START(entry_INT80_compat) * Interrupts are off on entry. */ ASM_CLAC /* Do this early to minimize exposure */ - SWAPGS + ALTERNATIVE "swapgs", "", X86_FEATURE_XENPV /* * User tracing code (ptrace or signal handlers) might assume that -- cgit v1.2.3 From 5e1e087457c94ad7fafbe1cf6f774c6999ee29d4 Mon Sep 17 00:00:00 2001 From: Zenghui Yu Date: Tue, 9 Aug 2022 12:38:48 +0800 Subject: arm64: Fix match_list for erratum 1286807 on Arm Cortex-A76 Since commit 51f559d66527 ("arm64: Enable repeat tlbi workaround on KRYO4XX gold CPUs"), we failed to detect erratum 1286807 on Cortex-A76 because its entry in arm64_repeat_tlbi_list[] was accidently corrupted by this commit. Fix this issue by creating a separate entry for Kryo4xx Gold. Fixes: 51f559d66527 ("arm64: Enable repeat tlbi workaround on KRYO4XX gold CPUs") Cc: Shreyas K K Signed-off-by: Zenghui Yu Acked-by: Marc Zyngier Link: https://lore.kernel.org/r/20220809043848.969-1-yuzenghui@huawei.com Signed-off-by: Will Deacon --- arch/arm64/kernel/cpu_errata.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index 7e6289e709fc..0f7e9087d900 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -208,6 +208,8 @@ static const struct arm64_cpu_capabilities arm64_repeat_tlbi_list[] = { #ifdef CONFIG_ARM64_ERRATUM_1286807 { ERRATA_MIDR_RANGE(MIDR_CORTEX_A76, 0, 0, 3, 0), + }, + { /* Kryo4xx Gold (rcpe to rfpe) => (r0p0 to r3p0) */ ERRATA_MIDR_RANGE(MIDR_QCOM_KRYO_4XX_GOLD, 0xc, 0xe, 0xf, 0xe), }, -- cgit v1.2.3 From ff5900092227ade3e31fe25f97faf406cde902e6 Mon Sep 17 00:00:00 2001 From: Lukas Bulwahn Date: Thu, 21 Jul 2022 12:04:33 +0200 Subject: arm64: adjust KASLR relocation after ARCH_RANDOM removal Commit aacd149b6238 ("arm64: head: avoid relocating the kernel twice for KASLR") adds the new file arch/arm64/kernel/pi/kaslr_early.c with a small code part guarded by '#ifdef CONFIG_ARCH_RANDOM'. Concurrently, commit 9592eef7c16e ("random: remove CONFIG_ARCH_RANDOM") removes the config CONFIG_ARCH_RANDOM and turns all '#ifdef CONFIG_ARCH_RANDOM' code parts into unconditional code parts, which is generally safe to do. Remove a needless ifdef guard after the ARCH_RANDOM removal. Signed-off-by: Lukas Bulwahn Acked-by: Ard Biesheuvel Link: https://lore.kernel.org/r/20220721100433.18286-1-lukas.bulwahn@gmail.com Signed-off-by: Will Deacon --- arch/arm64/kernel/pi/kaslr_early.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/arch/arm64/kernel/pi/kaslr_early.c b/arch/arm64/kernel/pi/kaslr_early.c index 6c3855e69395..17bff6e399e4 100644 --- a/arch/arm64/kernel/pi/kaslr_early.c +++ b/arch/arm64/kernel/pi/kaslr_early.c @@ -94,11 +94,9 @@ asmlinkage u64 kaslr_early_init(void *fdt) seed = get_kaslr_seed(fdt); if (!seed) { -#ifdef CONFIG_ARCH_RANDOM - if (!__early_cpu_has_rndr() || - !__arm64_rndr((unsigned long *)&seed)) -#endif - return 0; + if (!__early_cpu_has_rndr() || + !__arm64_rndr((unsigned long *)&seed)) + return 0; } /* -- cgit v1.2.3 From 34fc9cc3aebe8b9e27d3bc821543dd482dc686ca Mon Sep 17 00:00:00 2001 From: Heinrich Schuchardt Date: Wed, 17 Aug 2022 15:25:21 +0200 Subject: riscv: dts: microchip: correct L2 cache interrupts MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The "PolarFire SoC MSS Technical Reference Manual" documents the following PLIC interrupts: 1 - L2 Cache Controller Signals when a metadata correction event occurs 2 - L2 Cache Controller Signals when an uncorrectable metadata event occurs 3 - L2 Cache Controller Signals when a data correction event occurs 4 - L2 Cache Controller Signals when an uncorrectable data event occurs This differs from the SiFive FU540 which only has three L2 cache related interrupts. The sequence in the device tree is defined by an enum: enum {         DIR_CORR = 0,         DATA_CORR,         DATA_UNCORR,         DIR_UNCORR, }; So the correct sequence of the L2 cache interrupts is interrupts = <1>, <3>, <4>, <2>; [Conor] This manifests as an unusable system if the l2-cache driver is enabled, as the wrong interrupt gets cleared & the handler prints errors to the console ad infinitum. Fixes: 0fa6107eca41 ("RISC-V: Initial DTS for Microchip ICICLE board") CC: stable@vger.kernel.org # 5.15: e35b07a7df9b: riscv: dts: microchip: mpfs: Group tuples in interrupt properties Signed-off-by: Heinrich Schuchardt Signed-off-by: Conor Dooley --- arch/riscv/boot/dts/microchip/mpfs.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/riscv/boot/dts/microchip/mpfs.dtsi b/arch/riscv/boot/dts/microchip/mpfs.dtsi index 499c2e63ad35..0a6ad5b9ff8d 100644 --- a/arch/riscv/boot/dts/microchip/mpfs.dtsi +++ b/arch/riscv/boot/dts/microchip/mpfs.dtsi @@ -193,7 +193,7 @@ cache-size = <2097152>; cache-unified; interrupt-parent = <&plic>; - interrupts = <1>, <2>, <3>; + interrupts = <1>, <3>, <4>, <2>; }; clint: clint@2000000 { -- cgit v1.2.3 From 583585e48d965338e73e1eb383768d16e0922d73 Mon Sep 17 00:00:00 2001 From: Liu Jian Date: Tue, 9 Aug 2022 17:49:15 +0800 Subject: skmsg: Fix wrong last sg check in sk_msg_recvmsg() Fix one kernel NULL pointer dereference as below: [ 224.462334] Call Trace: [ 224.462394] __tcp_bpf_recvmsg+0xd3/0x380 [ 224.462441] ? sock_has_perm+0x78/0xa0 [ 224.462463] tcp_bpf_recvmsg+0x12e/0x220 [ 224.462494] inet_recvmsg+0x5b/0xd0 [ 224.462534] __sys_recvfrom+0xc8/0x130 [ 224.462574] ? syscall_trace_enter+0x1df/0x2e0 [ 224.462606] ? __do_page_fault+0x2de/0x500 [ 224.462635] __x64_sys_recvfrom+0x24/0x30 [ 224.462660] do_syscall_64+0x5d/0x1d0 [ 224.462709] entry_SYSCALL_64_after_hwframe+0x65/0xca In commit 9974d37ea75f ("skmsg: Fix invalid last sg check in sk_msg_recvmsg()"), we change last sg check to sg_is_last(), but in sockmap redirection case (without stream_parser/stream_verdict/ skb_verdict), we did not mark the end of the scatterlist. Check the sk_msg_alloc, sk_msg_page_add, and bpf_msg_push_data functions, they all do not mark the end of sg. They are expected to use sg.end for end judgment. So the judgment of '(i != msg_rx->sg.end)' is added back here. Fixes: 9974d37ea75f ("skmsg: Fix invalid last sg check in sk_msg_recvmsg()") Signed-off-by: Liu Jian Signed-off-by: Daniel Borkmann Acked-by: John Fastabend Acked-by: Jakub Sitnicki Link: https://lore.kernel.org/bpf/20220809094915.150391-1-liujian56@huawei.com --- net/core/skmsg.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/core/skmsg.c b/net/core/skmsg.c index f47338d89d5d..5be4485ff07d 100644 --- a/net/core/skmsg.c +++ b/net/core/skmsg.c @@ -461,7 +461,7 @@ int sk_msg_recvmsg(struct sock *sk, struct sk_psock *psock, struct msghdr *msg, if (copied == len) break; - } while (!sg_is_last(sge)); + } while ((i != msg_rx->sg.end) && !sg_is_last(sge)); if (unlikely(peek)) { msg_rx = sk_psock_next_msg(psock, msg_rx); @@ -471,7 +471,7 @@ int sk_msg_recvmsg(struct sock *sk, struct sk_psock *psock, struct msghdr *msg, } msg_rx->sg.start = i; - if (!sge->length && sg_is_last(sge)) { + if (!sge->length && (i == msg_rx->sg.end || sg_is_last(sge))) { msg_rx = sk_psock_dequeue_msg(psock); kfree_sk_msg(msg_rx); } -- cgit v1.2.3 From 3024d95a4c521c278a7504ee9e80c57c3a9750e0 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 17 Aug 2022 23:32:09 +0200 Subject: bpf: Partially revert flexible-array member replacement Partially revert 94dfc73e7cf4 ("treewide: uapi: Replace zero-length arrays with flexible-array members") given it breaks BPF UAPI. For example, BPF CI run reveals build breakage under LLVM: [...] CLNG-BPF [test_maps] map_ptr_kern.o CLNG-BPF [test_maps] btf__core_reloc_arrays___diff_arr_val_sz.o CLNG-BPF [test_maps] test_bpf_cookie.o progs/map_ptr_kern.c:314:26: error: field 'trie_key' with variable sized type 'struct bpf_lpm_trie_key' not at the end of a struct or class is a GNU extension [-Werror,-Wgnu-variable-sized-type-not-at-end] struct bpf_lpm_trie_key trie_key; ^ CLNG-BPF [test_maps] btf__core_reloc_type_based___diff.o 1 error generated. make: *** [Makefile:521: /tmp/runner/work/bpf/bpf/tools/testing/selftests/bpf/map_ptr_kern.o] Error 1 make: *** Waiting for unfinished jobs.... [...] Typical usage of the bpf_lpm_trie_key is that the struct gets embedded into a user defined key for the LPM BPF map, from the selftest example: struct bpf_lpm_trie_key { <-- UAPI exported struct __u32 prefixlen; __u8 data[]; }; struct lpm_key { <-- BPF program defined struct struct bpf_lpm_trie_key trie_key; __u32 data; }; Undo this for BPF until a different solution can be found. It's the only flexible- array member case in the UAPI header. This was discovered in BPF CI after Dave reported that the include/uapi/linux/bpf.h header was out of sync with tools/include/uapi/linux/bpf.h after 94dfc73e7cf4. And the subsequent sync attempt failed CI. Fixes: 94dfc73e7cf4 ("treewide: uapi: Replace zero-length arrays with flexible-array members") Reported-by: Dave Marchevsky Signed-off-by: Daniel Borkmann Cc: Gustavo A. R. Silva Link: https://lore.kernel.org/bpf/22aebc88-da67-f086-e620-dd4a16e2bc69@iogearbox.net --- include/uapi/linux/bpf.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 7bf9ba1329be..59a217ca2dfd 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -79,7 +79,7 @@ struct bpf_insn { /* Key of an a BPF_MAP_TYPE_LPM_TRIE entry */ struct bpf_lpm_trie_key { __u32 prefixlen; /* up to 32 for AF_INET, 128 for AF_INET6 */ - __u8 data[]; /* Arbitrary size */ + __u8 data[0]; /* Arbitrary size */ }; struct bpf_cgroup_storage_key { -- cgit v1.2.3 From 14b20b784f59bdd95f6f1cfb112c9818bcec4d84 Mon Sep 17 00:00:00 2001 From: YiFei Zhu Date: Tue, 16 Aug 2022 20:55:16 +0000 Subject: bpf: Restrict bpf_sys_bpf to CAP_PERFMON The verifier cannot perform sufficient validation of any pointers passed into bpf_attr and treats them as integers rather than pointers. The helper will then read from arbitrary pointers passed into it. Restrict the helper to CAP_PERFMON since the security model in BPF of arbitrary kernel read is CAP_BPF + CAP_PERFMON. Fixes: af2ac3e13e45 ("bpf: Prepare bpf syscall to be used from kernel and user space.") Signed-off-by: YiFei Zhu Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20220816205517.682470-1-zhuyifei@google.com --- kernel/bpf/syscall.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index a4d40d98428a..27760627370d 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -5197,7 +5197,7 @@ syscall_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) { switch (func_id) { case BPF_FUNC_sys_bpf: - return &bpf_sys_bpf_proto; + return !perfmon_capable() ? NULL : &bpf_sys_bpf_proto; case BPF_FUNC_btf_find_by_name_kind: return &bpf_btf_find_by_name_kind_proto; case BPF_FUNC_sys_close: -- cgit v1.2.3 From 7df548840c496b0141fb2404b889c346380c2b22 Mon Sep 17 00:00:00 2001 From: Pawan Gupta Date: Wed, 3 Aug 2022 14:41:32 -0700 Subject: x86/bugs: Add "unknown" reporting for MMIO Stale Data Older Intel CPUs that are not in the affected processor list for MMIO Stale Data vulnerabilities currently report "Not affected" in sysfs, which may not be correct. Vulnerability status for these older CPUs is unknown. Add known-not-affected CPUs to the whitelist. Report "unknown" mitigation status for CPUs that are not in blacklist, whitelist and also don't enumerate MSR ARCH_CAPABILITIES bits that reflect hardware immunity to MMIO Stale Data vulnerabilities. Mitigation is not deployed when the status is unknown. [ bp: Massage, fixup. ] Fixes: 8d50cdf8b834 ("x86/speculation/mmio: Add sysfs reporting for Processor MMIO Stale Data") Suggested-by: Andrew Cooper Suggested-by: Tony Luck Signed-off-by: Pawan Gupta Signed-off-by: Borislav Petkov Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/a932c154772f2121794a5f2eded1a11013114711.1657846269.git.pawan.kumar.gupta@linux.intel.com --- .../hw-vuln/processor_mmio_stale_data.rst | 14 ++++++++ arch/x86/include/asm/cpufeatures.h | 5 +-- arch/x86/kernel/cpu/bugs.c | 14 ++++++-- arch/x86/kernel/cpu/common.c | 42 ++++++++++++++-------- 4 files changed, 56 insertions(+), 19 deletions(-) diff --git a/Documentation/admin-guide/hw-vuln/processor_mmio_stale_data.rst b/Documentation/admin-guide/hw-vuln/processor_mmio_stale_data.rst index 9393c50b5afc..c98fd11907cc 100644 --- a/Documentation/admin-guide/hw-vuln/processor_mmio_stale_data.rst +++ b/Documentation/admin-guide/hw-vuln/processor_mmio_stale_data.rst @@ -230,6 +230,20 @@ The possible values in this file are: * - 'Mitigation: Clear CPU buffers' - The processor is vulnerable and the CPU buffer clearing mitigation is enabled. + * - 'Unknown: No mitigations' + - The processor vulnerability status is unknown because it is + out of Servicing period. Mitigation is not attempted. + +Definitions: +------------ + +Servicing period: The process of providing functional and security updates to +Intel processors or platforms, utilizing the Intel Platform Update (IPU) +process or other similar mechanisms. + +End of Servicing Updates (ESU): ESU is the date at which Intel will no +longer provide Servicing, such as through IPU or other similar update +processes. ESU dates will typically be aligned to end of quarter. If the processor is vulnerable then the following information is appended to the above information: diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 235dc85c91c3..ef4775c6db01 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -457,7 +457,8 @@ #define X86_BUG_ITLB_MULTIHIT X86_BUG(23) /* CPU may incur MCE during certain page attribute changes */ #define X86_BUG_SRBDS X86_BUG(24) /* CPU may leak RNG bits if not mitigated */ #define X86_BUG_MMIO_STALE_DATA X86_BUG(25) /* CPU is affected by Processor MMIO Stale Data vulnerabilities */ -#define X86_BUG_RETBLEED X86_BUG(26) /* CPU is affected by RETBleed */ -#define X86_BUG_EIBRS_PBRSB X86_BUG(27) /* EIBRS is vulnerable to Post Barrier RSB Predictions */ +#define X86_BUG_MMIO_UNKNOWN X86_BUG(26) /* CPU is too old and its MMIO Stale Data status is unknown */ +#define X86_BUG_RETBLEED X86_BUG(27) /* CPU is affected by RETBleed */ +#define X86_BUG_EIBRS_PBRSB X86_BUG(28) /* EIBRS is vulnerable to Post Barrier RSB Predictions */ #endif /* _ASM_X86_CPUFEATURES_H */ diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 510d85261132..da7c361f47e0 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -433,7 +433,8 @@ static void __init mmio_select_mitigation(void) u64 ia32_cap; if (!boot_cpu_has_bug(X86_BUG_MMIO_STALE_DATA) || - cpu_mitigations_off()) { + boot_cpu_has_bug(X86_BUG_MMIO_UNKNOWN) || + cpu_mitigations_off()) { mmio_mitigation = MMIO_MITIGATION_OFF; return; } @@ -538,6 +539,8 @@ out: pr_info("TAA: %s\n", taa_strings[taa_mitigation]); if (boot_cpu_has_bug(X86_BUG_MMIO_STALE_DATA)) pr_info("MMIO Stale Data: %s\n", mmio_strings[mmio_mitigation]); + else if (boot_cpu_has_bug(X86_BUG_MMIO_UNKNOWN)) + pr_info("MMIO Stale Data: Unknown: No mitigations\n"); } static void __init md_clear_select_mitigation(void) @@ -2275,6 +2278,9 @@ static ssize_t tsx_async_abort_show_state(char *buf) static ssize_t mmio_stale_data_show_state(char *buf) { + if (boot_cpu_has_bug(X86_BUG_MMIO_UNKNOWN)) + return sysfs_emit(buf, "Unknown: No mitigations\n"); + if (mmio_mitigation == MMIO_MITIGATION_OFF) return sysfs_emit(buf, "%s\n", mmio_strings[mmio_mitigation]); @@ -2421,6 +2427,7 @@ static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr return srbds_show_state(buf); case X86_BUG_MMIO_STALE_DATA: + case X86_BUG_MMIO_UNKNOWN: return mmio_stale_data_show_state(buf); case X86_BUG_RETBLEED: @@ -2480,7 +2487,10 @@ ssize_t cpu_show_srbds(struct device *dev, struct device_attribute *attr, char * ssize_t cpu_show_mmio_stale_data(struct device *dev, struct device_attribute *attr, char *buf) { - return cpu_show_common(dev, attr, buf, X86_BUG_MMIO_STALE_DATA); + if (boot_cpu_has_bug(X86_BUG_MMIO_UNKNOWN)) + return cpu_show_common(dev, attr, buf, X86_BUG_MMIO_UNKNOWN); + else + return cpu_show_common(dev, attr, buf, X86_BUG_MMIO_STALE_DATA); } ssize_t cpu_show_retbleed(struct device *dev, struct device_attribute *attr, char *buf) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 64a73f415f03..3e508f239098 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1135,7 +1135,8 @@ static void identify_cpu_without_cpuid(struct cpuinfo_x86 *c) #define NO_SWAPGS BIT(6) #define NO_ITLB_MULTIHIT BIT(7) #define NO_SPECTRE_V2 BIT(8) -#define NO_EIBRS_PBRSB BIT(9) +#define NO_MMIO BIT(9) +#define NO_EIBRS_PBRSB BIT(10) #define VULNWL(vendor, family, model, whitelist) \ X86_MATCH_VENDOR_FAM_MODEL(vendor, family, model, whitelist) @@ -1158,6 +1159,11 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = { VULNWL(VORTEX, 6, X86_MODEL_ANY, NO_SPECULATION), /* Intel Family 6 */ + VULNWL_INTEL(TIGERLAKE, NO_MMIO), + VULNWL_INTEL(TIGERLAKE_L, NO_MMIO), + VULNWL_INTEL(ALDERLAKE, NO_MMIO), + VULNWL_INTEL(ALDERLAKE_L, NO_MMIO), + VULNWL_INTEL(ATOM_SALTWELL, NO_SPECULATION | NO_ITLB_MULTIHIT), VULNWL_INTEL(ATOM_SALTWELL_TABLET, NO_SPECULATION | NO_ITLB_MULTIHIT), VULNWL_INTEL(ATOM_SALTWELL_MID, NO_SPECULATION | NO_ITLB_MULTIHIT), @@ -1176,9 +1182,9 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = { VULNWL_INTEL(ATOM_AIRMONT_MID, NO_L1TF | MSBDS_ONLY | NO_SWAPGS | NO_ITLB_MULTIHIT), VULNWL_INTEL(ATOM_AIRMONT_NP, NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT), - VULNWL_INTEL(ATOM_GOLDMONT, NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT), - VULNWL_INTEL(ATOM_GOLDMONT_D, NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT), - VULNWL_INTEL(ATOM_GOLDMONT_PLUS, NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_EIBRS_PBRSB), + VULNWL_INTEL(ATOM_GOLDMONT, NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO), + VULNWL_INTEL(ATOM_GOLDMONT_D, NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO), + VULNWL_INTEL(ATOM_GOLDMONT_PLUS, NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_EIBRS_PBRSB), /* * Technically, swapgs isn't serializing on AMD (despite it previously @@ -1193,18 +1199,18 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = { VULNWL_INTEL(ATOM_TREMONT_D, NO_ITLB_MULTIHIT | NO_EIBRS_PBRSB), /* AMD Family 0xf - 0x12 */ - VULNWL_AMD(0x0f, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT), - VULNWL_AMD(0x10, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT), - VULNWL_AMD(0x11, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT), - VULNWL_AMD(0x12, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT), + VULNWL_AMD(0x0f, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO), + VULNWL_AMD(0x10, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO), + VULNWL_AMD(0x11, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO), + VULNWL_AMD(0x12, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO), /* FAMILY_ANY must be last, otherwise 0x0f - 0x12 matches won't work */ - VULNWL_AMD(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT), - VULNWL_HYGON(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT), + VULNWL_AMD(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO), + VULNWL_HYGON(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO), /* Zhaoxin Family 7 */ - VULNWL(CENTAUR, 7, X86_MODEL_ANY, NO_SPECTRE_V2 | NO_SWAPGS), - VULNWL(ZHAOXIN, 7, X86_MODEL_ANY, NO_SPECTRE_V2 | NO_SWAPGS), + VULNWL(CENTAUR, 7, X86_MODEL_ANY, NO_SPECTRE_V2 | NO_SWAPGS | NO_MMIO), + VULNWL(ZHAOXIN, 7, X86_MODEL_ANY, NO_SPECTRE_V2 | NO_SWAPGS | NO_MMIO), {} }; @@ -1358,10 +1364,16 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) * Affected CPU list is generally enough to enumerate the vulnerability, * but for virtualization case check for ARCH_CAP MSR bits also, VMM may * not want the guest to enumerate the bug. + * + * Set X86_BUG_MMIO_UNKNOWN for CPUs that are neither in the blacklist, + * nor in the whitelist and also don't enumerate MSR ARCH_CAP MMIO bits. */ - if (cpu_matches(cpu_vuln_blacklist, MMIO) && - !arch_cap_mmio_immune(ia32_cap)) - setup_force_cpu_bug(X86_BUG_MMIO_STALE_DATA); + if (!arch_cap_mmio_immune(ia32_cap)) { + if (cpu_matches(cpu_vuln_blacklist, MMIO)) + setup_force_cpu_bug(X86_BUG_MMIO_STALE_DATA); + else if (!cpu_matches(cpu_vuln_whitelist, NO_MMIO)) + setup_force_cpu_bug(X86_BUG_MMIO_UNKNOWN); + } if (!cpu_has(c, X86_FEATURE_BTC_NO)) { if (cpu_matches(cpu_vuln_blacklist, RETBLEED) || (ia32_cap & ARCH_CAP_RSBA)) -- cgit v1.2.3 From 72e2329e7c9bbe15e7a813670497ec9c6f919af3 Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Wed, 29 Jun 2022 14:34:36 +0200 Subject: drm/vc4: hdmi: Depends on CONFIG_PM We already depend on runtime PM to get the power domains and clocks for most of the devices supported by the vc4 driver, so let's just select it to make sure it's there. Link: https://lore.kernel.org/r/20220629123510.1915022-38-maxime@cerno.tech Acked-by: Thomas Zimmermann Tested-by: Stefan Wahren Signed-off-by: Maxime Ripard (cherry picked from commit f1bc386b319e93e56453ae27e9e83817bb1f6f95) Signed-off-by: Maxime Ripard --- drivers/gpu/drm/vc4/Kconfig | 1 + drivers/gpu/drm/vc4/vc4_hdmi.c | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/vc4/Kconfig b/drivers/gpu/drm/vc4/Kconfig index 061be9a6619d..b0f3117102ca 100644 --- a/drivers/gpu/drm/vc4/Kconfig +++ b/drivers/gpu/drm/vc4/Kconfig @@ -8,6 +8,7 @@ config DRM_VC4 depends on DRM depends on SND && SND_SOC depends on COMMON_CLK + depends on PM select DRM_DISPLAY_HDMI_HELPER select DRM_DISPLAY_HELPER select DRM_KMS_HELPER diff --git a/drivers/gpu/drm/vc4/vc4_hdmi.c b/drivers/gpu/drm/vc4/vc4_hdmi.c index 592c3b5d03e6..411a87adb0c3 100644 --- a/drivers/gpu/drm/vc4/vc4_hdmi.c +++ b/drivers/gpu/drm/vc4/vc4_hdmi.c @@ -2855,7 +2855,7 @@ static int vc5_hdmi_init_resources(struct vc4_hdmi *vc4_hdmi) return 0; } -static int __maybe_unused vc4_hdmi_runtime_suspend(struct device *dev) +static int vc4_hdmi_runtime_suspend(struct device *dev) { struct vc4_hdmi *vc4_hdmi = dev_get_drvdata(dev); -- cgit v1.2.3 From 258e483a4d5e97a6a8caa74381ddc1f395ac1c71 Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Wed, 29 Jun 2022 14:34:37 +0200 Subject: drm/vc4: hdmi: Rework power up The current code tries to handle the case where CONFIG_PM isn't selected by first calling our runtime_resume implementation and then properly report the power state to the runtime_pm core. This allows to have a functionning device even if pm_runtime_get_* functions are nops. However, the device power state if CONFIG_PM is enabled is RPM_SUSPENDED, and thus our vc4_hdmi_write() and vc4_hdmi_read() calls in the runtime_pm hooks will now report a warning since the device might not be properly powered. Even more so, we need CONFIG_PM enabled since the previous RaspberryPi have a power domain that needs to be powered up for the HDMI controller to be usable. The previous patch has created a dependency on CONFIG_PM, now we can just assume it's there and only call pm_runtime_resume_and_get() to make sure our device is powered in bind. Link: https://lore.kernel.org/r/20220629123510.1915022-39-maxime@cerno.tech Acked-by: Thomas Zimmermann Tested-by: Stefan Wahren Signed-off-by: Maxime Ripard (cherry picked from commit 53565c28e6af2cef6bbf438c34250135e3564459) Signed-off-by: Maxime Ripard --- drivers/gpu/drm/vc4/vc4_hdmi.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/vc4/vc4_hdmi.c b/drivers/gpu/drm/vc4/vc4_hdmi.c index 411a87adb0c3..1e5f68704d7d 100644 --- a/drivers/gpu/drm/vc4/vc4_hdmi.c +++ b/drivers/gpu/drm/vc4/vc4_hdmi.c @@ -2972,17 +2972,15 @@ static int vc4_hdmi_bind(struct device *dev, struct device *master, void *data) vc4_hdmi->disable_4kp60 = true; } + pm_runtime_enable(dev); + /* - * We need to have the device powered up at this point to call - * our reset hook and for the CEC init. + * We need to have the device powered up at this point to call + * our reset hook and for the CEC init. */ - ret = vc4_hdmi_runtime_resume(dev); + ret = pm_runtime_resume_and_get(dev); if (ret) - goto err_put_ddc; - - pm_runtime_get_noresume(dev); - pm_runtime_set_active(dev); - pm_runtime_enable(dev); + goto err_disable_runtime_pm; if ((of_device_is_compatible(dev->of_node, "brcm,bcm2711-hdmi0") || of_device_is_compatible(dev->of_node, "brcm,bcm2711-hdmi1")) && @@ -3028,6 +3026,7 @@ err_destroy_conn: err_destroy_encoder: drm_encoder_cleanup(encoder); pm_runtime_put_sync(dev); +err_disable_runtime_pm: pm_runtime_disable(dev); err_put_ddc: put_device(&vc4_hdmi->ddc->dev); -- cgit v1.2.3 From cee7db1b0239468b22c295cf04a8c40c34ecd35a Mon Sep 17 00:00:00 2001 From: Akira Yokosawa Date: Mon, 8 Aug 2022 17:53:57 +0900 Subject: docs: kerneldoc-preamble: Test xeCJK.sty before loading On distros whose texlive packaging is fine-grained, texlive-xecjk can be installed/removed independently of other texlive packages. Conditionally loading xeCJK depending only on the existence of the "Noto Sans CJK SC" font might end up in xelatex error of "xeCJK.sty not found!". Improve the situation by testing existence of xeCJK.sty before loading it. This is useful on RHEL 9 and its clone distros where texlive-xecjk doesn't work at the moment due to a missing dependency [1]. "make pdfdocs" for non-CJK contents should work after removing texlive-xecjk. Link: [1] https://bugzilla.redhat.com/show_bug.cgi?id=2086254 Fixes: 398f7abdcb7e ("docs: pdfdocs: Pull LaTeX preamble part out of conf.py") Cc: stable@vger.kernel.org # v5.18+ Signed-off-by: Akira Yokosawa Acked-by: Mauro Carvalho Chehab Link: https://lore.kernel.org/r/c24c2a87-70b2-5342-bcc9-de467940466e@gmail.com Signed-off-by: Jonathan Corbet --- Documentation/sphinx/kerneldoc-preamble.sty | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/Documentation/sphinx/kerneldoc-preamble.sty b/Documentation/sphinx/kerneldoc-preamble.sty index 2a29cbe51396..9707e033c8c4 100644 --- a/Documentation/sphinx/kerneldoc-preamble.sty +++ b/Documentation/sphinx/kerneldoc-preamble.sty @@ -70,8 +70,16 @@ % Translations have Asian (CJK) characters which are only displayed if % xeCJK is used +\usepackage{ifthen} +\newboolean{enablecjk} +\setboolean{enablecjk}{false} \IfFontExistsTF{Noto Sans CJK SC}{ - % Load xeCJK when CJK font is available + \IfFileExists{xeCJK.sty}{ + \setboolean{enablecjk}{true} + }{} +}{} +\ifthenelse{\boolean{enablecjk}}{ + % Load xeCJK when both the Noto Sans CJK font and xeCJK.sty are available. \usepackage{xeCJK} % Noto CJK fonts don't provide slant shape. [AutoFakeSlant] permits % its emulation. @@ -196,7 +204,7 @@ % Inactivate CJK after tableofcontents \apptocmd{\sphinxtableofcontents}{\kerneldocCJKoff}{}{} \xeCJKsetup{CJKspace = true}% For inter-phrase space of Korean TOC -}{ % No CJK font found +}{ % Don't enable CJK % Custom macros to on/off CJK and switch CJK fonts (Dummy) \newcommand{\kerneldocCJKon}{} \newcommand{\kerneldocCJKoff}{} @@ -204,14 +212,16 @@ %% and ignore the argument (#1) in their definitions, whole contents of %% CJK chapters can be ignored. \newcommand{\kerneldocBeginSC}[1]{% - %% Put a note on missing CJK fonts in place of zh_CN translation. - \begin{sphinxadmonition}{note}{Note on missing fonts:} + %% Put a note on missing CJK fonts or the xecjk package in place of + %% zh_CN translation. + \begin{sphinxadmonition}{note}{Note on missing fonts and a package:} Translations of Simplified Chinese (zh\_CN), Traditional Chinese (zh\_TW), Korean (ko\_KR), and Japanese (ja\_JP) were skipped - due to the lack of suitable font families. + due to the lack of suitable font families and/or the texlive-xecjk + package. If you want them, please install ``Noto Sans CJK'' font families - by following instructions from + along with the texlive-xecjk package by following instructions from \sphinxcode{./scripts/sphinx-pre-install}. Having optional ``Noto Serif CJK'' font families will improve the looks of those translations. -- cgit v1.2.3 From 7ec9fce4b31604f8415136a4c07f7dc8ad431aec Mon Sep 17 00:00:00 2001 From: Eyal Birger Date: Thu, 18 Aug 2022 10:41:18 +0300 Subject: ip_tunnel: Respect tunnel key's "flow_flags" in IP tunnels Commit 451ef36bd229 ("ip_tunnels: Add new flow flags field to ip_tunnel_key") added a "flow_flags" member to struct ip_tunnel_key which was later used by the commit in the fixes tag to avoid dropping packets with sources that aren't locally configured when set in bpf_set_tunnel_key(). VXLAN and GENEVE were made to respect this flag, ip tunnels like IPIP and GRE were not. This commit fixes this omission by making ip_tunnel_init_flow() receive the flow flags from the tunnel key in the relevant collect_md paths. Fixes: b8fff748521c ("bpf: Set flow flag to allow any source IP in bpf_tunnel_key") Signed-off-by: Eyal Birger Signed-off-by: Daniel Borkmann Reviewed-by: Paul Chaignon Link: https://lore.kernel.org/bpf/20220818074118.726639-1-eyal.birger@gmail.com --- drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c | 3 ++- include/net/ip_tunnels.h | 4 +++- net/ipv4/ip_gre.c | 2 +- net/ipv4/ip_tunnel.c | 7 ++++--- 4 files changed, 10 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c index 39904dacf4f0..b3472fb94617 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c @@ -423,7 +423,8 @@ mlxsw_sp_span_gretap4_route(const struct net_device *to_dev, parms = mlxsw_sp_ipip_netdev_parms4(to_dev); ip_tunnel_init_flow(&fl4, parms.iph.protocol, *daddrp, *saddrp, - 0, 0, dev_net(to_dev), parms.link, tun->fwmark, 0); + 0, 0, dev_net(to_dev), parms.link, tun->fwmark, 0, + 0); rt = ip_route_output_key(tun->net, &fl4); if (IS_ERR(rt)) diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h index 63fac94f9ace..ced80e2f8b58 100644 --- a/include/net/ip_tunnels.h +++ b/include/net/ip_tunnels.h @@ -246,7 +246,8 @@ static inline void ip_tunnel_init_flow(struct flowi4 *fl4, __be32 daddr, __be32 saddr, __be32 key, __u8 tos, struct net *net, int oif, - __u32 mark, __u32 tun_inner_hash) + __u32 mark, __u32 tun_inner_hash, + __u8 flow_flags) { memset(fl4, 0, sizeof(*fl4)); @@ -263,6 +264,7 @@ static inline void ip_tunnel_init_flow(struct flowi4 *fl4, fl4->fl4_gre_key = key; fl4->flowi4_mark = mark; fl4->flowi4_multipath_hash = tun_inner_hash; + fl4->flowi4_flags = flow_flags; } int ip_tunnel_init(struct net_device *dev); diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 5c58e21f724e..f866d6282b2b 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -609,7 +609,7 @@ static int gre_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb) ip_tunnel_init_flow(&fl4, IPPROTO_GRE, key->u.ipv4.dst, key->u.ipv4.src, tunnel_id_to_key32(key->tun_id), key->tos & ~INET_ECN_MASK, dev_net(dev), 0, - skb->mark, skb_get_hash(skb)); + skb->mark, skb_get_hash(skb), key->flow_flags); rt = ip_route_output_key(dev_net(dev), &fl4); if (IS_ERR(rt)) return PTR_ERR(rt); diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index e65e948cab9f..019f3b0839c5 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -295,7 +295,7 @@ static int ip_tunnel_bind_dev(struct net_device *dev) ip_tunnel_init_flow(&fl4, iph->protocol, iph->daddr, iph->saddr, tunnel->parms.o_key, RT_TOS(iph->tos), dev_net(dev), - tunnel->parms.link, tunnel->fwmark, 0); + tunnel->parms.link, tunnel->fwmark, 0, 0); rt = ip_route_output_key(tunnel->net, &fl4); if (!IS_ERR(rt)) { @@ -570,7 +570,8 @@ void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, } ip_tunnel_init_flow(&fl4, proto, key->u.ipv4.dst, key->u.ipv4.src, tunnel_id_to_key32(key->tun_id), RT_TOS(tos), - dev_net(dev), 0, skb->mark, skb_get_hash(skb)); + dev_net(dev), 0, skb->mark, skb_get_hash(skb), + key->flow_flags); if (tunnel->encap.type != TUNNEL_ENCAP_NONE) goto tx_error; @@ -729,7 +730,7 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, ip_tunnel_init_flow(&fl4, protocol, dst, tnl_params->saddr, tunnel->parms.o_key, RT_TOS(tos), dev_net(dev), tunnel->parms.link, - tunnel->fwmark, skb_get_hash(skb)); + tunnel->fwmark, skb_get_hash(skb), 0); if (ip_tunnel_encap(skb, tunnel, &protocol, &fl4) < 0) goto tx_error; -- cgit v1.2.3 From 7d6620f107bae6ed687ff07668e8e8f855487aa9 Mon Sep 17 00:00:00 2001 From: Pu Lehui Date: Sat, 13 Aug 2022 21:40:30 +0800 Subject: bpf, cgroup: Fix kernel BUG in purge_effective_progs Syzkaller reported a triggered kernel BUG as follows: ------------[ cut here ]------------ kernel BUG at kernel/bpf/cgroup.c:925! invalid opcode: 0000 [#1] PREEMPT SMP NOPTI CPU: 1 PID: 194 Comm: detach Not tainted 5.19.0-14184-g69dac8e431af #8 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.0-0-gd239552ce722-prebuilt.qemu.org 04/01/2014 RIP: 0010:__cgroup_bpf_detach+0x1f2/0x2a0 Code: 00 e8 92 60 30 00 84 c0 75 d8 4c 89 e0 31 f6 85 f6 74 19 42 f6 84 28 48 05 00 00 02 75 0e 48 8b 80 c0 00 00 00 48 85 c0 75 e5 <0f> 0b 48 8b 0c5 RSP: 0018:ffffc9000055bdb0 EFLAGS: 00000246 RAX: 0000000000000000 RBX: ffff888100ec0800 RCX: ffffc900000f1000 RDX: 0000000000000000 RSI: 0000000000000001 RDI: ffff888100ec4578 RBP: 0000000000000000 R08: ffff888100ec0800 R09: 0000000000000040 R10: 0000000000000000 R11: 0000000000000000 R12: ffff888100ec4000 R13: 000000000000000d R14: ffffc90000199000 R15: ffff888100effb00 FS: 00007f68213d2b80(0000) GS:ffff88813bc80000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 000055f74a0e5850 CR3: 0000000102836000 CR4: 00000000000006e0 Call Trace: cgroup_bpf_prog_detach+0xcc/0x100 __sys_bpf+0x2273/0x2a00 __x64_sys_bpf+0x17/0x20 do_syscall_64+0x3b/0x90 entry_SYSCALL_64_after_hwframe+0x63/0xcd RIP: 0033:0x7f68214dbcb9 Code: 08 44 89 e0 5b 41 5c c3 66 0f 1f 84 00 00 00 00 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff8 RSP: 002b:00007ffeb487db68 EFLAGS: 00000246 ORIG_RAX: 0000000000000141 RAX: ffffffffffffffda RBX: 000000000000000b RCX: 00007f68214dbcb9 RDX: 0000000000000090 RSI: 00007ffeb487db70 RDI: 0000000000000009 RBP: 0000000000000003 R08: 0000000000000012 R09: 0000000b00000003 R10: 00007ffeb487db70 R11: 0000000000000246 R12: 00007ffeb487dc20 R13: 0000000000000004 R14: 0000000000000001 R15: 000055f74a1011b0 Modules linked in: ---[ end trace 0000000000000000 ]--- Repetition steps: For the following cgroup tree, root | cg1 | cg2 1. attach prog2 to cg2, and then attach prog1 to cg1, both bpf progs attach type is NONE or OVERRIDE. 2. write 1 to /proc/thread-self/fail-nth for failslab. 3. detach prog1 for cg1, and then kernel BUG occur. Failslab injection will cause kmalloc fail and fall back to purge_effective_progs. The problem is that cg2 have attached another prog, so when go through cg2 layer, iteration will add pos to 1, and subsequent operations will be skipped by the following condition, and cg will meet NULL in the end. `if (pos && !(cg->bpf.flags[atype] & BPF_F_ALLOW_MULTI))` The NULL cg means no link or prog match, this is as expected, and it's not a bug. So here just skip the no match situation. Fixes: 4c46091ee985 ("bpf: Fix KASAN use-after-free Read in compute_effective_progs") Signed-off-by: Pu Lehui Signed-off-by: Daniel Borkmann Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20220813134030.1972696-1-pulehui@huawei.com --- kernel/bpf/cgroup.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c index 59b7eb60d5b4..4a400cd63731 100644 --- a/kernel/bpf/cgroup.c +++ b/kernel/bpf/cgroup.c @@ -921,8 +921,10 @@ static void purge_effective_progs(struct cgroup *cgrp, struct bpf_prog *prog, pos++; } } + + /* no link or prog match, skip the cgroup of this layer */ + continue; found: - BUG_ON(!cg); progs = rcu_dereference_protected( desc->bpf.effective[atype], lockdep_is_held(&cgroup_mutex)); -- cgit v1.2.3 From b5c3aca86d2698c4850b6ee8b341938025d2780c Mon Sep 17 00:00:00 2001 From: Conor Dooley Date: Sun, 14 Aug 2022 15:12:37 +0100 Subject: riscv: signal: fix missing prototype warning Fix the warning: arch/riscv/kernel/signal.c:316:27: warning: no previous prototype for function 'do_notify_resume' [-Wmissing-prototypes] asmlinkage __visible void do_notify_resume(struct pt_regs *regs, All other functions in the file are static & none of the existing headers stood out as an obvious location. Create signal.h to hold the declaration. Fixes: e2c0cdfba7f6 ("RISC-V: User-facing API") Signed-off-by: Conor Dooley Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20220814141237.493457-4-mail@conchuod.ie Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/signal.h | 12 ++++++++++++ arch/riscv/kernel/signal.c | 1 + 2 files changed, 13 insertions(+) create mode 100644 arch/riscv/include/asm/signal.h diff --git a/arch/riscv/include/asm/signal.h b/arch/riscv/include/asm/signal.h new file mode 100644 index 000000000000..532c29ef0376 --- /dev/null +++ b/arch/riscv/include/asm/signal.h @@ -0,0 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#ifndef __ASM_SIGNAL_H +#define __ASM_SIGNAL_H + +#include +#include + +asmlinkage __visible +void do_notify_resume(struct pt_regs *regs, unsigned long thread_info_flags); + +#endif diff --git a/arch/riscv/kernel/signal.c b/arch/riscv/kernel/signal.c index 38b05ca6fe66..5a2de6b6f882 100644 --- a/arch/riscv/kernel/signal.c +++ b/arch/riscv/kernel/signal.c @@ -15,6 +15,7 @@ #include #include +#include #include #include #include -- cgit v1.2.3 From d951b20b9def73dcc39a5379831525d0d2a537e9 Mon Sep 17 00:00:00 2001 From: Conor Dooley Date: Sun, 14 Aug 2022 15:12:38 +0100 Subject: riscv: traps: add missing prototype Sparse complains: arch/riscv/kernel/traps.c:213:6: warning: symbol 'shadow_stack' was not declared. Should it be static? The variable is used in entry.S, so declare shadow_stack there alongside SHADOW_OVERFLOW_STACK_SIZE. Fixes: 31da94c25aea ("riscv: add VMAP_STACK overflow detection") Signed-off-by: Conor Dooley Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20220814141237.493457-5-mail@conchuod.ie Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/thread_info.h | 2 ++ arch/riscv/kernel/traps.c | 3 ++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/riscv/include/asm/thread_info.h b/arch/riscv/include/asm/thread_info.h index 78933ac04995..67322f878e0d 100644 --- a/arch/riscv/include/asm/thread_info.h +++ b/arch/riscv/include/asm/thread_info.h @@ -42,6 +42,8 @@ #ifndef __ASSEMBLY__ +extern long shadow_stack[SHADOW_OVERFLOW_STACK_SIZE / sizeof(long)]; + #include #include diff --git a/arch/riscv/kernel/traps.c b/arch/riscv/kernel/traps.c index 39d0f8bba4b4..635e6ec26938 100644 --- a/arch/riscv/kernel/traps.c +++ b/arch/riscv/kernel/traps.c @@ -20,9 +20,10 @@ #include #include +#include #include #include -#include +#include int show_unhandled_signals = 1; -- cgit v1.2.3 From 4e3aa9238277597c6c7624f302d81a7b568b6f2d Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 16 Aug 2022 14:28:36 +0200 Subject: x86/nospec: Unwreck the RSB stuffing Commit 2b1299322016 ("x86/speculation: Add RSB VM Exit protections") made a right mess of the RSB stuffing, rewrite the whole thing to not suck. Thanks to Andrew for the enlightening comment about Post-Barrier RSB things so we can make this code less magical. Cc: stable@vger.kernel.org Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/YvuNdDWoUZSBjYcm@worktop.programming.kicks-ass.net --- arch/x86/include/asm/nospec-branch.h | 80 ++++++++++++++++++------------------ 1 file changed, 39 insertions(+), 41 deletions(-) diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index e64fd20778b6..10731ccfed37 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -35,33 +35,44 @@ #define RSB_CLEAR_LOOPS 32 /* To forcibly overwrite all entries */ /* + * Common helper for __FILL_RETURN_BUFFER and __FILL_ONE_RETURN. + */ +#define __FILL_RETURN_SLOT \ + ANNOTATE_INTRA_FUNCTION_CALL; \ + call 772f; \ + int3; \ +772: + +/* + * Stuff the entire RSB. + * * Google experimented with loop-unrolling and this turned out to be * the optimal version - two calls, each with their own speculation * trap should their return address end up getting used, in a loop. */ -#define __FILL_RETURN_BUFFER(reg, nr, sp) \ - mov $(nr/2), reg; \ -771: \ - ANNOTATE_INTRA_FUNCTION_CALL; \ - call 772f; \ -773: /* speculation trap */ \ - UNWIND_HINT_EMPTY; \ - pause; \ - lfence; \ - jmp 773b; \ -772: \ - ANNOTATE_INTRA_FUNCTION_CALL; \ - call 774f; \ -775: /* speculation trap */ \ - UNWIND_HINT_EMPTY; \ - pause; \ - lfence; \ - jmp 775b; \ -774: \ - add $(BITS_PER_LONG/8) * 2, sp; \ - dec reg; \ - jnz 771b; \ - /* barrier for jnz misprediction */ \ +#define __FILL_RETURN_BUFFER(reg, nr) \ + mov $(nr/2), reg; \ +771: \ + __FILL_RETURN_SLOT \ + __FILL_RETURN_SLOT \ + add $(BITS_PER_LONG/8) * 2, %_ASM_SP; \ + dec reg; \ + jnz 771b; \ + /* barrier for jnz misprediction */ \ + lfence; + +/* + * Stuff a single RSB slot. + * + * To mitigate Post-Barrier RSB speculation, one CALL instruction must be + * forced to retire before letting a RET instruction execute. + * + * On PBRSB-vulnerable CPUs, it is not safe for a RET to be executed + * before this point. + */ +#define __FILL_ONE_RETURN \ + __FILL_RETURN_SLOT \ + add $(BITS_PER_LONG/8), %_ASM_SP; \ lfence; #ifdef __ASSEMBLY__ @@ -132,28 +143,15 @@ #endif .endm -.macro ISSUE_UNBALANCED_RET_GUARD - ANNOTATE_INTRA_FUNCTION_CALL - call .Lunbalanced_ret_guard_\@ - int3 -.Lunbalanced_ret_guard_\@: - add $(BITS_PER_LONG/8), %_ASM_SP - lfence -.endm - /* * A simpler FILL_RETURN_BUFFER macro. Don't make people use the CPP * monstrosity above, manually. */ -.macro FILL_RETURN_BUFFER reg:req nr:req ftr:req ftr2 -.ifb \ftr2 - ALTERNATIVE "jmp .Lskip_rsb_\@", "", \ftr -.else - ALTERNATIVE_2 "jmp .Lskip_rsb_\@", "", \ftr, "jmp .Lunbalanced_\@", \ftr2 -.endif - __FILL_RETURN_BUFFER(\reg,\nr,%_ASM_SP) -.Lunbalanced_\@: - ISSUE_UNBALANCED_RET_GUARD +.macro FILL_RETURN_BUFFER reg:req nr:req ftr:req ftr2=ALT_NOT(X86_FEATURE_ALWAYS) + ALTERNATIVE_2 "jmp .Lskip_rsb_\@", \ + __stringify(__FILL_RETURN_BUFFER(\reg,\nr)), \ftr, \ + __stringify(__FILL_ONE_RETURN), \ftr2 + .Lskip_rsb_\@: .endm -- cgit v1.2.3 From 332924973725e8cdcc783c175f68cf7e162cb9e5 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 19 Aug 2022 13:01:35 +0200 Subject: x86/nospec: Fix i386 RSB stuffing Turns out that i386 doesn't unconditionally have LFENCE, as such the loop in __FILL_RETURN_BUFFER isn't actually speculation safe on such chips. Fixes: ba6e31af2be9 ("x86/speculation: Add LFENCE to RSB fill sequence") Reported-by: Ben Hutchings Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/Yv9tj9vbQ9nNlXoY@worktop.programming.kicks-ass.net --- arch/x86/include/asm/nospec-branch.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 10731ccfed37..c936ce9f0c47 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -50,6 +50,7 @@ * the optimal version - two calls, each with their own speculation * trap should their return address end up getting used, in a loop. */ +#ifdef CONFIG_X86_64 #define __FILL_RETURN_BUFFER(reg, nr) \ mov $(nr/2), reg; \ 771: \ @@ -60,6 +61,17 @@ jnz 771b; \ /* barrier for jnz misprediction */ \ lfence; +#else +/* + * i386 doesn't unconditionally have LFENCE, as such it can't + * do a loop. + */ +#define __FILL_RETURN_BUFFER(reg, nr) \ + .rept nr; \ + __FILL_RETURN_SLOT; \ + .endr; \ + add $(BITS_PER_LONG/8) * nr, %_ASM_SP; +#endif /* * Stuff a single RSB slot. -- cgit v1.2.3 From ea2aa97ca37a9044ade001aef71dbc06318e8d44 Mon Sep 17 00:00:00 2001 From: Jeffy Chen Date: Fri, 19 Aug 2022 15:28:34 +0800 Subject: drm/gem: Fix GEM handle release errors MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently we are assuming a one to one mapping between dmabuf and GEM handle when releasing GEM handles. But that is not always true, since we would create extra handles for the GEM obj in cases like gem_open() and getfb{,2}(). A similar issue was reported at: https://lore.kernel.org/all/20211105083308.392156-1-jay.xu@rock-chips.com/ Another problem is that the imported dmabuf might not always have gem_obj->dma_buf set, which would cause leaks in drm_gem_remove_prime_handles(). Let's fix these for now by using handle to find the exact map to remove. Signed-off-by: Jeffy Chen Reviewed-by: Christian König Signed-off-by: Christian König Link: https://patchwork.freedesktop.org/patch/msgid/20220819072834.17888-1-jeffy.chen@rock-chips.com --- drivers/gpu/drm/drm_gem.c | 17 +---------------- drivers/gpu/drm/drm_internal.h | 4 ++-- drivers/gpu/drm/drm_prime.c | 20 ++++++++++++-------- 3 files changed, 15 insertions(+), 26 deletions(-) diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c index 86d670c71286..ad068865ba20 100644 --- a/drivers/gpu/drm/drm_gem.c +++ b/drivers/gpu/drm/drm_gem.c @@ -168,21 +168,6 @@ void drm_gem_private_object_init(struct drm_device *dev, } EXPORT_SYMBOL(drm_gem_private_object_init); -static void -drm_gem_remove_prime_handles(struct drm_gem_object *obj, struct drm_file *filp) -{ - /* - * Note: obj->dma_buf can't disappear as long as we still hold a - * handle reference in obj->handle_count. - */ - mutex_lock(&filp->prime.lock); - if (obj->dma_buf) { - drm_prime_remove_buf_handle_locked(&filp->prime, - obj->dma_buf); - } - mutex_unlock(&filp->prime.lock); -} - /** * drm_gem_object_handle_free - release resources bound to userspace handles * @obj: GEM object to clean up. @@ -253,7 +238,7 @@ drm_gem_object_release_handle(int id, void *ptr, void *data) if (obj->funcs->close) obj->funcs->close(obj, file_priv); - drm_gem_remove_prime_handles(obj, file_priv); + drm_prime_remove_buf_handle(&file_priv->prime, id); drm_vma_node_revoke(&obj->vma_node, file_priv); drm_gem_object_handle_put_unlocked(obj); diff --git a/drivers/gpu/drm/drm_internal.h b/drivers/gpu/drm/drm_internal.h index 1fbbc19f1ac0..7bb98e6a446d 100644 --- a/drivers/gpu/drm/drm_internal.h +++ b/drivers/gpu/drm/drm_internal.h @@ -74,8 +74,8 @@ int drm_prime_fd_to_handle_ioctl(struct drm_device *dev, void *data, void drm_prime_init_file_private(struct drm_prime_file_private *prime_fpriv); void drm_prime_destroy_file_private(struct drm_prime_file_private *prime_fpriv); -void drm_prime_remove_buf_handle_locked(struct drm_prime_file_private *prime_fpriv, - struct dma_buf *dma_buf); +void drm_prime_remove_buf_handle(struct drm_prime_file_private *prime_fpriv, + uint32_t handle); /* drm_drv.c */ struct drm_minor *drm_minor_acquire(unsigned int minor_id); diff --git a/drivers/gpu/drm/drm_prime.c b/drivers/gpu/drm/drm_prime.c index a3f180653b8b..eb09e86044c6 100644 --- a/drivers/gpu/drm/drm_prime.c +++ b/drivers/gpu/drm/drm_prime.c @@ -190,29 +190,33 @@ static int drm_prime_lookup_buf_handle(struct drm_prime_file_private *prime_fpri return -ENOENT; } -void drm_prime_remove_buf_handle_locked(struct drm_prime_file_private *prime_fpriv, - struct dma_buf *dma_buf) +void drm_prime_remove_buf_handle(struct drm_prime_file_private *prime_fpriv, + uint32_t handle) { struct rb_node *rb; - rb = prime_fpriv->dmabufs.rb_node; + mutex_lock(&prime_fpriv->lock); + + rb = prime_fpriv->handles.rb_node; while (rb) { struct drm_prime_member *member; - member = rb_entry(rb, struct drm_prime_member, dmabuf_rb); - if (member->dma_buf == dma_buf) { + member = rb_entry(rb, struct drm_prime_member, handle_rb); + if (member->handle == handle) { rb_erase(&member->handle_rb, &prime_fpriv->handles); rb_erase(&member->dmabuf_rb, &prime_fpriv->dmabufs); - dma_buf_put(dma_buf); + dma_buf_put(member->dma_buf); kfree(member); - return; - } else if (member->dma_buf < dma_buf) { + break; + } else if (member->handle < handle) { rb = rb->rb_right; } else { rb = rb->rb_left; } } + + mutex_unlock(&prime_fpriv->lock); } void drm_prime_init_file_private(struct drm_prime_file_private *prime_fpriv) -- cgit v1.2.3 From 04d4ca41809052f6088860fe150dac679e6453d0 Mon Sep 17 00:00:00 2001 From: Akira Yokosawa Date: Fri, 19 Aug 2022 07:34:40 +0900 Subject: docs/ja_JP/SubmittingPatches: Remove reference to submitting-drivers.rst Reflect changes made in commit 9db370de2780 ("docs: process: remove outdated submitting-drivers.rst") Reported-by: Mauro Carvalho Chehab Signed-off-by: Akira Yokosawa Fixes: 9db370de2780 ("docs: process: remove outdated submitting-drivers.rst") Cc: Tsugikazu Shibata Link: https://lore.kernel.org/r/20220818223440.13530-1-akiyks@gmail.com Signed-off-by: Jonathan Corbet --- Documentation/translations/ja_JP/SubmittingPatches | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/Documentation/translations/ja_JP/SubmittingPatches b/Documentation/translations/ja_JP/SubmittingPatches index 66ce0d8b0526..04deb77b20c6 100644 --- a/Documentation/translations/ja_JP/SubmittingPatches +++ b/Documentation/translations/ja_JP/SubmittingPatches @@ -35,8 +35,7 @@ Linux カーネルに変更を加えたいと思っている個人又は会社 てもらえやすくする提案を集めたものです。 コードを投稿する前に、Documentation/process/submit-checklist.rst の項目リストに目 -を通してチェックしてください。もしあなたがドライバーを投稿しようとし -ているなら、Documentation/process/submitting-drivers.rst にも目を通してください。 +を通してチェックしてください。 -------------------------------------------- セクション1 パッチの作り方と送り方 -- cgit v1.2.3 From 32ba156df1b1c8804a4e5be5339616945eafea22 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Tue, 16 Aug 2022 05:56:11 -0700 Subject: perf/x86/lbr: Enable the branch type for the Arch LBR by default On the platform with Arch LBR, the HW raw branch type encoding may leak to the perf tool when the SAVE_TYPE option is not set. In the intel_pmu_store_lbr(), the HW raw branch type is stored in lbr_entries[].type. If the SAVE_TYPE option is set, the lbr_entries[].type will be converted into the generic PERF_BR_* type in the intel_pmu_lbr_filter() and exposed to the user tools. But if the SAVE_TYPE option is NOT set by the user, the current perf kernel doesn't clear the field. The HW raw branch type leaks. There are two solutions to fix the issue for the Arch LBR. One is to clear the field if the SAVE_TYPE option is NOT set. The other solution is to unconditionally convert the branch type and expose the generic type to the user tools. The latter is implemented here, because - The branch type is valuable information. I don't see a case where you would not benefit from the branch type. (Stephane Eranian) - Not having the branch type DOES NOT save any space in the branch record (Stephane Eranian) - The Arch LBR HW can retrieve the common branch types from the LBR_INFO. It doesn't require the high overhead SW disassemble. Fixes: 47125db27e47 ("perf/x86/intel/lbr: Support Architectural LBR") Reported-by: Stephane Eranian Signed-off-by: Kan Liang Signed-off-by: Peter Zijlstra (Intel) Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20220816125612.2042397-1-kan.liang@linux.intel.com --- arch/x86/events/intel/lbr.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c index 4f70fb6c2c1e..47fca6a7a8bc 100644 --- a/arch/x86/events/intel/lbr.c +++ b/arch/x86/events/intel/lbr.c @@ -1097,6 +1097,14 @@ static int intel_pmu_setup_hw_lbr_filter(struct perf_event *event) if (static_cpu_has(X86_FEATURE_ARCH_LBR)) { reg->config = mask; + + /* + * The Arch LBR HW can retrieve the common branch types + * from the LBR_INFO. It doesn't require the high overhead + * SW disassemble. + * Enable the branch type by default for the Arch LBR. + */ + reg->reg |= X86_BR_TYPE_SAVE; return 0; } -- cgit v1.2.3 From 7d3598868aaee05eb738d1c3115616b867e7530a Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 16 Aug 2022 19:40:57 +0800 Subject: perf/x86/core: Set pebs_capable and PMU_FL_PEBS_ALL for the Baseline The SDM explicitly states that PEBS Baseline implies Extended PEBS. For cpu model forward compatibility (e.g. on ICX, SPR, ADL), it's safe to stop doing FMS table thing such as setting pebs_capable and PMU_FL_PEBS_ALL since it's already set in the intel_ds_init(). The Goldmont Plus is the only platform which supports extended PEBS but doesn't have Baseline. Keep the status quo. Reported-by: Like Xu Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Kan Liang Link: https://lkml.kernel.org/r/20220816114057.51307-1-likexu@tencent.com --- arch/x86/events/intel/core.c | 4 ---- arch/x86/events/intel/ds.c | 1 + 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index 2db93498ff71..cb98a05ee743 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -6291,10 +6291,8 @@ __init int intel_pmu_init(void) x86_pmu.pebs_aliases = NULL; x86_pmu.pebs_prec_dist = true; x86_pmu.pebs_block = true; - x86_pmu.pebs_capable = ~0ULL; x86_pmu.flags |= PMU_FL_HAS_RSP_1; x86_pmu.flags |= PMU_FL_NO_HT_SHARING; - x86_pmu.flags |= PMU_FL_PEBS_ALL; x86_pmu.flags |= PMU_FL_INSTR_LATENCY; x86_pmu.flags |= PMU_FL_MEM_LOADS_AUX; @@ -6337,10 +6335,8 @@ __init int intel_pmu_init(void) x86_pmu.pebs_aliases = NULL; x86_pmu.pebs_prec_dist = true; x86_pmu.pebs_block = true; - x86_pmu.pebs_capable = ~0ULL; x86_pmu.flags |= PMU_FL_HAS_RSP_1; x86_pmu.flags |= PMU_FL_NO_HT_SHARING; - x86_pmu.flags |= PMU_FL_PEBS_ALL; x86_pmu.flags |= PMU_FL_INSTR_LATENCY; x86_pmu.flags |= PMU_FL_MEM_LOADS_AUX; x86_pmu.lbr_pt_coexist = true; diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c index ba60427caa6d..ac6dd4c96dbc 100644 --- a/arch/x86/events/intel/ds.c +++ b/arch/x86/events/intel/ds.c @@ -2262,6 +2262,7 @@ void __init intel_ds_init(void) PERF_SAMPLE_BRANCH_STACK | PERF_SAMPLE_TIME; x86_pmu.flags |= PMU_FL_PEBS_ALL; + x86_pmu.pebs_capable = ~0ULL; pebs_qual = "-baseline"; x86_get_pmu(smp_processor_id())->capabilities |= PERF_PMU_CAP_EXTENDED_REGS; } else { -- cgit v1.2.3 From d4bdb0bebc5ba3299d74f123c782d99cd4e25c49 Mon Sep 17 00:00:00 2001 From: Stephane Eranian Date: Wed, 17 Aug 2022 22:46:13 -0700 Subject: perf/x86/intel/ds: Fix precise store latency handling With the existing code in store_latency_data(), the memory operation (mem_op) returned to the user is always OP_LOAD where in fact, it should be OP_STORE. This comes from the fact that the function is simply grabbing the information from a data source map which covers only load accesses. Intel 12th gen CPU offers precise store sampling that captures both the data source and latency. Therefore it can use the data source mapping table but must override the memory operation to reflect stores instead of loads. Fixes: 61b985e3e775 ("perf/x86/intel: Add perf core PMU support for Sapphire Rapids") Signed-off-by: Stephane Eranian Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20220818054613.1548130-1-eranian@google.com --- arch/x86/events/intel/ds.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c index ac6dd4c96dbc..e5b587499122 100644 --- a/arch/x86/events/intel/ds.c +++ b/arch/x86/events/intel/ds.c @@ -291,6 +291,7 @@ static u64 load_latency_data(struct perf_event *event, u64 status) static u64 store_latency_data(struct perf_event *event, u64 status) { union intel_x86_pebs_dse dse; + union perf_mem_data_src src; u64 val; dse.val = status; @@ -304,7 +305,14 @@ static u64 store_latency_data(struct perf_event *event, u64 status) val |= P(BLK, NA); - return val; + /* + * the pebs_data_source table is only for loads + * so override the mem_op to say STORE instead + */ + src.val = val; + src.mem_op = P(OP,STORE); + + return src.val; } struct pebs_record_core { -- cgit v1.2.3 From cde643ff75bc20c538dfae787ca3b587bab16b50 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Thu, 18 Aug 2022 11:44:29 -0700 Subject: perf/x86/intel: Fix pebs event constraints for ADL According to the latest event list, the LOAD_LATENCY PEBS event only works on the GP counter 0 and 1 for ADL and RPL. Update the pebs event constraints table. Fixes: f83d2f91d259 ("perf/x86/intel: Add Alder Lake Hybrid support") Reported-by: Ammy Yi Signed-off-by: Kan Liang Signed-off-by: Peter Zijlstra (Intel) Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20220818184429.2355857-1-kan.liang@linux.intel.com --- arch/x86/events/intel/ds.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c index e5b587499122..de1f55d51784 100644 --- a/arch/x86/events/intel/ds.c +++ b/arch/x86/events/intel/ds.c @@ -830,7 +830,7 @@ struct event_constraint intel_glm_pebs_event_constraints[] = { struct event_constraint intel_grt_pebs_event_constraints[] = { /* Allow all events as PEBS with no flags */ - INTEL_HYBRID_LAT_CONSTRAINT(0x5d0, 0xf), + INTEL_HYBRID_LAT_CONSTRAINT(0x5d0, 0x3), INTEL_HYBRID_LAT_CONSTRAINT(0x6d0, 0xf), EVENT_CONSTRAINT_END }; -- cgit v1.2.3 From 65fac0d54f374625b43a9d6ad1f2c212bd41f518 Mon Sep 17 00:00:00 2001 From: Yu Kuai Date: Tue, 26 Jul 2022 20:22:24 +0800 Subject: blk-mq: fix io hung due to missing commit_rqs Currently, in virtio_scsi, if 'bd->last' is not set to true while dispatching request, such io will stay in driver's queue, and driver will wait for block layer to dispatch more rqs. However, if block layer failed to dispatch more rq, it should trigger commit_rqs to inform driver. There is a problem in blk_mq_try_issue_list_directly() that commit_rqs won't be called: // assume that queue_depth is set to 1, list contains two rq blk_mq_try_issue_list_directly blk_mq_request_issue_directly // dispatch first rq // last is false __blk_mq_try_issue_directly blk_mq_get_dispatch_budget // succeed to get first budget __blk_mq_issue_directly scsi_queue_rq cmd->flags |= SCMD_LAST virtscsi_queuecommand kick = (sc->flags & SCMD_LAST) != 0 // kick is false, first rq won't issue to disk queued++ blk_mq_request_issue_directly // dispatch second rq __blk_mq_try_issue_directly blk_mq_get_dispatch_budget // failed to get second budget ret == BLK_STS_RESOURCE blk_mq_request_bypass_insert // errors is still 0 if (!list_empty(list) || errors && ...) // won't pass, commit_rqs won't be called In this situation, first rq relied on second rq to dispatch, while second rq relied on first rq to complete, thus they will both hung. Fix the problem by also treat 'BLK_STS_*RESOURCE' as 'errors' since it means that request is not queued successfully. Same problem exists in blk_mq_dispatch_rq_list(), 'BLK_STS_*RESOURCE' can't be treated as 'errors' here, fix the problem by calling commit_rqs if queue_rq return 'BLK_STS_*RESOURCE'. Fixes: d666ba98f849 ("blk-mq: add mq_ops->commit_rqs()") Signed-off-by: Yu Kuai Reviewed-by: Ming Lei Link: https://lore.kernel.org/r/20220726122224.1790882-1-yukuai1@huaweicloud.com Signed-off-by: Jens Axboe --- block/blk-mq.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/block/blk-mq.c b/block/blk-mq.c index 3c1e6b6d991d..c96c8c4f751b 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -1931,7 +1931,8 @@ out: /* If we didn't flush the entire list, we could have told the driver * there was more coming, but that turned out to be a lie. */ - if ((!list_empty(list) || errors) && q->mq_ops->commit_rqs && queued) + if ((!list_empty(list) || errors || needs_resource || + ret == BLK_STS_DEV_RESOURCE) && q->mq_ops->commit_rqs && queued) q->mq_ops->commit_rqs(hctx); /* * Any items that need requeuing? Stuff them into hctx->dispatch, @@ -2660,6 +2661,7 @@ void blk_mq_try_issue_list_directly(struct blk_mq_hw_ctx *hctx, list_del_init(&rq->queuelist); ret = blk_mq_request_issue_directly(rq, list_empty(list)); if (ret != BLK_STS_OK) { + errors++; if (ret == BLK_STS_RESOURCE || ret == BLK_STS_DEV_RESOURCE) { blk_mq_request_bypass_insert(rq, false, @@ -2667,7 +2669,6 @@ void blk_mq_try_issue_list_directly(struct blk_mq_hw_ctx *hctx, break; } blk_mq_end_request(rq, ret); - errors++; } else queued++; } -- cgit v1.2.3 From 43ef9db423bdce1df504d4d10e25092d427f04e3 Mon Sep 17 00:00:00 2001 From: Tim Huang Date: Mon, 15 Aug 2022 14:04:49 +0800 Subject: drm/amdgpu: enable GFXOFF allow control for GC IP v11.0.1 Enable GFXOFF allow control when set the GFX power gating. Signed-off-by: Tim Huang Reviewed-by: Yifan Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index 158d87e6805d..f45db80810fa 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -5328,8 +5328,7 @@ static int gfx_v11_0_set_powergating_state(void *handle, break; case IP_VERSION(11, 0, 1): gfx_v11_cntl_pg(adev, enable); - /* TODO: Enable this when GFXOFF is ready */ - // amdgpu_gfx_off_ctrl(adev, enable); + amdgpu_gfx_off_ctrl(adev, enable); break; default: break; -- cgit v1.2.3 From 9d705d7741ae70764f3d6d87e67fad3b5c30ffd0 Mon Sep 17 00:00:00 2001 From: YiPeng Chai Date: Fri, 12 Aug 2022 13:38:34 +0800 Subject: drm/amdgpu: Move psp_xgmi_terminate call from amdgpu_xgmi_remove_device to psp_hw_fini V1: The amdgpu_xgmi_remove_device function will send unload command to psp through psp ring to terminate xgmi, but psp ring has been destroyed in psp_hw_fini. V2: 1. Change the commit title. 2. Restore amdgpu_xgmi_remove_device to its original calling location. Move psp_xgmi_terminate call from amdgpu_xgmi_remove_device to psp_hw_fini. Signed-off-by: YiPeng Chai Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 3 +++ drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index b067ce45d226..1036446abc30 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -2641,6 +2641,9 @@ static int psp_hw_fini(void *handle) psp_rap_terminate(psp); psp_dtm_terminate(psp); psp_hdcp_terminate(psp); + + if (adev->gmc.xgmi.num_physical_nodes > 1) + psp_xgmi_terminate(psp); } psp_asd_terminate(psp); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c index 1b108d03e785..f2aebbf3fbe3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c @@ -742,7 +742,7 @@ int amdgpu_xgmi_remove_device(struct amdgpu_device *adev) amdgpu_put_xgmi_hive(hive); } - return psp_xgmi_terminate(&adev->psp); + return 0; } static int amdgpu_xgmi_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block) -- cgit v1.2.3 From f5994da72ba124a3d0463672fdfbec073e3bb72f Mon Sep 17 00:00:00 2001 From: YiPeng Chai Date: Fri, 12 Aug 2022 14:34:35 +0800 Subject: drm/amdgpu: fix hive reference leak when adding xgmi device Only amdgpu_get_xgmi_hive but no amdgpu_put_xgmi_hive which will leak the hive reference. Signed-off-by: YiPeng Chai Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index e8a0b19b7398..95ce3687902b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -2456,12 +2456,14 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev) if (!hive->reset_domain || !amdgpu_reset_get_reset_domain(hive->reset_domain)) { r = -ENOENT; + amdgpu_put_xgmi_hive(hive); goto init_failed; } /* Drop the early temporary reset domain we created for device */ amdgpu_reset_put_reset_domain(adev->reset_domain); adev->reset_domain = hive->reset_domain; + amdgpu_put_xgmi_hive(hive); } } -- cgit v1.2.3 From c351938350ab9b5e978dede2c321da43de7eb70c Mon Sep 17 00:00:00 2001 From: Candice Li Date: Thu, 18 Aug 2022 10:47:09 +0800 Subject: drm/amdgpu: Check num_gfx_rings for gfx v9_0 rb setup. No need to set up rb when no gfx rings. Signed-off-by: Candice Li Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index c6e0f9313a7f..fc9c1043244c 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -2587,7 +2587,8 @@ static void gfx_v9_0_constants_init(struct amdgpu_device *adev) gfx_v9_0_tiling_mode_table_init(adev); - gfx_v9_0_setup_rb(adev); + if (adev->gfx.num_gfx_rings) + gfx_v9_0_setup_rb(adev); gfx_v9_0_get_cu_info(adev, &adev->gfx.cu_info); adev->gfx.config.db_debug2 = RREG32_SOC15(GC, 0, mmDB_DEBUG2); -- cgit v1.2.3 From 06671734881af2bcf7f453661b5f8616e32bb3fc Mon Sep 17 00:00:00 2001 From: shaoyunl Date: Thu, 18 Aug 2022 14:13:52 -0400 Subject: drm/amdgpu: Remove the additional kfd pre reset call for sriov The additional call is caused by merge conflict Reviewed-by: Felix Kuehling Signed-off-by: shaoyunl Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 95ce3687902b..f095a2513aff 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -4415,8 +4415,6 @@ static int amdgpu_device_reset_sriov(struct amdgpu_device *adev, retry: amdgpu_amdkfd_pre_reset(adev); - amdgpu_amdkfd_pre_reset(adev); - if (from_hypervisor) r = amdgpu_virt_request_full_gpu(adev, true); else -- cgit v1.2.3 From 2035590f3d40f227eac453d0c36b5eae85c1cf08 Mon Sep 17 00:00:00 2001 From: Maíra Canal Date: Thu, 18 Aug 2022 10:27:30 -0300 Subject: drm/amd/display: Include missing header MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The file amdgpu_dm_plane.c missed the header amdgpu_dm_plane.h, which resulted on the following warning: drivers/gpu/drm/amd/amdgpu/../display/amdgpu_dm/amdgpu_dm_plane.c:1046:5: warning: no previous prototype for 'fill_dc_scaling_info' [-Wmissing-prototypes] drivers/gpu/drm/amd/amdgpu/../display/amdgpu_dm/amdgpu_dm_plane.c:1222:6: warning: no previous prototype for 'handle_cursor_update' [-Wmissing-prototypes] drivers/gpu/drm/amd/amdgpu/../display/amdgpu_dm/amdgpu_dm_plane.c:152:6: warning: no previous prototype for 'modifier_has_dcc' [-Wmissing-prototypes] drivers/gpu/drm/amd/amdgpu/../display/amdgpu_dm/amdgpu_dm_plane.c:1576:5: warning: no previous prototype for 'amdgpu_dm_plane_init' [-Wmissing-prototypes] drivers/gpu/drm/amd/amdgpu/../display/amdgpu_dm/amdgpu_dm_plane.c:157:10: warning: no previous prototype for 'modifier_gfx9_swizzle_mode' [-Wmissing-prototypes] drivers/gpu/drm/amd/amdgpu/../display/amdgpu_dm/amdgpu_dm_plane.c:752:5: warning: no previous prototype for 'fill_plane_buffer_attributes' [-Wmissing-prototypes] drivers/gpu/drm/amd/amdgpu/../display/amdgpu_dm/amdgpu_dm_plane.c:83:31: warning: no previous prototype for 'amd_get_format_info' [-Wmissing-prototypes] drivers/gpu/drm/amd/amdgpu/../display/amdgpu_dm/amdgpu_dm_plane.c:88:6: warning: no previous prototype for 'fill_blending_from_plane_state' [-Wmissing-prototypes] drivers/gpu/drm/amd/amdgpu/../display/amdgpu_dm/amdgpu_dm_plane.c:992:5: warning: no previous prototype for 'dm_plane_helper_check_state' [-Wmissing-prototypes] Therefore, include the missing header on the file and turn global functions that are not used outside of the file into static functions. Fixes: 5d945cbcd4b1 ("drm/amd/display: Create a file dedicated to planes") Reported-by: kernel test robot Signed-off-by: Maíra Canal Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c | 5 +++-- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.h | 8 -------- 2 files changed, 3 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c index fca7cf9dbaee..987bde4dca3d 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c @@ -34,6 +34,7 @@ #include "dal_asic_id.h" #include "amdgpu_display.h" #include "amdgpu_dm_trace.h" +#include "amdgpu_dm_plane.h" #include "gc/gc_11_0_0_offset.h" #include "gc/gc_11_0_0_sh_mask.h" @@ -149,12 +150,12 @@ static void add_modifier(uint64_t **mods, uint64_t *size, uint64_t *cap, uint64_ *size += 1; } -bool modifier_has_dcc(uint64_t modifier) +static bool modifier_has_dcc(uint64_t modifier) { return IS_AMD_FMT_MOD(modifier) && AMD_FMT_MOD_GET(DCC, modifier); } -unsigned modifier_gfx9_swizzle_mode(uint64_t modifier) +static unsigned modifier_gfx9_swizzle_mode(uint64_t modifier) { if (modifier == DRM_FORMAT_MOD_LINEAR) return 0; diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.h index 95168c2cfa6f..286981a2dd40 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.h +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.h @@ -36,17 +36,9 @@ int fill_dc_scaling_info(struct amdgpu_device *adev, const struct drm_plane_state *state, struct dc_scaling_info *scaling_info); -void get_min_max_dc_plane_scaling(struct drm_device *dev, - struct drm_framebuffer *fb, - int *min_downscale, int *max_upscale); - int dm_plane_helper_check_state(struct drm_plane_state *state, struct drm_crtc_state *new_crtc_state); -bool modifier_has_dcc(uint64_t modifier); - -unsigned int modifier_gfx9_swizzle_mode(uint64_t modifier); - int fill_plane_buffer_attributes(struct amdgpu_device *adev, const struct amdgpu_framebuffer *afb, const enum surface_pixel_format format, -- cgit v1.2.3 From f461950fdc374a3ada5a63c669d997de4600dffe Mon Sep 17 00:00:00 2001 From: Zhenneng Li Date: Thu, 11 Aug 2022 15:25:40 +0800 Subject: drm/radeon: add a force flush to delay work when radeon MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Although radeon card fence and wait for gpu to finish processing current batch rings, there is still a corner case that radeon lockup work queue may not be fully flushed, and meanwhile the radeon_suspend_kms() function has called pci_set_power_state() to put device in D3hot state. Per PCI spec rev 4.0 on 5.3.1.4.1 D3hot State. > Configuration and Message requests are the only TLPs accepted by a Function in > the D3hot state. All other received Requests must be handled as Unsupported Requests, > and all received Completions may optionally be handled as Unexpected Completions. This issue will happen in following logs: Unable to handle kernel paging request at virtual address 00008800e0008010 CPU 0 kworker/0:3(131): Oops 0 pc = [] ra = [] ps = 0000 Tainted: G W pc is at si_gpu_check_soft_reset+0x3c/0x240 ra is at si_dma_is_lockup+0x34/0xd0 v0 = 0000000000000000 t0 = fff08800e0008010 t1 = 0000000000010000 t2 = 0000000000008010 t3 = fff00007e3c00000 t4 = fff00007e3c00258 t5 = 000000000000ffff t6 = 0000000000000001 t7 = fff00007ef078000 s0 = fff00007e3c016e8 s1 = fff00007e3c00000 s2 = fff00007e3c00018 s3 = fff00007e3c00000 s4 = fff00007fff59d80 s5 = 0000000000000000 s6 = fff00007ef07bd98 a0 = fff00007e3c00000 a1 = fff00007e3c016e8 a2 = 0000000000000008 a3 = 0000000000000001 a4 = 8f5c28f5c28f5c29 a5 = ffffffff810f4338 t8 = 0000000000000275 t9 = ffffffff809b66f8 t10 = ff6769c5d964b800 t11= 000000000000b886 pv = ffffffff811bea20 at = 0000000000000000 gp = ffffffff81d89690 sp = 00000000aa814126 Disabling lock debugging due to kernel taint Trace: [] si_dma_is_lockup+0x34/0xd0 [] radeon_fence_check_lockup+0xd0/0x290 [] process_one_work+0x280/0x550 [] worker_thread+0x70/0x7c0 [] worker_thread+0x130/0x7c0 [] kthread+0x200/0x210 [] worker_thread+0x0/0x7c0 [] kthread+0x14c/0x210 [] ret_from_kernel_thread+0x18/0x20 [] kthread+0x0/0x210 Code: ad3e0008 43f0074a ad7e0018 ad9e0020 8c3001e8 40230101 <88210000> 4821ed21 So force lockup work queue flush to fix this problem. Acked-by: Christian König Signed-off-by: Zhenneng Li Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/radeon_device.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c index 2b12389f841a..ee0165687239 100644 --- a/drivers/gpu/drm/radeon/radeon_device.c +++ b/drivers/gpu/drm/radeon/radeon_device.c @@ -1605,6 +1605,9 @@ int radeon_suspend_kms(struct drm_device *dev, bool suspend, if (r) { /* delay GPU reset to resume */ radeon_fence_driver_force_completion(rdev, i); + } else { + /* finish executing delayed work */ + flush_delayed_work(&rdev->fence_drv[i].lockup_work); } } -- cgit v1.2.3 From 8f2c96420c6ec3dcb18c8be923e24c6feaa5ccf6 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 11 Aug 2022 16:43:49 -0700 Subject: scsi: ufs: core: Reduce the power mode change timeout The current power mode change timeout (180 s) is so large that it can cause a watchdog timer to fire. Reduce the power mode change timeout to 10 seconds. Link: https://lore.kernel.org/r/20220811234401.1957911-1-bvanassche@acm.org Reviewed-by: Stanley Chu Signed-off-by: Bart Van Assche Signed-off-by: Martin K. Petersen --- drivers/ufs/core/ufshcd.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c index 6bc679d22927..a202d7d5240d 100644 --- a/drivers/ufs/core/ufshcd.c +++ b/drivers/ufs/core/ufshcd.c @@ -8741,6 +8741,8 @@ static int ufshcd_set_dev_pwr_mode(struct ufs_hba *hba, struct scsi_device *sdp; unsigned long flags; int ret, retries; + unsigned long deadline; + int32_t remaining; spin_lock_irqsave(hba->host->host_lock, flags); sdp = hba->ufs_device_wlun; @@ -8773,9 +8775,14 @@ static int ufshcd_set_dev_pwr_mode(struct ufs_hba *hba, * callbacks hence set the RQF_PM flag so that it doesn't resume the * already suspended childs. */ + deadline = jiffies + 10 * HZ; for (retries = 3; retries > 0; --retries) { + ret = -ETIMEDOUT; + remaining = deadline - jiffies; + if (remaining <= 0) + break; ret = scsi_execute(sdp, cmd, DMA_NONE, NULL, 0, NULL, &sshdr, - START_STOP_TIMEOUT, 0, 0, RQF_PM, NULL); + remaining / HZ, 0, 0, RQF_PM, NULL); if (!scsi_status_is_check_condition(ret) || !scsi_sense_valid(&sshdr) || sshdr.sense_key != UNIT_ATTENTION) -- cgit v1.2.3 From fac8e558da9485e13a0ae0488aa0b8a8c307cd34 Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Thu, 11 Aug 2022 20:12:06 -0500 Subject: scsi: core: Fix passthrough retry counter handling Passthrough users will set the scsi_cmnd->allowed value and were expecting up to $allowed retries. The problem is that before: commit 6aded12b10e0 ("scsi: core: Remove struct scsi_request") we used to set the retries on the scsi_request then copy them over to scsi_cmnd->allowed in scsi_setup_scsi_cmnd. With that patch we now set scsi_cmnd->allowed to 0 in scsi_prepare_cmd and overwrite what the passthrough user set. This moves the allowed initialization to after the blk_rq_is_passthrough() check so it's only done for the non-passthrough path where the ULD init_command will normally set an allowed value it prefers. Link: https://lore.kernel.org/r/20220812011206.9157-1-michael.christie@oracle.com Fixes: 6aded12b10e0 ("scsi: core: Remove struct scsi_request") Reviewed-by: Christoph Hellwig Signed-off-by: Mike Christie Signed-off-by: Martin K. Petersen --- drivers/scsi/scsi_lib.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index ac2e70e2cd96..ef08029a0079 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -1548,7 +1548,6 @@ static blk_status_t scsi_prepare_cmd(struct request *req) scsi_init_command(sdev, cmd); cmd->eh_eflags = 0; - cmd->allowed = 0; cmd->prot_type = 0; cmd->prot_flags = 0; cmd->submitter = 0; @@ -1599,6 +1598,8 @@ static blk_status_t scsi_prepare_cmd(struct request *req) return ret; } + /* Usually overridden by the ULP */ + cmd->allowed = 0; memset(cmd->cmnd, 0, sizeof(cmd->cmnd)); return scsi_cmd_to_driver(cmd)->init_command(cmd); } -- cgit v1.2.3 From fc2e426b1161761561624ebd43ce8c8d2fa058da Mon Sep 17 00:00:00 2001 From: Chen Zhongjin Date: Fri, 19 Aug 2022 16:43:34 +0800 Subject: x86/unwind/orc: Unwind ftrace trampolines with correct ORC entry When meeting ftrace trampolines in ORC unwinding, unwinder uses address of ftrace_{regs_}call address to find the ORC entry, which gets next frame at sp+176. If there is an IRQ hitting at sub $0xa8,%rsp, the next frame should be sp+8 instead of 176. It makes unwinder skip correct frame and throw warnings such as "wrong direction" or "can't access registers", etc, depending on the content of the incorrect frame address. By adding the base address ftrace_{regs_}caller with the offset *ip - ops->trampoline*, we can get the correct address to find the ORC entry. Also change "caller" to "tramp_addr" to make variable name conform to its content. [ mingo: Clarified the changelog a bit. ] Fixes: 6be7fa3c74d1 ("ftrace, orc, x86: Handle ftrace dynamically allocated trampolines") Signed-off-by: Chen Zhongjin Signed-off-by: Ingo Molnar Reviewed-by: Steven Rostedt (Google) Cc: Link: https://lore.kernel.org/r/20220819084334.244016-1-chenzhongjin@huawei.com --- arch/x86/kernel/unwind_orc.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/arch/x86/kernel/unwind_orc.c b/arch/x86/kernel/unwind_orc.c index 38185aedf7d1..0ea57da92940 100644 --- a/arch/x86/kernel/unwind_orc.c +++ b/arch/x86/kernel/unwind_orc.c @@ -93,22 +93,27 @@ static struct orc_entry *orc_find(unsigned long ip); static struct orc_entry *orc_ftrace_find(unsigned long ip) { struct ftrace_ops *ops; - unsigned long caller; + unsigned long tramp_addr, offset; ops = ftrace_ops_trampoline(ip); if (!ops) return NULL; + /* Set tramp_addr to the start of the code copied by the trampoline */ if (ops->flags & FTRACE_OPS_FL_SAVE_REGS) - caller = (unsigned long)ftrace_regs_call; + tramp_addr = (unsigned long)ftrace_regs_caller; else - caller = (unsigned long)ftrace_call; + tramp_addr = (unsigned long)ftrace_caller; + + /* Now place tramp_addr to the location within the trampoline ip is at */ + offset = ip - ops->trampoline; + tramp_addr += offset; /* Prevent unlikely recursion */ - if (ip == caller) + if (ip == tramp_addr) return NULL; - return orc_find(caller); + return orc_find(tramp_addr); } #else static struct orc_entry *orc_ftrace_find(unsigned long ip) -- cgit v1.2.3 From a8d302a0b77057568350fe0123e639d02dba0745 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Sun, 21 Aug 2022 17:59:11 +0200 Subject: ALSA: memalloc: Revive x86-specific WC page allocations again We dropped the x86-specific hack for WC-page allocations with a hope that the standard dma_alloc_wc() works nowadays. Alas, it doesn't, and we need to take back some workaround again, but in a different form, as the previous one was broken for some platforms. This patch re-introduces the x86-specific WC-page allocations, but it uses rather the manual page allocations instead of dma_alloc_coherent(). The use of dma_alloc_coherent() was also a potential problem in the recent addition of the fallback allocation for noncontig pages, and this patch eliminates both at once. Fixes: 9882d63bea14 ("ALSA: memalloc: Drop x86-specific hack for WC allocations") Cc: BugLink: https://bugzilla.kernel.org/show_bug.cgi?id=216363 Link: https://lore.kernel.org/r/20220821155911.10715-1-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/core/memalloc.c | 87 +++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 71 insertions(+), 16 deletions(-) diff --git a/sound/core/memalloc.c b/sound/core/memalloc.c index d3885cb02270..b665ac66ccbe 100644 --- a/sound/core/memalloc.c +++ b/sound/core/memalloc.c @@ -20,6 +20,13 @@ static const struct snd_malloc_ops *snd_dma_get_ops(struct snd_dma_buffer *dmab); +#ifdef CONFIG_SND_DMA_SGBUF +static void *do_alloc_fallback_pages(struct device *dev, size_t size, + dma_addr_t *addr, bool wc); +static void do_free_fallback_pages(void *p, size_t size, bool wc); +static void *snd_dma_sg_fallback_alloc(struct snd_dma_buffer *dmab, size_t size); +#endif + /* a cast to gfp flag from the dev pointer; for CONTINUOUS and VMALLOC types */ static inline gfp_t snd_mem_get_gfp_flags(const struct snd_dma_buffer *dmab, gfp_t default_gfp) @@ -277,16 +284,21 @@ EXPORT_SYMBOL(snd_sgbuf_get_chunk_size); /* * Continuous pages allocator */ -static void *snd_dma_continuous_alloc(struct snd_dma_buffer *dmab, size_t size) +static void *do_alloc_pages(size_t size, dma_addr_t *addr, gfp_t gfp) { - gfp_t gfp = snd_mem_get_gfp_flags(dmab, GFP_KERNEL); void *p = alloc_pages_exact(size, gfp); if (p) - dmab->addr = page_to_phys(virt_to_page(p)); + *addr = page_to_phys(virt_to_page(p)); return p; } +static void *snd_dma_continuous_alloc(struct snd_dma_buffer *dmab, size_t size) +{ + return do_alloc_pages(size, &dmab->addr, + snd_mem_get_gfp_flags(dmab, GFP_KERNEL)); +} + static void snd_dma_continuous_free(struct snd_dma_buffer *dmab) { free_pages_exact(dmab->area, dmab->bytes); @@ -463,6 +475,25 @@ static const struct snd_malloc_ops snd_dma_dev_ops = { /* * Write-combined pages */ +/* x86-specific allocations */ +#ifdef CONFIG_SND_DMA_SGBUF +static void *snd_dma_wc_alloc(struct snd_dma_buffer *dmab, size_t size) +{ + return do_alloc_fallback_pages(dmab->dev.dev, size, &dmab->addr, true); +} + +static void snd_dma_wc_free(struct snd_dma_buffer *dmab) +{ + do_free_fallback_pages(dmab->area, dmab->bytes, true); +} + +static int snd_dma_wc_mmap(struct snd_dma_buffer *dmab, + struct vm_area_struct *area) +{ + area->vm_page_prot = pgprot_writecombine(area->vm_page_prot); + return snd_dma_continuous_mmap(dmab, area); +} +#else static void *snd_dma_wc_alloc(struct snd_dma_buffer *dmab, size_t size) { return dma_alloc_wc(dmab->dev.dev, size, &dmab->addr, DEFAULT_GFP); @@ -479,6 +510,7 @@ static int snd_dma_wc_mmap(struct snd_dma_buffer *dmab, return dma_mmap_wc(dmab->dev.dev, area, dmab->area, dmab->addr, dmab->bytes); } +#endif /* CONFIG_SND_DMA_SGBUF */ static const struct snd_malloc_ops snd_dma_wc_ops = { .alloc = snd_dma_wc_alloc, @@ -486,10 +518,6 @@ static const struct snd_malloc_ops snd_dma_wc_ops = { .mmap = snd_dma_wc_mmap, }; -#ifdef CONFIG_SND_DMA_SGBUF -static void *snd_dma_sg_fallback_alloc(struct snd_dma_buffer *dmab, size_t size); -#endif - /* * Non-contiguous pages allocator */ @@ -669,6 +697,37 @@ static const struct snd_malloc_ops snd_dma_sg_wc_ops = { .get_chunk_size = snd_dma_noncontig_get_chunk_size, }; +/* manual page allocations with wc setup */ +static void *do_alloc_fallback_pages(struct device *dev, size_t size, + dma_addr_t *addr, bool wc) +{ + gfp_t gfp = GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN; + void *p; + + again: + p = do_alloc_pages(size, addr, gfp); + if (!p || (*addr + size - 1) & ~dev->coherent_dma_mask) { + if (IS_ENABLED(CONFIG_ZONE_DMA32) && !(gfp & GFP_DMA32)) { + gfp |= GFP_DMA32; + goto again; + } + if (IS_ENABLED(CONFIG_ZONE_DMA) && !(gfp & GFP_DMA)) { + gfp = (gfp & ~GFP_DMA32) | GFP_DMA; + goto again; + } + } + if (p && wc) + set_memory_wc((unsigned long)(p), size >> PAGE_SHIFT); + return p; +} + +static void do_free_fallback_pages(void *p, size_t size, bool wc) +{ + if (wc) + set_memory_wb((unsigned long)(p), size >> PAGE_SHIFT); + free_pages_exact(p, size); +} + /* Fallback SG-buffer allocations for x86 */ struct snd_dma_sg_fallback { size_t count; @@ -679,14 +738,11 @@ struct snd_dma_sg_fallback { static void __snd_dma_sg_fallback_free(struct snd_dma_buffer *dmab, struct snd_dma_sg_fallback *sgbuf) { + bool wc = dmab->dev.type == SNDRV_DMA_TYPE_DEV_WC_SG_FALLBACK; size_t i; - if (sgbuf->count && dmab->dev.type == SNDRV_DMA_TYPE_DEV_WC_SG_FALLBACK) - set_pages_array_wb(sgbuf->pages, sgbuf->count); for (i = 0; i < sgbuf->count && sgbuf->pages[i]; i++) - dma_free_coherent(dmab->dev.dev, PAGE_SIZE, - page_address(sgbuf->pages[i]), - sgbuf->addrs[i]); + do_free_fallback_pages(page_address(sgbuf->pages[i]), PAGE_SIZE, wc); kvfree(sgbuf->pages); kvfree(sgbuf->addrs); kfree(sgbuf); @@ -698,6 +754,7 @@ static void *snd_dma_sg_fallback_alloc(struct snd_dma_buffer *dmab, size_t size) struct page **pages; size_t i, count; void *p; + bool wc = dmab->dev.type == SNDRV_DMA_TYPE_DEV_WC_SG_FALLBACK; sgbuf = kzalloc(sizeof(*sgbuf), GFP_KERNEL); if (!sgbuf) @@ -712,15 +769,13 @@ static void *snd_dma_sg_fallback_alloc(struct snd_dma_buffer *dmab, size_t size) goto error; for (i = 0; i < count; sgbuf->count++, i++) { - p = dma_alloc_coherent(dmab->dev.dev, PAGE_SIZE, - &sgbuf->addrs[i], DEFAULT_GFP); + p = do_alloc_fallback_pages(dmab->dev.dev, PAGE_SIZE, + &sgbuf->addrs[i], wc); if (!p) goto error; sgbuf->pages[i] = virt_to_page(p); } - if (dmab->dev.type == SNDRV_DMA_TYPE_DEV_WC_SG_FALLBACK) - set_pages_array_wc(pages, count); p = vmap(pages, count, VM_MAP, PAGE_KERNEL); if (!p) goto error; -- cgit v1.2.3 From b51111271b0352aa596c5ae8faf06939e91b3b68 Mon Sep 17 00:00:00 2001 From: Goldwyn Rodrigues Date: Tue, 16 Aug 2022 16:42:56 -0500 Subject: btrfs: check if root is readonly while setting security xattr For a filesystem which has btrfs read-only property set to true, all write operations including xattr should be denied. However, security xattr can still be changed even if btrfs ro property is true. This happens because xattr_permission() does not have any restrictions on security.*, system.* and in some cases trusted.* from VFS and the decision is left to the underlying filesystem. See comments in xattr_permission() for more details. This patch checks if the root is read-only before performing the set xattr operation. Testcase: DEV=/dev/vdb MNT=/mnt mkfs.btrfs -f $DEV mount $DEV $MNT echo "file one" > $MNT/f1 setfattr -n "security.one" -v 2 $MNT/f1 btrfs property set /mnt ro true setfattr -n "security.one" -v 1 $MNT/f1 umount $MNT CC: stable@vger.kernel.org # 4.9+ Reviewed-by: Qu Wenruo Reviewed-by: Filipe Manana Signed-off-by: Goldwyn Rodrigues Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/xattr.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c index 7421abcf325a..5bb8d8c86311 100644 --- a/fs/btrfs/xattr.c +++ b/fs/btrfs/xattr.c @@ -371,6 +371,9 @@ static int btrfs_xattr_handler_set(const struct xattr_handler *handler, const char *name, const void *buffer, size_t size, int flags) { + if (btrfs_root_readonly(BTRFS_I(inode)->root)) + return -EROFS; + name = xattr_full_name(handler, name); return btrfs_setxattr_trans(inode, name, buffer, size, flags); } -- cgit v1.2.3 From 9ea0106a7a3d8116860712e3f17cd52ce99f6707 Mon Sep 17 00:00:00 2001 From: Zixuan Fu Date: Mon, 15 Aug 2022 23:16:06 +0800 Subject: btrfs: fix possible memory leak in btrfs_get_dev_args_from_path() In btrfs_get_dev_args_from_path(), btrfs_get_bdev_and_sb() can fail if the path is invalid. In this case, btrfs_get_dev_args_from_path() returns directly without freeing args->uuid and args->fsid allocated before, which causes memory leak. To fix these possible leaks, when btrfs_get_bdev_and_sb() fails, btrfs_put_dev_args_from_path() is called to clean up the memory. Reported-by: TOTE Robot Fixes: faa775c41d655 ("btrfs: add a btrfs_get_dev_args_from_path helper") CC: stable@vger.kernel.org # 5.16 Reviewed-by: Boris Burkov Signed-off-by: Zixuan Fu Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/volumes.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 272901514b0c..064ab2a79c80 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -2345,8 +2345,11 @@ int btrfs_get_dev_args_from_path(struct btrfs_fs_info *fs_info, ret = btrfs_get_bdev_and_sb(path, FMODE_READ, fs_info->bdev_holder, 0, &bdev, &disk_super); - if (ret) + if (ret) { + btrfs_put_dev_args_from_path(args); return ret; + } + args->devid = btrfs_stack_device_id(&disk_super->dev_item); memcpy(args->uuid, disk_super->dev_item.uuid, BTRFS_UUID_SIZE); if (btrfs_fs_incompat(fs_info, METADATA_UUID)) -- cgit v1.2.3 From e6e3dec6c3c288d556b991a85d5d8e3ee71e9046 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Mon, 8 Aug 2022 12:18:37 +0100 Subject: btrfs: update generation of hole file extent item when merging holes When punching a hole into a file range that is adjacent with a hole and we are not using the no-holes feature, we expand the range of the adjacent file extent item that represents a hole, to save metadata space. However we don't update the generation of hole file extent item, which means a full fsync will not log that file extent item if the fsync happens in a later transaction (since commit 7f30c07288bb9e ("btrfs: stop copying old file extents when doing a full fsync")). For example, if we do this: $ mkfs.btrfs -f -O ^no-holes /dev/sdb $ mount /dev/sdb /mnt $ xfs_io -f -c "pwrite -S 0xab 2M 2M" /mnt/foobar $ sync We end up with 2 file extent items in our file: 1) One that represents the hole for the file range [0, 2M), with a generation of 7; 2) Another one that represents an extent covering the range [2M, 4M). After that if we do the following: $ xfs_io -c "fpunch 2M 2M" /mnt/foobar We end up with a single file extent item in the file, which represents a hole for the range [0, 4M) and with a generation of 7 - because we end dropping the data extent for range [2M, 4M) and then update the file extent item that represented the hole at [0, 2M), by increasing length from 2M to 4M. Then doing a full fsync and power failing: $ xfs_io -c "fsync" /mnt/foobar will result in the full fsync not logging the file extent item that represents the hole for the range [0, 4M), because its generation is 7, which is lower than the generation of the current transaction (8). As a consequence, after mounting again the filesystem (after log replay), the region [2M, 4M) does not have a hole, it still points to the previous data extent. So fix this by always updating the generation of existing file extent items representing holes when we merge/expand them. This solves the problem and it's the same approach as when we merge prealloc extents that got written (at btrfs_mark_extent_written()). Setting the generation to the current transaction's generation is also what we do when merging the new hole extent map with the previous one or the next one. A test case for fstests, covering both cases of hole file extent item merging (to the left and to the right), will be sent soon. Fixes: 7f30c07288bb9e ("btrfs: stop copying old file extents when doing a full fsync") CC: stable@vger.kernel.org # 5.18+ Reviewed-by: Josef Bacik Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/file.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 687fb372093f..470421955de4 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -2499,6 +2499,7 @@ static int fill_holes(struct btrfs_trans_handle *trans, btrfs_set_file_extent_num_bytes(leaf, fi, num_bytes); btrfs_set_file_extent_ram_bytes(leaf, fi, num_bytes); btrfs_set_file_extent_offset(leaf, fi, 0); + btrfs_set_file_extent_generation(leaf, fi, trans->transid); btrfs_mark_buffer_dirty(leaf); goto out; } @@ -2515,6 +2516,7 @@ static int fill_holes(struct btrfs_trans_handle *trans, btrfs_set_file_extent_num_bytes(leaf, fi, num_bytes); btrfs_set_file_extent_ram_bytes(leaf, fi, num_bytes); btrfs_set_file_extent_offset(leaf, fi, 0); + btrfs_set_file_extent_generation(leaf, fi, trans->transid); btrfs_mark_buffer_dirty(leaf); goto out; } -- cgit v1.2.3 From 4a445b7b6178d88956192c0202463063f52e8667 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Sat, 13 Aug 2022 16:06:53 +0800 Subject: btrfs: don't merge pages into bio if their page offset is not contiguous [BUG] Zygo reported on latest development branch, he could hit ASSERT()/BUG_ON() caused crash when doing RAID5 recovery (intentionally corrupt one disk, and let btrfs to recover the data during read/scrub). And The following minimal reproducer can cause extent state leakage at rmmod time: mkfs.btrfs -f -d raid5 -m raid5 $dev1 $dev2 $dev3 -b 1G > /dev/null mount $dev1 $mnt fsstress -w -d $mnt -n 25 -s 1660807876 sync fssum -A -f -w /tmp/fssum.saved $mnt umount $mnt # Wipe the dev1 but keeps its super block xfs_io -c "pwrite -S 0x0 1m 1023m" $dev1 mount $dev1 $mnt fssum -r /tmp/fssum.saved $mnt > /dev/null umount $mnt rmmod btrfs This will lead to the following extent states leakage: BTRFS: state leak: start 499712 end 503807 state 5 in tree 1 refs 1 BTRFS: state leak: start 495616 end 499711 state 5 in tree 1 refs 1 BTRFS: state leak: start 491520 end 495615 state 5 in tree 1 refs 1 BTRFS: state leak: start 487424 end 491519 state 5 in tree 1 refs 1 BTRFS: state leak: start 483328 end 487423 state 5 in tree 1 refs 1 BTRFS: state leak: start 479232 end 483327 state 5 in tree 1 refs 1 BTRFS: state leak: start 475136 end 479231 state 5 in tree 1 refs 1 BTRFS: state leak: start 471040 end 475135 state 5 in tree 1 refs 1 [CAUSE] Since commit 7aa51232e204 ("btrfs: pass a btrfs_bio to btrfs_repair_one_sector"), we always use btrfs_bio->file_offset to determine the file offset of a page. But that usage assume that, one bio has all its page having a continuous page offsets. Unfortunately that's not true, btrfs only requires the logical bytenr contiguous when assembling its bios. From above script, we have one bio looks like this: fssum-27671 submit_one_bio: bio logical=217739264 len=36864 fssum-27671 submit_one_bio: r/i=5/261 page_offset=466944 <<< fssum-27671 submit_one_bio: r/i=5/261 page_offset=724992 <<< fssum-27671 submit_one_bio: r/i=5/261 page_offset=729088 fssum-27671 submit_one_bio: r/i=5/261 page_offset=733184 fssum-27671 submit_one_bio: r/i=5/261 page_offset=737280 fssum-27671 submit_one_bio: r/i=5/261 page_offset=741376 fssum-27671 submit_one_bio: r/i=5/261 page_offset=745472 fssum-27671 submit_one_bio: r/i=5/261 page_offset=749568 fssum-27671 submit_one_bio: r/i=5/261 page_offset=753664 Note that the 1st and the 2nd page has non-contiguous page offsets. This means, at repair time, we will have completely wrong file offset passed in: kworker/u32:2-19927 btrfs_repair_one_sector: r/i=5/261 page_off=729088 file_off=475136 bio_offset=8192 Since the file offset is incorrect, we latter incorrectly set the extent states, and no way to really release them. Thus later it causes the leakage. In fact, this can be even worse, since the file offset is incorrect, we can hit cases like the incorrect file offset belongs to a HOLE, and later cause btrfs_num_copies() to trigger error, finally hit BUG_ON()/ASSERT() later. [FIX] Add an extra condition in btrfs_bio_add_page() for uncompressed IO. Now we will have more strict requirement for bio pages: - They should all have the same mapping (the mapping check is already implied by the call chain) - Their logical bytenr should be adjacent This is the same as the old condition. - Their page_offset() (file offset) should be adjacent This is the new check. This would result a slightly increased amount of bios from btrfs (needs holes and inside the same stripe boundary to trigger). But this would greatly reduce the confusion, as it's pretty common to assume a btrfs bio would only contain continuous page cache. Later we may need extra cleanups, as we no longer needs to handle gaps between page offsets in endio functions. Currently this should be the minimal patch to fix commit 7aa51232e204 ("btrfs: pass a btrfs_bio to btrfs_repair_one_sector"). Reported-by: Zygo Blaxell Fixes: 7aa51232e204 ("btrfs: pass a btrfs_bio to btrfs_repair_one_sector") Reviewed-by: Christoph Hellwig Signed-off-by: Qu Wenruo Signed-off-by: David Sterba --- fs/btrfs/extent_io.c | 33 +++++++++++++++++++++++++++++---- 1 file changed, 29 insertions(+), 4 deletions(-) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 6e8e936a8a1e..eb17e5bb9235 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -3233,7 +3233,7 @@ static int btrfs_bio_add_page(struct btrfs_bio_ctrl *bio_ctrl, u32 bio_size = bio->bi_iter.bi_size; u32 real_size; const sector_t sector = disk_bytenr >> SECTOR_SHIFT; - bool contig; + bool contig = false; int ret; ASSERT(bio); @@ -3242,10 +3242,35 @@ static int btrfs_bio_add_page(struct btrfs_bio_ctrl *bio_ctrl, if (bio_ctrl->compress_type != compress_type) return 0; - if (bio_ctrl->compress_type != BTRFS_COMPRESS_NONE) + + if (bio->bi_iter.bi_size == 0) { + /* We can always add a page into an empty bio. */ + contig = true; + } else if (bio_ctrl->compress_type == BTRFS_COMPRESS_NONE) { + struct bio_vec *bvec = bio_last_bvec_all(bio); + + /* + * The contig check requires the following conditions to be met: + * 1) The pages are belonging to the same inode + * This is implied by the call chain. + * + * 2) The range has adjacent logical bytenr + * + * 3) The range has adjacent file offset + * This is required for the usage of btrfs_bio->file_offset. + */ + if (bio_end_sector(bio) == sector && + page_offset(bvec->bv_page) + bvec->bv_offset + + bvec->bv_len == page_offset(page) + pg_offset) + contig = true; + } else { + /* + * For compression, all IO should have its logical bytenr + * set to the starting bytenr of the compressed extent. + */ contig = bio->bi_iter.bi_sector == sector; - else - contig = bio_end_sector(bio) == sector; + } + if (!contig) return 0; -- cgit v1.2.3 From 79d3d1d12e6f22c904195f9356069859e2595f00 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Fri, 19 Aug 2022 11:53:39 -0400 Subject: btrfs: don't allow large NOWAIT direct reads Dylan and Jens reported a problem where they had an io_uring test that was returning short reads, and bisected it to ee5b46a353af ("btrfs: increase direct io read size limit to 256 sectors"). The root cause is their test was doing larger reads via io_uring with NOWAIT and async. This was triggering a page fault during the direct read, however the first page was able to work just fine and thus we submitted a 4k read for a larger iocb. Btrfs allows for partial IO's in this case specifically because we don't allow page faults, and thus we'll attempt to do any io that we can, submit what we could, come back and fault in the rest of the range and try to do the remaining IO. However for !is_sync_kiocb() we'll call ->ki_complete() as soon as the partial dio is done, which is incorrect. In the sync case we can exit the iomap code, submit more io's, and return with the amount of IO we were able to complete successfully. We were always doing short reads in this case, but for NOWAIT we were getting saved by the fact that we were limiting direct reads to sectorsize, and if we were larger than that we would return EAGAIN. Fix the regression by simply returning EAGAIN in the NOWAIT case with larger reads, that way io_uring can retry and get the larger IO and have the fault logic handle everything properly. This still leaves the AIO short read case, but that existed before this change. The way to properly fix this would be to handle partial iocb completions, but that's a lot of work, for now deal with the regression in the most straightforward way possible. Reported-by: Dylan Yudaken Fixes: ee5b46a353af ("btrfs: increase direct io read size limit to 256 sectors") Reviewed-by: Filipe Manana Signed-off-by: Josef Bacik Signed-off-by: David Sterba --- fs/btrfs/inode.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index ecc5fa3343fc..bea0adf28735 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -7693,6 +7693,20 @@ static int btrfs_dio_iomap_begin(struct inode *inode, loff_t start, const u64 data_alloc_len = length; bool unlock_extents = false; + /* + * We could potentially fault if we have a buffer > PAGE_SIZE, and if + * we're NOWAIT we may submit a bio for a partial range and return + * EIOCBQUEUED, which would result in an errant short read. + * + * The best way to handle this would be to allow for partial completions + * of iocb's, so we could submit the partial bio, return and fault in + * the rest of the pages, and then submit the io for the rest of the + * range. However we don't have that currently, so simply return + * -EAGAIN at this point so that the normal path is used. + */ + if (!write && (flags & IOMAP_NOWAIT) && length > PAGE_SIZE) + return -EAGAIN; + /* * Cap the size of reads to that usually seen in buffered I/O as we need * to allocate a contiguous array for the checksums. -- cgit v1.2.3 From 785538bfdd682c8e962341d585f9b88262a0475e Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Tue, 16 Aug 2022 10:26:38 -0700 Subject: scsi: sd: Revert "Rework asynchronous resume support" Although commit 88f1669019bd ("scsi: sd: Rework asynchronous resume support") eliminates a delay for some ATA disks after resume, it causes resume of ATA disks to fail on other setups. See also: * "Resume process hangs for 5-6 seconds starting sometime in 5.16" (https://bugzilla.kernel.org/show_bug.cgi?id=215880). * Geert's regression report (https://lore.kernel.org/linux-scsi/alpine.DEB.2.22.394.2207191125130.1006766@ramsan.of.borg/). This is what I understand about this issue: * During resume, ata_port_pm_resume() starts the SCSI error handler. This changes the SCSI host state into SHOST_RECOVERY and causes scsi_queue_rq() to return BLK_STS_RESOURCE. * sd_resume() calls sd_start_stop_device() for ATA devices. That function in turn calls sd_submit_start() which tries to submit a START STOP UNIT command. That command can only be submitted after the SCSI error handler has changed the SCSI host state back to SHOST_RUNNING. * The SCSI error handler runs on its own thread and calls schedule_work(&(ap->scsi_rescan_task)). That causes ata_scsi_dev_rescan() to be called from the context of a kernel workqueue. That call hangs in blk_mq_get_tag(). I'm not sure why - maybe because all available tags have been allocated by sd_submit_start() calls (this is a guess). Link: https://lore.kernel.org/r/20220816172638.538734-1-bvanassche@acm.org Fixes: 88f1669019bd ("scsi: sd: Rework asynchronous resume support") Cc: Damien Le Moal Cc: Hannes Reinecke Cc: Geert Uytterhoeven Cc: gzhqyz@gmail.com Reported-by: Geert Uytterhoeven Reported-by: gzhqyz@gmail.com Reported-and-tested-by: Vlastimil Babka Tested-by: John Garry Tested-by: Hans de Goede Signed-off-by: Bart Van Assche Signed-off-by: Martin K. Petersen --- drivers/scsi/sd.c | 84 ++++++++++++------------------------------------------- drivers/scsi/sd.h | 5 ---- 2 files changed, 18 insertions(+), 71 deletions(-) diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index 8f79fa6318fe..eb76ba055021 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -103,7 +103,6 @@ static void sd_config_discard(struct scsi_disk *, unsigned int); static void sd_config_write_same(struct scsi_disk *); static int sd_revalidate_disk(struct gendisk *); static void sd_unlock_native_capacity(struct gendisk *disk); -static void sd_start_done_work(struct work_struct *work); static int sd_probe(struct device *); static int sd_remove(struct device *); static void sd_shutdown(struct device *); @@ -3471,7 +3470,6 @@ static int sd_probe(struct device *dev) sdkp->max_retries = SD_MAX_RETRIES; atomic_set(&sdkp->openers, 0); atomic_set(&sdkp->device->ioerr_cnt, 0); - INIT_WORK(&sdkp->start_done_work, sd_start_done_work); if (!sdp->request_queue->rq_timeout) { if (sdp->type != TYPE_MOD) @@ -3594,69 +3592,12 @@ static void scsi_disk_release(struct device *dev) kfree(sdkp); } -/* Process sense data after a START command finished. */ -static void sd_start_done_work(struct work_struct *work) -{ - struct scsi_disk *sdkp = container_of(work, typeof(*sdkp), - start_done_work); - struct scsi_sense_hdr sshdr; - int res = sdkp->start_result; - - if (res == 0) - return; - - sd_print_result(sdkp, "Start/Stop Unit failed", res); - - if (res < 0) - return; - - if (scsi_normalize_sense(sdkp->start_sense_buffer, - sdkp->start_sense_len, &sshdr)) - sd_print_sense_hdr(sdkp, &sshdr); -} - -/* A START command finished. May be called from interrupt context. */ -static void sd_start_done(struct request *req, blk_status_t status) -{ - const struct scsi_cmnd *scmd = blk_mq_rq_to_pdu(req); - struct scsi_disk *sdkp = scsi_disk(req->q->disk); - - sdkp->start_result = scmd->result; - WARN_ON_ONCE(scmd->sense_len > SCSI_SENSE_BUFFERSIZE); - sdkp->start_sense_len = scmd->sense_len; - memcpy(sdkp->start_sense_buffer, scmd->sense_buffer, - ARRAY_SIZE(sdkp->start_sense_buffer)); - WARN_ON_ONCE(!schedule_work(&sdkp->start_done_work)); -} - -/* Submit a START command asynchronously. */ -static int sd_submit_start(struct scsi_disk *sdkp, u8 cmd[], u8 cmd_len) -{ - struct scsi_device *sdev = sdkp->device; - struct request_queue *q = sdev->request_queue; - struct request *req; - struct scsi_cmnd *scmd; - - req = scsi_alloc_request(q, REQ_OP_DRV_IN, BLK_MQ_REQ_PM); - if (IS_ERR(req)) - return PTR_ERR(req); - - scmd = blk_mq_rq_to_pdu(req); - scmd->cmd_len = cmd_len; - memcpy(scmd->cmnd, cmd, cmd_len); - scmd->allowed = sdkp->max_retries; - req->timeout = SD_TIMEOUT; - req->rq_flags |= RQF_PM | RQF_QUIET; - req->end_io = sd_start_done; - blk_execute_rq_nowait(req, /*at_head=*/true); - - return 0; -} - static int sd_start_stop_device(struct scsi_disk *sdkp, int start) { unsigned char cmd[6] = { START_STOP }; /* START_VALID */ + struct scsi_sense_hdr sshdr; struct scsi_device *sdp = sdkp->device; + int res; if (start) cmd[4] |= 1; /* START */ @@ -3667,10 +3608,23 @@ static int sd_start_stop_device(struct scsi_disk *sdkp, int start) if (!scsi_device_online(sdp)) return -ENODEV; - /* Wait until processing of sense data has finished. */ - flush_work(&sdkp->start_done_work); + res = scsi_execute(sdp, cmd, DMA_NONE, NULL, 0, NULL, &sshdr, + SD_TIMEOUT, sdkp->max_retries, 0, RQF_PM, NULL); + if (res) { + sd_print_result(sdkp, "Start/Stop Unit failed", res); + if (res > 0 && scsi_sense_valid(&sshdr)) { + sd_print_sense_hdr(sdkp, &sshdr); + /* 0x3a is medium not present */ + if (sshdr.asc == 0x3a) + res = 0; + } + } - return sd_submit_start(sdkp, cmd, sizeof(cmd)); + /* SCSI error codes must not go to the generic layer */ + if (res) + return -EIO; + + return 0; } /* @@ -3697,8 +3651,6 @@ static void sd_shutdown(struct device *dev) sd_printk(KERN_NOTICE, sdkp, "Stopping disk\n"); sd_start_stop_device(sdkp, 0); } - - flush_work(&sdkp->start_done_work); } static int sd_suspend_common(struct device *dev, bool ignore_stop_errors) diff --git a/drivers/scsi/sd.h b/drivers/scsi/sd.h index b89187761d61..5eea762f84d1 100644 --- a/drivers/scsi/sd.h +++ b/drivers/scsi/sd.h @@ -150,11 +150,6 @@ struct scsi_disk { unsigned urswrz : 1; unsigned security : 1; unsigned ignore_medium_access_errors : 1; - - int start_result; - u32 start_sense_len; - u8 start_sense_buffer[SCSI_SENSE_BUFFERSIZE]; - struct work_struct start_done_work; }; #define to_scsi_disk(obj) container_of(obj, struct scsi_disk, disk_dev) -- cgit v1.2.3 From c919c164fc87bcca8e80b3b9224492fa5b6455ba Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 23 Aug 2022 01:01:36 -0500 Subject: smb3: missing inode locks in zero range smb3 fallocate zero range was not grabbing the inode or filemap_invalidate locks so could have race with pagemap reinstantiating the page. Cc: stable@vger.kernel.org Signed-off-by: David Howells Signed-off-by: Steve French --- fs/cifs/smb2ops.c | 55 ++++++++++++++++++++++++++++++------------------------- 1 file changed, 30 insertions(+), 25 deletions(-) diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index 96f3b0573606..a6fe54281fd3 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -3307,26 +3307,43 @@ get_smb2_acl(struct cifs_sb_info *cifs_sb, return pntsd; } +static long smb3_zero_data(struct file *file, struct cifs_tcon *tcon, + loff_t offset, loff_t len, unsigned int xid) +{ + struct cifsFileInfo *cfile = file->private_data; + struct file_zero_data_information fsctl_buf; + + cifs_dbg(FYI, "Offset %lld len %lld\n", offset, len); + + fsctl_buf.FileOffset = cpu_to_le64(offset); + fsctl_buf.BeyondFinalZero = cpu_to_le64(offset + len); + + return SMB2_ioctl(xid, tcon, cfile->fid.persistent_fid, + cfile->fid.volatile_fid, FSCTL_SET_ZERO_DATA, + (char *)&fsctl_buf, + sizeof(struct file_zero_data_information), + 0, NULL, NULL); +} + static long smb3_zero_range(struct file *file, struct cifs_tcon *tcon, loff_t offset, loff_t len, bool keep_size) { struct cifs_ses *ses = tcon->ses; - struct inode *inode; - struct cifsInodeInfo *cifsi; + struct inode *inode = file_inode(file); + struct cifsInodeInfo *cifsi = CIFS_I(inode); struct cifsFileInfo *cfile = file->private_data; - struct file_zero_data_information fsctl_buf; long rc; unsigned int xid; __le64 eof; xid = get_xid(); - inode = d_inode(cfile->dentry); - cifsi = CIFS_I(inode); - trace_smb3_zero_enter(xid, cfile->fid.persistent_fid, tcon->tid, ses->Suid, offset, len); + inode_lock(inode); + filemap_invalidate_lock(inode->i_mapping); + /* * We zero the range through ioctl, so we need remove the page caches * first, otherwise the data may be inconsistent with the server. @@ -3334,26 +3351,12 @@ static long smb3_zero_range(struct file *file, struct cifs_tcon *tcon, truncate_pagecache_range(inode, offset, offset + len - 1); /* if file not oplocked can't be sure whether asking to extend size */ - if (!CIFS_CACHE_READ(cifsi)) - if (keep_size == false) { - rc = -EOPNOTSUPP; - trace_smb3_zero_err(xid, cfile->fid.persistent_fid, - tcon->tid, ses->Suid, offset, len, rc); - free_xid(xid); - return rc; - } - - cifs_dbg(FYI, "Offset %lld len %lld\n", offset, len); - - fsctl_buf.FileOffset = cpu_to_le64(offset); - fsctl_buf.BeyondFinalZero = cpu_to_le64(offset + len); + rc = -EOPNOTSUPP; + if (keep_size == false && !CIFS_CACHE_READ(cifsi)) + goto zero_range_exit; - rc = SMB2_ioctl(xid, tcon, cfile->fid.persistent_fid, - cfile->fid.volatile_fid, FSCTL_SET_ZERO_DATA, - (char *)&fsctl_buf, - sizeof(struct file_zero_data_information), - 0, NULL, NULL); - if (rc) + rc = smb3_zero_data(file, tcon, offset, len, xid); + if (rc < 0) goto zero_range_exit; /* @@ -3366,6 +3369,8 @@ static long smb3_zero_range(struct file *file, struct cifs_tcon *tcon, } zero_range_exit: + filemap_invalidate_unlock(inode->i_mapping); + inode_unlock(inode); free_xid(xid); if (rc) trace_smb3_zero_err(xid, cfile->fid.persistent_fid, tcon->tid, -- cgit v1.2.3 From afe7116f6d3b888778ed6d95e3cf724767b9aedf Mon Sep 17 00:00:00 2001 From: Lin Ma Date: Mon, 8 Aug 2022 11:42:24 +0800 Subject: ieee802154/adf7242: defer destroy_workqueue call There is a possible race condition (use-after-free) like below (FREE) | (USE) adf7242_remove | adf7242_channel cancel_delayed_work_sync | destroy_workqueue (1) | adf7242_cmd_rx | mod_delayed_work (2) | The root cause for this race is that the upper layer (ieee802154) is unaware of this detaching event and the function adf7242_channel can be called without any checks. To fix this, we can add a flag write at the beginning of adf7242_remove and add flag check in adf7242_channel. Or we can just defer the destructive operation like other commit 3e0588c291d6 ("hamradio: defer ax25 kfree after unregister_netdev") which let the ieee802154_unregister_hw() to handle the synchronization. This patch takes the second option. Fixes: 58e9683d1475 ("net: ieee802154: adf7242: Fix OCL calibration runs") Signed-off-by: Lin Ma Acked-by: Michael Hennerich Link: https://lore.kernel.org/r/20220808034224.12642-1-linma@zju.edu.cn Signed-off-by: Stefan Schmidt --- drivers/net/ieee802154/adf7242.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ieee802154/adf7242.c b/drivers/net/ieee802154/adf7242.c index 6afdf1622944..5cf218c674a5 100644 --- a/drivers/net/ieee802154/adf7242.c +++ b/drivers/net/ieee802154/adf7242.c @@ -1310,10 +1310,11 @@ static void adf7242_remove(struct spi_device *spi) debugfs_remove_recursive(lp->debugfs_root); + ieee802154_unregister_hw(lp->hw); + cancel_delayed_work_sync(&lp->work); destroy_workqueue(lp->wqueue); - ieee802154_unregister_hw(lp->hw); mutex_destroy(&lp->bmux); ieee802154_free_hw(lp->hw); } -- cgit v1.2.3 From b5a990209d72615e3cac2b3b0d8ddd445c020cf5 Mon Sep 17 00:00:00 2001 From: Jilin Yuan Date: Fri, 8 Jul 2022 23:15:38 +0800 Subject: net/ieee802154: fix repeated words in comments Delete the redundant word 'was'. Signed-off-by: Jilin Yuan Link: https://lore.kernel.org/r/20220708151538.51483-1-yuanjilin@cdjrlc.com Signed-off-by: Stefan Schmidt --- drivers/net/ieee802154/ca8210.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ieee802154/ca8210.c b/drivers/net/ieee802154/ca8210.c index 42c0b451088d..450b16ad40a4 100644 --- a/drivers/net/ieee802154/ca8210.c +++ b/drivers/net/ieee802154/ca8210.c @@ -2293,7 +2293,7 @@ static int ca8210_set_csma_params( * @retries: Number of retries * * Sets the number of times to retry a transmission if no acknowledgment was - * was received from the other end when one was requested. + * received from the other end when one was requested. * * Return: 0 or linux error code */ -- cgit v1.2.3 From ba0803050d610d5072666be727bca5e03e55b242 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 23 Aug 2022 02:10:56 -0500 Subject: smb3: missing inode locks in punch hole smb3 fallocate punch hole was not grabbing the inode or filemap_invalidate locks so could have race with pagemap reinstantiating the page. Cc: stable@vger.kernel.org Signed-off-by: David Howells Signed-off-by: Steve French --- fs/cifs/smb2ops.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index a6fe54281fd3..4810bd62266a 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -3384,7 +3384,7 @@ static long smb3_zero_range(struct file *file, struct cifs_tcon *tcon, static long smb3_punch_hole(struct file *file, struct cifs_tcon *tcon, loff_t offset, loff_t len) { - struct inode *inode; + struct inode *inode = file_inode(file); struct cifsFileInfo *cfile = file->private_data; struct file_zero_data_information fsctl_buf; long rc; @@ -3393,14 +3393,12 @@ static long smb3_punch_hole(struct file *file, struct cifs_tcon *tcon, xid = get_xid(); - inode = d_inode(cfile->dentry); - + inode_lock(inode); /* Need to make file sparse, if not already, before freeing range. */ /* Consider adding equivalent for compressed since it could also work */ if (!smb2_set_sparse(xid, tcon, cfile, inode, set_sparse)) { rc = -EOPNOTSUPP; - free_xid(xid); - return rc; + goto out; } filemap_invalidate_lock(inode->i_mapping); @@ -3420,8 +3418,10 @@ static long smb3_punch_hole(struct file *file, struct cifs_tcon *tcon, (char *)&fsctl_buf, sizeof(struct file_zero_data_information), CIFSMaxBufSize, NULL, NULL); - free_xid(xid); filemap_invalidate_unlock(inode->i_mapping); +out: + inode_unlock(inode); + free_xid(xid); return rc; } -- cgit v1.2.3 From 0b52f76351bf87f35b62195fca874b6055125bc6 Mon Sep 17 00:00:00 2001 From: Martin Liška Date: Thu, 18 Aug 2022 22:31:57 +0200 Subject: docs/arm64: elf_hwcaps: unify newlines in HWCAP lists Unify horizontal spacing (remove extra newlines) which are sensitive to visual presentation by Sphinx. Fixes: 5e64b862c482 ("arm64/sme: Basic enumeration support") Signed-off-by: Martin Liska Reviewed-by: Mark Brown Link: https://lore.kernel.org/r/84e3d6cc-75cf-d6f3-9bb8-be02075aaf6d@suse.cz Signed-off-by: Will Deacon --- Documentation/arm64/elf_hwcaps.rst | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/Documentation/arm64/elf_hwcaps.rst b/Documentation/arm64/elf_hwcaps.rst index 52b75a25c205..311021f2e560 100644 --- a/Documentation/arm64/elf_hwcaps.rst +++ b/Documentation/arm64/elf_hwcaps.rst @@ -242,44 +242,34 @@ HWCAP2_MTE3 by Documentation/arm64/memory-tagging-extension.rst. HWCAP2_SME - Functionality implied by ID_AA64PFR1_EL1.SME == 0b0001, as described by Documentation/arm64/sme.rst. HWCAP2_SME_I16I64 - Functionality implied by ID_AA64SMFR0_EL1.I16I64 == 0b1111. HWCAP2_SME_F64F64 - Functionality implied by ID_AA64SMFR0_EL1.F64F64 == 0b1. HWCAP2_SME_I8I32 - Functionality implied by ID_AA64SMFR0_EL1.I8I32 == 0b1111. HWCAP2_SME_F16F32 - Functionality implied by ID_AA64SMFR0_EL1.F16F32 == 0b1. HWCAP2_SME_B16F32 - Functionality implied by ID_AA64SMFR0_EL1.B16F32 == 0b1. HWCAP2_SME_F32F32 - Functionality implied by ID_AA64SMFR0_EL1.F32F32 == 0b1. HWCAP2_SME_FA64 - Functionality implied by ID_AA64SMFR0_EL1.FA64 == 0b1. HWCAP2_WFXT - Functionality implied by ID_AA64ISAR2_EL1.WFXT == 0b0010. HWCAP2_EBF16 - Functionality implied by ID_AA64ISAR1_EL1.BF16 == 0b0010. 4. Unused AT_HWCAP bits -- cgit v1.2.3 From 729a916599eaf13df66ae5404f5489f38518ea8c Mon Sep 17 00:00:00 2001 From: Kuan-Ying Lee Date: Thu, 21 Jul 2022 11:05:31 +0800 Subject: arm64: Fix comment typo Replace wrong 'FIQ EL1h' comment with 'FIQ EL1t'. Signed-off-by: Kuan-Ying Lee Link: https://lore.kernel.org/r/20220721030531.21234-1-Kuan-Ying.Lee@mediatek.com Signed-off-by: Will Deacon --- arch/arm64/kernel/entry.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index 254fe31c03a0..2d73b3e793b2 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -502,7 +502,7 @@ tsk .req x28 // current thread_info SYM_CODE_START(vectors) kernel_ventry 1, t, 64, sync // Synchronous EL1t kernel_ventry 1, t, 64, irq // IRQ EL1t - kernel_ventry 1, t, 64, fiq // FIQ EL1h + kernel_ventry 1, t, 64, fiq // FIQ EL1t kernel_ventry 1, t, 64, error // Error EL1t kernel_ventry 1, h, 64, sync // Synchronous EL1h -- cgit v1.2.3 From 2e8cff0a0eee87b27f0cf87ad8310eb41b5886ab Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Wed, 17 Aug 2022 16:40:22 +0100 Subject: arm64: fix rodata=full On arm64, "rodata=full" has been suppored (but not documented) since commit: c55191e96caa9d78 ("arm64: mm: apply r/o permissions of VM areas to its linear alias as well") As it's necessary to determine the rodata configuration early during boot, arm64 has an early_param() handler for this, whereas init/main.c has a __setup() handler which is run later. Unfortunately, this split meant that since commit: f9a40b0890658330 ("init/main.c: return 1 from handled __setup() functions") ... passing "rodata=full" would result in a spurious warning from the __setup() handler (though RO permissions would be configured appropriately). Further, "rodata=full" has been broken since commit: 0d6ea3ac94ca77c5 ("lib/kstrtox.c: add "false"/"true" support to kstrtobool()") ... which caused strtobool() to parse "full" as false (in addition to many other values not documented for the "rodata=" kernel parameter. This patch fixes this breakage by: * Moving the core parameter parser to an __early_param(), such that it is available early. * Adding an (optional) arch hook which arm64 can use to parse "full". * Updating the documentation to mention that "full" is valid for arm64. * Having the core parameter parser handle "on" and "off" explicitly, such that any undocumented values (e.g. typos such as "ful") are reported as errors rather than being silently accepted. Note that __setup() and early_param() have opposite conventions for their return values, where __setup() uses 1 to indicate a parameter was handled and early_param() uses 0 to indicate a parameter was handled. Fixes: f9a40b089065 ("init/main.c: return 1 from handled __setup() functions") Fixes: 0d6ea3ac94ca ("lib/kstrtox.c: add "false"/"true" support to kstrtobool()") Signed-off-by: Mark Rutland Cc: Andy Shevchenko Cc: Ard Biesheuvel Cc: Catalin Marinas Cc: Jagdish Gediya Cc: Matthew Wilcox Cc: Randy Dunlap Cc: Will Deacon Reviewed-by: Ard Biesheuvel Link: https://lore.kernel.org/r/20220817154022.3974645-1-mark.rutland@arm.com Signed-off-by: Will Deacon --- Documentation/admin-guide/kernel-parameters.txt | 2 ++ arch/arm64/include/asm/setup.h | 17 +++++++++++++++++ arch/arm64/mm/mmu.c | 18 ------------------ init/main.c | 18 +++++++++++++++--- 4 files changed, 34 insertions(+), 21 deletions(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index d7f30902fda0..426fa892d311 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -5331,6 +5331,8 @@ rodata= [KNL] on Mark read-only kernel memory as read-only (default). off Leave read-only kernel memory writable for debugging. + full Mark read-only kernel memory and aliases as read-only + [arm64] rockchip.usb_uart Enable the uart passthrough on the designated usb port diff --git a/arch/arm64/include/asm/setup.h b/arch/arm64/include/asm/setup.h index 6437df661700..f4af547ef54c 100644 --- a/arch/arm64/include/asm/setup.h +++ b/arch/arm64/include/asm/setup.h @@ -3,6 +3,8 @@ #ifndef __ARM64_ASM_SETUP_H #define __ARM64_ASM_SETUP_H +#include + #include void *get_early_fdt_ptr(void); @@ -14,4 +16,19 @@ void early_fdt_map(u64 dt_phys); extern phys_addr_t __fdt_pointer __initdata; extern u64 __cacheline_aligned boot_args[4]; +static inline bool arch_parse_debug_rodata(char *arg) +{ + extern bool rodata_enabled; + extern bool rodata_full; + + if (arg && !strcmp(arg, "full")) { + rodata_enabled = true; + rodata_full = true; + return true; + } + + return false; +} +#define arch_parse_debug_rodata arch_parse_debug_rodata + #endif diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index db7c4e6ae57b..e7ad44585f40 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -642,24 +642,6 @@ static void __init map_kernel_segment(pgd_t *pgdp, void *va_start, void *va_end, vm_area_add_early(vma); } -static int __init parse_rodata(char *arg) -{ - int ret = strtobool(arg, &rodata_enabled); - if (!ret) { - rodata_full = false; - return 0; - } - - /* permit 'full' in addition to boolean options */ - if (strcmp(arg, "full")) - return -EINVAL; - - rodata_enabled = true; - rodata_full = true; - return 0; -} -early_param("rodata", parse_rodata); - #ifdef CONFIG_UNMAP_KERNEL_AT_EL0 static int __init map_entry_trampoline(void) { diff --git a/init/main.c b/init/main.c index 91642a4e69be..1fe7942f5d4a 100644 --- a/init/main.c +++ b/init/main.c @@ -1446,13 +1446,25 @@ static noinline void __init kernel_init_freeable(void); #if defined(CONFIG_STRICT_KERNEL_RWX) || defined(CONFIG_STRICT_MODULE_RWX) bool rodata_enabled __ro_after_init = true; + +#ifndef arch_parse_debug_rodata +static inline bool arch_parse_debug_rodata(char *str) { return false; } +#endif + static int __init set_debug_rodata(char *str) { - if (strtobool(str, &rodata_enabled)) + if (arch_parse_debug_rodata(str)) + return 0; + + if (str && !strcmp(str, "on")) + rodata_enabled = true; + else if (str && !strcmp(str, "off")) + rodata_enabled = false; + else pr_warn("Invalid option string for rodata: '%s'\n", str); - return 1; + return 0; } -__setup("rodata=", set_debug_rodata); +early_param("rodata", set_debug_rodata); #endif #ifdef CONFIG_STRICT_KERNEL_RWX -- cgit v1.2.3 From e89d120c4b720e232cc6a94f0fcbd59c15d41489 Mon Sep 17 00:00:00 2001 From: Ionela Voinescu Date: Fri, 19 Aug 2022 11:30:50 +0100 Subject: arm64: errata: add detection for AMEVCNTR01 incrementing incorrectly The AMU counter AMEVCNTR01 (constant counter) should increment at the same rate as the system counter. On affected Cortex-A510 cores, AMEVCNTR01 increments incorrectly giving a significantly higher output value. This results in inaccurate task scheduler utilization tracking and incorrect feedback on CPU frequency. Work around this problem by returning 0 when reading the affected counter in key locations that results in disabling all users of this counter from using it either for frequency invariance or as FFH reference counter. This effect is the same to firmware disabling affected counters. Details on how the two features are affected by this erratum: - AMU counters will not be used for frequency invariance for affected CPUs and CPUs in the same cpufreq policy. AMUs can still be used for frequency invariance for unaffected CPUs in the system. Although unlikely, if no alternative method can be found to support frequency invariance for affected CPUs (cpufreq based or solution based on platform counters) frequency invariance will be disabled. Please check the chapter on frequency invariance at Documentation/scheduler/sched-capacity.rst for details of its effect. - Given that FFH can be used to fetch either the core or constant counter values, restrictions are lifted regarding any of these counters returning a valid (!0) value. Therefore FFH is considered supported if there is a least one CPU that support AMUs, independent of any counters being disabled or affected by this erratum. Clarifying comments are now added to the cpc_ffh_supported(), cpu_read_constcnt() and cpu_read_corecnt() functions. The above is achieved through adding a new erratum: ARM64_ERRATUM_2457168. Signed-off-by: Ionela Voinescu Reviewed-by: Catalin Marinas Cc: Catalin Marinas Cc: Will Deacon Cc: James Morse Link: https://lore.kernel.org/r/20220819103050.24211-1-ionela.voinescu@arm.com Signed-off-by: Will Deacon --- Documentation/arm64/silicon-errata.rst | 2 ++ arch/arm64/Kconfig | 17 +++++++++++++++++ arch/arm64/kernel/cpu_errata.c | 10 ++++++++++ arch/arm64/kernel/cpufeature.c | 5 ++++- arch/arm64/kernel/topology.c | 32 ++++++++++++++++++++++++++++++-- arch/arm64/tools/cpucaps | 1 + 6 files changed, 64 insertions(+), 3 deletions(-) diff --git a/Documentation/arm64/silicon-errata.rst b/Documentation/arm64/silicon-errata.rst index 33b04db8408f..fda97b3fcf01 100644 --- a/Documentation/arm64/silicon-errata.rst +++ b/Documentation/arm64/silicon-errata.rst @@ -52,6 +52,8 @@ stable kernels. | Allwinner | A64/R18 | UNKNOWN1 | SUN50I_ERRATUM_UNKNOWN1 | +----------------+-----------------+-----------------+-----------------------------+ +----------------+-----------------+-----------------+-----------------------------+ +| ARM | Cortex-A510 | #2457168 | ARM64_ERRATUM_2457168 | ++----------------+-----------------+-----------------+-----------------------------+ | ARM | Cortex-A510 | #2064142 | ARM64_ERRATUM_2064142 | +----------------+-----------------+-----------------+-----------------------------+ | ARM | Cortex-A510 | #2038923 | ARM64_ERRATUM_2038923 | diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 571cc234d0b3..9fb9fff08c94 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -917,6 +917,23 @@ config ARM64_ERRATUM_1902691 If unsure, say Y. +config ARM64_ERRATUM_2457168 + bool "Cortex-A510: 2457168: workaround for AMEVCNTR01 incrementing incorrectly" + depends on ARM64_AMU_EXTN + default y + help + This option adds the workaround for ARM Cortex-A510 erratum 2457168. + + The AMU counter AMEVCNTR01 (constant counter) should increment at the same rate + as the system counter. On affected Cortex-A510 cores AMEVCNTR01 increments + incorrectly giving a significantly higher output value. + + Work around this problem by returning 0 when reading the affected counter in + key locations that results in disabling all users of this counter. This effect + is the same to firmware disabling affected counters. + + If unsure, say Y. + config CAVIUM_ERRATUM_22375 bool "Cavium erratum 22375, 24313" default y diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index 0f7e9087d900..53b973b6059f 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -656,6 +656,16 @@ const struct arm64_cpu_capabilities arm64_errata[] = { ERRATA_MIDR_REV_RANGE(MIDR_CORTEX_A510, 0, 0, 2) }, #endif +#ifdef CONFIG_ARM64_ERRATUM_2457168 + { + .desc = "ARM erratum 2457168", + .capability = ARM64_WORKAROUND_2457168, + .type = ARM64_CPUCAP_WEAK_LOCAL_CPU_FEATURE, + + /* Cortex-A510 r0p0-r1p1 */ + CAP_MIDR_RANGE(MIDR_CORTEX_A510, 0, 0, 1, 1) + }, +#endif #ifdef CONFIG_ARM64_ERRATUM_2038923 { .desc = "ARM erratum 2038923", diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 907401e4fffb..af4de817d712 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -1870,7 +1870,10 @@ static void cpu_amu_enable(struct arm64_cpu_capabilities const *cap) pr_info("detected CPU%d: Activity Monitors Unit (AMU)\n", smp_processor_id()); cpumask_set_cpu(smp_processor_id(), &amu_cpus); - update_freq_counters_refs(); + + /* 0 reference values signal broken/disabled counters */ + if (!this_cpu_has_cap(ARM64_WORKAROUND_2457168)) + update_freq_counters_refs(); } } diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c index 869ffc4d4484..ad2bfc794257 100644 --- a/arch/arm64/kernel/topology.c +++ b/arch/arm64/kernel/topology.c @@ -296,12 +296,25 @@ core_initcall(init_amu_fie); static void cpu_read_corecnt(void *val) { + /* + * A value of 0 can be returned if the current CPU does not support AMUs + * or if the counter is disabled for this CPU. A return value of 0 at + * counter read is properly handled as an error case by the users of the + * counter. + */ *(u64 *)val = read_corecnt(); } static void cpu_read_constcnt(void *val) { - *(u64 *)val = read_constcnt(); + /* + * Return 0 if the current CPU is affected by erratum 2457168. A value + * of 0 is also returned if the current CPU does not support AMUs or if + * the counter is disabled. A return value of 0 at counter read is + * properly handled as an error case by the users of the counter. + */ + *(u64 *)val = this_cpu_has_cap(ARM64_WORKAROUND_2457168) ? + 0UL : read_constcnt(); } static inline @@ -328,7 +341,22 @@ int counters_read_on_cpu(int cpu, smp_call_func_t func, u64 *val) */ bool cpc_ffh_supported(void) { - return freq_counters_valid(get_cpu_with_amu_feat()); + int cpu = get_cpu_with_amu_feat(); + + /* + * FFH is considered supported if there is at least one present CPU that + * supports AMUs. Using FFH to read core and reference counters for CPUs + * that do not support AMUs, have counters disabled or that are affected + * by errata, will result in a return value of 0. + * + * This is done to allow any enabled and valid counters to be read + * through FFH, knowing that potentially returning 0 as counter value is + * properly handled by the users of these counters. + */ + if ((cpu >= nr_cpu_ids) || !cpumask_test_cpu(cpu, cpu_present_mask)) + return false; + + return true; } int cpc_read_ffh(int cpu, struct cpc_reg *reg, u64 *val) diff --git a/arch/arm64/tools/cpucaps b/arch/arm64/tools/cpucaps index 779653771507..63b2484ce6c3 100644 --- a/arch/arm64/tools/cpucaps +++ b/arch/arm64/tools/cpucaps @@ -67,6 +67,7 @@ WORKAROUND_1902691 WORKAROUND_2038923 WORKAROUND_2064142 WORKAROUND_2077057 +WORKAROUND_2457168 WORKAROUND_TRBE_OVERWRITE_FILL_MODE WORKAROUND_TSB_FLUSH_FAILURE WORKAROUND_TRBE_WRITE_OUT_OF_RANGE -- cgit v1.2.3 From e75d18cecbb3805895d8ed64da4f78575ec96043 Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Mon, 8 Aug 2022 09:46:40 +0100 Subject: arm64: cacheinfo: Fix incorrect assignment of signed error value to unsigned fw_level Though acpi_find_last_cache_level() always returned signed value and the document states it will return any errors caused by lack of a PPTT table, it never returned negative values before. Commit 0c80f9e165f8 ("ACPI: PPTT: Leave the table mapped for the runtime usage") however changed it by returning -ENOENT if no PPTT was found. The value returned from acpi_find_last_cache_level() is then assigned to unsigned fw_level. It will result in the number of cache leaves calculated incorrectly as a huge value which will then cause the following warning from __alloc_pages as the order would be great than MAX_ORDER because of incorrect and huge cache leaves value. | WARNING: CPU: 0 PID: 1 at mm/page_alloc.c:5407 __alloc_pages+0x74/0x314 | Modules linked in: | CPU: 0 PID: 1 Comm: swapper/0 Not tainted 5.19.0-10393-g7c2a8d3ac4c0 #73 | pstate: 20000005 (nzCv daif -PAN -UAO -TCO -DIT -SSBS BTYPE=--) | pc : __alloc_pages+0x74/0x314 | lr : alloc_pages+0xe8/0x318 | Call trace: | __alloc_pages+0x74/0x314 | alloc_pages+0xe8/0x318 | kmalloc_order_trace+0x68/0x1dc | __kmalloc+0x240/0x338 | detect_cache_attributes+0xe0/0x56c | update_siblings_masks+0x38/0x284 | store_cpu_topology+0x78/0x84 | smp_prepare_cpus+0x48/0x134 | kernel_init_freeable+0xc4/0x14c | kernel_init+0x2c/0x1b4 | ret_from_fork+0x10/0x20 Fix the same by changing fw_level to be signed integer and return the error from init_cache_level() early in case of error. Reported-and-Tested-by: Bruno Goncalves Signed-off-by: Sudeep Holla Link: https://lore.kernel.org/r/20220808084640.3165368-1-sudeep.holla@arm.com Signed-off-by: Will Deacon --- arch/arm64/kernel/cacheinfo.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kernel/cacheinfo.c b/arch/arm64/kernel/cacheinfo.c index 587543c6c51c..97c42be71338 100644 --- a/arch/arm64/kernel/cacheinfo.c +++ b/arch/arm64/kernel/cacheinfo.c @@ -45,7 +45,8 @@ static void ci_leaf_init(struct cacheinfo *this_leaf, int init_cache_level(unsigned int cpu) { - unsigned int ctype, level, leaves, fw_level; + unsigned int ctype, level, leaves; + int fw_level; struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu); for (level = 1, leaves = 0; level <= MAX_CACHE_LEVEL; level++) { @@ -63,6 +64,9 @@ int init_cache_level(unsigned int cpu) else fw_level = acpi_find_last_cache_level(cpu); + if (fw_level < 0) + return fw_level; + if (level < fw_level) { /* * some external caches not specified in CLIDR_EL1 -- cgit v1.2.3 From 02e483f8d414cc4f467389843d61b63bbbbcd892 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Thu, 18 Aug 2022 22:36:11 +0100 Subject: arm64/sysreg: Directly include bitfield.h The SYS_FIELD_ macros in sysreg.h use definitions from bitfield.h but there is no direct inclusion of it, add one to ensure that sysreg.h is directly usable. Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20220818213613.733091-2-broonie@kernel.org Signed-off-by: Will Deacon --- arch/arm64/include/asm/sysreg.h | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 7c71358d44c4..c67002677015 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -1116,6 +1116,7 @@ #else +#include #include #include #include -- cgit v1.2.3 From a10edea4efbba4e8756f493781e953dd3592f24a Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Thu, 18 Aug 2022 22:36:12 +0100 Subject: arm64/sysreg: Guard SYS_FIELD_ macros for asm The SYS_FIELD_ macros are not safe for assembly contexts, move them inside the guarded section. Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20220818213613.733091-3-broonie@kernel.org Signed-off-by: Will Deacon --- arch/arm64/include/asm/sysreg.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index c67002677015..818df938a7ad 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -1210,8 +1210,6 @@ par; \ }) -#endif - #define SYS_FIELD_GET(reg, field, val) \ FIELD_GET(reg##_##field##_MASK, val) @@ -1221,4 +1219,6 @@ #define SYS_FIELD_PREP_ENUM(reg, field, val) \ FIELD_PREP(reg##_##field##_MASK, reg##_##field##_##val) +#endif + #endif /* __ASM_SYSREG_H */ -- cgit v1.2.3 From 53d2d84a1f6d68e036adab71df8e0f37cd2724f6 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Thu, 18 Aug 2022 22:36:13 +0100 Subject: arm64/cache: Fix cache_type_cwg() for register generation Ard noticed that since we converted CTR_EL0 to automatic generation we have been seeing errors on some systems handling the value of cache_type_cwg() such as CPU features: No Cache Writeback Granule information, assuming 128 This is because the manual definition of CTR_EL0_CWG_MASK was done without a shift while our convention is to define the mask after shifting. This means that the user in cache_type_cwg() was broken as it was written for the manually written shift then mask. Fix this by converting to use SYS_FIELD_GET(). The only other field where the _MASK for this register is used is IminLine which is at offset 0 so unaffected. Fixes: 9a3634d02301 ("arm64/sysreg: Convert CTR_EL0 to automatic generation") Reported-by: Ard Biesheuvel Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20220818213613.733091-4-broonie@kernel.org Signed-off-by: Will Deacon --- arch/arm64/include/asm/cache.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/cache.h b/arch/arm64/include/asm/cache.h index ca9b487112cc..34256bda0da9 100644 --- a/arch/arm64/include/asm/cache.h +++ b/arch/arm64/include/asm/cache.h @@ -71,7 +71,7 @@ static __always_inline int icache_is_vpipt(void) static inline u32 cache_type_cwg(void) { - return (read_cpuid_cachetype() >> CTR_EL0_CWG_SHIFT) & CTR_EL0_CWG_MASK; + return SYS_FIELD_GET(CTR_EL0, CWG, read_cpuid_cachetype()); } #define __read_mostly __section(".data..read_mostly") -- cgit v1.2.3 From 7ddcaf78e93c9282b4d92184f511b4d5bee75355 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Wed, 17 Aug 2022 19:23:21 +0100 Subject: arm64/signal: Raise limit on stack frames The signal code has a limit of 64K on the size of a stack frame that it will generate, if this limit is exceeded then a process will be killed if it receives a signal. Unfortunately with the advent of SME this limit is too small - the maximum possible size of the ZA register alone is 64K. This is not an issue for practical systems at present but is easily seen using virtual platforms. Raise the limit to 256K, this is substantially more than could be used by any current architecture extension. Signed-off-by: Mark Brown Acked-by: Catalin Marinas Link: https://lore.kernel.org/r/20220817182324.638214-2-broonie@kernel.org Signed-off-by: Will Deacon --- arch/arm64/kernel/signal.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c index 3e6d0352d7d3..0685b8784927 100644 --- a/arch/arm64/kernel/signal.c +++ b/arch/arm64/kernel/signal.c @@ -91,7 +91,7 @@ static size_t sigframe_size(struct rt_sigframe_user_layout const *user) * not taken into account. This limit is not a guarantee and is * NOT ABI. */ -#define SIGFRAME_MAXSZ SZ_64K +#define SIGFRAME_MAXSZ SZ_256K static int __sigframe_alloc(struct rt_sigframe_user_layout *user, unsigned long *offset, size_t size, bool extend) -- cgit v1.2.3 From ea64baacbc36a0d552aec0d87107182f40211131 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Wed, 17 Aug 2022 19:23:22 +0100 Subject: arm64/signal: Flush FPSIMD register state when disabling streaming mode When handling a signal delivered to a context with streaming mode enabled we will disable streaming mode for the signal handler, when doing so we should also flush the saved FPSIMD register state like exiting streaming mode in the hardware would do so that if that state is reloaded we get the same behaviour. Without this we will reload whatever the last FPSIMD state that was saved for the task was. Fixes: 40a8e87bb328 ("arm64/sme: Disable ZA and streaming mode when handling signals") Signed-off-by: Mark Brown Reviewed-by: Catalin Marinas Link: https://lore.kernel.org/r/20220817182324.638214-3-broonie@kernel.org Signed-off-by: Will Deacon --- arch/arm64/kernel/signal.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c index 0685b8784927..0d28a783a69a 100644 --- a/arch/arm64/kernel/signal.c +++ b/arch/arm64/kernel/signal.c @@ -926,6 +926,16 @@ static void setup_return(struct pt_regs *regs, struct k_sigaction *ka, /* Signal handlers are invoked with ZA and streaming mode disabled */ if (system_supports_sme()) { + /* + * If we were in streaming mode the saved register + * state was SVE but we will exit SM and use the + * FPSIMD register state - flush the saved FPSIMD + * register state in case it gets loaded. + */ + if (current->thread.svcr & SVCR_SM_MASK) + memset(¤t->thread.uw.fpsimd_state, 0, + sizeof(current->thread.uw.fpsimd_state)); + current->thread.svcr &= ~(SVCR_ZA_MASK | SVCR_SM_MASK); sme_smstop(); -- cgit v1.2.3 From 826a4fdd2ada9e5923c58bdd168f31a42e958ffc Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Wed, 17 Aug 2022 19:23:23 +0100 Subject: arm64/sme: Don't flush SVE register state when allocating SME storage Currently when taking a SME access trap we allocate storage for the SVE register state in order to be able to handle storage of streaming mode SVE. Due to the original usage in a purely SVE context the SVE register state allocation this also flushes the register state for SVE if storage was already allocated but in the SME context this is not desirable. For a SME access trap to be taken the task must not be in streaming mode so either there already is SVE register state present for regular SVE mode which would be corrupted or the task does not have TIF_SVE and the flush is redundant. Fix this by adding a flag to sve_alloc() indicating if we are in a SVE context and need to flush the state. Freshly allocated storage is always zeroed either way. Fixes: 8bd7f91c03d8 ("arm64/sme: Implement traps and syscall handling for SME") Signed-off-by: Mark Brown Reviewed-by: Catalin Marinas Link: https://lore.kernel.org/r/20220817182324.638214-4-broonie@kernel.org Signed-off-by: Will Deacon --- arch/arm64/include/asm/fpsimd.h | 4 ++-- arch/arm64/kernel/fpsimd.c | 10 ++++++---- arch/arm64/kernel/ptrace.c | 6 +++--- arch/arm64/kernel/signal.c | 2 +- 4 files changed, 12 insertions(+), 10 deletions(-) diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h index 9bb1873f5295..6f86b7ab6c28 100644 --- a/arch/arm64/include/asm/fpsimd.h +++ b/arch/arm64/include/asm/fpsimd.h @@ -153,7 +153,7 @@ struct vl_info { #ifdef CONFIG_ARM64_SVE -extern void sve_alloc(struct task_struct *task); +extern void sve_alloc(struct task_struct *task, bool flush); extern void fpsimd_release_task(struct task_struct *task); extern void fpsimd_sync_to_sve(struct task_struct *task); extern void fpsimd_force_sync_to_sve(struct task_struct *task); @@ -256,7 +256,7 @@ size_t sve_state_size(struct task_struct const *task); #else /* ! CONFIG_ARM64_SVE */ -static inline void sve_alloc(struct task_struct *task) { } +static inline void sve_alloc(struct task_struct *task, bool flush) { } static inline void fpsimd_release_task(struct task_struct *task) { } static inline void sve_sync_to_fpsimd(struct task_struct *task) { } static inline void sve_sync_from_fpsimd_zeropad(struct task_struct *task) { } diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index dd63ffc3a2fa..b9ae9827e6e8 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -715,10 +715,12 @@ size_t sve_state_size(struct task_struct const *task) * do_sve_acc() case, there is no ABI requirement to hide stale data * written previously be task. */ -void sve_alloc(struct task_struct *task) +void sve_alloc(struct task_struct *task, bool flush) { if (task->thread.sve_state) { - memset(task->thread.sve_state, 0, sve_state_size(task)); + if (flush) + memset(task->thread.sve_state, 0, + sve_state_size(task)); return; } @@ -1388,7 +1390,7 @@ void do_sve_acc(unsigned long esr, struct pt_regs *regs) return; } - sve_alloc(current); + sve_alloc(current, true); if (!current->thread.sve_state) { force_sig(SIGKILL); return; @@ -1439,7 +1441,7 @@ void do_sme_acc(unsigned long esr, struct pt_regs *regs) return; } - sve_alloc(current); + sve_alloc(current, false); sme_alloc(current); if (!current->thread.sve_state || !current->thread.za_state) { force_sig(SIGKILL); diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index 21da83187a60..eb7c08dfb834 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -882,7 +882,7 @@ static int sve_set_common(struct task_struct *target, * state and ensure there's storage. */ if (target->thread.svcr != old_svcr) - sve_alloc(target); + sve_alloc(target, true); } /* Registers: FPSIMD-only case */ @@ -912,7 +912,7 @@ static int sve_set_common(struct task_struct *target, goto out; } - sve_alloc(target); + sve_alloc(target, true); if (!target->thread.sve_state) { ret = -ENOMEM; clear_tsk_thread_flag(target, TIF_SVE); @@ -1082,7 +1082,7 @@ static int za_set(struct task_struct *target, /* Ensure there is some SVE storage for streaming mode */ if (!target->thread.sve_state) { - sve_alloc(target); + sve_alloc(target, false); if (!target->thread.sve_state) { clear_thread_flag(TIF_SME); ret = -ENOMEM; diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c index 0d28a783a69a..9ad911f1647c 100644 --- a/arch/arm64/kernel/signal.c +++ b/arch/arm64/kernel/signal.c @@ -310,7 +310,7 @@ static int restore_sve_fpsimd_context(struct user_ctxs *user) fpsimd_flush_task_state(current); /* From now, fpsimd_thread_switch() won't touch thread.sve_state */ - sve_alloc(current); + sve_alloc(current, true); if (!current->thread.sve_state) { clear_thread_flag(TIF_SVE); return -ENOMEM; -- cgit v1.2.3 From 714f3cbd70a4db9f9b7fe5b8a032896ed33fb824 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Wed, 17 Aug 2022 19:23:24 +0100 Subject: arm64/sme: Don't flush SVE register state when handling SME traps Currently as part of handling a SME access trap we flush the SVE register state. This is not needed and would corrupt register state if the task has access to the SVE registers already. For non-streaming mode accesses the required flushing will be done in the SVE access trap. For streaming mode SVE register accesses the architecture guarantees that the register state will be flushed when streaming mode is entered or exited so there is no need for us to do so. Simply remove the register initialisation. Fixes: 8bd7f91c03d8 ("arm64/sme: Implement traps and syscall handling for SME") Signed-off-by: Mark Brown Reviewed-by: Catalin Marinas Link: https://lore.kernel.org/r/20220817182324.638214-5-broonie@kernel.org Signed-off-by: Will Deacon --- arch/arm64/kernel/fpsimd.c | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index b9ae9827e6e8..23834d96d1e7 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -1462,17 +1462,6 @@ void do_sme_acc(unsigned long esr, struct pt_regs *regs) fpsimd_bind_task_to_cpu(); } - /* - * If SVE was not already active initialise the SVE registers, - * any non-shared state between the streaming and regular SVE - * registers is architecturally guaranteed to be zeroed when - * we enter streaming mode. We do not need to initialize ZA - * since ZA must be disabled at this point and enabling ZA is - * architecturally defined to zero ZA. - */ - if (system_supports_sve() && !test_thread_flag(TIF_SVE)) - sve_init_regs(); - put_cpu_fpsimd_context(); } -- cgit v1.2.3 From c2fa700c5f7667800b6cfedcb9994eb5eb69e6cc Mon Sep 17 00:00:00 2001 From: Ammar Faizi Date: Tue, 23 Aug 2022 18:45:48 +0700 Subject: MAINTAINERS: Add `include/linux/io_uring_types.h` File include/linux/io_uring_types.h doesn't have a maintainer, add it to the io_uring section. Signed-off-by: Ammar Faizi Signed-off-by: Jens Axboe --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index 8a5012ba6ff9..6dc55be41420 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -10657,6 +10657,7 @@ T: git git://git.kernel.dk/linux-block T: git git://git.kernel.dk/liburing F: io_uring/ F: include/linux/io_uring.h +F: include/linux/io_uring_types.h F: include/uapi/linux/io_uring.h F: tools/io_uring/ -- cgit v1.2.3 From e1d0c6d05afdcff01ace698edb3b8808db1dc066 Mon Sep 17 00:00:00 2001 From: Ammar Faizi Date: Tue, 23 Aug 2022 18:45:49 +0700 Subject: io_uring: uapi: Add `extern "C"` in io_uring.h for liburing Make it easy for liburing to integrate uapi header with the kernel. Previously, when this header changes, the liburing side can't directly copy this header file due to some small differences. Sync them. Link: https://lore.kernel.org/io-uring/f1feef16-6ea2-0653-238f-4aaee35060b6@kernel.dk Cc: Bart Van Assche Cc: Dylan Yudaken Cc: Facebook Kernel Team Signed-off-by: Ammar Faizi Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 1463cfecb56b..9e0b5c8d92ce 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -12,6 +12,10 @@ #include #include +#ifdef __cplusplus +extern "C" { +#endif + /* * IO submission data structure (Submission Queue Entry) */ @@ -661,4 +665,8 @@ struct io_uring_recvmsg_out { __u32 flags; }; +#ifdef __cplusplus +} +#endif + #endif -- cgit v1.2.3 From 47abea041f897d64dbd5777f0cf7745148f85d75 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 23 Aug 2022 07:26:08 -0600 Subject: io_uring: fix off-by-one in sync cancelation file check The passed in index should be validated against the number of registered files we have, it needs to be smaller than the index value to avoid going one beyond the end. Fixes: 78a861b94959 ("io_uring: add sync cancelation API through io_uring_register()") Reported-by: Luo Likang Signed-off-by: Jens Axboe --- io_uring/cancel.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/io_uring/cancel.c b/io_uring/cancel.c index e4e1dc0325f0..5fc5d3e80fcb 100644 --- a/io_uring/cancel.c +++ b/io_uring/cancel.c @@ -218,7 +218,7 @@ static int __io_sync_cancel(struct io_uring_task *tctx, (cd->flags & IORING_ASYNC_CANCEL_FD_FIXED)) { unsigned long file_ptr; - if (unlikely(fd > ctx->nr_user_files)) + if (unlikely(fd >= ctx->nr_user_files)) return -EBADF; fd = array_index_nospec(fd, ctx->nr_user_files); file_ptr = io_fixed_file_slot(&ctx->file_table, fd)->file_ptr; -- cgit v1.2.3 From 22dec134dbfa825b963f8a1807ad19b943e46a56 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 23 Aug 2022 09:27:16 +0200 Subject: ALSA: seq: oss: Fix data-race for max_midi_devs access ALSA OSS sequencer refers to a global variable max_midi_devs at creating a new port, storing it to its own field. Meanwhile this variable may be changed by other sequencer events at snd_seq_oss_midi_check_exit_port() in parallel, which may cause a data race. OTOH, this data race itself is almost harmless, as the access to the MIDI device is done via get_mdev() and it's protected with a refcount, hence its presence is guaranteed. Though, it's sill better to address the data-race from the code sanity POV, and this patch adds the proper spinlock for the protection. Reported-by: Abhishek Shah Cc: Link: https://lore.kernel.org/r/CAEHB2493pZRXs863w58QWnUTtv3HHfg85aYhLn5HJHCwxqtHQg@mail.gmail.com Link: https://lore.kernel.org/r/20220823072717.1706-1-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/core/seq/oss/seq_oss_midi.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/core/seq/oss/seq_oss_midi.c b/sound/core/seq/oss/seq_oss_midi.c index 1e3bf086f867..07efb38f58ac 100644 --- a/sound/core/seq/oss/seq_oss_midi.c +++ b/sound/core/seq/oss/seq_oss_midi.c @@ -270,7 +270,9 @@ snd_seq_oss_midi_clear_all(void) void snd_seq_oss_midi_setup(struct seq_oss_devinfo *dp) { + spin_lock_irq(®ister_lock); dp->max_mididev = max_midi_devs; + spin_unlock_irq(®ister_lock); } /* -- cgit v1.2.3 From a9c3eda7eada94e8cf29cb102aa80e1370d8fa2e Mon Sep 17 00:00:00 2001 From: Kanchan Joshi Date: Tue, 23 Aug 2022 20:40:22 +0530 Subject: io_uring: fix submission-failure handling for uring-cmd If ->uring_cmd returned an error value different from -EAGAIN or -EIOCBQUEUED, it gets overridden with IOU_OK. This invites trouble as caller (io_uring core code) handles IOU_OK differently than other error codes. Fix this by returning the actual error code. Signed-off-by: Kanchan Joshi Signed-off-by: Jens Axboe --- io_uring/uring_cmd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/io_uring/uring_cmd.c b/io_uring/uring_cmd.c index 8e0cc2d9205e..b9989ae7b957 100644 --- a/io_uring/uring_cmd.c +++ b/io_uring/uring_cmd.c @@ -112,7 +112,7 @@ int io_uring_cmd(struct io_kiocb *req, unsigned int issue_flags) if (ret < 0) req_set_fail(req); io_req_set_res(req, ret, 0); - return IOU_OK; + return ret; } return IOU_ISSUE_SKIP_COMPLETE; -- cgit v1.2.3 From 6b04ce966a738ecdd9294c9593e48513c0dc90aa Mon Sep 17 00:00:00 2001 From: Karol Herbst Date: Fri, 19 Aug 2022 22:09:28 +0200 Subject: nouveau: explicitly wait on the fence in nouveau_bo_move_m2mf It is a bit unlcear to us why that's helping, but it does and unbreaks suspend/resume on a lot of GPUs without any known drawbacks. Cc: stable@vger.kernel.org # v5.15+ Closes: https://gitlab.freedesktop.org/drm/nouveau/-/issues/156 Signed-off-by: Karol Herbst Reviewed-by: Lyude Paul Link: https://patchwork.freedesktop.org/patch/msgid/20220819200928.401416-1-kherbst@redhat.com --- drivers/gpu/drm/nouveau/nouveau_bo.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c index 05076e530e7d..e29175e4b44c 100644 --- a/drivers/gpu/drm/nouveau/nouveau_bo.c +++ b/drivers/gpu/drm/nouveau/nouveau_bo.c @@ -820,6 +820,15 @@ nouveau_bo_move_m2mf(struct ttm_buffer_object *bo, int evict, if (ret == 0) { ret = nouveau_fence_new(chan, false, &fence); if (ret == 0) { + /* TODO: figure out a better solution here + * + * wait on the fence here explicitly as going through + * ttm_bo_move_accel_cleanup somehow doesn't seem to do it. + * + * Without this the operation can timeout and we'll fallback to a + * software copy, which might take several minutes to finish. + */ + nouveau_fence_wait(fence, false, false); ret = ttm_bo_move_accel_cleanup(bo, &fence->base, evict, false, -- cgit v1.2.3 From 36527b9d882362567ceb4eea8666813280f30e6f Mon Sep 17 00:00:00 2001 From: Riwen Lu Date: Tue, 23 Aug 2022 15:43:42 +0800 Subject: ACPI: processor: Remove freq Qos request for all CPUs The freq Qos request would be removed repeatedly if the cpufreq policy relates to more than one CPU. Then, it would cause the "called for unknown object" warning. Remove the freq Qos request for each CPU relates to the cpufreq policy, instead of removing repeatedly for the last CPU of it. Fixes: a1bb46c36ce3 ("ACPI: processor: Add QoS requests for all CPUs") Reported-by: Jeremy Linton Tested-by: Jeremy Linton Signed-off-by: Riwen Lu Cc: 5.4+ # 5.4+ Signed-off-by: Rafael J. Wysocki --- drivers/acpi/processor_thermal.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/acpi/processor_thermal.c b/drivers/acpi/processor_thermal.c index db6ac540e924..e534fd49a67e 100644 --- a/drivers/acpi/processor_thermal.c +++ b/drivers/acpi/processor_thermal.c @@ -151,7 +151,7 @@ void acpi_thermal_cpufreq_exit(struct cpufreq_policy *policy) unsigned int cpu; for_each_cpu(cpu, policy->related_cpus) { - struct acpi_processor *pr = per_cpu(processors, policy->cpu); + struct acpi_processor *pr = per_cpu(processors, cpu); if (pr) freq_qos_remove_request(&pr->thermal_req); -- cgit v1.2.3 From 7931e28098a4c1a2a6802510b0cbe57546d2049d Mon Sep 17 00:00:00 2001 From: "Lee, Chun-Yi" Date: Mon, 8 Aug 2022 21:21:58 +0800 Subject: thermal/int340x_thermal: handle data_vault when the value is ZERO_SIZE_PTR In some case, the GDDV returns a package with a buffer which has zero length. It causes that kmemdup() returns ZERO_SIZE_PTR (0x10). Then the data_vault_read() got NULL point dereference problem when accessing the 0x10 value in data_vault. [ 71.024560] BUG: kernel NULL pointer dereference, address: 0000000000000010 This patch uses ZERO_OR_NULL_PTR() for checking ZERO_SIZE_PTR or NULL value in data_vault. Signed-off-by: "Lee, Chun-Yi" Signed-off-by: Rafael J. Wysocki --- drivers/thermal/intel/int340x_thermal/int3400_thermal.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/thermal/intel/int340x_thermal/int3400_thermal.c b/drivers/thermal/intel/int340x_thermal/int3400_thermal.c index 80d4e0676083..365489bf4b8c 100644 --- a/drivers/thermal/intel/int340x_thermal/int3400_thermal.c +++ b/drivers/thermal/intel/int340x_thermal/int3400_thermal.c @@ -527,7 +527,7 @@ static void int3400_setup_gddv(struct int3400_thermal_priv *priv) priv->data_vault = kmemdup(obj->package.elements[0].buffer.pointer, obj->package.elements[0].buffer.length, GFP_KERNEL); - if (!priv->data_vault) + if (ZERO_OR_NULL_PTR(priv->data_vault)) goto out_free; bin_attr_data_vault.private = priv->data_vault; @@ -597,7 +597,7 @@ static int int3400_thermal_probe(struct platform_device *pdev) goto free_imok; } - if (priv->data_vault) { + if (!ZERO_OR_NULL_PTR(priv->data_vault)) { result = sysfs_create_group(&pdev->dev.kobj, &data_attribute_group); if (result) @@ -615,7 +615,8 @@ static int int3400_thermal_probe(struct platform_device *pdev) free_sysfs: cleanup_odvp(priv); if (priv->data_vault) { - sysfs_remove_group(&pdev->dev.kobj, &data_attribute_group); + if (!ZERO_OR_NULL_PTR(priv->data_vault)) + sysfs_remove_group(&pdev->dev.kobj, &data_attribute_group); kfree(priv->data_vault); } free_uuid: @@ -647,7 +648,7 @@ static int int3400_thermal_remove(struct platform_device *pdev) if (!priv->rel_misc_dev_res) acpi_thermal_rel_misc_device_remove(priv->adev->handle); - if (priv->data_vault) + if (!ZERO_OR_NULL_PTR(priv->data_vault)) sysfs_remove_group(&pdev->dev.kobj, &data_attribute_group); sysfs_remove_group(&pdev->dev.kobj, &uuid_attribute_group); sysfs_remove_group(&pdev->dev.kobj, &imok_attribute_group); -- cgit v1.2.3 From ea902bcc1943f7539200ec464de3f54335588774 Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Tue, 23 Aug 2022 10:48:19 -0700 Subject: x86/cpu: Add new Raptor Lake CPU model number Note1: Model 0xB7 already claimed the "no suffix" #define for a regular client part, so add (yet another) suffix "S" to distinguish this new part from the earlier one. Note2: the RAPTORLAKE* and ALDERLAKE* processors are very similar from a software enabling point of view. There are no known features that have model-specific enabling and also differ between the two. In other words, every single place that list *one* or more RAPTORLAKE* or ALDERLAKE* processors should list all of them. Note3: This is being merged before there is an in-tree user. Merging this provides an "anchor" so that the different folks can update their subsystems (like perf) in parallel to use this define and test it. [ dhansen: add a note about why this has no in-tree users yet ] Signed-off-by: Tony Luck Signed-off-by: Dave Hansen Link: https://lkml.kernel.org/r/20220823174819.223941-1-tony.luck@intel.com --- arch/x86/include/asm/intel-family.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/x86/include/asm/intel-family.h b/arch/x86/include/asm/intel-family.h index def6ca121111..aeb38023a703 100644 --- a/arch/x86/include/asm/intel-family.h +++ b/arch/x86/include/asm/intel-family.h @@ -27,6 +27,7 @@ * _X - regular server parts * _D - micro server parts * _N,_P - other mobile parts + * _S - other client parts * * Historical OPTDIFFs: * @@ -112,6 +113,7 @@ #define INTEL_FAM6_RAPTORLAKE 0xB7 #define INTEL_FAM6_RAPTORLAKE_P 0xBA +#define INTEL_FAM6_RAPTORLAKE_S 0xBF /* "Small Core" Processors (Atom) */ -- cgit v1.2.3 From 6ca7076fbfaeccce173aeab832d76b9e49e1034b Mon Sep 17 00:00:00 2001 From: Lukasz Luba Date: Tue, 16 Aug 2022 13:01:57 +0100 Subject: cpufreq: check only freq_table in __resolve_freq() There is no need to check if the cpufreq driver implements callback cpufreq_driver::target_index. The logic in the __resolve_freq uses the frequency table available in the policy. It doesn't matter if the driver provides 'target_index' or 'target' callback. It just has to populate the 'policy->freq_table'. Thus, check only frequency table during the frequency resolving call. Acked-by: Viresh Kumar Signed-off-by: Lukasz Luba Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 7820c4e74289..69b3d61852ac 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -532,7 +532,7 @@ static unsigned int __resolve_freq(struct cpufreq_policy *policy, target_freq = clamp_val(target_freq, policy->min, policy->max); - if (!cpufreq_driver->target_index) + if (!policy->freq_table) return target_freq; idx = cpufreq_frequency_table_target(policy, target_freq, relation); -- cgit v1.2.3 From ced8ecf026fd8084cf175530ff85c76d6085d715 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Tue, 23 Aug 2022 11:28:13 -0700 Subject: btrfs: fix space cache corruption and potential double allocations When testing space_cache v2 on a large set of machines, we encountered a few symptoms: 1. "unable to add free space :-17" (EEXIST) errors. 2. Missing free space info items, sometimes caught with a "missing free space info for X" error. 3. Double-accounted space: ranges that were allocated in the extent tree and also marked as free in the free space tree, ranges that were marked as allocated twice in the extent tree, or ranges that were marked as free twice in the free space tree. If the latter made it onto disk, the next reboot would hit the BUG_ON() in add_new_free_space(). 4. On some hosts with no on-disk corruption or error messages, the in-memory space cache (dumped with drgn) disagreed with the free space tree. All of these symptoms have the same underlying cause: a race between caching the free space for a block group and returning free space to the in-memory space cache for pinned extents causes us to double-add a free range to the space cache. This race exists when free space is cached from the free space tree (space_cache=v2) or the extent tree (nospace_cache, or space_cache=v1 if the cache needs to be regenerated). struct btrfs_block_group::last_byte_to_unpin and struct btrfs_block_group::progress are supposed to protect against this race, but commit d0c2f4fa555e ("btrfs: make concurrent fsyncs wait less when waiting for a transaction commit") subtly broke this by allowing multiple transactions to be unpinning extents at the same time. Specifically, the race is as follows: 1. An extent is deleted from an uncached block group in transaction A. 2. btrfs_commit_transaction() is called for transaction A. 3. btrfs_run_delayed_refs() -> __btrfs_free_extent() runs the delayed ref for the deleted extent. 4. __btrfs_free_extent() -> do_free_extent_accounting() -> add_to_free_space_tree() adds the deleted extent back to the free space tree. 5. do_free_extent_accounting() -> btrfs_update_block_group() -> btrfs_cache_block_group() queues up the block group to get cached. block_group->progress is set to block_group->start. 6. btrfs_commit_transaction() for transaction A calls switch_commit_roots(). It sets block_group->last_byte_to_unpin to block_group->progress, which is block_group->start because the block group hasn't been cached yet. 7. The caching thread gets to our block group. Since the commit roots were already switched, load_free_space_tree() sees the deleted extent as free and adds it to the space cache. It finishes caching and sets block_group->progress to U64_MAX. 8. btrfs_commit_transaction() advances transaction A to TRANS_STATE_SUPER_COMMITTED. 9. fsync calls btrfs_commit_transaction() for transaction B. Since transaction A is already in TRANS_STATE_SUPER_COMMITTED and the commit is for fsync, it advances. 10. btrfs_commit_transaction() for transaction B calls switch_commit_roots(). This time, the block group has already been cached, so it sets block_group->last_byte_to_unpin to U64_MAX. 11. btrfs_commit_transaction() for transaction A calls btrfs_finish_extent_commit(), which calls unpin_extent_range() for the deleted extent. It sees last_byte_to_unpin set to U64_MAX (by transaction B!), so it adds the deleted extent to the space cache again! This explains all of our symptoms above: * If the sequence of events is exactly as described above, when the free space is re-added in step 11, it will fail with EEXIST. * If another thread reallocates the deleted extent in between steps 7 and 11, then step 11 will silently re-add that space to the space cache as free even though it is actually allocated. Then, if that space is allocated *again*, the free space tree will be corrupted (namely, the wrong item will be deleted). * If we don't catch this free space tree corruption, it will continue to get worse as extents are deleted and reallocated. The v1 space_cache is synchronously loaded when an extent is deleted (btrfs_update_block_group() with alloc=0 calls btrfs_cache_block_group() with load_cache_only=1), so it is not normally affected by this bug. However, as noted above, if we fail to load the space cache, we will fall back to caching from the extent tree and may hit this bug. The easiest fix for this race is to also make caching from the free space tree or extent tree synchronous. Josef tested this and found no performance regressions. A few extra changes fall out of this change. Namely, this fix does the following, with step 2 being the crucial fix: 1. Factor btrfs_caching_ctl_wait_done() out of btrfs_wait_block_group_cache_done() to allow waiting on a caching_ctl that we already hold a reference to. 2. Change the call in btrfs_cache_block_group() of btrfs_wait_space_cache_v1_finished() to btrfs_caching_ctl_wait_done(), which makes us wait regardless of the space_cache option. 3. Delete the now unused btrfs_wait_space_cache_v1_finished() and space_cache_v1_done(). 4. Change btrfs_cache_block_group()'s `int load_cache_only` parameter to `bool wait` to more accurately describe its new meaning. 5. Change a few callers which had a separate call to btrfs_wait_block_group_cache_done() to use wait = true instead. 6. Make btrfs_wait_block_group_cache_done() static now that it's not used outside of block-group.c anymore. Fixes: d0c2f4fa555e ("btrfs: make concurrent fsyncs wait less when waiting for a transaction commit") CC: stable@vger.kernel.org # 5.12+ Reviewed-by: Filipe Manana Signed-off-by: Omar Sandoval Signed-off-by: David Sterba --- fs/btrfs/block-group.c | 47 +++++++++++++++-------------------------------- fs/btrfs/block-group.h | 4 +--- fs/btrfs/ctree.h | 1 - fs/btrfs/extent-tree.c | 30 ++++++------------------------ 4 files changed, 22 insertions(+), 60 deletions(-) diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c index 993aca2f1e18..e0375ba9d0fe 100644 --- a/fs/btrfs/block-group.c +++ b/fs/btrfs/block-group.c @@ -440,39 +440,26 @@ void btrfs_wait_block_group_cache_progress(struct btrfs_block_group *cache, btrfs_put_caching_control(caching_ctl); } -int btrfs_wait_block_group_cache_done(struct btrfs_block_group *cache) +static int btrfs_caching_ctl_wait_done(struct btrfs_block_group *cache, + struct btrfs_caching_control *caching_ctl) +{ + wait_event(caching_ctl->wait, btrfs_block_group_done(cache)); + return cache->cached == BTRFS_CACHE_ERROR ? -EIO : 0; +} + +static int btrfs_wait_block_group_cache_done(struct btrfs_block_group *cache) { struct btrfs_caching_control *caching_ctl; - int ret = 0; + int ret; caching_ctl = btrfs_get_caching_control(cache); if (!caching_ctl) return (cache->cached == BTRFS_CACHE_ERROR) ? -EIO : 0; - - wait_event(caching_ctl->wait, btrfs_block_group_done(cache)); - if (cache->cached == BTRFS_CACHE_ERROR) - ret = -EIO; + ret = btrfs_caching_ctl_wait_done(cache, caching_ctl); btrfs_put_caching_control(caching_ctl); return ret; } -static bool space_cache_v1_done(struct btrfs_block_group *cache) -{ - bool ret; - - spin_lock(&cache->lock); - ret = cache->cached != BTRFS_CACHE_FAST; - spin_unlock(&cache->lock); - - return ret; -} - -void btrfs_wait_space_cache_v1_finished(struct btrfs_block_group *cache, - struct btrfs_caching_control *caching_ctl) -{ - wait_event(caching_ctl->wait, space_cache_v1_done(cache)); -} - #ifdef CONFIG_BTRFS_DEBUG static void fragment_free_space(struct btrfs_block_group *block_group) { @@ -750,9 +737,8 @@ done: btrfs_put_block_group(block_group); } -int btrfs_cache_block_group(struct btrfs_block_group *cache, int load_cache_only) +int btrfs_cache_block_group(struct btrfs_block_group *cache, bool wait) { - DEFINE_WAIT(wait); struct btrfs_fs_info *fs_info = cache->fs_info; struct btrfs_caching_control *caching_ctl = NULL; int ret = 0; @@ -785,10 +771,7 @@ int btrfs_cache_block_group(struct btrfs_block_group *cache, int load_cache_only } WARN_ON(cache->caching_ctl); cache->caching_ctl = caching_ctl; - if (btrfs_test_opt(fs_info, SPACE_CACHE)) - cache->cached = BTRFS_CACHE_FAST; - else - cache->cached = BTRFS_CACHE_STARTED; + cache->cached = BTRFS_CACHE_STARTED; cache->has_caching_ctl = 1; spin_unlock(&cache->lock); @@ -801,8 +784,8 @@ int btrfs_cache_block_group(struct btrfs_block_group *cache, int load_cache_only btrfs_queue_work(fs_info->caching_workers, &caching_ctl->work); out: - if (load_cache_only && caching_ctl) - btrfs_wait_space_cache_v1_finished(cache, caching_ctl); + if (wait && caching_ctl) + ret = btrfs_caching_ctl_wait_done(cache, caching_ctl); if (caching_ctl) btrfs_put_caching_control(caching_ctl); @@ -3312,7 +3295,7 @@ int btrfs_update_block_group(struct btrfs_trans_handle *trans, * space back to the block group, otherwise we will leak space. */ if (!alloc && !btrfs_block_group_done(cache)) - btrfs_cache_block_group(cache, 1); + btrfs_cache_block_group(cache, true); byte_in_group = bytenr - cache->start; WARN_ON(byte_in_group > cache->length); diff --git a/fs/btrfs/block-group.h b/fs/btrfs/block-group.h index 35e0e860cc0b..6b3cdc4cbc41 100644 --- a/fs/btrfs/block-group.h +++ b/fs/btrfs/block-group.h @@ -263,9 +263,7 @@ void btrfs_dec_nocow_writers(struct btrfs_block_group *bg); void btrfs_wait_nocow_writers(struct btrfs_block_group *bg); void btrfs_wait_block_group_cache_progress(struct btrfs_block_group *cache, u64 num_bytes); -int btrfs_wait_block_group_cache_done(struct btrfs_block_group *cache); -int btrfs_cache_block_group(struct btrfs_block_group *cache, - int load_cache_only); +int btrfs_cache_block_group(struct btrfs_block_group *cache, bool wait); void btrfs_put_caching_control(struct btrfs_caching_control *ctl); struct btrfs_caching_control *btrfs_get_caching_control( struct btrfs_block_group *cache); diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 4edb4bfb2166..9ef162dbd4bc 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -505,7 +505,6 @@ struct btrfs_free_cluster { enum btrfs_caching_type { BTRFS_CACHE_NO, BTRFS_CACHE_STARTED, - BTRFS_CACHE_FAST, BTRFS_CACHE_FINISHED, BTRFS_CACHE_ERROR, }; diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index ab944d1f94ef..6914cd8024ba 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -2551,17 +2551,10 @@ int btrfs_pin_extent_for_log_replay(struct btrfs_trans_handle *trans, return -EINVAL; /* - * pull in the free space cache (if any) so that our pin - * removes the free space from the cache. We have load_only set - * to one because the slow code to read in the free extents does check - * the pinned extents. + * Fully cache the free space first so that our pin removes the free space + * from the cache. */ - btrfs_cache_block_group(cache, 1); - /* - * Make sure we wait until the cache is completely built in case it is - * missing or is invalid and therefore needs to be rebuilt. - */ - ret = btrfs_wait_block_group_cache_done(cache); + ret = btrfs_cache_block_group(cache, true); if (ret) goto out; @@ -2584,12 +2577,7 @@ static int __exclude_logged_extent(struct btrfs_fs_info *fs_info, if (!block_group) return -EINVAL; - btrfs_cache_block_group(block_group, 1); - /* - * Make sure we wait until the cache is completely built in case it is - * missing or is invalid and therefore needs to be rebuilt. - */ - ret = btrfs_wait_block_group_cache_done(block_group); + ret = btrfs_cache_block_group(block_group, true); if (ret) goto out; @@ -4399,7 +4387,7 @@ have_block_group: ffe_ctl->cached = btrfs_block_group_done(block_group); if (unlikely(!ffe_ctl->cached)) { ffe_ctl->have_caching_bg = true; - ret = btrfs_cache_block_group(block_group, 0); + ret = btrfs_cache_block_group(block_group, false); /* * If we get ENOMEM here or something else we want to @@ -6169,13 +6157,7 @@ int btrfs_trim_fs(struct btrfs_fs_info *fs_info, struct fstrim_range *range) if (end - start >= range->minlen) { if (!btrfs_block_group_done(cache)) { - ret = btrfs_cache_block_group(cache, 0); - if (ret) { - bg_failed++; - bg_ret = ret; - continue; - } - ret = btrfs_wait_block_group_cache_done(cache); + ret = btrfs_cache_block_group(cache, true); if (ret) { bg_failed++; bg_ret = ret; -- cgit v1.2.3 From 47bf225a8d2cccb15f7e8d4a1ed9b757dd86afd7 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Mon, 22 Aug 2022 15:47:09 +0100 Subject: btrfs: fix silent failure when deleting root reference At btrfs_del_root_ref(), if btrfs_search_slot() returns an error, we end up returning from the function with a value of 0 (success). This happens because the function returns the value stored in the variable 'err', which is 0, while the error value we got from btrfs_search_slot() is stored in the 'ret' variable. So fix it by setting 'err' with the error value. Fixes: 8289ed9f93bef2 ("btrfs: replace the BUG_ON in btrfs_del_root_ref with proper error handling") CC: stable@vger.kernel.org # 5.16+ Reviewed-by: Qu Wenruo Signed-off-by: Filipe Manana Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/root-tree.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c index a64b26b16904..d647cb2938c0 100644 --- a/fs/btrfs/root-tree.c +++ b/fs/btrfs/root-tree.c @@ -349,9 +349,10 @@ int btrfs_del_root_ref(struct btrfs_trans_handle *trans, u64 root_id, key.offset = ref_id; again: ret = btrfs_search_slot(trans, tree_root, &key, path, -1, 1); - if (ret < 0) + if (ret < 0) { + err = ret; goto out; - if (ret == 0) { + } else if (ret == 0) { leaf = path->nodes[0]; ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_root_ref); -- cgit v1.2.3 From 59a3991984dbc1fc47e5651a265c5200bd85464e Mon Sep 17 00:00:00 2001 From: Anand Jain Date: Fri, 12 Aug 2022 18:32:18 +0800 Subject: btrfs: replace: drop assert for suspended replace If the filesystem mounts with the replace-operation in a suspended state and try to cancel the suspended replace-operation, we hit the assert. The assert came from the commit fe97e2e173af ("btrfs: dev-replace: replace's scrub must not be running in suspended state") that was actually not required. So just remove it. $ mount /dev/sda5 /btrfs BTRFS info (device sda5): cannot continue dev_replace, tgtdev is missing BTRFS info (device sda5): you may cancel the operation after 'mount -o degraded' $ mount -o degraded /dev/sda5 /btrfs <-- success. $ btrfs replace cancel /btrfs kernel: assertion failed: ret != -ENOTCONN, in fs/btrfs/dev-replace.c:1131 kernel: ------------[ cut here ]------------ kernel: kernel BUG at fs/btrfs/ctree.h:3750! After the patch: $ btrfs replace cancel /btrfs BTRFS info (device sda5): suspended dev_replace from /dev/sda5 (devid 1) to canceled Fixes: fe97e2e173af ("btrfs: dev-replace: replace's scrub must not be running in suspended state") CC: stable@vger.kernel.org # 5.0+ Signed-off-by: Anand Jain Signed-off-by: David Sterba --- fs/btrfs/dev-replace.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c index f43196a893ca..b1cddef87cd3 100644 --- a/fs/btrfs/dev-replace.c +++ b/fs/btrfs/dev-replace.c @@ -1129,8 +1129,7 @@ int btrfs_dev_replace_cancel(struct btrfs_fs_info *fs_info) up_write(&dev_replace->rwsem); /* Scrub for replace must not be running in suspended state */ - ret = btrfs_scrub_cancel(fs_info); - ASSERT(ret != -ENOTCONN); + btrfs_scrub_cancel(fs_info); trans = btrfs_start_transaction(root, 0); if (IS_ERR(trans)) { -- cgit v1.2.3 From f2c3bec215694fb8bc0ef5010f2a758d1906fc2d Mon Sep 17 00:00:00 2001 From: Anand Jain Date: Fri, 12 Aug 2022 18:32:19 +0800 Subject: btrfs: add info when mount fails due to stale replace target If the replace target device reappears after the suspended replace is cancelled, it blocks the mount operation as it can't find the matching replace-item in the metadata. As shown below, BTRFS error (device sda5): replace devid present without an active replace item To overcome this situation, the user can run the command btrfs device scan --forget and try the mount command again. And also, to avoid repeating the issue, superblock on the devid=0 must be wiped. wipefs -a device-path-to-devid=0. This patch adds some info when this situation occurs. Reported-by: Samuel Greiner Link: https://lore.kernel.org/linux-btrfs/b4f62b10-b295-26ea-71f9-9a5c9299d42c@balkonien.org/T/ CC: stable@vger.kernel.org # 5.0+ Signed-off-by: Anand Jain Signed-off-by: David Sterba --- fs/btrfs/dev-replace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c index b1cddef87cd3..41cddd3ff059 100644 --- a/fs/btrfs/dev-replace.c +++ b/fs/btrfs/dev-replace.c @@ -165,7 +165,7 @@ no_valid_dev_replace_entry_found: */ if (btrfs_find_device(fs_info->fs_devices, &args)) { btrfs_err(fs_info, - "replace devid present without an active replace item"); +"replace without active item, run 'device scan --forget' on the target device"); ret = -EUCLEAN; } else { dev_replace->srcdev = NULL; -- cgit v1.2.3 From 3f67e69976035352db110443916bcce32c7f64ac Mon Sep 17 00:00:00 2001 From: Conor Dooley Date: Sat, 20 Aug 2022 00:14:13 +0100 Subject: riscv: dts: microchip: mpfs: fix incorrect pcie child node name Recent versions of dt-schema complain about the PCIe controller's child node name: arch/riscv/boot/dts/microchip/mpfs-icicle-kit.dtb: pcie@2000000000: Unevaluated properties are not allowed ('clock-names', 'clocks', 'legacy-interrupt-controller', 'microchip,axi-m-atr0' were unexpected) From schema: Documentation/devicetree/bindings/pci/microchip,pcie-host.yaml Make the dts match the correct property name in the dts. Fixes: 528a5b1f2556 ("riscv: dts: microchip: add new peripherals to icicle kit device tree") Signed-off-by: Conor Dooley --- arch/riscv/boot/dts/microchip/mpfs.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/riscv/boot/dts/microchip/mpfs.dtsi b/arch/riscv/boot/dts/microchip/mpfs.dtsi index 0a6ad5b9ff8d..b26fc7886e5d 100644 --- a/arch/riscv/boot/dts/microchip/mpfs.dtsi +++ b/arch/riscv/boot/dts/microchip/mpfs.dtsi @@ -487,7 +487,7 @@ msi-controller; microchip,axi-m-atr0 = <0x10 0x0>; status = "disabled"; - pcie_intc: legacy-interrupt-controller { + pcie_intc: interrupt-controller { #address-cells = <0>; #interrupt-cells = <1>; interrupt-controller; -- cgit v1.2.3 From 72a05748cbd285567d69f173f8694e3471b79f20 Mon Sep 17 00:00:00 2001 From: Conor Dooley Date: Sat, 20 Aug 2022 00:14:14 +0100 Subject: riscv: dts: microchip: mpfs: remove ti,fifo-depth property Recent versions of dt-schema warn about a previously undetected undocument property on the icicle & polarberry devicetrees: arch/riscv/boot/dts/microchip/mpfs-icicle-kit.dtb: ethernet@20112000: ethernet-phy@8: Unevaluated properties are not allowed ('ti,fifo-depth' was unexpected) From schema: Documentation/devicetree/bindings/net/cdns,macb.yaml I know what you're thinking, the binding doesn't look to be the problem and I agree. I am not sure why a TI vendor property was ever actually added since it has no meaning... just get rid of it. Fixes: bc47b2217f24 ("riscv: dts: microchip: add the sundance polarberry") Fixes: 0fa6107eca41 ("RISC-V: Initial DTS for Microchip ICICLE board") Signed-off-by: Conor Dooley --- arch/riscv/boot/dts/microchip/mpfs-icicle-kit.dts | 2 -- arch/riscv/boot/dts/microchip/mpfs-polarberry.dts | 2 -- 2 files changed, 4 deletions(-) diff --git a/arch/riscv/boot/dts/microchip/mpfs-icicle-kit.dts b/arch/riscv/boot/dts/microchip/mpfs-icicle-kit.dts index 044982a11df5..ee548ab61a2a 100644 --- a/arch/riscv/boot/dts/microchip/mpfs-icicle-kit.dts +++ b/arch/riscv/boot/dts/microchip/mpfs-icicle-kit.dts @@ -84,12 +84,10 @@ phy1: ethernet-phy@9 { reg = <9>; - ti,fifo-depth = <0x1>; }; phy0: ethernet-phy@8 { reg = <8>; - ti,fifo-depth = <0x1>; }; }; diff --git a/arch/riscv/boot/dts/microchip/mpfs-polarberry.dts b/arch/riscv/boot/dts/microchip/mpfs-polarberry.dts index 82c93c8f5c17..dc11bb8fc833 100644 --- a/arch/riscv/boot/dts/microchip/mpfs-polarberry.dts +++ b/arch/riscv/boot/dts/microchip/mpfs-polarberry.dts @@ -54,12 +54,10 @@ phy1: ethernet-phy@5 { reg = <5>; - ti,fifo-depth = <0x01>; }; phy0: ethernet-phy@4 { reg = <4>; - ti,fifo-depth = <0x01>; }; }; -- cgit v1.2.3 From 2b55915d27dcaa35f54bad7925af0a76001079bc Mon Sep 17 00:00:00 2001 From: Conor Dooley Date: Sat, 20 Aug 2022 00:14:15 +0100 Subject: riscv: dts: microchip: mpfs: remove bogus card-detect-delay Recent versions of dt-schema warn about a previously undetected undocumented property: arch/riscv/boot/dts/microchip/mpfs-icicle-kit.dtb: mmc@20008000: Unevaluated properties are not allowed ('card-detect-delay' was unexpected) From schema: Documentation/devicetree/bindings/mmc/cdns,sdhci.yaml There are no GPIOs connected to MSSIO6B4 pin K3 so adding the common cd-debounce-delay-ms property makes no sense. The Cadence IP has a register that sets the card detect delay as "DP * tclk". On MPFS, this clock frequency is not configurable (it must be 200 MHz) & the FPGA comes out of reset with this register already set. Fixes: bc47b2217f24 ("riscv: dts: microchip: add the sundance polarberry") Fixes: 0fa6107eca41 ("RISC-V: Initial DTS for Microchip ICICLE board") Signed-off-by: Conor Dooley --- arch/riscv/boot/dts/microchip/mpfs-icicle-kit.dts | 1 - arch/riscv/boot/dts/microchip/mpfs-polarberry.dts | 1 - 2 files changed, 2 deletions(-) diff --git a/arch/riscv/boot/dts/microchip/mpfs-icicle-kit.dts b/arch/riscv/boot/dts/microchip/mpfs-icicle-kit.dts index ee548ab61a2a..f3f87ed2007f 100644 --- a/arch/riscv/boot/dts/microchip/mpfs-icicle-kit.dts +++ b/arch/riscv/boot/dts/microchip/mpfs-icicle-kit.dts @@ -100,7 +100,6 @@ disable-wp; cap-sd-highspeed; cap-mmc-highspeed; - card-detect-delay = <200>; mmc-ddr-1_8v; mmc-hs200-1_8v; sd-uhs-sdr12; diff --git a/arch/riscv/boot/dts/microchip/mpfs-polarberry.dts b/arch/riscv/boot/dts/microchip/mpfs-polarberry.dts index dc11bb8fc833..c87cc2d8fe29 100644 --- a/arch/riscv/boot/dts/microchip/mpfs-polarberry.dts +++ b/arch/riscv/boot/dts/microchip/mpfs-polarberry.dts @@ -70,7 +70,6 @@ disable-wp; cap-sd-highspeed; cap-mmc-highspeed; - card-detect-delay = <200>; mmc-ddr-1_8v; mmc-hs200-1_8v; sd-uhs-sdr12; -- cgit v1.2.3 From e4009c5fa77b4356aa37ce002e9f9952dfd7a615 Mon Sep 17 00:00:00 2001 From: Conor Dooley Date: Sat, 20 Aug 2022 00:14:16 +0100 Subject: riscv: dts: microchip: mpfs: remove pci axi address translation property An AXI master address translation table property was inadvertently added to the device tree & this was not caught by dtbs_check at the time. Remove the property - it should not be in mpfs.dtsi anyway as it would be more suitable in -fabric.dtsi nor does it actually apply to the version of the reference design we are using for upstream. Link: https://www.microsemi.com/document-portal/doc_download/1245812-polarfire-fpga-and-polarfire-soc-fpga-pci-express-user-guide # Section 1.3.3 Fixes: 528a5b1f2556 ("riscv: dts: microchip: add new peripherals to icicle kit device tree") Signed-off-by: Conor Dooley --- arch/riscv/boot/dts/microchip/mpfs.dtsi | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/riscv/boot/dts/microchip/mpfs.dtsi b/arch/riscv/boot/dts/microchip/mpfs.dtsi index b26fc7886e5d..74493344ea41 100644 --- a/arch/riscv/boot/dts/microchip/mpfs.dtsi +++ b/arch/riscv/boot/dts/microchip/mpfs.dtsi @@ -485,7 +485,6 @@ ranges = <0x3000000 0x0 0x8000000 0x20 0x8000000 0x0 0x80000000>; msi-parent = <&pcie>; msi-controller; - microchip,axi-m-atr0 = <0x10 0x0>; status = "disabled"; pcie_intc: interrupt-controller { #address-cells = <0>; -- cgit v1.2.3 From 0947ae1121083d363d522ff7518ee72b55bd8d29 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Tue, 23 Aug 2022 14:58:04 -0700 Subject: bpf: Fix a data-race around bpf_jit_limit. While reading bpf_jit_limit, it can be changed concurrently via sysctl, WRITE_ONCE() in __do_proc_doulongvec_minmax(). The size of bpf_jit_limit is long, so we need to add a paired READ_ONCE() to avoid load-tearing. Fixes: ede95a63b5e8 ("bpf: add bpf_jit_limit knob to restrict unpriv allocations") Signed-off-by: Kuniyuki Iwashima Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20220823215804.2177-1-kuniyu@amazon.com --- kernel/bpf/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index c1e10d088dbb..3d9eb3ae334c 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -971,7 +971,7 @@ pure_initcall(bpf_jit_charge_init); int bpf_jit_charge_modmem(u32 size) { - if (atomic_long_add_return(size, &bpf_jit_current) > bpf_jit_limit) { + if (atomic_long_add_return(size, &bpf_jit_current) > READ_ONCE(bpf_jit_limit)) { if (!bpf_capable()) { atomic_long_sub(size, &bpf_jit_current); return -EPERM; -- cgit v1.2.3 From 3e7e04b747adea36f349715d9f0998eeebf15d72 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 23 Aug 2022 09:27:17 +0200 Subject: ALSA: seq: Fix data-race at module auto-loading It's been reported that there is a possible data-race accessing to the global card_requested[] array at ALSA sequencer core, which is used for determining whether to call request_module() for the card or not. This data race itself is almost harmless, as it might end up with one extra request_module() call for the already loaded module at most. But it's still better to fix. This patch addresses the possible data race of card_requested[] and client_requested[] arrays by replacing them with bitmask. It's an atomic operation and can work without locks. Reported-by: Abhishek Shah Cc: Link: https://lore.kernel.org/r/CAEHB24_ay6YzARpA1zgCsE7=H9CSJJzux618E=Ka4h0YdKn=qA@mail.gmail.com Link: https://lore.kernel.org/r/20220823072717.1706-2-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/core/seq/seq_clientmgr.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/sound/core/seq/seq_clientmgr.c b/sound/core/seq/seq_clientmgr.c index 2e9d695d336c..2d707afa1ef1 100644 --- a/sound/core/seq/seq_clientmgr.c +++ b/sound/core/seq/seq_clientmgr.c @@ -121,13 +121,13 @@ struct snd_seq_client *snd_seq_client_use_ptr(int clientid) spin_unlock_irqrestore(&clients_lock, flags); #ifdef CONFIG_MODULES if (!in_interrupt()) { - static char client_requested[SNDRV_SEQ_GLOBAL_CLIENTS]; - static char card_requested[SNDRV_CARDS]; + static DECLARE_BITMAP(client_requested, SNDRV_SEQ_GLOBAL_CLIENTS); + static DECLARE_BITMAP(card_requested, SNDRV_CARDS); + if (clientid < SNDRV_SEQ_GLOBAL_CLIENTS) { int idx; - if (!client_requested[clientid]) { - client_requested[clientid] = 1; + if (!test_and_set_bit(clientid, client_requested)) { for (idx = 0; idx < 15; idx++) { if (seq_client_load[idx] < 0) break; @@ -142,10 +142,8 @@ struct snd_seq_client *snd_seq_client_use_ptr(int clientid) int card = (clientid - SNDRV_SEQ_GLOBAL_CLIENTS) / SNDRV_SEQ_CLIENTS_PER_CARD; if (card < snd_ecards_limit) { - if (! card_requested[card]) { - card_requested[card] = 1; + if (!test_and_set_bit(card, card_requested)) snd_request_card(card); - } snd_seq_device_load_drivers(); } } -- cgit v1.2.3 From 2e6481a3f3ee6234ce577454e1d88aca55f51d47 Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Tue, 23 Aug 2022 15:24:05 +0300 Subject: ALSA: hda: intel-nhlt: Correct the handling of fmt_config flexible array The struct nhlt_format's fmt_config is a flexible array, it must not be used as normal array. When moving to the next nhlt_fmt_cfg we need to take into account the data behind the ->config.caps (indicated by ->config.size). Fixes: a864e8f159b13 ("ALSA: hda: intel-nhlt: verify config type") Signed-off-by: Peter Ujfalusi Reviewed-by: Pierre-Louis Bossart Reviewed-by: Jaska Uimonen Link: https://lore.kernel.org/r/20220823122405.18464-1-peter.ujfalusi@linux.intel.com Signed-off-by: Takashi Iwai --- sound/hda/intel-nhlt.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/sound/hda/intel-nhlt.c b/sound/hda/intel-nhlt.c index 9db5ccd9aa2d..13bb0ccfb36c 100644 --- a/sound/hda/intel-nhlt.c +++ b/sound/hda/intel-nhlt.c @@ -55,16 +55,22 @@ int intel_nhlt_get_dmic_geo(struct device *dev, struct nhlt_acpi_table *nhlt) /* find max number of channels based on format_configuration */ if (fmt_configs->fmt_count) { + struct nhlt_fmt_cfg *fmt_cfg = fmt_configs->fmt_config; + dev_dbg(dev, "found %d format definitions\n", fmt_configs->fmt_count); for (i = 0; i < fmt_configs->fmt_count; i++) { struct wav_fmt_ext *fmt_ext; - fmt_ext = &fmt_configs->fmt_config[i].fmt_ext; + fmt_ext = &fmt_cfg->fmt_ext; if (fmt_ext->fmt.channels > max_ch) max_ch = fmt_ext->fmt.channels; + + /* Move to the next nhlt_fmt_cfg */ + fmt_cfg = (struct nhlt_fmt_cfg *)(fmt_cfg->config.caps + + fmt_cfg->config.size); } dev_dbg(dev, "max channels found %d\n", max_ch); } else { -- cgit v1.2.3 From 4b1c742407571eff58b6de9881889f7ca7c4b4dc Mon Sep 17 00:00:00 2001 From: Michael Roth Date: Tue, 23 Aug 2022 11:07:34 -0500 Subject: x86/boot: Don't propagate uninitialized boot_params->cc_blob_address In some cases, bootloaders will leave boot_params->cc_blob_address uninitialized rather than zeroing it out. This field is only meant to be set by the boot/compressed kernel in order to pass information to the uncompressed kernel when SEV-SNP support is enabled. Therefore, there are no cases where the bootloader-provided values should be treated as anything other than garbage. Otherwise, the uncompressed kernel may attempt to access this bogus address, leading to a crash during early boot. Normally, sanitize_boot_params() would be used to clear out such fields but that happens too late: sev_enable() may have already initialized it to a valid value that should not be zeroed out. Instead, have sev_enable() zero it out unconditionally beforehand. Also ensure this happens for !CONFIG_AMD_MEM_ENCRYPT as well by also including this handling in the sev_enable() stub function. [ bp: Massage commit message and comments. ] Fixes: b190a043c49a ("x86/sev: Add SEV-SNP feature detection/setup") Reported-by: Jeremi Piotrowski Reported-by: watnuss@gmx.de Signed-off-by: Michael Roth Signed-off-by: Borislav Petkov Cc: stable@vger.kernel.org Link: https://bugzilla.kernel.org/show_bug.cgi?id=216387 Link: https://lore.kernel.org/r/20220823160734.89036-1-michael.roth@amd.com --- arch/x86/boot/compressed/misc.h | 12 +++++++++++- arch/x86/boot/compressed/sev.c | 8 ++++++++ 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h index 4910bf230d7b..62208ec04ca4 100644 --- a/arch/x86/boot/compressed/misc.h +++ b/arch/x86/boot/compressed/misc.h @@ -132,7 +132,17 @@ void snp_set_page_private(unsigned long paddr); void snp_set_page_shared(unsigned long paddr); void sev_prep_identity_maps(unsigned long top_level_pgt); #else -static inline void sev_enable(struct boot_params *bp) { } +static inline void sev_enable(struct boot_params *bp) +{ + /* + * bp->cc_blob_address should only be set by boot/compressed kernel. + * Initialize it to 0 unconditionally (thus here in this stub too) to + * ensure that uninitialized values from buggy bootloaders aren't + * propagated. + */ + if (bp) + bp->cc_blob_address = 0; +} static inline void sev_es_shutdown_ghcb(void) { } static inline bool sev_es_check_ghcb_fault(unsigned long address) { diff --git a/arch/x86/boot/compressed/sev.c b/arch/x86/boot/compressed/sev.c index 52f989f6acc2..c93930d5ccbd 100644 --- a/arch/x86/boot/compressed/sev.c +++ b/arch/x86/boot/compressed/sev.c @@ -276,6 +276,14 @@ void sev_enable(struct boot_params *bp) struct msr m; bool snp; + /* + * bp->cc_blob_address should only be set by boot/compressed kernel. + * Initialize it to 0 to ensure that uninitialized values from + * buggy bootloaders aren't propagated. + */ + if (bp) + bp->cc_blob_address = 0; + /* * Setup/preliminary detection of SNP. This will be sanity-checked * against CPUID/MSR values later. -- cgit v1.2.3 From cdaa0a407f1acd3a44861e3aea6e3c7349e668f1 Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Tue, 23 Aug 2022 16:55:51 -0500 Subject: x86/sev: Don't use cc_platform_has() for early SEV-SNP calls When running identity-mapped and depending on the kernel configuration, it is possible that the compiler uses jump tables when generating code for cc_platform_has(). This causes a boot failure because the jump table uses un-mapped kernel virtual addresses, not identity-mapped addresses. This has been seen with CONFIG_RETPOLINE=n. Similar to sme_encrypt_kernel(), use an open-coded direct check for the status of SNP rather than trying to eliminate the jump table. This preserves any code optimization in cc_platform_has() that can be useful post boot. It also limits the changes to SEV-specific files so that future compiler features won't necessarily require possible build changes just because they are not compatible with running identity-mapped. [ bp: Massage commit message. ] Fixes: 5e5ccff60a29 ("x86/sev: Add helper for validating pages in early enc attribute changes") Reported-by: Sean Christopherson Suggested-by: Sean Christopherson Signed-off-by: Tom Lendacky Signed-off-by: Borislav Petkov Cc: # 5.19.x Link: https://lore.kernel.org/all/YqfabnTRxFSM+LoX@google.com/ --- arch/x86/kernel/sev.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/sev.c b/arch/x86/kernel/sev.c index 63dc626627a0..4f84c3f11af5 100644 --- a/arch/x86/kernel/sev.c +++ b/arch/x86/kernel/sev.c @@ -701,7 +701,13 @@ e_term: void __init early_snp_set_memory_private(unsigned long vaddr, unsigned long paddr, unsigned int npages) { - if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP)) + /* + * This can be invoked in early boot while running identity mapped, so + * use an open coded check for SNP instead of using cc_platform_has(). + * This eliminates worries about jump tables or checking boot_cpu_data + * in the cc_platform_has() function. + */ + if (!(sev_status & MSR_AMD64_SEV_SNP_ENABLED)) return; /* @@ -717,7 +723,13 @@ void __init early_snp_set_memory_private(unsigned long vaddr, unsigned long padd void __init early_snp_set_memory_shared(unsigned long vaddr, unsigned long paddr, unsigned int npages) { - if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP)) + /* + * This can be invoked in early boot while running identity mapped, so + * use an open coded check for SNP instead of using cc_platform_has(). + * This eliminates worries about jump tables or checking boot_cpu_data + * in the cc_platform_has() function. + */ + if (!(sev_status & MSR_AMD64_SEV_SNP_ENABLED)) return; /* Invalidate the memory pages before they are marked shared in the RMP table. */ -- cgit v1.2.3 From 6ab55ec0a938c7f943a4edba3d6514f775983887 Mon Sep 17 00:00:00 2001 From: Zheyu Ma Date: Wed, 24 Aug 2022 16:16:54 +0800 Subject: ALSA: control: Fix an out-of-bounds bug in get_ctl_id_hash() Since the user can control the arguments provided to the kernel by the ioctl() system call, an out-of-bounds bug occurs when the 'id->name' provided by the user does not end with '\0'. The following log can reveal it: [ 10.002313] BUG: KASAN: stack-out-of-bounds in snd_ctl_find_id+0x36c/0x3a0 [ 10.002895] Read of size 1 at addr ffff888109f5fe28 by task snd/439 [ 10.004934] Call Trace: [ 10.007140] snd_ctl_find_id+0x36c/0x3a0 [ 10.007489] snd_ctl_ioctl+0x6cf/0x10e0 Fix this by checking the bound of 'id->name' in the loop. Fixes: c27e1efb61c5 ("ALSA: control: Use xarray for faster lookups") Signed-off-by: Zheyu Ma Link: https://lore.kernel.org/r/20220824081654.3767739-1-zheyuma97@gmail.com Signed-off-by: Takashi Iwai --- sound/core/control.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sound/core/control.c b/sound/core/control.c index f3e893715369..e8fc4c511e5f 100644 --- a/sound/core/control.c +++ b/sound/core/control.c @@ -385,14 +385,14 @@ static bool elem_id_matches(const struct snd_kcontrol *kctl, #define MULTIPLIER 37 static unsigned long get_ctl_id_hash(const struct snd_ctl_elem_id *id) { + int i; unsigned long h; - const unsigned char *p; h = id->iface; h = MULTIPLIER * h + id->device; h = MULTIPLIER * h + id->subdevice; - for (p = id->name; *p; p++) - h = MULTIPLIER * h + *p; + for (i = 0; id->name[i] && i < SNDRV_CTL_ELEM_ID_NAME_MAXLEN; i++) + h = MULTIPLIER * h + id->name[i]; h = MULTIPLIER * h + id->index; h &= LONG_MAX; return h; -- cgit v1.2.3 From c490a0b5a4f36da3918181a8acdc6991d967c5f3 Mon Sep 17 00:00:00 2001 From: Siddh Raman Pant Date: Tue, 23 Aug 2022 21:38:10 +0530 Subject: loop: Check for overflow while configuring loop The userspace can configure a loop using an ioctl call, wherein a configuration of type loop_config is passed (see lo_ioctl()'s case on line 1550 of drivers/block/loop.c). This proceeds to call loop_configure() which in turn calls loop_set_status_from_info() (see line 1050 of loop.c), passing &config->info which is of type loop_info64*. This function then sets the appropriate values, like the offset. loop_device has lo_offset of type loff_t (see line 52 of loop.c), which is typdef-chained to long long, whereas loop_info64 has lo_offset of type __u64 (see line 56 of include/uapi/linux/loop.h). The function directly copies offset from info to the device as follows (See line 980 of loop.c): lo->lo_offset = info->lo_offset; This results in an overflow, which triggers a warning in iomap_iter() due to a call to iomap_iter_done() which has: WARN_ON_ONCE(iter->iomap.offset > iter->pos); Thus, check for negative value during loop_set_status_from_info(). Bug report: https://syzkaller.appspot.com/bug?id=c620fe14aac810396d3c3edc9ad73848bf69a29e Reported-and-tested-by: syzbot+a8e049cd3abd342936b6@syzkaller.appspotmail.com Cc: stable@vger.kernel.org Reviewed-by: Matthew Wilcox (Oracle) Signed-off-by: Siddh Raman Pant Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20220823160810.181275-1-code@siddh.me Signed-off-by: Jens Axboe --- drivers/block/loop.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/block/loop.c b/drivers/block/loop.c index e3c0ba93c1a3..ad92192c7d61 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -979,6 +979,11 @@ loop_set_status_from_info(struct loop_device *lo, lo->lo_offset = info->lo_offset; lo->lo_sizelimit = info->lo_sizelimit; + + /* loff_t vars have been assigned __u64 */ + if (lo->lo_offset < 0 || lo->lo_sizelimit < 0) + return -EOVERFLOW; + memcpy(lo->lo_file_name, info->lo_file_name, LO_NAME_SIZE); lo->lo_file_name[LO_NAME_SIZE-1] = 0; lo->lo_flags = info->lo_flags; -- cgit v1.2.3 From 2cacedc873ab5f5945d8f1b71804b0bcea0383ff Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Wed, 24 Aug 2022 13:07:38 +0100 Subject: io_uring/net: fix must_hold annotation Fix up the io_alloc_notif()'s __must_hold as we don't have a ctx argument there but should get it from the slot instead. Reported-by: Stefan Metzmacher Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/cbb0a920f18e0aed590bf58300af817b9befb8a3.1661342812.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- io_uring/notif.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/io_uring/notif.c b/io_uring/notif.c index 977736e82c1a..568ff17dc552 100644 --- a/io_uring/notif.c +++ b/io_uring/notif.c @@ -73,7 +73,7 @@ struct io_kiocb *io_alloc_notif(struct io_ring_ctx *ctx, } void io_notif_slot_flush(struct io_notif_slot *slot) - __must_hold(&ctx->uring_lock) + __must_hold(&slot->notif->ctx->uring_lock) { struct io_kiocb *notif = slot->notif; struct io_notif_data *nd = io_notif_to_data(notif); -- cgit v1.2.3 From 5a848b7c9e5e4d94390fbc391ccb81d40f3ccfb5 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Wed, 24 Aug 2022 13:07:39 +0100 Subject: io_uring/net: fix zc send link failing Failed requests should be marked with req_set_fail(), so links and cqe skipping work correctly, which is missing in io_sendzc(). Note, io_sendzc() return IOU_OK on failure, so the core code won't do the cleanup for us. Fixes: 06a5464be84e4 ("io_uring: wire send zc request type") Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/e47d46fda9db30154ce66a549bb0d3380b780520.1661342812.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- io_uring/net.c | 1 + 1 file changed, 1 insertion(+) diff --git a/io_uring/net.c b/io_uring/net.c index f8cdf1dc3863..d6310c655a0f 100644 --- a/io_uring/net.c +++ b/io_uring/net.c @@ -1023,6 +1023,7 @@ int io_sendzc(struct io_kiocb *req, unsigned int issue_flags) } if (ret == -ERESTARTSYS) ret = -EINTR; + req_set_fail(req); } else if (zc->flags & IORING_RECVSEND_NOTIF_FLUSH) { io_notif_slot_flush_submit(notif_slot, 0); } -- cgit v1.2.3 From 986e263def32eec89153babf469859d837507d34 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Wed, 24 Aug 2022 13:07:40 +0100 Subject: io_uring/net: fix indentation Fix up indentation before we get complaints from tooling. Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/bd5754e3764215ccd7fb04cd636ea9167aaa275d.1661342812.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- io_uring/net.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/io_uring/net.c b/io_uring/net.c index d6310c655a0f..3adcb09ae264 100644 --- a/io_uring/net.c +++ b/io_uring/net.c @@ -989,7 +989,7 @@ int io_sendzc(struct io_kiocb *req, unsigned int issue_flags) ret = io_import_fixed(WRITE, &msg.msg_iter, req->imu, (u64)(uintptr_t)zc->buf, zc->len); if (unlikely(ret)) - return ret; + return ret; } else { ret = import_single_range(WRITE, zc->buf, zc->len, &iov, &msg.msg_iter); -- cgit v1.2.3 From 53bdc88aac9a21aae937452724fa4738cd843795 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Wed, 24 Aug 2022 13:07:41 +0100 Subject: io_uring/notif: order notif vs send CQEs Currently, there is no ordering between notification CQEs and completions of the send flushing it, this quite complicates the userspace, especially since we don't flush notification when the send(+flush) request fails, i.e. there will be only one CQE. What we can do is to make sure that notification completions come only after sends. The easiest way to achieve this is to not try to complete a notification inline from io_sendzc() but defer it to task_work, considering that io-wq sendzc is disallowed CQEs will be naturally ordered because task_works will only be executed after we're done with submission and so inline completion. Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/cddfd1c2bf91f22b9fe08e13b7dffdd8f858a151.1661342812.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- io_uring/notif.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/io_uring/notif.c b/io_uring/notif.c index 568ff17dc552..96f076b175e0 100644 --- a/io_uring/notif.c +++ b/io_uring/notif.c @@ -81,8 +81,10 @@ void io_notif_slot_flush(struct io_notif_slot *slot) slot->notif = NULL; /* drop slot's master ref */ - if (refcount_dec_and_test(&nd->uarg.refcnt)) - io_notif_complete(notif); + if (refcount_dec_and_test(&nd->uarg.refcnt)) { + notif->io_task_work.func = __io_notif_complete_tw; + io_req_task_work_add(notif); + } } __cold int io_notif_unregister(struct io_ring_ctx *ctx) -- cgit v1.2.3 From 5916943943d19a854238d50d1fe2047467cbeb3c Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Wed, 24 Aug 2022 13:07:42 +0100 Subject: io_uring: conditional ->async_data allocation There are opcodes that need ->async_data only in some cases and allocation it unconditionally may hurt performance. Add an option to opdef to make move the allocation part from the core io_uring to opcode specific code. Note, we can't just set opdef->async_size to zero because there are other helpers that rely on it, e.g. io_alloc_async_data(). Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/9dc62be9e88dd0ed63c48365340e8922d2498293.1661342812.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- io_uring/io_uring.c | 7 ++++--- io_uring/opdef.h | 2 ++ 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index ebfdb2212ec2..77616279000b 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -1450,9 +1450,10 @@ int io_req_prep_async(struct io_kiocb *req) return 0; if (WARN_ON_ONCE(req_has_async_data(req))) return -EFAULT; - if (io_alloc_async_data(req)) - return -EAGAIN; - + if (!io_op_defs[req->opcode].manual_alloc) { + if (io_alloc_async_data(req)) + return -EAGAIN; + } return def->prep_async(req); } diff --git a/io_uring/opdef.h b/io_uring/opdef.h index ece8ed4f96c4..763c6e54e2ee 100644 --- a/io_uring/opdef.h +++ b/io_uring/opdef.h @@ -25,6 +25,8 @@ struct io_op_def { unsigned ioprio : 1; /* supports iopoll */ unsigned iopoll : 1; + /* opcode specific path will handle ->async_data allocation if needed */ + unsigned manual_alloc : 1; /* size of async data needed, if any */ unsigned short async_size; -- cgit v1.2.3 From 6afd9db630b037c7f0bc939368216512568de607 Mon Sep 17 00:00:00 2001 From: Sander Vanheule Date: Tue, 23 Aug 2022 08:12:19 +0200 Subject: lib/test_cpumask: drop cpu_possible_mask full test MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When the number of CPUs that can possibly be brought online is known at boot time, e.g. when HOTPLUG is disabled, nr_cpu_ids may be smaller than NR_CPUS. In that case, cpu_possible_mask would not be completely filled, and cpumask_full(cpu_possible_mask) can return false for valid system configurations. Without this test, cpu_possible_mask contents are still constrained by a check on cpumask_weight(), as well as tests in test_cpumask_first(), test_cpumask_last(), test_cpumask_next(), and test_cpumask_iterators(). Fixes: c41e8866c28c ("lib/test: introduce cpumask KUnit test suite") Link: https://lore.kernel.org/lkml/346cb279-8e75-24b0-7d12-9803f2b41c73@riseup.net/ Reported-by: Maíra Canal Signed-off-by: Sander Vanheule Tested-by: Maíra Canal Reviewed-by: David Gow Signed-off-by: Yury Norov --- lib/test_cpumask.c | 1 - 1 file changed, 1 deletion(-) diff --git a/lib/test_cpumask.c b/lib/test_cpumask.c index a31a1622f1f6..4ebf9f5805f3 100644 --- a/lib/test_cpumask.c +++ b/lib/test_cpumask.c @@ -54,7 +54,6 @@ static cpumask_t mask_all; static void test_cpumask_weight(struct kunit *test) { KUNIT_EXPECT_TRUE(test, cpumask_empty(&mask_empty)); - KUNIT_EXPECT_TRUE(test, cpumask_full(cpu_possible_mask)); KUNIT_EXPECT_TRUE(test, cpumask_full(&mask_all)); KUNIT_EXPECT_EQ(test, 0, cpumask_weight(&mask_empty)); -- cgit v1.2.3 From fbbc94d8486c6d683650269154c9a7d8897f35d7 Mon Sep 17 00:00:00 2001 From: Sander Vanheule Date: Tue, 23 Aug 2022 08:12:20 +0200 Subject: lib/test_cpumask: fix cpu_possible_mask last test MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since cpumask_first() on the cpu_possible_mask must return at most nr_cpu_ids - 1 for a valid result, cpumask_last() cannot return anything larger than this value. As test_cpumask_weight() also verifies that the total weight of cpu_possible_mask must equal nr_cpu_ids, the last bit set in this mask must be at nr_cpu_ids - 1. Fixes: c41e8866c28c ("lib/test: introduce cpumask KUnit test suite") Link: https://lore.kernel.org/lkml/346cb279-8e75-24b0-7d12-9803f2b41c73@riseup.net/ Reported-by: Maíra Canal Signed-off-by: Sander Vanheule Tested-by: Maíra Canal Reviewed-by: David Gow Acked-by: Yury Norov Signed-off-by: Yury Norov --- lib/test_cpumask.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/test_cpumask.c b/lib/test_cpumask.c index 4ebf9f5805f3..4d353614d853 100644 --- a/lib/test_cpumask.c +++ b/lib/test_cpumask.c @@ -73,7 +73,7 @@ static void test_cpumask_first(struct kunit *test) static void test_cpumask_last(struct kunit *test) { KUNIT_EXPECT_LE(test, nr_cpumask_bits, cpumask_last(&mask_empty)); - KUNIT_EXPECT_EQ(test, nr_cpumask_bits - 1, cpumask_last(cpu_possible_mask)); + KUNIT_EXPECT_EQ(test, nr_cpu_ids - 1, cpumask_last(cpu_possible_mask)); } static void test_cpumask_next(struct kunit *test) -- cgit v1.2.3 From d3c0ca4992cc21b0e1d7d8e530faa4ab16f8ec41 Mon Sep 17 00:00:00 2001 From: Sander Vanheule Date: Tue, 23 Aug 2022 08:12:21 +0200 Subject: lib/test_cpumask: follow KUnit style guidelines MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The cpumask test suite doesn't follow the KUnit style guidelines, as laid out in Documentation/dev-tools/kunit/style.rst. The file is renamed to lib/cpumask_kunit.c to clearly distinguish it from other, non-KUnit, tests. Link: https://lore.kernel.org/lkml/346cb279-8e75-24b0-7d12-9803f2b41c73@riseup.net/ Suggested-by: Maíra Canal Signed-off-by: Sander Vanheule Reviewed-by: Maíra Canal Reviewed-by: David Gow Acked-by: Yury Norov Signed-off-by: Yury Norov --- lib/Kconfig.debug | 7 ++- lib/Makefile | 2 +- lib/cpumask_kunit.c | 137 ++++++++++++++++++++++++++++++++++++++++++++++++++++ lib/test_cpumask.c | 137 ---------------------------------------------------- 4 files changed, 143 insertions(+), 140 deletions(-) create mode 100644 lib/cpumask_kunit.c delete mode 100644 lib/test_cpumask.c diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 072e4b289c13..bcbe60d6c80c 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -2029,13 +2029,16 @@ config LKDTM Documentation on how to use the module can be found in Documentation/fault-injection/provoke-crashes.rst -config TEST_CPUMASK - tristate "cpumask tests" if !KUNIT_ALL_TESTS +config CPUMASK_KUNIT_TEST + tristate "KUnit test for cpumask" if !KUNIT_ALL_TESTS depends on KUNIT default KUNIT_ALL_TESTS help Enable to turn on cpumask tests, running at boot or module load time. + For more information on KUnit and unit tests in general, please refer + to the KUnit documentation in Documentation/dev-tools/kunit/. + If unsure, say N. config TEST_LIST_SORT diff --git a/lib/Makefile b/lib/Makefile index 5927d7fa0806..ffabc30a27d4 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -60,6 +60,7 @@ obj-$(CONFIG_TEST_BPF) += test_bpf.o obj-$(CONFIG_TEST_FIRMWARE) += test_firmware.o obj-$(CONFIG_TEST_BITOPS) += test_bitops.o CFLAGS_test_bitops.o += -Werror +obj-$(CONFIG_CPUMASK_KUNIT_TEST) += cpumask_kunit.o obj-$(CONFIG_TEST_SYSCTL) += test_sysctl.o obj-$(CONFIG_TEST_SIPHASH) += test_siphash.o obj-$(CONFIG_HASH_KUNIT_TEST) += test_hash.o @@ -100,7 +101,6 @@ obj-$(CONFIG_TEST_HMM) += test_hmm.o obj-$(CONFIG_TEST_FREE_PAGES) += test_free_pages.o obj-$(CONFIG_KPROBES_SANITY_TEST) += test_kprobes.o obj-$(CONFIG_TEST_REF_TRACKER) += test_ref_tracker.o -obj-$(CONFIG_TEST_CPUMASK) += test_cpumask.o CFLAGS_test_fprobe.o += $(CC_FLAGS_FTRACE) obj-$(CONFIG_FPROBE_SANITY_TEST) += test_fprobe.o # diff --git a/lib/cpumask_kunit.c b/lib/cpumask_kunit.c new file mode 100644 index 000000000000..4d353614d853 --- /dev/null +++ b/lib/cpumask_kunit.c @@ -0,0 +1,137 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * KUnit tests for cpumask. + * + * Author: Sander Vanheule + */ + +#include +#include +#include + +#define EXPECT_FOR_EACH_CPU_EQ(test, mask) \ + do { \ + const cpumask_t *m = (mask); \ + int mask_weight = cpumask_weight(m); \ + int cpu, iter = 0; \ + for_each_cpu(cpu, m) \ + iter++; \ + KUNIT_EXPECT_EQ((test), mask_weight, iter); \ + } while (0) + +#define EXPECT_FOR_EACH_CPU_NOT_EQ(test, mask) \ + do { \ + const cpumask_t *m = (mask); \ + int mask_weight = cpumask_weight(m); \ + int cpu, iter = 0; \ + for_each_cpu_not(cpu, m) \ + iter++; \ + KUNIT_EXPECT_EQ((test), nr_cpu_ids - mask_weight, iter); \ + } while (0) + +#define EXPECT_FOR_EACH_CPU_WRAP_EQ(test, mask) \ + do { \ + const cpumask_t *m = (mask); \ + int mask_weight = cpumask_weight(m); \ + int cpu, iter = 0; \ + for_each_cpu_wrap(cpu, m, nr_cpu_ids / 2) \ + iter++; \ + KUNIT_EXPECT_EQ((test), mask_weight, iter); \ + } while (0) + +#define EXPECT_FOR_EACH_CPU_BUILTIN_EQ(test, name) \ + do { \ + int mask_weight = num_##name##_cpus(); \ + int cpu, iter = 0; \ + for_each_##name##_cpu(cpu) \ + iter++; \ + KUNIT_EXPECT_EQ((test), mask_weight, iter); \ + } while (0) + +static cpumask_t mask_empty; +static cpumask_t mask_all; + +static void test_cpumask_weight(struct kunit *test) +{ + KUNIT_EXPECT_TRUE(test, cpumask_empty(&mask_empty)); + KUNIT_EXPECT_TRUE(test, cpumask_full(&mask_all)); + + KUNIT_EXPECT_EQ(test, 0, cpumask_weight(&mask_empty)); + KUNIT_EXPECT_EQ(test, nr_cpu_ids, cpumask_weight(cpu_possible_mask)); + KUNIT_EXPECT_EQ(test, nr_cpumask_bits, cpumask_weight(&mask_all)); +} + +static void test_cpumask_first(struct kunit *test) +{ + KUNIT_EXPECT_LE(test, nr_cpu_ids, cpumask_first(&mask_empty)); + KUNIT_EXPECT_EQ(test, 0, cpumask_first(cpu_possible_mask)); + + KUNIT_EXPECT_EQ(test, 0, cpumask_first_zero(&mask_empty)); + KUNIT_EXPECT_LE(test, nr_cpu_ids, cpumask_first_zero(cpu_possible_mask)); +} + +static void test_cpumask_last(struct kunit *test) +{ + KUNIT_EXPECT_LE(test, nr_cpumask_bits, cpumask_last(&mask_empty)); + KUNIT_EXPECT_EQ(test, nr_cpu_ids - 1, cpumask_last(cpu_possible_mask)); +} + +static void test_cpumask_next(struct kunit *test) +{ + KUNIT_EXPECT_EQ(test, 0, cpumask_next_zero(-1, &mask_empty)); + KUNIT_EXPECT_LE(test, nr_cpu_ids, cpumask_next_zero(-1, cpu_possible_mask)); + + KUNIT_EXPECT_LE(test, nr_cpu_ids, cpumask_next(-1, &mask_empty)); + KUNIT_EXPECT_EQ(test, 0, cpumask_next(-1, cpu_possible_mask)); +} + +static void test_cpumask_iterators(struct kunit *test) +{ + EXPECT_FOR_EACH_CPU_EQ(test, &mask_empty); + EXPECT_FOR_EACH_CPU_NOT_EQ(test, &mask_empty); + EXPECT_FOR_EACH_CPU_WRAP_EQ(test, &mask_empty); + + EXPECT_FOR_EACH_CPU_EQ(test, cpu_possible_mask); + EXPECT_FOR_EACH_CPU_NOT_EQ(test, cpu_possible_mask); + EXPECT_FOR_EACH_CPU_WRAP_EQ(test, cpu_possible_mask); +} + +static void test_cpumask_iterators_builtin(struct kunit *test) +{ + EXPECT_FOR_EACH_CPU_BUILTIN_EQ(test, possible); + + /* Ensure the dynamic masks are stable while running the tests */ + cpu_hotplug_disable(); + + EXPECT_FOR_EACH_CPU_BUILTIN_EQ(test, online); + EXPECT_FOR_EACH_CPU_BUILTIN_EQ(test, present); + + cpu_hotplug_enable(); +} + +static int test_cpumask_init(struct kunit *test) +{ + cpumask_clear(&mask_empty); + cpumask_setall(&mask_all); + + return 0; +} + +static struct kunit_case test_cpumask_cases[] = { + KUNIT_CASE(test_cpumask_weight), + KUNIT_CASE(test_cpumask_first), + KUNIT_CASE(test_cpumask_last), + KUNIT_CASE(test_cpumask_next), + KUNIT_CASE(test_cpumask_iterators), + KUNIT_CASE(test_cpumask_iterators_builtin), + {} +}; + +static struct kunit_suite test_cpumask_suite = { + .name = "cpumask", + .init = test_cpumask_init, + .test_cases = test_cpumask_cases, +}; +kunit_test_suite(test_cpumask_suite); + +MODULE_LICENSE("GPL"); diff --git a/lib/test_cpumask.c b/lib/test_cpumask.c deleted file mode 100644 index 4d353614d853..000000000000 --- a/lib/test_cpumask.c +++ /dev/null @@ -1,137 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * KUnit tests for cpumask. - * - * Author: Sander Vanheule - */ - -#include -#include -#include - -#define EXPECT_FOR_EACH_CPU_EQ(test, mask) \ - do { \ - const cpumask_t *m = (mask); \ - int mask_weight = cpumask_weight(m); \ - int cpu, iter = 0; \ - for_each_cpu(cpu, m) \ - iter++; \ - KUNIT_EXPECT_EQ((test), mask_weight, iter); \ - } while (0) - -#define EXPECT_FOR_EACH_CPU_NOT_EQ(test, mask) \ - do { \ - const cpumask_t *m = (mask); \ - int mask_weight = cpumask_weight(m); \ - int cpu, iter = 0; \ - for_each_cpu_not(cpu, m) \ - iter++; \ - KUNIT_EXPECT_EQ((test), nr_cpu_ids - mask_weight, iter); \ - } while (0) - -#define EXPECT_FOR_EACH_CPU_WRAP_EQ(test, mask) \ - do { \ - const cpumask_t *m = (mask); \ - int mask_weight = cpumask_weight(m); \ - int cpu, iter = 0; \ - for_each_cpu_wrap(cpu, m, nr_cpu_ids / 2) \ - iter++; \ - KUNIT_EXPECT_EQ((test), mask_weight, iter); \ - } while (0) - -#define EXPECT_FOR_EACH_CPU_BUILTIN_EQ(test, name) \ - do { \ - int mask_weight = num_##name##_cpus(); \ - int cpu, iter = 0; \ - for_each_##name##_cpu(cpu) \ - iter++; \ - KUNIT_EXPECT_EQ((test), mask_weight, iter); \ - } while (0) - -static cpumask_t mask_empty; -static cpumask_t mask_all; - -static void test_cpumask_weight(struct kunit *test) -{ - KUNIT_EXPECT_TRUE(test, cpumask_empty(&mask_empty)); - KUNIT_EXPECT_TRUE(test, cpumask_full(&mask_all)); - - KUNIT_EXPECT_EQ(test, 0, cpumask_weight(&mask_empty)); - KUNIT_EXPECT_EQ(test, nr_cpu_ids, cpumask_weight(cpu_possible_mask)); - KUNIT_EXPECT_EQ(test, nr_cpumask_bits, cpumask_weight(&mask_all)); -} - -static void test_cpumask_first(struct kunit *test) -{ - KUNIT_EXPECT_LE(test, nr_cpu_ids, cpumask_first(&mask_empty)); - KUNIT_EXPECT_EQ(test, 0, cpumask_first(cpu_possible_mask)); - - KUNIT_EXPECT_EQ(test, 0, cpumask_first_zero(&mask_empty)); - KUNIT_EXPECT_LE(test, nr_cpu_ids, cpumask_first_zero(cpu_possible_mask)); -} - -static void test_cpumask_last(struct kunit *test) -{ - KUNIT_EXPECT_LE(test, nr_cpumask_bits, cpumask_last(&mask_empty)); - KUNIT_EXPECT_EQ(test, nr_cpu_ids - 1, cpumask_last(cpu_possible_mask)); -} - -static void test_cpumask_next(struct kunit *test) -{ - KUNIT_EXPECT_EQ(test, 0, cpumask_next_zero(-1, &mask_empty)); - KUNIT_EXPECT_LE(test, nr_cpu_ids, cpumask_next_zero(-1, cpu_possible_mask)); - - KUNIT_EXPECT_LE(test, nr_cpu_ids, cpumask_next(-1, &mask_empty)); - KUNIT_EXPECT_EQ(test, 0, cpumask_next(-1, cpu_possible_mask)); -} - -static void test_cpumask_iterators(struct kunit *test) -{ - EXPECT_FOR_EACH_CPU_EQ(test, &mask_empty); - EXPECT_FOR_EACH_CPU_NOT_EQ(test, &mask_empty); - EXPECT_FOR_EACH_CPU_WRAP_EQ(test, &mask_empty); - - EXPECT_FOR_EACH_CPU_EQ(test, cpu_possible_mask); - EXPECT_FOR_EACH_CPU_NOT_EQ(test, cpu_possible_mask); - EXPECT_FOR_EACH_CPU_WRAP_EQ(test, cpu_possible_mask); -} - -static void test_cpumask_iterators_builtin(struct kunit *test) -{ - EXPECT_FOR_EACH_CPU_BUILTIN_EQ(test, possible); - - /* Ensure the dynamic masks are stable while running the tests */ - cpu_hotplug_disable(); - - EXPECT_FOR_EACH_CPU_BUILTIN_EQ(test, online); - EXPECT_FOR_EACH_CPU_BUILTIN_EQ(test, present); - - cpu_hotplug_enable(); -} - -static int test_cpumask_init(struct kunit *test) -{ - cpumask_clear(&mask_empty); - cpumask_setall(&mask_all); - - return 0; -} - -static struct kunit_case test_cpumask_cases[] = { - KUNIT_CASE(test_cpumask_weight), - KUNIT_CASE(test_cpumask_first), - KUNIT_CASE(test_cpumask_last), - KUNIT_CASE(test_cpumask_next), - KUNIT_CASE(test_cpumask_iterators), - KUNIT_CASE(test_cpumask_iterators_builtin), - {} -}; - -static struct kunit_suite test_cpumask_suite = { - .name = "cpumask", - .init = test_cpumask_init, - .test_cases = test_cpumask_cases, -}; -kunit_test_suite(test_cpumask_suite); - -MODULE_LICENSE("GPL"); -- cgit v1.2.3 From bf5413586b8dbdb8eedee41cfd1450a4e587e845 Mon Sep 17 00:00:00 2001 From: Sander Vanheule Date: Tue, 23 Aug 2022 08:12:22 +0200 Subject: lib/cpumask_kunit: log mask contents For extra context, log the contents of the masks under test. This should help with finding out why a certain test fails. Link: https://lore.kernel.org/lkml/CABVgOSkPXBc-PWk1zBZRQ_Tt+Sz1ruFHBj3ixojymZF=Vi4tpQ@mail.gmail.com/ Suggested-by: David Gow Signed-off-by: Sander Vanheule Signed-off-by: Yury Norov --- lib/cpumask_kunit.c | 51 +++++++++++++++++++++++++++++++-------------------- 1 file changed, 31 insertions(+), 20 deletions(-) diff --git a/lib/cpumask_kunit.c b/lib/cpumask_kunit.c index 4d353614d853..ecbeec72221e 100644 --- a/lib/cpumask_kunit.c +++ b/lib/cpumask_kunit.c @@ -9,6 +9,10 @@ #include #include +#define MASK_MSG(m) \ + "%s contains %sCPUs %*pbl", #m, (cpumask_weight(m) ? "" : "no "), \ + nr_cpumask_bits, cpumask_bits(m) + #define EXPECT_FOR_EACH_CPU_EQ(test, mask) \ do { \ const cpumask_t *m = (mask); \ @@ -16,7 +20,7 @@ int cpu, iter = 0; \ for_each_cpu(cpu, m) \ iter++; \ - KUNIT_EXPECT_EQ((test), mask_weight, iter); \ + KUNIT_EXPECT_EQ_MSG((test), mask_weight, iter, MASK_MSG(mask)); \ } while (0) #define EXPECT_FOR_EACH_CPU_NOT_EQ(test, mask) \ @@ -26,7 +30,7 @@ int cpu, iter = 0; \ for_each_cpu_not(cpu, m) \ iter++; \ - KUNIT_EXPECT_EQ((test), nr_cpu_ids - mask_weight, iter); \ + KUNIT_EXPECT_EQ_MSG((test), nr_cpu_ids - mask_weight, iter, MASK_MSG(mask)); \ } while (0) #define EXPECT_FOR_EACH_CPU_WRAP_EQ(test, mask) \ @@ -36,7 +40,7 @@ int cpu, iter = 0; \ for_each_cpu_wrap(cpu, m, nr_cpu_ids / 2) \ iter++; \ - KUNIT_EXPECT_EQ((test), mask_weight, iter); \ + KUNIT_EXPECT_EQ_MSG((test), mask_weight, iter, MASK_MSG(mask)); \ } while (0) #define EXPECT_FOR_EACH_CPU_BUILTIN_EQ(test, name) \ @@ -45,7 +49,7 @@ int cpu, iter = 0; \ for_each_##name##_cpu(cpu) \ iter++; \ - KUNIT_EXPECT_EQ((test), mask_weight, iter); \ + KUNIT_EXPECT_EQ_MSG((test), mask_weight, iter, MASK_MSG(cpu_##name##_mask)); \ } while (0) static cpumask_t mask_empty; @@ -53,36 +57,43 @@ static cpumask_t mask_all; static void test_cpumask_weight(struct kunit *test) { - KUNIT_EXPECT_TRUE(test, cpumask_empty(&mask_empty)); - KUNIT_EXPECT_TRUE(test, cpumask_full(&mask_all)); + KUNIT_EXPECT_TRUE_MSG(test, cpumask_empty(&mask_empty), MASK_MSG(&mask_empty)); + KUNIT_EXPECT_TRUE_MSG(test, cpumask_full(&mask_all), MASK_MSG(&mask_all)); - KUNIT_EXPECT_EQ(test, 0, cpumask_weight(&mask_empty)); - KUNIT_EXPECT_EQ(test, nr_cpu_ids, cpumask_weight(cpu_possible_mask)); - KUNIT_EXPECT_EQ(test, nr_cpumask_bits, cpumask_weight(&mask_all)); + KUNIT_EXPECT_EQ_MSG(test, 0, cpumask_weight(&mask_empty), MASK_MSG(&mask_empty)); + KUNIT_EXPECT_EQ_MSG(test, nr_cpu_ids, cpumask_weight(cpu_possible_mask), + MASK_MSG(cpu_possible_mask)); + KUNIT_EXPECT_EQ_MSG(test, nr_cpumask_bits, cpumask_weight(&mask_all), MASK_MSG(&mask_all)); } static void test_cpumask_first(struct kunit *test) { - KUNIT_EXPECT_LE(test, nr_cpu_ids, cpumask_first(&mask_empty)); - KUNIT_EXPECT_EQ(test, 0, cpumask_first(cpu_possible_mask)); + KUNIT_EXPECT_LE_MSG(test, nr_cpu_ids, cpumask_first(&mask_empty), MASK_MSG(&mask_empty)); + KUNIT_EXPECT_EQ_MSG(test, 0, cpumask_first(cpu_possible_mask), MASK_MSG(cpu_possible_mask)); - KUNIT_EXPECT_EQ(test, 0, cpumask_first_zero(&mask_empty)); - KUNIT_EXPECT_LE(test, nr_cpu_ids, cpumask_first_zero(cpu_possible_mask)); + KUNIT_EXPECT_EQ_MSG(test, 0, cpumask_first_zero(&mask_empty), MASK_MSG(&mask_empty)); + KUNIT_EXPECT_LE_MSG(test, nr_cpu_ids, cpumask_first_zero(cpu_possible_mask), + MASK_MSG(cpu_possible_mask)); } static void test_cpumask_last(struct kunit *test) { - KUNIT_EXPECT_LE(test, nr_cpumask_bits, cpumask_last(&mask_empty)); - KUNIT_EXPECT_EQ(test, nr_cpu_ids - 1, cpumask_last(cpu_possible_mask)); + KUNIT_EXPECT_LE_MSG(test, nr_cpumask_bits, cpumask_last(&mask_empty), + MASK_MSG(&mask_empty)); + KUNIT_EXPECT_EQ_MSG(test, nr_cpu_ids - 1, cpumask_last(cpu_possible_mask), + MASK_MSG(cpu_possible_mask)); } static void test_cpumask_next(struct kunit *test) { - KUNIT_EXPECT_EQ(test, 0, cpumask_next_zero(-1, &mask_empty)); - KUNIT_EXPECT_LE(test, nr_cpu_ids, cpumask_next_zero(-1, cpu_possible_mask)); - - KUNIT_EXPECT_LE(test, nr_cpu_ids, cpumask_next(-1, &mask_empty)); - KUNIT_EXPECT_EQ(test, 0, cpumask_next(-1, cpu_possible_mask)); + KUNIT_EXPECT_EQ_MSG(test, 0, cpumask_next_zero(-1, &mask_empty), MASK_MSG(&mask_empty)); + KUNIT_EXPECT_LE_MSG(test, nr_cpu_ids, cpumask_next_zero(-1, cpu_possible_mask), + MASK_MSG(cpu_possible_mask)); + + KUNIT_EXPECT_LE_MSG(test, nr_cpu_ids, cpumask_next(-1, &mask_empty), + MASK_MSG(&mask_empty)); + KUNIT_EXPECT_EQ_MSG(test, 0, cpumask_next(-1, cpu_possible_mask), + MASK_MSG(cpu_possible_mask)); } static void test_cpumask_iterators(struct kunit *test) -- cgit v1.2.3 From 5d7fef0804b0a72a7efe196cd23b438edf84726c Mon Sep 17 00:00:00 2001 From: Sander Vanheule Date: Tue, 23 Aug 2022 08:12:23 +0200 Subject: lib/cpumask_kunit: add tests file to MAINTAINERS cpumask related files are listed under the BITMAP API section, so the file with tests for cpumask should be added to that list. Signed-off-by: Sander Vanheule Reviewed-by: David Gow Acked-by: Yury Norov Signed-off-by: Yury Norov --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index 9d7f64dc0efe..59b3718e66d1 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3612,6 +3612,7 @@ F: include/linux/find.h F: include/linux/nodemask.h F: lib/bitmap.c F: lib/cpumask.c +F: lib/cpumask_kunit.c F: lib/find_bit.c F: lib/find_bit_benchmark.c F: lib/test_bitmap.c -- cgit v1.2.3 From a1d2eb51f0a33c28f5399a1610e66b3fbd24e884 Mon Sep 17 00:00:00 2001 From: Paulo Alcantara Date: Fri, 19 Aug 2022 17:00:19 -0300 Subject: cifs: skip extra NULL byte in filenames Since commit: cifs: alloc_path_with_tree_prefix: do not append sep. if the path is empty alloc_path_with_tree_prefix() function was no longer including the trailing separator when @path is empty, although @out_len was still assuming a path separator thus adding an extra byte to the final filename. This has caused mount issues in some Synology servers due to the extra NULL byte in filenames when sending SMB2_CREATE requests with SMB2_FLAGS_DFS_OPERATIONS set. Fix this by checking if @path is not empty and then add extra byte for separator. Also, do not include any trailing NULL bytes in filename as MS-SMB2 requires it to be 8-byte aligned and not NULL terminated. Cc: stable@vger.kernel.org Fixes: 7eacba3b00a3 ("cifs: alloc_path_with_tree_prefix: do not append sep. if the path is empty") Signed-off-by: Paulo Alcantara (SUSE) Signed-off-by: Steve French --- fs/cifs/smb2pdu.c | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 91cfc5b47ac7..128e44e57528 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -2572,19 +2572,15 @@ alloc_path_with_tree_prefix(__le16 **out_path, int *out_size, int *out_len, path_len = UniStrnlen((wchar_t *)path, PATH_MAX); - /* - * make room for one path separator between the treename and - * path - */ - *out_len = treename_len + 1 + path_len; + /* make room for one path separator only if @path isn't empty */ + *out_len = treename_len + (path[0] ? 1 : 0) + path_len; /* - * final path needs to be null-terminated UTF16 with a - * size aligned to 8 + * final path needs to be 8-byte aligned as specified in + * MS-SMB2 2.2.13 SMB2 CREATE Request. */ - - *out_size = roundup((*out_len+1)*2, 8); - *out_path = kzalloc(*out_size, GFP_KERNEL); + *out_size = roundup(*out_len * sizeof(__le16), 8); + *out_path = kzalloc(*out_size + sizeof(__le16) /* null */, GFP_KERNEL); if (!*out_path) return -ENOMEM; -- cgit v1.2.3 From 265ad47a40da581be77172b4a8e1fb72b2bd914a Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 10 Aug 2022 11:20:12 -0700 Subject: md/raid10: Fix the data type of an r10_sync_page_io() argument Fix the following sparse warning: drivers/md/raid10.c:2647:60: sparse: sparse: incorrect type in argument 5 (different base types) @@ expected restricted blk_opf_t [usertype] opf @@ got int rw @@ This patch does not change any functionality since REQ_OP_READ = READ = 0 and since REQ_OP_WRITE = WRITE = 1. Cc: Rong A Chen Cc: Jens Axboe Cc: Paul Menzel Fixes: 4ce4c73f662b ("md/core: Combine two sync_page_io() arguments") Reported-by: kernel test robot Signed-off-by: Bart Van Assche Signed-off-by: Song Liu --- drivers/md/raid10.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 9117fcdee1be..64d6e4cd8a3a 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -2639,18 +2639,18 @@ static void check_decay_read_errors(struct mddev *mddev, struct md_rdev *rdev) } static int r10_sync_page_io(struct md_rdev *rdev, sector_t sector, - int sectors, struct page *page, int rw) + int sectors, struct page *page, enum req_op op) { sector_t first_bad; int bad_sectors; if (is_badblock(rdev, sector, sectors, &first_bad, &bad_sectors) - && (rw == READ || test_bit(WriteErrorSeen, &rdev->flags))) + && (op == REQ_OP_READ || test_bit(WriteErrorSeen, &rdev->flags))) return -1; - if (sync_page_io(rdev, sector, sectors << 9, page, rw, false)) + if (sync_page_io(rdev, sector, sectors << 9, page, op, false)) /* success */ return 1; - if (rw == WRITE) { + if (op == REQ_OP_WRITE) { set_bit(WriteErrorSeen, &rdev->flags); if (!test_and_set_bit(WantReplacement, &rdev->flags)) set_bit(MD_RECOVERY_NEEDED, @@ -2780,7 +2780,7 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10 if (r10_sync_page_io(rdev, r10_bio->devs[sl].addr + sect, - s, conf->tmppage, WRITE) + s, conf->tmppage, REQ_OP_WRITE) == 0) { /* Well, this device is dead */ pr_notice("md/raid10:%s: read correction write failed (%d sectors at %llu on %pg)\n", @@ -2814,8 +2814,7 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10 switch (r10_sync_page_io(rdev, r10_bio->devs[sl].addr + sect, - s, conf->tmppage, - READ)) { + s, conf->tmppage, REQ_OP_READ)) { case 0: /* Well, this device is dead */ pr_notice("md/raid10:%s: unable to read back corrected sectors (%d sectors at %llu on %pg)\n", -- cgit v1.2.3 From 5e8daf906f890560df430d30617c692a794acb73 Mon Sep 17 00:00:00 2001 From: David Sloan Date: Thu, 11 Aug 2022 11:14:13 -0600 Subject: md: Flush workqueue md_rdev_misc_wq in md_alloc() A race condition still exists when removing and re-creating md devices in test cases. However, it is only seen on some setups. The race condition was tracked down to a reference still being held to the kobject by the rdev in the md_rdev_misc_wq which will be released in rdev_delayed_delete(). md_alloc() waits for previous deletions by waiting on the md_misc_wq, but the md_rdev_misc_wq may still be holding a reference to a recently removed device. To fix this, also flush the md_rdev_misc_wq in md_alloc(). Signed-off-by: David Sloan [logang@deltatee.com: rewrote commit message] Signed-off-by: Logan Gunthorpe Signed-off-by: Song Liu --- drivers/md/md.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/md/md.c b/drivers/md/md.c index afaf36b2f6ab..71d221601bf8 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -5620,6 +5620,7 @@ struct mddev *md_alloc(dev_t dev, char *name) * removed (mddev_delayed_delete). */ flush_workqueue(md_misc_wq); + flush_workqueue(md_rdev_misc_wq); mutex_lock(&disks_mutex); mddev = mddev_alloc(dev); -- cgit v1.2.3 From 1d258758cf06a0734482989911d184dd5837ed4e Mon Sep 17 00:00:00 2001 From: Guoqing Jiang Date: Wed, 17 Aug 2022 20:05:13 +0800 Subject: Revert "md-raid: destroy the bitmap after destroying the thread" This reverts commit e151db8ecfb019b7da31d076130a794574c89f6f. Because it obviously breaks clustered raid as noticed by Neil though it fixed KASAN issue for dm-raid, let's revert it and fix KASAN issue in next commit. [1]. https://lore.kernel.org/linux-raid/a6657e08-b6a7-358b-2d2a-0ac37d49d23a@linux.dev/T/#m95ac225cab7409f66c295772483d091084a6d470 Fixes: e151db8ecfb0 ("md-raid: destroy the bitmap after destroying the thread") Signed-off-by: Guoqing Jiang Signed-off-by: Song Liu --- drivers/md/md.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/md.c b/drivers/md/md.c index 71d221601bf8..107c4c953c35 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -6239,11 +6239,11 @@ static void mddev_detach(struct mddev *mddev) static void __md_stop(struct mddev *mddev) { struct md_personality *pers = mddev->pers; + md_bitmap_destroy(mddev); mddev_detach(mddev); /* Ensure ->event_work is done */ if (mddev->event_work.func) flush_workqueue(md_misc_wq); - md_bitmap_destroy(mddev); spin_lock(&mddev->lock); mddev->pers = NULL; spin_unlock(&mddev->lock); -- cgit v1.2.3 From 0dd84b319352bb8ba64752d4e45396d8b13e6018 Mon Sep 17 00:00:00 2001 From: Guoqing Jiang Date: Wed, 17 Aug 2022 20:05:14 +0800 Subject: md: call __md_stop_writes in md_stop From the link [1], we can see raid1d was running even after the path raid_dtr -> md_stop -> __md_stop. Let's stop write first in destructor to align with normal md-raid to fix the KASAN issue. [1]. https://lore.kernel.org/linux-raid/CAPhsuW5gc4AakdGNdF8ubpezAuDLFOYUO_sfMZcec6hQFm8nhg@mail.gmail.com/T/#m7f12bf90481c02c6d2da68c64aeed4779b7df74a Fixes: 48df498daf62 ("md: move bitmap_destroy to the beginning of __md_stop") Reported-by: Mikulas Patocka Signed-off-by: Guoqing Jiang Signed-off-by: Song Liu --- drivers/md/md.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/md/md.c b/drivers/md/md.c index 107c4c953c35..729be2c5296c 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -6261,6 +6261,7 @@ void md_stop(struct mddev *mddev) /* stop the array and free an attached data structures. * This is called from dm-raid */ + __md_stop_writes(mddev); __md_stop(mddev); bioset_exit(&mddev->bio_set); bioset_exit(&mddev->sync_set); -- cgit v1.2.3 From acf4c6205e862304681234a6a4375b478af12552 Mon Sep 17 00:00:00 2001 From: Yu Zhe Date: Fri, 12 Aug 2022 14:52:23 +0800 Subject: fbdev: omapfb: Fix tests for platform_get_irq() failure The platform_get_irq() returns negative error codes. It can't actually return zero. Signed-off-by: Yu Zhe Signed-off-by: Helge Deller --- drivers/video/fbdev/omap/omapfb_main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/video/fbdev/omap/omapfb_main.c b/drivers/video/fbdev/omap/omapfb_main.c index dfb4ddc45701..fbb3af883d4d 100644 --- a/drivers/video/fbdev/omap/omapfb_main.c +++ b/drivers/video/fbdev/omap/omapfb_main.c @@ -1642,14 +1642,14 @@ static int omapfb_do_probe(struct platform_device *pdev, goto cleanup; } fbdev->int_irq = platform_get_irq(pdev, 0); - if (!fbdev->int_irq) { + if (fbdev->int_irq < 0) { dev_err(&pdev->dev, "unable to get irq\n"); r = ENXIO; goto cleanup; } fbdev->ext_irq = platform_get_irq(pdev, 1); - if (!fbdev->ext_irq) { + if (fbdev->ext_irq < 0) { dev_err(&pdev->dev, "unable to get irq\n"); r = ENXIO; goto cleanup; -- cgit v1.2.3 From 868ce967af1ea5d946635f8f89f752319212d769 Mon Sep 17 00:00:00 2001 From: Jilin Yuan Date: Tue, 16 Aug 2022 21:07:13 +0800 Subject: fbdev: ssd1307fb: Fix repeated words in comments Delete the redundant word 'set'. Signed-off-by: Jilin Yuan Signed-off-by: Helge Deller --- drivers/video/fbdev/ssd1307fb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/video/fbdev/ssd1307fb.c b/drivers/video/fbdev/ssd1307fb.c index 5c765655d000..52e4ed9da78c 100644 --- a/drivers/video/fbdev/ssd1307fb.c +++ b/drivers/video/fbdev/ssd1307fb.c @@ -450,7 +450,7 @@ static int ssd1307fb_init(struct ssd1307fb_par *par) if (ret < 0) return ret; - /* Set Set Area Color Mode ON/OFF & Low Power Display Mode */ + /* Set Area Color Mode ON/OFF & Low Power Display Mode */ if (par->area_color_enable || par->low_power) { u32 mode; -- cgit v1.2.3 From 19f953e7435644b81332dd632ba1b2d80b1e37af Mon Sep 17 00:00:00 2001 From: Letu Ren Date: Thu, 18 Aug 2022 18:44:24 +0800 Subject: fbdev: fb_pm2fb: Avoid potential divide by zero error In `do_fb_ioctl()` of fbmem.c, if cmd is FBIOPUT_VSCREENINFO, var will be copied from user, then go through `fb_set_var()` and `info->fbops->fb_check_var()` which could may be `pm2fb_check_var()`. Along the path, `var->pixclock` won't be modified. This function checks whether reciprocal of `var->pixclock` is too high. If `var->pixclock` is zero, there will be a divide by zero error. So, it is necessary to check whether denominator is zero to avoid crash. As this bug is found by Syzkaller, logs are listed below. divide error in pm2fb_check_var Call Trace: fb_set_var+0x367/0xeb0 drivers/video/fbdev/core/fbmem.c:1015 do_fb_ioctl+0x234/0x670 drivers/video/fbdev/core/fbmem.c:1110 fb_ioctl+0xdd/0x130 drivers/video/fbdev/core/fbmem.c:1189 Reported-by: Zheyu Ma Signed-off-by: Letu Ren Signed-off-by: Helge Deller --- drivers/video/fbdev/pm2fb.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/video/fbdev/pm2fb.c b/drivers/video/fbdev/pm2fb.c index d3be2c64f1c0..8fd79deb1e2a 100644 --- a/drivers/video/fbdev/pm2fb.c +++ b/drivers/video/fbdev/pm2fb.c @@ -617,6 +617,11 @@ static int pm2fb_check_var(struct fb_var_screeninfo *var, struct fb_info *info) return -EINVAL; } + if (!var->pixclock) { + DPRINTK("pixclock is zero\n"); + return -EINVAL; + } + if (PICOS2KHZ(var->pixclock) > PM2_MAX_PIXCLOCK) { DPRINTK("pixclock too high (%ldKHz)\n", PICOS2KHZ(var->pixclock)); -- cgit v1.2.3 From 3119cabcc5237c63fc5316409c5beada6304ca75 Mon Sep 17 00:00:00 2001 From: Jiapeng Chong Date: Fri, 19 Aug 2022 19:04:14 +0800 Subject: fbdev: sisfb: Clean up some inconsistent indenting No functional modification involved. drivers/video/fbdev/sis/sis_main.c:6165 sisfb_probe() warn: inconsistent indenting. drivers/video/fbdev/sis/sis_main.c:4266 sisfb_post_300_rwtest() warn: inconsistent indenting. drivers/video/fbdev/sis/sis_main.c:2388 SISDoSense() warn: inconsistent indenting. drivers/video/fbdev/sis/sis_main.c:2531 SiS_Sense30x() warn: inconsistent indenting. drivers/video/fbdev/sis/sis_main.c:2382 SISDoSense() warn: inconsistent indenting. drivers/video/fbdev/sis/sis_main.c:2250 sisfb_sense_crt1() warn: inconsistent indenting. drivers/video/fbdev/sis/sis_main.c:672 sisfb_validate_mode() warn: inconsistent indenting. Link: https://bugzilla.openanolis.cn/show_bug.cgi?id=1934 Reported-by: Abaci Robot Signed-off-by: Jiapeng Chong Signed-off-by: Helge Deller --- drivers/video/fbdev/sis/sis_main.c | 274 +++++++++++++++++++------------------ 1 file changed, 141 insertions(+), 133 deletions(-) diff --git a/drivers/video/fbdev/sis/sis_main.c b/drivers/video/fbdev/sis/sis_main.c index f28fd69d5eb7..d29ded67cecb 100644 --- a/drivers/video/fbdev/sis/sis_main.c +++ b/drivers/video/fbdev/sis/sis_main.c @@ -649,37 +649,37 @@ sisfb_validate_mode(struct sis_video_info *ivideo, int myindex, u32 vbflags) u16 xres=0, yres, myres; #ifdef CONFIG_FB_SIS_300 - if(ivideo->sisvga_engine == SIS_300_VGA) { - if(!(sisbios_mode[myindex].chipset & MD_SIS300)) + if (ivideo->sisvga_engine == SIS_300_VGA) { + if (!(sisbios_mode[myindex].chipset & MD_SIS300)) return -1 ; } #endif #ifdef CONFIG_FB_SIS_315 - if(ivideo->sisvga_engine == SIS_315_VGA) { - if(!(sisbios_mode[myindex].chipset & MD_SIS315)) + if (ivideo->sisvga_engine == SIS_315_VGA) { + if (!(sisbios_mode[myindex].chipset & MD_SIS315)) return -1; } #endif myres = sisbios_mode[myindex].yres; - switch(vbflags & VB_DISPTYPE_DISP2) { + switch (vbflags & VB_DISPTYPE_DISP2) { case CRT2_LCD: xres = ivideo->lcdxres; yres = ivideo->lcdyres; - if((ivideo->SiS_Pr.SiS_CustomT != CUT_PANEL848) && - (ivideo->SiS_Pr.SiS_CustomT != CUT_PANEL856)) { - if(sisbios_mode[myindex].xres > xres) + if ((ivideo->SiS_Pr.SiS_CustomT != CUT_PANEL848) && + (ivideo->SiS_Pr.SiS_CustomT != CUT_PANEL856)) { + if (sisbios_mode[myindex].xres > xres) return -1; - if(myres > yres) + if (myres > yres) return -1; } - if(ivideo->sisfb_fstn) { - if(sisbios_mode[myindex].xres == 320) { - if(myres == 240) { - switch(sisbios_mode[myindex].mode_no[1]) { + if (ivideo->sisfb_fstn) { + if (sisbios_mode[myindex].xres == 320) { + if (myres == 240) { + switch (sisbios_mode[myindex].mode_no[1]) { case 0x50: myindex = MODE_FSTN_8; break; case 0x56: myindex = MODE_FSTN_16; break; case 0x53: return -1; @@ -688,7 +688,7 @@ sisfb_validate_mode(struct sis_video_info *ivideo, int myindex, u32 vbflags) } } - if(SiS_GetModeID_LCD(ivideo->sisvga_engine, vbflags, sisbios_mode[myindex].xres, + if (SiS_GetModeID_LCD(ivideo->sisvga_engine, vbflags, sisbios_mode[myindex].xres, sisbios_mode[myindex].yres, 0, ivideo->sisfb_fstn, ivideo->SiS_Pr.SiS_CustomT, xres, yres, ivideo->vbflags2) < 0x14) { return -1; @@ -696,14 +696,14 @@ sisfb_validate_mode(struct sis_video_info *ivideo, int myindex, u32 vbflags) break; case CRT2_TV: - if(SiS_GetModeID_TV(ivideo->sisvga_engine, vbflags, sisbios_mode[myindex].xres, + if (SiS_GetModeID_TV(ivideo->sisvga_engine, vbflags, sisbios_mode[myindex].xres, sisbios_mode[myindex].yres, 0, ivideo->vbflags2) < 0x14) { return -1; } break; case CRT2_VGA: - if(SiS_GetModeID_VGA2(ivideo->sisvga_engine, vbflags, sisbios_mode[myindex].xres, + if (SiS_GetModeID_VGA2(ivideo->sisvga_engine, vbflags, sisbios_mode[myindex].xres, sisbios_mode[myindex].yres, 0, ivideo->vbflags2) < 0x14) { return -1; } @@ -2204,82 +2204,88 @@ static bool sisfb_test_DDC1(struct sis_video_info *ivideo) static void sisfb_sense_crt1(struct sis_video_info *ivideo) { - bool mustwait = false; - u8 sr1F, cr17; + bool mustwait = false; + u8 sr1F, cr17; #ifdef CONFIG_FB_SIS_315 - u8 cr63=0; + u8 cr63 = 0; #endif - u16 temp = 0xffff; - int i; + u16 temp = 0xffff; + int i; + + sr1F = SiS_GetReg(SISSR, 0x1F); + SiS_SetRegOR(SISSR, 0x1F, 0x04); + SiS_SetRegAND(SISSR, 0x1F, 0x3F); - sr1F = SiS_GetReg(SISSR, 0x1F); - SiS_SetRegOR(SISSR, 0x1F, 0x04); - SiS_SetRegAND(SISSR, 0x1F, 0x3F); - if(sr1F & 0xc0) mustwait = true; + if (sr1F & 0xc0) + mustwait = true; #ifdef CONFIG_FB_SIS_315 - if(ivideo->sisvga_engine == SIS_315_VGA) { - cr63 = SiS_GetReg(SISCR, ivideo->SiS_Pr.SiS_MyCR63); - cr63 &= 0x40; - SiS_SetRegAND(SISCR, ivideo->SiS_Pr.SiS_MyCR63, 0xBF); - } + if (ivideo->sisvga_engine == SIS_315_VGA) { + cr63 = SiS_GetReg(SISCR, ivideo->SiS_Pr.SiS_MyCR63); + cr63 &= 0x40; + SiS_SetRegAND(SISCR, ivideo->SiS_Pr.SiS_MyCR63, 0xBF); + } #endif - cr17 = SiS_GetReg(SISCR, 0x17); - cr17 &= 0x80; - if(!cr17) { - SiS_SetRegOR(SISCR, 0x17, 0x80); - mustwait = true; - SiS_SetReg(SISSR, 0x00, 0x01); - SiS_SetReg(SISSR, 0x00, 0x03); - } + cr17 = SiS_GetReg(SISCR, 0x17); + cr17 &= 0x80; - if(mustwait) { - for(i=0; i < 10; i++) sisfbwaitretracecrt1(ivideo); - } + if (!cr17) { + SiS_SetRegOR(SISCR, 0x17, 0x80); + mustwait = true; + SiS_SetReg(SISSR, 0x00, 0x01); + SiS_SetReg(SISSR, 0x00, 0x03); + } + if (mustwait) { + for (i = 0; i < 10; i++) + sisfbwaitretracecrt1(ivideo); + } #ifdef CONFIG_FB_SIS_315 - if(ivideo->chip >= SIS_330) { - SiS_SetRegAND(SISCR, 0x32, ~0x20); - if(ivideo->chip >= SIS_340) { - SiS_SetReg(SISCR, 0x57, 0x4a); - } else { - SiS_SetReg(SISCR, 0x57, 0x5f); - } - SiS_SetRegOR(SISCR, 0x53, 0x02); - while ((SiS_GetRegByte(SISINPSTAT)) & 0x01) break; - while (!((SiS_GetRegByte(SISINPSTAT)) & 0x01)) break; - if ((SiS_GetRegByte(SISMISCW)) & 0x10) temp = 1; - SiS_SetRegAND(SISCR, 0x53, 0xfd); - SiS_SetRegAND(SISCR, 0x57, 0x00); - } + if (ivideo->chip >= SIS_330) { + SiS_SetRegAND(SISCR, 0x32, ~0x20); + if (ivideo->chip >= SIS_340) + SiS_SetReg(SISCR, 0x57, 0x4a); + else + SiS_SetReg(SISCR, 0x57, 0x5f); + + SiS_SetRegOR(SISCR, 0x53, 0x02); + while ((SiS_GetRegByte(SISINPSTAT)) & 0x01) + break; + while (!((SiS_GetRegByte(SISINPSTAT)) & 0x01)) + break; + if ((SiS_GetRegByte(SISMISCW)) & 0x10) + temp = 1; + + SiS_SetRegAND(SISCR, 0x53, 0xfd); + SiS_SetRegAND(SISCR, 0x57, 0x00); + } #endif - if(temp == 0xffff) { - i = 3; - do { - temp = SiS_HandleDDC(&ivideo->SiS_Pr, ivideo->vbflags, - ivideo->sisvga_engine, 0, 0, NULL, ivideo->vbflags2); - } while(((temp == 0) || (temp == 0xffff)) && i--); + if (temp == 0xffff) { + i = 3; - if((temp == 0) || (temp == 0xffff)) { - if(sisfb_test_DDC1(ivideo)) temp = 1; - } - } + do { + temp = SiS_HandleDDC(&ivideo->SiS_Pr, ivideo->vbflags, + ivideo->sisvga_engine, 0, 0, NULL, ivideo->vbflags2); + } while (((temp == 0) || (temp == 0xffff)) && i--); - if((temp) && (temp != 0xffff)) { - SiS_SetRegOR(SISCR, 0x32, 0x20); - } + if ((temp == 0) || (temp == 0xffff)) { + if (sisfb_test_DDC1(ivideo)) + temp = 1; + } + } + + if ((temp) && (temp != 0xffff)) + SiS_SetRegOR(SISCR, 0x32, 0x20); #ifdef CONFIG_FB_SIS_315 - if(ivideo->sisvga_engine == SIS_315_VGA) { - SiS_SetRegANDOR(SISCR, ivideo->SiS_Pr.SiS_MyCR63, 0xBF, cr63); - } + if (ivideo->sisvga_engine == SIS_315_VGA) + SiS_SetRegANDOR(SISCR, ivideo->SiS_Pr.SiS_MyCR63, 0xBF, cr63); #endif - SiS_SetRegANDOR(SISCR, 0x17, 0x7F, cr17); - - SiS_SetReg(SISSR, 0x1F, sr1F); + SiS_SetRegANDOR(SISCR, 0x17, 0x7F, cr17); + SiS_SetReg(SISSR, 0x1F, sr1F); } /* Determine and detect attached devices on SiS30x */ @@ -2293,25 +2299,25 @@ static void SiS_SenseLCD(struct sis_video_info *ivideo) ivideo->SiS_Pr.PanelSelfDetected = false; /* LCD detection only for TMDS bridges */ - if(!(ivideo->vbflags2 & VB2_SISTMDSBRIDGE)) + if (!(ivideo->vbflags2 & VB2_SISTMDSBRIDGE)) return; - if(ivideo->vbflags2 & VB2_30xBDH) + if (ivideo->vbflags2 & VB2_30xBDH) return; /* If LCD already set up by BIOS, skip it */ reg = SiS_GetReg(SISCR, 0x32); - if(reg & 0x08) + if (reg & 0x08) return; realcrtno = 1; - if(ivideo->SiS_Pr.DDCPortMixup) + if (ivideo->SiS_Pr.DDCPortMixup) realcrtno = 0; /* Check DDC capabilities */ temp = SiS_HandleDDC(&ivideo->SiS_Pr, ivideo->vbflags, ivideo->sisvga_engine, realcrtno, 0, &buffer[0], ivideo->vbflags2); - if((!temp) || (temp == 0xffff) || (!(temp & 0x02))) + if ((!temp) || (temp == 0xffff) || (!(temp & 0x02))) return; /* Read DDC data */ @@ -2320,17 +2326,17 @@ static void SiS_SenseLCD(struct sis_video_info *ivideo) temp = SiS_HandleDDC(&ivideo->SiS_Pr, ivideo->vbflags, ivideo->sisvga_engine, realcrtno, 1, &buffer[0], ivideo->vbflags2); - } while((temp) && i--); + } while ((temp) && i--); - if(temp) + if (temp) return; /* No digital device */ - if(!(buffer[0x14] & 0x80)) + if (!(buffer[0x14] & 0x80)) return; /* First detailed timing preferred timing? */ - if(!(buffer[0x18] & 0x02)) + if (!(buffer[0x18] & 0x02)) return; xres = buffer[0x38] | ((buffer[0x3a] & 0xf0) << 4); @@ -2338,26 +2344,26 @@ static void SiS_SenseLCD(struct sis_video_info *ivideo) switch(xres) { case 1024: - if(yres == 768) + if (yres == 768) paneltype = 0x02; break; case 1280: - if(yres == 1024) + if (yres == 1024) paneltype = 0x03; break; case 1600: - if((yres == 1200) && (ivideo->vbflags2 & VB2_30xC)) + if ((yres == 1200) && (ivideo->vbflags2 & VB2_30xC)) paneltype = 0x0b; break; } - if(!paneltype) + if (!paneltype) return; - if(buffer[0x23]) + if (buffer[0x23]) cr37 |= 0x10; - if((buffer[0x47] & 0x18) == 0x18) + if ((buffer[0x47] & 0x18) == 0x18) cr37 |= ((((buffer[0x47] & 0x06) ^ 0x06) << 5) | 0x20); else cr37 |= 0xc0; @@ -2372,31 +2378,34 @@ static void SiS_SenseLCD(struct sis_video_info *ivideo) static int SISDoSense(struct sis_video_info *ivideo, u16 type, u16 test) { - int temp, mytest, result, i, j; - - for(j = 0; j < 10; j++) { - result = 0; - for(i = 0; i < 3; i++) { - mytest = test; - SiS_SetReg(SISPART4, 0x11, (type & 0x00ff)); - temp = (type >> 8) | (mytest & 0x00ff); - SiS_SetRegANDOR(SISPART4, 0x10, 0xe0, temp); - SiS_DDC2Delay(&ivideo->SiS_Pr, 0x1500); - mytest >>= 8; - mytest &= 0x7f; - temp = SiS_GetReg(SISPART4, 0x03); - temp ^= 0x0e; - temp &= mytest; - if(temp == mytest) result++; + int temp, mytest, result, i, j; + + for (j = 0; j < 10; j++) { + result = 0; + for (i = 0; i < 3; i++) { + mytest = test; + SiS_SetReg(SISPART4, 0x11, (type & 0x00ff)); + temp = (type >> 8) | (mytest & 0x00ff); + SiS_SetRegANDOR(SISPART4, 0x10, 0xe0, temp); + SiS_DDC2Delay(&ivideo->SiS_Pr, 0x1500); + mytest >>= 8; + mytest &= 0x7f; + temp = SiS_GetReg(SISPART4, 0x03); + temp ^= 0x0e; + temp &= mytest; + if (temp == mytest) + result++; #if 1 - SiS_SetReg(SISPART4, 0x11, 0x00); - SiS_SetRegAND(SISPART4, 0x10, 0xe0); - SiS_DDC2Delay(&ivideo->SiS_Pr, 0x1000); + SiS_SetReg(SISPART4, 0x11, 0x00); + SiS_SetRegAND(SISPART4, 0x10, 0xe0); + SiS_DDC2Delay(&ivideo->SiS_Pr, 0x1000); #endif - } - if((result == 0) || (result >= 2)) break; - } - return result; + } + + if ((result == 0) || (result >= 2)) + break; + } + return result; } static void SiS_Sense30x(struct sis_video_info *ivideo) @@ -4262,18 +4271,17 @@ static int sisfb_post_300_rwtest(struct sis_video_info *ivideo, int iteration, unsigned int k, RankCapacity, PageCapacity, BankNumHigh, BankNumMid; unsigned int PhysicalAdrOtherPage, PhysicalAdrHigh, PhysicalAdrHalfPage; - for(k = 0; k < ARRAY_SIZE(SiS_DRAMType); k++) { - + for (k = 0; k < ARRAY_SIZE(SiS_DRAMType); k++) { RankCapacity = buswidth * SiS_DRAMType[k][3]; - if(RankCapacity != PseudoRankCapacity) + if (RankCapacity != PseudoRankCapacity) continue; - if((SiS_DRAMType[k][2] + SiS_DRAMType[k][0]) > PseudoAdrPinCount) + if ((SiS_DRAMType[k][2] + SiS_DRAMType[k][0]) > PseudoAdrPinCount) continue; BankNumHigh = RankCapacity * 16 * iteration - 1; - if(iteration == 3) { /* Rank No */ + if (iteration == 3) { /* Rank No */ BankNumMid = RankCapacity * 16 - 1; } else { BankNumMid = RankCapacity * 16 * iteration / 2 - 1; @@ -4287,18 +4295,22 @@ static int sisfb_post_300_rwtest(struct sis_video_info *ivideo, int iteration, SiS_SetRegAND(SISSR, 0x15, 0xFB); /* Test */ SiS_SetRegOR(SISSR, 0x15, 0x04); /* Test */ sr14 = (SiS_DRAMType[k][3] * buswidth) - 1; - if(buswidth == 4) sr14 |= 0x80; - else if(buswidth == 2) sr14 |= 0x40; + + if (buswidth == 4) + sr14 |= 0x80; + else if (buswidth == 2) + sr14 |= 0x40; + SiS_SetReg(SISSR, 0x13, SiS_DRAMType[k][4]); SiS_SetReg(SISSR, 0x14, sr14); BankNumHigh <<= 16; BankNumMid <<= 16; - if((BankNumHigh + PhysicalAdrHigh >= mapsize) || - (BankNumMid + PhysicalAdrHigh >= mapsize) || - (BankNumHigh + PhysicalAdrHalfPage >= mapsize) || - (BankNumHigh + PhysicalAdrOtherPage >= mapsize)) + if ((BankNumHigh + PhysicalAdrHigh >= mapsize) || + (BankNumMid + PhysicalAdrHigh >= mapsize) || + (BankNumHigh + PhysicalAdrHalfPage >= mapsize) || + (BankNumHigh + PhysicalAdrOtherPage >= mapsize)) continue; /* Write data */ @@ -4312,7 +4324,7 @@ static int sisfb_post_300_rwtest(struct sis_video_info *ivideo, int iteration, (FBAddr + BankNumHigh + PhysicalAdrOtherPage)); /* Read data */ - if(readw(FBAddr + BankNumHigh + PhysicalAdrHigh) == PhysicalAdrHigh) + if (readw(FBAddr + BankNumHigh + PhysicalAdrHigh) == PhysicalAdrHigh) return 1; } @@ -6150,24 +6162,20 @@ static int sisfb_probe(struct pci_dev *pdev, const struct pci_device_id *ent) #endif #ifdef CONFIG_FB_SIS_315 - if(ivideo->sisvga_engine == SIS_315_VGA) { + if (ivideo->sisvga_engine == SIS_315_VGA) { int result = 1; - /* if((ivideo->chip == SIS_315H) || - (ivideo->chip == SIS_315) || - (ivideo->chip == SIS_315PRO) || - (ivideo->chip == SIS_330)) { - sisfb_post_sis315330(pdev); - } else */ if(ivideo->chip == XGI_20) { + + if (ivideo->chip == XGI_20) { result = sisfb_post_xgi(pdev); ivideo->sisfb_can_post = 1; - } else if((ivideo->chip == XGI_40) && ivideo->haveXGIROM) { + } else if ((ivideo->chip == XGI_40) && ivideo->haveXGIROM) { result = sisfb_post_xgi(pdev); ivideo->sisfb_can_post = 1; } else { printk(KERN_INFO "sisfb: Card is not " "POSTed and sisfb can't do this either.\n"); } - if(!result) { + if (!result) { printk(KERN_ERR "sisfb: Failed to POST card\n"); ret = -ENODEV; goto error_3; -- cgit v1.2.3 From 1cd3bf3348b4a686cc955c9cd11034d7652219cd Mon Sep 17 00:00:00 2001 From: Jiapeng Chong Date: Fri, 19 Aug 2022 19:06:59 +0800 Subject: fbdev: radeon: Clean up some inconsistent indenting No functional modification involved. Link: https://bugzilla.openanolis.cn/show_bug.cgi?id=1932 Reported-by: Abaci Robot Signed-off-by: Jiapeng Chong Signed-off-by: Helge Deller --- drivers/video/fbdev/aty/radeon_base.c | 46 +++++++++++++++++------------------ 1 file changed, 23 insertions(+), 23 deletions(-) diff --git a/drivers/video/fbdev/aty/radeon_base.c b/drivers/video/fbdev/aty/radeon_base.c index 6851f47613e1..15933c0ea763 100644 --- a/drivers/video/fbdev/aty/radeon_base.c +++ b/drivers/video/fbdev/aty/radeon_base.c @@ -2094,34 +2094,34 @@ static void radeon_identify_vram(struct radeonfb_info *rinfo) u32 tmp; /* framebuffer size */ - if ((rinfo->family == CHIP_FAMILY_RS100) || + if ((rinfo->family == CHIP_FAMILY_RS100) || (rinfo->family == CHIP_FAMILY_RS200) || (rinfo->family == CHIP_FAMILY_RS300) || (rinfo->family == CHIP_FAMILY_RC410) || (rinfo->family == CHIP_FAMILY_RS400) || (rinfo->family == CHIP_FAMILY_RS480) ) { - u32 tom = INREG(NB_TOM); - tmp = ((((tom >> 16) - (tom & 0xffff) + 1) << 6) * 1024); - - radeon_fifo_wait(6); - OUTREG(MC_FB_LOCATION, tom); - OUTREG(DISPLAY_BASE_ADDR, (tom & 0xffff) << 16); - OUTREG(CRTC2_DISPLAY_BASE_ADDR, (tom & 0xffff) << 16); - OUTREG(OV0_BASE_ADDR, (tom & 0xffff) << 16); - - /* This is supposed to fix the crtc2 noise problem. */ - OUTREG(GRPH2_BUFFER_CNTL, INREG(GRPH2_BUFFER_CNTL) & ~0x7f0000); - - if ((rinfo->family == CHIP_FAMILY_RS100) || - (rinfo->family == CHIP_FAMILY_RS200)) { - /* This is to workaround the asic bug for RMX, some versions - of BIOS doesn't have this register initialized correctly. - */ - OUTREGP(CRTC_MORE_CNTL, CRTC_H_CUTOFF_ACTIVE_EN, - ~CRTC_H_CUTOFF_ACTIVE_EN); - } - } else { - tmp = INREG(CNFG_MEMSIZE); + u32 tom = INREG(NB_TOM); + + tmp = ((((tom >> 16) - (tom & 0xffff) + 1) << 6) * 1024); + radeon_fifo_wait(6); + OUTREG(MC_FB_LOCATION, tom); + OUTREG(DISPLAY_BASE_ADDR, (tom & 0xffff) << 16); + OUTREG(CRTC2_DISPLAY_BASE_ADDR, (tom & 0xffff) << 16); + OUTREG(OV0_BASE_ADDR, (tom & 0xffff) << 16); + + /* This is supposed to fix the crtc2 noise problem. */ + OUTREG(GRPH2_BUFFER_CNTL, INREG(GRPH2_BUFFER_CNTL) & ~0x7f0000); + + if ((rinfo->family == CHIP_FAMILY_RS100) || + (rinfo->family == CHIP_FAMILY_RS200)) { + /* This is to workaround the asic bug for RMX, some versions + * of BIOS doesn't have this register initialized correctly. + */ + OUTREGP(CRTC_MORE_CNTL, CRTC_H_CUTOFF_ACTIVE_EN, + ~CRTC_H_CUTOFF_ACTIVE_EN); + } + } else { + tmp = INREG(CNFG_MEMSIZE); } /* mem size is bits [28:0], mask off the rest */ -- cgit v1.2.3 From 58559dfc1ebba2ae0c7627dc8f8991ae1984c6e3 Mon Sep 17 00:00:00 2001 From: Shigeru Yoshida Date: Sun, 21 Aug 2022 20:17:31 +0900 Subject: fbdev: fbcon: Destroy mutex on freeing struct fb_info It's needed to destroy bl_curve_mutex on freeing struct fb_info since the mutex is embedded in the structure and initialized when it's allocated. Signed-off-by: Shigeru Yoshida Signed-off-by: Helge Deller --- drivers/video/fbdev/core/fbsysfs.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/video/fbdev/core/fbsysfs.c b/drivers/video/fbdev/core/fbsysfs.c index c2a60b187467..4d7f63892dcc 100644 --- a/drivers/video/fbdev/core/fbsysfs.c +++ b/drivers/video/fbdev/core/fbsysfs.c @@ -84,6 +84,10 @@ void framebuffer_release(struct fb_info *info) if (WARN_ON(refcount_read(&info->count))) return; +#if IS_ENABLED(CONFIG_FB_BACKLIGHT) + mutex_destroy(&info->bl_curve_mutex); +#endif + kfree(info->apertures); kfree(info); } -- cgit v1.2.3 From 07c55c9803dea748d17a054000cbf1913ce06399 Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Fri, 19 Aug 2022 16:57:52 +0800 Subject: fbdev: chipsfb: Add missing pci_disable_device() in chipsfb_pci_init() Add missing pci_disable_device() in error path in chipsfb_pci_init(). Signed-off-by: Yang Yingliang Signed-off-by: Helge Deller --- drivers/video/fbdev/chipsfb.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/video/fbdev/chipsfb.c b/drivers/video/fbdev/chipsfb.c index 393894af26f8..2b00a9d554fc 100644 --- a/drivers/video/fbdev/chipsfb.c +++ b/drivers/video/fbdev/chipsfb.c @@ -430,6 +430,7 @@ static int chipsfb_pci_init(struct pci_dev *dp, const struct pci_device_id *ent) err_release_fb: framebuffer_release(p); err_disable: + pci_disable_device(dp); err_out: return rc; } -- cgit v1.2.3 From 144c467398aa9fb274583590c848e6d6388e89d9 Mon Sep 17 00:00:00 2001 From: Jiapeng Chong Date: Wed, 24 Aug 2022 19:44:55 +0800 Subject: fbdev: omap: Remove unnecessary print function dev_err() The print function dev_err() is redundant because platform_get_irq() already prints an error. Link: https://bugzilla.openanolis.cn/show_bug.cgi?id=1957 Reported-by: Abaci Robot Signed-off-by: Jiapeng Chong Signed-off-by: Helge Deller --- drivers/video/fbdev/omap/omapfb_main.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/video/fbdev/omap/omapfb_main.c b/drivers/video/fbdev/omap/omapfb_main.c index fbb3af883d4d..17cda5765683 100644 --- a/drivers/video/fbdev/omap/omapfb_main.c +++ b/drivers/video/fbdev/omap/omapfb_main.c @@ -1643,14 +1643,12 @@ static int omapfb_do_probe(struct platform_device *pdev, } fbdev->int_irq = platform_get_irq(pdev, 0); if (fbdev->int_irq < 0) { - dev_err(&pdev->dev, "unable to get irq\n"); r = ENXIO; goto cleanup; } fbdev->ext_irq = platform_get_irq(pdev, 1); if (fbdev->ext_irq < 0) { - dev_err(&pdev->dev, "unable to get irq\n"); r = ENXIO; goto cleanup; } -- cgit v1.2.3 From 8d0268585b9c07dd01b77b63913a4f31ad99239b Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Thu, 18 Aug 2022 23:01:17 +0200 Subject: fbdev: Move fbdev drivers from strlcpy to strscpy Follow the advice of the below link and prefer 'strscpy' in this subsystem. Conversion is 1:1 because the return value is not used. Generated by a coccinelle script. Link: https://lore.kernel.org/r/CAHk-=wgfRnXz0W3D37d01q3JFkr_i_uTL=V6A6G1oUZcprmknw@mail.gmail.com/ Signed-off-by: Wolfram Sang Signed-off-by: Helge Deller --- drivers/video/console/sticore.c | 2 +- drivers/video/fbdev/aty/atyfb_base.c | 2 +- drivers/video/fbdev/aty/radeon_base.c | 2 +- drivers/video/fbdev/bw2.c | 2 +- drivers/video/fbdev/cirrusfb.c | 2 +- drivers/video/fbdev/clps711x-fb.c | 2 +- drivers/video/fbdev/core/fbcon.c | 2 +- drivers/video/fbdev/cyber2000fb.c | 8 ++++---- drivers/video/fbdev/ffb.c | 2 +- drivers/video/fbdev/geode/gx1fb_core.c | 6 +++--- drivers/video/fbdev/gxt4500.c | 2 +- drivers/video/fbdev/i740fb.c | 2 +- drivers/video/fbdev/imxfb.c | 2 +- drivers/video/fbdev/matrox/matroxfb_base.c | 6 +++--- drivers/video/fbdev/omap2/omapfb/omapfb-main.c | 2 +- drivers/video/fbdev/pxa168fb.c | 2 +- drivers/video/fbdev/pxafb.c | 2 +- drivers/video/fbdev/s3fb.c | 2 +- drivers/video/fbdev/simplefb.c | 2 +- drivers/video/fbdev/sis/sis_main.c | 4 ++-- drivers/video/fbdev/sm501fb.c | 2 +- drivers/video/fbdev/sstfb.c | 2 +- drivers/video/fbdev/sunxvr1000.c | 2 +- drivers/video/fbdev/sunxvr2500.c | 2 +- drivers/video/fbdev/sunxvr500.c | 2 +- drivers/video/fbdev/tcx.c | 2 +- drivers/video/fbdev/tdfxfb.c | 4 ++-- drivers/video/fbdev/tgafb.c | 2 +- drivers/video/fbdev/tridentfb.c | 2 +- 29 files changed, 38 insertions(+), 38 deletions(-) diff --git a/drivers/video/console/sticore.c b/drivers/video/console/sticore.c index bd4dc97d4d34..db568f67e4dc 100644 --- a/drivers/video/console/sticore.c +++ b/drivers/video/console/sticore.c @@ -290,7 +290,7 @@ static char default_sti_path[21] __read_mostly; static int __init sti_setup(char *str) { if (str) - strlcpy (default_sti_path, str, sizeof (default_sti_path)); + strscpy(default_sti_path, str, sizeof(default_sti_path)); return 1; } diff --git a/drivers/video/fbdev/aty/atyfb_base.c b/drivers/video/fbdev/aty/atyfb_base.c index a3e6faed7745..14eb718bd67c 100644 --- a/drivers/video/fbdev/aty/atyfb_base.c +++ b/drivers/video/fbdev/aty/atyfb_base.c @@ -3891,7 +3891,7 @@ static int __init atyfb_setup(char *options) && (!strncmp(this_opt, "Mach64:", 7))) { static unsigned char m64_num; static char mach64_str[80]; - strlcpy(mach64_str, this_opt + 7, sizeof(mach64_str)); + strscpy(mach64_str, this_opt + 7, sizeof(mach64_str)); if (!store_video_par(mach64_str, m64_num)) { m64_num++; mach64_count = m64_num; diff --git a/drivers/video/fbdev/aty/radeon_base.c b/drivers/video/fbdev/aty/radeon_base.c index 15933c0ea763..a14a8d73035c 100644 --- a/drivers/video/fbdev/aty/radeon_base.c +++ b/drivers/video/fbdev/aty/radeon_base.c @@ -1980,7 +1980,7 @@ static int radeon_set_fbinfo(struct radeonfb_info *rinfo) info->screen_base = rinfo->fb_base; info->screen_size = rinfo->mapped_vram; /* Fill fix common fields */ - strlcpy(info->fix.id, rinfo->name, sizeof(info->fix.id)); + strscpy(info->fix.id, rinfo->name, sizeof(info->fix.id)); info->fix.smem_start = rinfo->fb_base_phys; info->fix.smem_len = rinfo->video_ram; info->fix.type = FB_TYPE_PACKED_PIXELS; diff --git a/drivers/video/fbdev/bw2.c b/drivers/video/fbdev/bw2.c index e7702fe1fe7d..6403ae07970d 100644 --- a/drivers/video/fbdev/bw2.c +++ b/drivers/video/fbdev/bw2.c @@ -182,7 +182,7 @@ static int bw2_ioctl(struct fb_info *info, unsigned int cmd, unsigned long arg) static void bw2_init_fix(struct fb_info *info, int linebytes) { - strlcpy(info->fix.id, "bwtwo", sizeof(info->fix.id)); + strscpy(info->fix.id, "bwtwo", sizeof(info->fix.id)); info->fix.type = FB_TYPE_PACKED_PIXELS; info->fix.visual = FB_VISUAL_MONO01; diff --git a/drivers/video/fbdev/cirrusfb.c b/drivers/video/fbdev/cirrusfb.c index a41a75841e10..2a9fa06881b5 100644 --- a/drivers/video/fbdev/cirrusfb.c +++ b/drivers/video/fbdev/cirrusfb.c @@ -1999,7 +1999,7 @@ static int cirrusfb_set_fbinfo(struct fb_info *info) } /* Fill fix common fields */ - strlcpy(info->fix.id, cirrusfb_board_info[cinfo->btype].name, + strscpy(info->fix.id, cirrusfb_board_info[cinfo->btype].name, sizeof(info->fix.id)); /* monochrome: only 1 memory plane */ diff --git a/drivers/video/fbdev/clps711x-fb.c b/drivers/video/fbdev/clps711x-fb.c index 771ce1f76951..a1061c2f1640 100644 --- a/drivers/video/fbdev/clps711x-fb.c +++ b/drivers/video/fbdev/clps711x-fb.c @@ -326,7 +326,7 @@ static int clps711x_fb_probe(struct platform_device *pdev) info->var.vmode = FB_VMODE_NONINTERLACED; info->fix.type = FB_TYPE_PACKED_PIXELS; info->fix.accel = FB_ACCEL_NONE; - strlcpy(info->fix.id, CLPS711X_FB_NAME, sizeof(info->fix.id)); + strscpy(info->fix.id, CLPS711X_FB_NAME, sizeof(info->fix.id)); fb_videomode_to_var(&info->var, &cfb->mode); ret = fb_alloc_cmap(&info->cmap, BIT(CLPS711X_FB_BPP_MAX), 0); diff --git a/drivers/video/fbdev/core/fbcon.c b/drivers/video/fbdev/core/fbcon.c index cf9ac4da0a82..4a032fcf0d14 100644 --- a/drivers/video/fbdev/core/fbcon.c +++ b/drivers/video/fbdev/core/fbcon.c @@ -412,7 +412,7 @@ static int __init fb_console_setup(char *this_opt) while ((options = strsep(&this_opt, ",")) != NULL) { if (!strncmp(options, "font:", 5)) { - strlcpy(fontname, options + 5, sizeof(fontname)); + strscpy(fontname, options + 5, sizeof(fontname)); continue; } diff --git a/drivers/video/fbdev/cyber2000fb.c b/drivers/video/fbdev/cyber2000fb.c index d45355b9a58c..8f041f9b14c7 100644 --- a/drivers/video/fbdev/cyber2000fb.c +++ b/drivers/video/fbdev/cyber2000fb.c @@ -1134,7 +1134,7 @@ int cyber2000fb_attach(struct cyberpro_info *info, int idx) info->fb_size = int_cfb_info->fb.fix.smem_len; info->info = int_cfb_info; - strlcpy(info->dev_name, int_cfb_info->fb.fix.id, + strscpy(info->dev_name, int_cfb_info->fb.fix.id, sizeof(info->dev_name)); } @@ -1229,7 +1229,7 @@ static int cyber2000fb_ddc_getsda(void *data) static int cyber2000fb_setup_ddc_bus(struct cfb_info *cfb) { - strlcpy(cfb->ddc_adapter.name, cfb->fb.fix.id, + strscpy(cfb->ddc_adapter.name, cfb->fb.fix.id, sizeof(cfb->ddc_adapter.name)); cfb->ddc_adapter.owner = THIS_MODULE; cfb->ddc_adapter.class = I2C_CLASS_DDC; @@ -1304,7 +1304,7 @@ static int cyber2000fb_i2c_getscl(void *data) static int cyber2000fb_i2c_register(struct cfb_info *cfb) { - strlcpy(cfb->i2c_adapter.name, cfb->fb.fix.id, + strscpy(cfb->i2c_adapter.name, cfb->fb.fix.id, sizeof(cfb->i2c_adapter.name)); cfb->i2c_adapter.owner = THIS_MODULE; cfb->i2c_adapter.algo_data = &cfb->i2c_algo; @@ -1500,7 +1500,7 @@ static int cyber2000fb_setup(char *options) if (strncmp(opt, "font:", 5) == 0) { static char default_font_storage[40]; - strlcpy(default_font_storage, opt + 5, + strscpy(default_font_storage, opt + 5, sizeof(default_font_storage)); default_font = default_font_storage; continue; diff --git a/drivers/video/fbdev/ffb.c b/drivers/video/fbdev/ffb.c index b3d580e57221..7cba3969a970 100644 --- a/drivers/video/fbdev/ffb.c +++ b/drivers/video/fbdev/ffb.c @@ -883,7 +883,7 @@ static void ffb_init_fix(struct fb_info *info) } else ffb_type_name = "Elite 3D"; - strlcpy(info->fix.id, ffb_type_name, sizeof(info->fix.id)); + strscpy(info->fix.id, ffb_type_name, sizeof(info->fix.id)); info->fix.type = FB_TYPE_PACKED_PIXELS; info->fix.visual = FB_VISUAL_TRUECOLOR; diff --git a/drivers/video/fbdev/geode/gx1fb_core.c b/drivers/video/fbdev/geode/gx1fb_core.c index 5d34d89fb665..e41204ecb0e3 100644 --- a/drivers/video/fbdev/geode/gx1fb_core.c +++ b/drivers/video/fbdev/geode/gx1fb_core.c @@ -410,13 +410,13 @@ static void __init gx1fb_setup(char *options) continue; if (!strncmp(this_opt, "mode:", 5)) - strlcpy(mode_option, this_opt + 5, sizeof(mode_option)); + strscpy(mode_option, this_opt + 5, sizeof(mode_option)); else if (!strncmp(this_opt, "crt:", 4)) crt_option = !!simple_strtoul(this_opt + 4, NULL, 0); else if (!strncmp(this_opt, "panel:", 6)) - strlcpy(panel_option, this_opt + 6, sizeof(panel_option)); + strscpy(panel_option, this_opt + 6, sizeof(panel_option)); else - strlcpy(mode_option, this_opt, sizeof(mode_option)); + strscpy(mode_option, this_opt, sizeof(mode_option)); } } #endif diff --git a/drivers/video/fbdev/gxt4500.c b/drivers/video/fbdev/gxt4500.c index e5475ae1e158..94588b809ebf 100644 --- a/drivers/video/fbdev/gxt4500.c +++ b/drivers/video/fbdev/gxt4500.c @@ -650,7 +650,7 @@ static int gxt4500_probe(struct pci_dev *pdev, const struct pci_device_id *ent) cardtype = ent->driver_data; par->refclk_ps = cardinfo[cardtype].refclk_ps; info->fix = gxt4500_fix; - strlcpy(info->fix.id, cardinfo[cardtype].cardname, + strscpy(info->fix.id, cardinfo[cardtype].cardname, sizeof(info->fix.id)); info->pseudo_palette = par->pseudo_palette; diff --git a/drivers/video/fbdev/i740fb.c b/drivers/video/fbdev/i740fb.c index 7f09a0daaaa2..bd30d8314b68 100644 --- a/drivers/video/fbdev/i740fb.c +++ b/drivers/video/fbdev/i740fb.c @@ -159,7 +159,7 @@ static int i740fb_setup_ddc_bus(struct fb_info *info) { struct i740fb_par *par = info->par; - strlcpy(par->ddc_adapter.name, info->fix.id, + strscpy(par->ddc_adapter.name, info->fix.id, sizeof(par->ddc_adapter.name)); par->ddc_adapter.owner = THIS_MODULE; par->ddc_adapter.class = I2C_CLASS_DDC; diff --git a/drivers/video/fbdev/imxfb.c b/drivers/video/fbdev/imxfb.c index d97d7456d15a..94f3bc637fc8 100644 --- a/drivers/video/fbdev/imxfb.c +++ b/drivers/video/fbdev/imxfb.c @@ -681,7 +681,7 @@ static int imxfb_init_fbinfo(struct platform_device *pdev) fbi->devtype = pdev->id_entry->driver_data; - strlcpy(info->fix.id, IMX_NAME, sizeof(info->fix.id)); + strscpy(info->fix.id, IMX_NAME, sizeof(info->fix.id)); info->fix.type = FB_TYPE_PACKED_PIXELS; info->fix.type_aux = 0; diff --git a/drivers/video/fbdev/matrox/matroxfb_base.c b/drivers/video/fbdev/matrox/matroxfb_base.c index 236521b19daf..68bba2688f4c 100644 --- a/drivers/video/fbdev/matrox/matroxfb_base.c +++ b/drivers/video/fbdev/matrox/matroxfb_base.c @@ -2383,9 +2383,9 @@ static int __init matroxfb_setup(char *options) { else if (!strncmp(this_opt, "mem:", 4)) mem = simple_strtoul(this_opt+4, NULL, 0); else if (!strncmp(this_opt, "mode:", 5)) - strlcpy(videomode, this_opt+5, sizeof(videomode)); + strscpy(videomode, this_opt + 5, sizeof(videomode)); else if (!strncmp(this_opt, "outputs:", 8)) - strlcpy(outputs, this_opt+8, sizeof(outputs)); + strscpy(outputs, this_opt + 8, sizeof(outputs)); else if (!strncmp(this_opt, "dfp:", 4)) { dfp_type = simple_strtoul(this_opt+4, NULL, 0); dfp = 1; @@ -2455,7 +2455,7 @@ static int __init matroxfb_setup(char *options) { else if (!strcmp(this_opt, "dfp")) dfp = value; else { - strlcpy(videomode, this_opt, sizeof(videomode)); + strscpy(videomode, this_opt, sizeof(videomode)); } } } diff --git a/drivers/video/fbdev/omap2/omapfb/omapfb-main.c b/drivers/video/fbdev/omap2/omapfb/omapfb-main.c index afa688e754b9..5ccddcfce722 100644 --- a/drivers/video/fbdev/omap2/omapfb/omapfb-main.c +++ b/drivers/video/fbdev/omap2/omapfb/omapfb-main.c @@ -1331,7 +1331,7 @@ static void clear_fb_info(struct fb_info *fbi) { memset(&fbi->var, 0, sizeof(fbi->var)); memset(&fbi->fix, 0, sizeof(fbi->fix)); - strlcpy(fbi->fix.id, MODULE_NAME, sizeof(fbi->fix.id)); + strscpy(fbi->fix.id, MODULE_NAME, sizeof(fbi->fix.id)); } static int omapfb_free_all_fbmem(struct omapfb2_device *fbdev) diff --git a/drivers/video/fbdev/pxa168fb.c b/drivers/video/fbdev/pxa168fb.c index e943300d23e8..d5d0bbd39213 100644 --- a/drivers/video/fbdev/pxa168fb.c +++ b/drivers/video/fbdev/pxa168fb.c @@ -640,7 +640,7 @@ static int pxa168fb_probe(struct platform_device *pdev) info->flags = FBINFO_DEFAULT | FBINFO_PARTIAL_PAN_OK | FBINFO_HWACCEL_XPAN | FBINFO_HWACCEL_YPAN; info->node = -1; - strlcpy(info->fix.id, mi->id, 16); + strscpy(info->fix.id, mi->id, 16); info->fix.type = FB_TYPE_PACKED_PIXELS; info->fix.type_aux = 0; info->fix.xpanstep = 0; diff --git a/drivers/video/fbdev/pxafb.c b/drivers/video/fbdev/pxafb.c index 66cfc3e9d3cf..696ac5431180 100644 --- a/drivers/video/fbdev/pxafb.c +++ b/drivers/video/fbdev/pxafb.c @@ -2042,7 +2042,7 @@ static int __init pxafb_setup_options(void) return -ENODEV; if (options) - strlcpy(g_options, options, sizeof(g_options)); + strscpy(g_options, options, sizeof(g_options)); return 0; } diff --git a/drivers/video/fbdev/s3fb.c b/drivers/video/fbdev/s3fb.c index 5069f6f67923..67b63a753cb2 100644 --- a/drivers/video/fbdev/s3fb.c +++ b/drivers/video/fbdev/s3fb.c @@ -248,7 +248,7 @@ static int s3fb_setup_ddc_bus(struct fb_info *info) { struct s3fb_info *par = info->par; - strlcpy(par->ddc_adapter.name, info->fix.id, + strscpy(par->ddc_adapter.name, info->fix.id, sizeof(par->ddc_adapter.name)); par->ddc_adapter.owner = THIS_MODULE; par->ddc_adapter.class = I2C_CLASS_DDC; diff --git a/drivers/video/fbdev/simplefb.c b/drivers/video/fbdev/simplefb.c index cf2a90ecd64e..e770b4a356b5 100644 --- a/drivers/video/fbdev/simplefb.c +++ b/drivers/video/fbdev/simplefb.c @@ -355,7 +355,7 @@ static int simplefb_regulators_get(struct simplefb_par *par, if (!p || p == prop->name) continue; - strlcpy(name, prop->name, + strscpy(name, prop->name, strlen(prop->name) - strlen(SUPPLY_SUFFIX) + 1); regulator = devm_regulator_get_optional(&pdev->dev, name); if (IS_ERR(regulator)) { diff --git a/drivers/video/fbdev/sis/sis_main.c b/drivers/video/fbdev/sis/sis_main.c index d29ded67cecb..c9e77429dfa3 100644 --- a/drivers/video/fbdev/sis/sis_main.c +++ b/drivers/video/fbdev/sis/sis_main.c @@ -1872,7 +1872,7 @@ sisfb_get_fix(struct fb_fix_screeninfo *fix, int con, struct fb_info *info) memset(fix, 0, sizeof(struct fb_fix_screeninfo)); - strlcpy(fix->id, ivideo->myid, sizeof(fix->id)); + strscpy(fix->id, ivideo->myid, sizeof(fix->id)); mutex_lock(&info->mm_lock); fix->smem_start = ivideo->video_base + ivideo->video_offset; @@ -5879,7 +5879,7 @@ static int sisfb_probe(struct pci_dev *pdev, const struct pci_device_id *ent) ivideo->cardnumber++; } - strlcpy(ivideo->myid, chipinfo->chip_name, sizeof(ivideo->myid)); + strscpy(ivideo->myid, chipinfo->chip_name, sizeof(ivideo->myid)); ivideo->warncount = 0; ivideo->chip_id = pdev->device; diff --git a/drivers/video/fbdev/sm501fb.c b/drivers/video/fbdev/sm501fb.c index 6a52eba64559..fce6cfbadfd6 100644 --- a/drivers/video/fbdev/sm501fb.c +++ b/drivers/video/fbdev/sm501fb.c @@ -1719,7 +1719,7 @@ static int sm501fb_init_fb(struct fb_info *fb, enum sm501_controller head, enable = 0; } - strlcpy(fb->fix.id, fbname, sizeof(fb->fix.id)); + strscpy(fb->fix.id, fbname, sizeof(fb->fix.id)); memcpy(&par->ops, (head == HEAD_CRT) ? &sm501fb_ops_crt : &sm501fb_ops_pnl, diff --git a/drivers/video/fbdev/sstfb.c b/drivers/video/fbdev/sstfb.c index 27d4b0ace2d6..cd4d640f9477 100644 --- a/drivers/video/fbdev/sstfb.c +++ b/drivers/video/fbdev/sstfb.c @@ -1382,7 +1382,7 @@ static int sstfb_probe(struct pci_dev *pdev, const struct pci_device_id *id) goto fail; } sst_get_memsize(info, &fix->smem_len); - strlcpy(fix->id, spec->name, sizeof(fix->id)); + strscpy(fix->id, spec->name, sizeof(fix->id)); printk(KERN_INFO "%s (revision %d) with %s dac\n", fix->id, par->revision, par->dac_sw.name); diff --git a/drivers/video/fbdev/sunxvr1000.c b/drivers/video/fbdev/sunxvr1000.c index 15b079505a00..490bd9a14763 100644 --- a/drivers/video/fbdev/sunxvr1000.c +++ b/drivers/video/fbdev/sunxvr1000.c @@ -80,7 +80,7 @@ static int gfb_set_fbinfo(struct gfb_info *gp) info->pseudo_palette = gp->pseudo_palette; /* Fill fix common fields */ - strlcpy(info->fix.id, "gfb", sizeof(info->fix.id)); + strscpy(info->fix.id, "gfb", sizeof(info->fix.id)); info->fix.smem_start = gp->fb_base_phys; info->fix.smem_len = gp->fb_size; info->fix.type = FB_TYPE_PACKED_PIXELS; diff --git a/drivers/video/fbdev/sunxvr2500.c b/drivers/video/fbdev/sunxvr2500.c index 1d3bacd9d5ac..1279b02234f8 100644 --- a/drivers/video/fbdev/sunxvr2500.c +++ b/drivers/video/fbdev/sunxvr2500.c @@ -84,7 +84,7 @@ static int s3d_set_fbinfo(struct s3d_info *sp) info->pseudo_palette = sp->pseudo_palette; /* Fill fix common fields */ - strlcpy(info->fix.id, "s3d", sizeof(info->fix.id)); + strscpy(info->fix.id, "s3d", sizeof(info->fix.id)); info->fix.smem_start = sp->fb_base_phys; info->fix.smem_len = sp->fb_size; info->fix.type = FB_TYPE_PACKED_PIXELS; diff --git a/drivers/video/fbdev/sunxvr500.c b/drivers/video/fbdev/sunxvr500.c index 9daf17b11106..f7b463633ba0 100644 --- a/drivers/video/fbdev/sunxvr500.c +++ b/drivers/video/fbdev/sunxvr500.c @@ -207,7 +207,7 @@ static int e3d_set_fbinfo(struct e3d_info *ep) info->pseudo_palette = ep->pseudo_palette; /* Fill fix common fields */ - strlcpy(info->fix.id, "e3d", sizeof(info->fix.id)); + strscpy(info->fix.id, "e3d", sizeof(info->fix.id)); info->fix.smem_start = ep->fb_base_phys; info->fix.smem_len = ep->fb_size; info->fix.type = FB_TYPE_PACKED_PIXELS; diff --git a/drivers/video/fbdev/tcx.c b/drivers/video/fbdev/tcx.c index 1638a40fed22..01d87f53324d 100644 --- a/drivers/video/fbdev/tcx.c +++ b/drivers/video/fbdev/tcx.c @@ -333,7 +333,7 @@ tcx_init_fix(struct fb_info *info, int linebytes) else tcx_name = "TCX24"; - strlcpy(info->fix.id, tcx_name, sizeof(info->fix.id)); + strscpy(info->fix.id, tcx_name, sizeof(info->fix.id)); info->fix.type = FB_TYPE_PACKED_PIXELS; info->fix.visual = FB_VISUAL_PSEUDOCOLOR; diff --git a/drivers/video/fbdev/tdfxfb.c b/drivers/video/fbdev/tdfxfb.c index 67e37a62b07c..8a8122f8bfeb 100644 --- a/drivers/video/fbdev/tdfxfb.c +++ b/drivers/video/fbdev/tdfxfb.c @@ -1264,7 +1264,7 @@ static int tdfxfb_setup_ddc_bus(struct tdfxfb_i2c_chan *chan, const char *name, { int rc; - strlcpy(chan->adapter.name, name, sizeof(chan->adapter.name)); + strscpy(chan->adapter.name, name, sizeof(chan->adapter.name)); chan->adapter.owner = THIS_MODULE; chan->adapter.class = I2C_CLASS_DDC; chan->adapter.algo_data = &chan->algo; @@ -1293,7 +1293,7 @@ static int tdfxfb_setup_i2c_bus(struct tdfxfb_i2c_chan *chan, const char *name, { int rc; - strlcpy(chan->adapter.name, name, sizeof(chan->adapter.name)); + strscpy(chan->adapter.name, name, sizeof(chan->adapter.name)); chan->adapter.owner = THIS_MODULE; chan->adapter.algo_data = &chan->algo; chan->adapter.dev.parent = dev; diff --git a/drivers/video/fbdev/tgafb.c b/drivers/video/fbdev/tgafb.c index ae0cf5540636..1fff5fd7ab51 100644 --- a/drivers/video/fbdev/tgafb.c +++ b/drivers/video/fbdev/tgafb.c @@ -1344,7 +1344,7 @@ tgafb_init_fix(struct fb_info *info) memory_size = 16777216; } - strlcpy(info->fix.id, tga_type_name, sizeof(info->fix.id)); + strscpy(info->fix.id, tga_type_name, sizeof(info->fix.id)); info->fix.type = FB_TYPE_PACKED_PIXELS; info->fix.type_aux = 0; diff --git a/drivers/video/fbdev/tridentfb.c b/drivers/video/fbdev/tridentfb.c index 319131bd72cf..cda095420ee8 100644 --- a/drivers/video/fbdev/tridentfb.c +++ b/drivers/video/fbdev/tridentfb.c @@ -270,7 +270,7 @@ static int tridentfb_setup_ddc_bus(struct fb_info *info) { struct tridentfb_par *par = info->par; - strlcpy(par->ddc_adapter.name, info->fix.id, + strscpy(par->ddc_adapter.name, info->fix.id, sizeof(par->ddc_adapter.name)); par->ddc_adapter.owner = THIS_MODULE; par->ddc_adapter.class = I2C_CLASS_DDC; -- cgit v1.2.3 From 9789de8bdc92a9ec95c9bc7b43e94de91acc4460 Mon Sep 17 00:00:00 2001 From: Zhang Xiaoxu Date: Tue, 23 Aug 2022 20:52:00 +0800 Subject: cifs: Use help macro to get the header preamble size It's better to use HEADER_PREAMBLE_SIZE because the unfolded expression too long. No actual functional changes, minor readability improvement. Signed-off-by: Zhang Xiaoxu Reviewed-by: Paulo Alcantara (SUSE) Signed-off-by: Steve French --- fs/cifs/cifsencrypt.c | 2 +- fs/cifs/cifsglob.h | 1 + fs/cifs/connect.c | 20 ++++++++++---------- fs/cifs/transport.c | 21 ++++++++++----------- 4 files changed, 22 insertions(+), 22 deletions(-) diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c index 8f7835ccbca1..61a9fed56548 100644 --- a/fs/cifs/cifsencrypt.c +++ b/fs/cifs/cifsencrypt.c @@ -32,7 +32,7 @@ int __cifs_calc_signature(struct smb_rqst *rqst, int rc; struct kvec *iov = rqst->rq_iov; int n_vec = rqst->rq_nvec; - int is_smb2 = server->vals->header_preamble_size == 0; + bool is_smb2 = HEADER_PREAMBLE_SIZE(server) == 0; /* iov[0] is actual data and not the rfc1002 length for SMB2+ */ if (is_smb2) { diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index f15d7b0c123d..b15b84d03fb6 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -557,6 +557,7 @@ struct smb_version_values { #define HEADER_SIZE(server) (server->vals->header_size) #define MAX_HEADER_SIZE(server) (server->vals->max_header_size) +#define HEADER_PREAMBLE_SIZE(server) (server->vals->header_preamble_size) /** * CIFS superblock mount flags (mnt_cifs_flags) to consider when diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 3da5da9f16b0..ccf7f32e0c3e 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -871,7 +871,7 @@ smb2_get_credits_from_hdr(char *buffer, struct TCP_Server_Info *server) /* * SMB1 does not use credits. */ - if (server->vals->header_preamble_size) + if (HEADER_PREAMBLE_SIZE(server)) return 0; return le16_to_cpu(shdr->CreditRequest); @@ -1050,7 +1050,7 @@ standard_receive3(struct TCP_Server_Info *server, struct mid_q_entry *mid) /* make sure this will fit in a large buffer */ if (pdu_length > CIFSMaxBufSize + MAX_HEADER_SIZE(server) - - server->vals->header_preamble_size) { + HEADER_PREAMBLE_SIZE(server)) { cifs_server_dbg(VFS, "SMB response too long (%u bytes)\n", pdu_length); cifs_reconnect(server, true); return -ECONNABORTED; @@ -1065,8 +1065,8 @@ standard_receive3(struct TCP_Server_Info *server, struct mid_q_entry *mid) /* now read the rest */ length = cifs_read_from_socket(server, buf + HEADER_SIZE(server) - 1, - pdu_length - HEADER_SIZE(server) + 1 - + server->vals->header_preamble_size); + pdu_length - HEADER_SIZE(server) + 1 + + HEADER_PREAMBLE_SIZE(server)); if (length < 0) return length; @@ -1122,7 +1122,7 @@ smb2_add_credits_from_hdr(char *buffer, struct TCP_Server_Info *server) /* * SMB1 does not use credits. */ - if (server->vals->header_preamble_size) + if (HEADER_PREAMBLE_SIZE(server)) return; if (shdr->CreditRequest) { @@ -1180,7 +1180,7 @@ cifs_demultiplex_thread(void *p) if (length < 0) continue; - if (server->vals->header_preamble_size == 0) + if (HEADER_PREAMBLE_SIZE(server) == 0) server->total_read = 0; else server->total_read = length; @@ -1199,7 +1199,7 @@ next_pdu: /* make sure we have enough to get to the MID */ if (server->pdu_size < HEADER_SIZE(server) - 1 - - server->vals->header_preamble_size) { + HEADER_PREAMBLE_SIZE(server)) { cifs_server_dbg(VFS, "SMB response too short (%u bytes)\n", server->pdu_size); cifs_reconnect(server, true); @@ -1208,9 +1208,9 @@ next_pdu: /* read down to the MID */ length = cifs_read_from_socket(server, - buf + server->vals->header_preamble_size, - HEADER_SIZE(server) - 1 - - server->vals->header_preamble_size); + buf + HEADER_PREAMBLE_SIZE(server), + HEADER_SIZE(server) - 1 - + HEADER_PREAMBLE_SIZE(server)); if (length < 0) continue; server->total_read += length; diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c index de7aeced7e16..bb1052dbac5b 100644 --- a/fs/cifs/transport.c +++ b/fs/cifs/transport.c @@ -261,8 +261,8 @@ smb_rqst_len(struct TCP_Server_Info *server, struct smb_rqst *rqst) int nvec; unsigned long buflen = 0; - if (server->vals->header_preamble_size == 0 && - rqst->rq_nvec >= 2 && rqst->rq_iov[0].iov_len == 4) { + if (HEADER_PREAMBLE_SIZE(server) == 0 && rqst->rq_nvec >= 2 && + rqst->rq_iov[0].iov_len == 4) { iov = &rqst->rq_iov[1]; nvec = rqst->rq_nvec - 1; } else { @@ -346,7 +346,7 @@ __smb_send_rqst(struct TCP_Server_Info *server, int num_rqst, sigprocmask(SIG_BLOCK, &mask, &oldmask); /* Generate a rfc1002 marker for SMB2+ */ - if (server->vals->header_preamble_size == 0) { + if (HEADER_PREAMBLE_SIZE(server) == 0) { struct kvec hiov = { .iov_base = &rfc1002_marker, .iov_len = 4 @@ -1238,7 +1238,7 @@ compound_send_recv(const unsigned int xid, struct cifs_ses *ses, buf = (char *)midQ[i]->resp_buf; resp_iov[i].iov_base = buf; resp_iov[i].iov_len = midQ[i]->resp_buf_size + - server->vals->header_preamble_size; + HEADER_PREAMBLE_SIZE(server); if (midQ[i]->large_buf) resp_buf_type[i] = CIFS_LARGE_BUFFER; @@ -1643,7 +1643,7 @@ int cifs_discard_remaining_data(struct TCP_Server_Info *server) { unsigned int rfclen = server->pdu_size; - int remaining = rfclen + server->vals->header_preamble_size - + int remaining = rfclen + HEADER_PREAMBLE_SIZE(server) - server->total_read; while (remaining > 0) { @@ -1689,8 +1689,7 @@ cifs_readv_receive(struct TCP_Server_Info *server, struct mid_q_entry *mid) unsigned int data_offset, data_len; struct cifs_readdata *rdata = mid->callback_data; char *buf = server->smallbuf; - unsigned int buflen = server->pdu_size + - server->vals->header_preamble_size; + unsigned int buflen = server->pdu_size + HEADER_PREAMBLE_SIZE(server); bool use_rdma_mr = false; cifs_dbg(FYI, "%s: mid=%llu offset=%llu bytes=%u\n", @@ -1724,10 +1723,10 @@ cifs_readv_receive(struct TCP_Server_Info *server, struct mid_q_entry *mid) /* set up first two iov for signature check and to get credits */ rdata->iov[0].iov_base = buf; - rdata->iov[0].iov_len = server->vals->header_preamble_size; - rdata->iov[1].iov_base = buf + server->vals->header_preamble_size; + rdata->iov[0].iov_len = HEADER_PREAMBLE_SIZE(server); + rdata->iov[1].iov_base = buf + HEADER_PREAMBLE_SIZE(server); rdata->iov[1].iov_len = - server->total_read - server->vals->header_preamble_size; + server->total_read - HEADER_PREAMBLE_SIZE(server); cifs_dbg(FYI, "0: iov_base=%p iov_len=%zu\n", rdata->iov[0].iov_base, rdata->iov[0].iov_len); cifs_dbg(FYI, "1: iov_base=%p iov_len=%zu\n", @@ -1752,7 +1751,7 @@ cifs_readv_receive(struct TCP_Server_Info *server, struct mid_q_entry *mid) } data_offset = server->ops->read_data_offset(buf) + - server->vals->header_preamble_size; + HEADER_PREAMBLE_SIZE(server); if (data_offset < server->total_read) { /* * win2k8 sometimes sends an offset of 0 when the read -- cgit v1.2.3 From b6b3624d016b980f917b46e6b964f943ac5ac56e Mon Sep 17 00:00:00 2001 From: Zhang Xiaoxu Date: Tue, 23 Aug 2022 20:52:01 +0800 Subject: cifs: Use help macro to get the mid header size It's better to use MID_HEADER_SIZE because the unfolded expression too long. No actual functional changes, minor readability improvement. Signed-off-by: Zhang Xiaoxu Reviewed-by: Paulo Alcantara (SUSE) Signed-off-by: Steve French --- fs/cifs/cifsglob.h | 1 + fs/cifs/connect.c | 9 +++------ 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index b15b84d03fb6..6c8293314530 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -558,6 +558,7 @@ struct smb_version_values { #define HEADER_SIZE(server) (server->vals->header_size) #define MAX_HEADER_SIZE(server) (server->vals->max_header_size) #define HEADER_PREAMBLE_SIZE(server) (server->vals->header_preamble_size) +#define MID_HEADER_SIZE(server) (HEADER_SIZE(server) - 1 - HEADER_PREAMBLE_SIZE(server)) /** * CIFS superblock mount flags (mnt_cifs_flags) to consider when diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index ccf7f32e0c3e..0123f41c26f5 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -1065,8 +1065,7 @@ standard_receive3(struct TCP_Server_Info *server, struct mid_q_entry *mid) /* now read the rest */ length = cifs_read_from_socket(server, buf + HEADER_SIZE(server) - 1, - pdu_length - HEADER_SIZE(server) + 1 + - HEADER_PREAMBLE_SIZE(server)); + pdu_length - MID_HEADER_SIZE(server)); if (length < 0) return length; @@ -1198,8 +1197,7 @@ next_pdu: server->pdu_size = pdu_length; /* make sure we have enough to get to the MID */ - if (server->pdu_size < HEADER_SIZE(server) - 1 - - HEADER_PREAMBLE_SIZE(server)) { + if (server->pdu_size < MID_HEADER_SIZE(server)) { cifs_server_dbg(VFS, "SMB response too short (%u bytes)\n", server->pdu_size); cifs_reconnect(server, true); @@ -1209,8 +1207,7 @@ next_pdu: /* read down to the MID */ length = cifs_read_from_socket(server, buf + HEADER_PREAMBLE_SIZE(server), - HEADER_SIZE(server) - 1 - - HEADER_PREAMBLE_SIZE(server)); + MID_HEADER_SIZE(server)); if (length < 0) continue; server->total_read += length; -- cgit v1.2.3 From d291e703f420d5f8f999fe54f360d54d213bddb4 Mon Sep 17 00:00:00 2001 From: Zhang Xiaoxu Date: Tue, 23 Aug 2022 20:52:02 +0800 Subject: cifs: Add helper function to check smb1+ server SMB1 server's header_preamble_size is not 0, add use is_smb1 function to simplify the code, no actual functional changes. Reviewed-by: Paulo Alcantara (SUSE) Signed-off-by: Zhang Xiaoxu Signed-off-by: Steve French --- fs/cifs/cifsencrypt.c | 3 +-- fs/cifs/cifsglob.h | 5 +++++ fs/cifs/connect.c | 10 +++++----- fs/cifs/transport.c | 4 ++-- 4 files changed, 13 insertions(+), 9 deletions(-) diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c index 61a9fed56548..46f5718754f9 100644 --- a/fs/cifs/cifsencrypt.c +++ b/fs/cifs/cifsencrypt.c @@ -32,10 +32,9 @@ int __cifs_calc_signature(struct smb_rqst *rqst, int rc; struct kvec *iov = rqst->rq_iov; int n_vec = rqst->rq_nvec; - bool is_smb2 = HEADER_PREAMBLE_SIZE(server) == 0; /* iov[0] is actual data and not the rfc1002 length for SMB2+ */ - if (is_smb2) { + if (!is_smb1(server)) { if (iov[0].iov_len <= 4) return -EIO; i = 0; diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index 6c8293314530..ae7f571a7dba 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -752,6 +752,11 @@ struct TCP_Server_Info { #endif }; +static inline bool is_smb1(struct TCP_Server_Info *server) +{ + return HEADER_PREAMBLE_SIZE(server) != 0; +} + static inline void cifs_server_lock(struct TCP_Server_Info *server) { unsigned int nofs_flag = memalloc_nofs_save(); diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 0123f41c26f5..a0a06b6f252b 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -871,7 +871,7 @@ smb2_get_credits_from_hdr(char *buffer, struct TCP_Server_Info *server) /* * SMB1 does not use credits. */ - if (HEADER_PREAMBLE_SIZE(server)) + if (is_smb1(server)) return 0; return le16_to_cpu(shdr->CreditRequest); @@ -1121,7 +1121,7 @@ smb2_add_credits_from_hdr(char *buffer, struct TCP_Server_Info *server) /* * SMB1 does not use credits. */ - if (HEADER_PREAMBLE_SIZE(server)) + if (is_smb1(server)) return; if (shdr->CreditRequest) { @@ -1179,10 +1179,10 @@ cifs_demultiplex_thread(void *p) if (length < 0) continue; - if (HEADER_PREAMBLE_SIZE(server) == 0) - server->total_read = 0; - else + if (is_smb1(server)) server->total_read = length; + else + server->total_read = 0; /* * The right amount was read from socket - 4 bytes, diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c index bb1052dbac5b..c2fe035e573b 100644 --- a/fs/cifs/transport.c +++ b/fs/cifs/transport.c @@ -261,7 +261,7 @@ smb_rqst_len(struct TCP_Server_Info *server, struct smb_rqst *rqst) int nvec; unsigned long buflen = 0; - if (HEADER_PREAMBLE_SIZE(server) == 0 && rqst->rq_nvec >= 2 && + if (!is_smb1(server) && rqst->rq_nvec >= 2 && rqst->rq_iov[0].iov_len == 4) { iov = &rqst->rq_iov[1]; nvec = rqst->rq_nvec - 1; @@ -346,7 +346,7 @@ __smb_send_rqst(struct TCP_Server_Info *server, int num_rqst, sigprocmask(SIG_BLOCK, &mask, &oldmask); /* Generate a rfc1002 marker for SMB2+ */ - if (HEADER_PREAMBLE_SIZE(server) == 0) { + if (!is_smb1(server)) { struct kvec hiov = { .iov_base = &rfc1002_marker, .iov_len = 4 -- cgit v1.2.3 From 60deb9f10eec5c6a20252ed36238b55d8b614a2c Mon Sep 17 00:00:00 2001 From: Siddh Raman Pant Date: Sat, 20 Aug 2022 01:33:40 +0530 Subject: wifi: mac80211: Fix UAF in ieee80211_scan_rx() ieee80211_scan_rx() tries to access scan_req->flags after a null check, but a UAF is observed when the scan is completed and __ieee80211_scan_completed() executes, which then calls cfg80211_scan_done() leading to the freeing of scan_req. Since scan_req is rcu_dereference()'d, prevent the racing in __ieee80211_scan_completed() by ensuring that from mac80211's POV it is no longer accessed from an RCU read critical section before we call cfg80211_scan_done(). Cc: stable@vger.kernel.org Link: https://syzkaller.appspot.com/bug?extid=f9acff9bf08a845f225d Reported-by: syzbot+f9acff9bf08a845f225d@syzkaller.appspotmail.com Suggested-by: Johannes Berg Signed-off-by: Siddh Raman Pant Link: https://lore.kernel.org/r/20220819200340.34826-1-code@siddh.me Signed-off-by: Johannes Berg --- net/mac80211/scan.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c index fa8ddf576bc1..c4f2aeb31da3 100644 --- a/net/mac80211/scan.c +++ b/net/mac80211/scan.c @@ -469,16 +469,19 @@ static void __ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted) scan_req = rcu_dereference_protected(local->scan_req, lockdep_is_held(&local->mtx)); - if (scan_req != local->int_scan_req) { - local->scan_info.aborted = aborted; - cfg80211_scan_done(scan_req, &local->scan_info); - } RCU_INIT_POINTER(local->scan_req, NULL); RCU_INIT_POINTER(local->scan_sdata, NULL); local->scanning = 0; local->scan_chandef.chan = NULL; + synchronize_rcu(); + + if (scan_req != local->int_scan_req) { + local->scan_info.aborted = aborted; + cfg80211_scan_done(scan_req, &local->scan_info); + } + /* Set power back to normal operating levels. */ ieee80211_hw_config(local, 0); -- cgit v1.2.3 From 36fe8e4e5cb02131719612aea1e64379670d1846 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Tue, 23 Aug 2022 15:22:23 +0200 Subject: wifi: mac80211: always free sta in __sta_info_alloc in case of error Free sta pointer in __sta_info_alloc routine if sta_info_alloc_link() fails. Fixes: 246b39e4a1ba5 ("wifi: mac80211: refactor some sta_info link handling") Signed-off-by: Lorenzo Bianconi Link: https://lore.kernel.org/r/a3d079208684cddbc25289f7f7e0fed795b0cad4.1661260857.git.lorenzo@kernel.org Signed-off-by: Johannes Berg --- net/mac80211/sta_info.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index cb23da9aff1e..330dab41f2fe 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -494,7 +494,7 @@ __sta_info_alloc(struct ieee80211_sub_if_data *sdata, sta->sdata = sdata; if (sta_info_alloc_link(local, &sta->deflink, gfp)) - return NULL; + goto free; if (link_id >= 0) { sta_info_add_link(sta, link_id, &sta->deflink, -- cgit v1.2.3 From 62b03f45c6352410d13bf0710d24ef6290632eb1 Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Thu, 18 Aug 2022 12:33:49 +0800 Subject: wifi: mac80211: fix possible leak in ieee80211_tx_control_port() Add missing dev_kfree_skb() in an error path in ieee80211_tx_control_port() to avoid a memory leak. Fixes: dd820ed6336a ("wifi: mac80211: return error from control port TX for drops") Signed-off-by: Yang Yingliang Link: https://lore.kernel.org/r/20220818043349.4168835-1-yangyingliang@huawei.com Signed-off-by: Johannes Berg --- net/mac80211/tx.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 45df9932d0ba..594bd70ee641 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -5885,6 +5885,7 @@ int ieee80211_tx_control_port(struct wiphy *wiphy, struct net_device *dev, rcu_read_lock(); err = ieee80211_lookup_ra_sta(sdata, skb, &sta); if (err) { + dev_kfree_skb(skb); rcu_read_unlock(); return err; } -- cgit v1.2.3 From 15bc8966b6d3a5b9bfe4c9facfa02f2b69b1e5f0 Mon Sep 17 00:00:00 2001 From: Siddh Raman Pant Date: Sun, 14 Aug 2022 20:45:12 +0530 Subject: wifi: mac80211: Don't finalize CSA in IBSS mode if state is disconnected When we are not connected to a channel, sending channel "switch" announcement doesn't make any sense. The BSS list is empty in that case. This causes the for loop in cfg80211_get_bss() to be bypassed, so the function returns NULL (check line 1424 of net/wireless/scan.c), causing the WARN_ON() in ieee80211_ibss_csa_beacon() to get triggered (check line 500 of net/mac80211/ibss.c), which was consequently reported on the syzkaller dashboard. Thus, check if we have an existing connection before generating the CSA beacon in ieee80211_ibss_finish_csa(). Cc: stable@vger.kernel.org Fixes: cd7760e62c2a ("mac80211: add support for CSA in IBSS mode") Link: https://syzkaller.appspot.com/bug?id=05603ef4ae8926761b678d2939a3b2ad28ab9ca6 Reported-by: syzbot+b6c9fe29aefe68e4ad34@syzkaller.appspotmail.com Signed-off-by: Siddh Raman Pant Tested-by: syzbot+b6c9fe29aefe68e4ad34@syzkaller.appspotmail.com Link: https://lore.kernel.org/r/20220814151512.9985-1-code@siddh.me Signed-off-by: Johannes Berg --- net/mac80211/ibss.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c index d56890e3fabb..9b283bbc7bb4 100644 --- a/net/mac80211/ibss.c +++ b/net/mac80211/ibss.c @@ -530,6 +530,10 @@ int ieee80211_ibss_finish_csa(struct ieee80211_sub_if_data *sdata) sdata_assert_lock(sdata); + /* When not connected/joined, sending CSA doesn't make sense. */ + if (ifibss->state != IEEE80211_IBSS_MLME_JOINED) + return -ENOLINK; + /* update cfg80211 bss information with the new channel */ if (!is_zero_ether_addr(ifibss->bssid)) { cbss = cfg80211_get_bss(sdata->local->hw.wiphy, -- cgit v1.2.3 From d776763f48084926b5d9e25507a3ddb7c9243d5e Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 4 Aug 2022 10:03:21 +0300 Subject: wifi: cfg80211: debugfs: fix return type in ht40allow_map_read() The return type is supposed to be ssize_t, which is signed long, but "r" was declared as unsigned int. This means that on 64 bit systems we return positive values instead of negative error codes. Fixes: 80a3511d70e8 ("cfg80211: add debugfs HT40 allow map") Signed-off-by: Dan Carpenter Link: https://lore.kernel.org/r/YutvOQeJm0UjLhwU@kili Signed-off-by: Johannes Berg --- net/wireless/debugfs.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/wireless/debugfs.c b/net/wireless/debugfs.c index aab43469a2f0..0878b162890a 100644 --- a/net/wireless/debugfs.c +++ b/net/wireless/debugfs.c @@ -65,9 +65,10 @@ static ssize_t ht40allow_map_read(struct file *file, { struct wiphy *wiphy = file->private_data; char *buf; - unsigned int offset = 0, buf_size = PAGE_SIZE, i, r; + unsigned int offset = 0, buf_size = PAGE_SIZE, i; enum nl80211_band band; struct ieee80211_supported_band *sband; + ssize_t r; buf = kzalloc(buf_size, GFP_KERNEL); if (!buf) -- cgit v1.2.3 From 55f0a4894484e8d6ddf662f5aebbf3b4cb028541 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 28 Jul 2022 17:25:16 +0300 Subject: wifi: mac80211: potential NULL dereference in ieee80211_tx_control_port() The ieee80211_lookup_ra_sta() function will sometimes set "sta" to NULL so add this NULL check to prevent an Oops. Fixes: 9dd1953846c7 ("wifi: nl80211/mac80211: clarify link ID in control port TX") Signed-off-by: Dan Carpenter Link: https://lore.kernel.org/r/YuKcTAyO94YOy0Bu@kili Signed-off-by: Johannes Berg --- net/mac80211/tx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 594bd70ee641..bf7fe6cd9dfc 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -5900,7 +5900,7 @@ int ieee80211_tx_control_port(struct wiphy *wiphy, struct net_device *dev, * for MLO STA, the SA should be the AP MLD address, but * the link ID has been selected already */ - if (sta->sta.mlo) + if (sta && sta->sta.mlo) memcpy(ehdr->h_source, sdata->vif.addr, ETH_ALEN); } rcu_read_unlock(); -- cgit v1.2.3 From cd11d1a6114bd4bc6450ae59f6e110ec47362126 Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Fri, 8 Jul 2022 08:40:09 +0100 Subject: HID: steam: Prevent NULL pointer dereference in steam_{recv,send}_report It is possible for a malicious device to forgo submitting a Feature Report. The HID Steam driver presently makes no prevision for this and de-references the 'struct hid_report' pointer obtained from the HID devices without first checking its validity. Let's change that. Cc: Jiri Kosina Cc: Benjamin Tissoires Cc: linux-input@vger.kernel.org Fixes: c164d6abf3841 ("HID: add driver for Valve Steam Controller") Signed-off-by: Lee Jones Signed-off-by: Jiri Kosina --- drivers/hid/hid-steam.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/hid/hid-steam.c b/drivers/hid/hid-steam.c index a3b151b29bd7..fc616db4231b 100644 --- a/drivers/hid/hid-steam.c +++ b/drivers/hid/hid-steam.c @@ -134,6 +134,11 @@ static int steam_recv_report(struct steam_device *steam, int ret; r = steam->hdev->report_enum[HID_FEATURE_REPORT].report_id_hash[0]; + if (!r) { + hid_err(steam->hdev, "No HID_FEATURE_REPORT submitted - nothing to read\n"); + return -EINVAL; + } + if (hid_report_len(r) < 64) return -EINVAL; @@ -165,6 +170,11 @@ static int steam_send_report(struct steam_device *steam, int ret; r = steam->hdev->report_enum[HID_FEATURE_REPORT].report_id_hash[0]; + if (!r) { + hid_err(steam->hdev, "No HID_FEATURE_REPORT submitted - nothing to read\n"); + return -EINVAL; + } + if (hid_report_len(r) < 64) return -EINVAL; -- cgit v1.2.3 From a5623a203cffe2d2b84d2f6c989d9017db1856af Mon Sep 17 00:00:00 2001 From: Karthik Alapati Date: Thu, 28 Jul 2022 21:13:17 +0530 Subject: HID: hidraw: fix memory leak in hidraw_release() Free the buffered reports before deleting the list entry. BUG: memory leak unreferenced object 0xffff88810e72f180 (size 32): comm "softirq", pid 0, jiffies 4294945143 (age 16.080s) hex dump (first 32 bytes): 64 f3 c6 6a d1 88 07 04 00 00 00 00 00 00 00 00 d..j............ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ backtrace: [] kmemdup+0x23/0x50 mm/util.c:128 [] kmemdup include/linux/fortify-string.h:440 [inline] [] hidraw_report_event+0xa2/0x150 drivers/hid/hidraw.c:521 [] hid_report_raw_event+0x27d/0x740 drivers/hid/hid-core.c:1992 [] hid_input_report+0x1ae/0x270 drivers/hid/hid-core.c:2065 [] hid_irq_in+0x1ff/0x250 drivers/hid/usbhid/hid-core.c:284 [] __usb_hcd_giveback_urb+0xf9/0x230 drivers/usb/core/hcd.c:1670 [] usb_hcd_giveback_urb+0x1b6/0x1d0 drivers/usb/core/hcd.c:1747 [] dummy_timer+0x8e4/0x14c0 drivers/usb/gadget/udc/dummy_hcd.c:1988 [] call_timer_fn+0x38/0x200 kernel/time/timer.c:1474 [] expire_timers kernel/time/timer.c:1519 [inline] [] __run_timers.part.0+0x316/0x430 kernel/time/timer.c:1790 [] __run_timers kernel/time/timer.c:1768 [inline] [] run_timer_softirq+0x44/0x90 kernel/time/timer.c:1803 [] __do_softirq+0xe6/0x2ea kernel/softirq.c:571 [] invoke_softirq kernel/softirq.c:445 [inline] [] __irq_exit_rcu kernel/softirq.c:650 [inline] [] irq_exit_rcu+0xc0/0x110 kernel/softirq.c:662 [] sysvec_apic_timer_interrupt+0xa2/0xd0 arch/x86/kernel/apic/apic.c:1106 [] asm_sysvec_apic_timer_interrupt+0x1b/0x20 arch/x86/include/asm/idtentry.h:649 [] native_safe_halt arch/x86/include/asm/irqflags.h:51 [inline] [] arch_safe_halt arch/x86/include/asm/irqflags.h:89 [inline] [] acpi_safe_halt drivers/acpi/processor_idle.c:111 [inline] [] acpi_idle_do_entry+0xc0/0xd0 drivers/acpi/processor_idle.c:554 Link: https://syzkaller.appspot.com/bug?id=19a04b43c75ed1092021010419b5e560a8172c4f Reported-by: syzbot+f59100a0428e6ded9443@syzkaller.appspotmail.com Signed-off-by: Karthik Alapati Signed-off-by: Jiri Kosina --- drivers/hid/hidraw.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/hid/hidraw.c b/drivers/hid/hidraw.c index 681614a8302a..197b1e7bf029 100644 --- a/drivers/hid/hidraw.c +++ b/drivers/hid/hidraw.c @@ -350,6 +350,8 @@ static int hidraw_release(struct inode * inode, struct file * file) down_write(&minors_rwsem); spin_lock_irqsave(&hidraw_table[minor]->list_lock, flags); + for (int i = list->tail; i < list->head; i++) + kfree(list->buffer[i].value); list_del(&list->node); spin_unlock_irqrestore(&hidraw_table[minor]->list_lock, flags); kfree(list); -- cgit v1.2.3 From 1c0cc9d11c665020cbeb80e660fb8929164407f4 Mon Sep 17 00:00:00 2001 From: Josh Kilmer Date: Thu, 28 Jul 2022 12:51:11 -0500 Subject: HID: asus: ROG NKey: Ignore portion of 0x5a report On an Asus G513QY, of the 5 bytes in a 0x5a report, only the first byte is a meaningful keycode. The other bytes are zeroed out or hold garbage from the last packet sent to the keyboard. This patch fixes up the report descriptor for this event so that the general hid code will only process 1 byte for keycodes, avoiding spurious key events and unmapped Asus vendor usagepage code warnings. Signed-off-by: Josh Kilmer Signed-off-by: Jiri Kosina --- drivers/hid/hid-asus.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/hid/hid-asus.c b/drivers/hid/hid-asus.c index 08c9a9a60ae4..b59c3dafa6a4 100644 --- a/drivers/hid/hid-asus.c +++ b/drivers/hid/hid-asus.c @@ -1212,6 +1212,13 @@ static __u8 *asus_report_fixup(struct hid_device *hdev, __u8 *rdesc, rdesc = new_rdesc; } + if (drvdata->quirks & QUIRK_ROG_NKEY_KEYBOARD && + *rsize == 331 && rdesc[190] == 0x85 && rdesc[191] == 0x5a && + rdesc[204] == 0x95 && rdesc[205] == 0x05) { + hid_info(hdev, "Fixing up Asus N-KEY keyb report descriptor\n"); + rdesc[205] = 0x01; + } + return rdesc; } -- cgit v1.2.3 From 94553f8a218540d676efbf3f7827ed493d1057cf Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Thu, 4 Aug 2022 08:58:14 +0800 Subject: HID: ishtp-hid-clientHID: ishtp-hid-client: Fix comment typo The double `like' is duplicated in the comment, remove one. Signed-off-by: Jason Wang Signed-off-by: Jiri Kosina --- drivers/hid/intel-ish-hid/ishtp-hid.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hid/intel-ish-hid/ishtp-hid.h b/drivers/hid/intel-ish-hid/ishtp-hid.h index 6a5cc11aefd8..35dddc5015b3 100644 --- a/drivers/hid/intel-ish-hid/ishtp-hid.h +++ b/drivers/hid/intel-ish-hid/ishtp-hid.h @@ -105,7 +105,7 @@ struct report_list { * @multi_packet_cnt: Count of fragmented packet count * * This structure is used to store completion flags and per client data like - * like report description, number of HID devices etc. + * report description, number of HID devices etc. */ struct ishtp_cl_data { /* completion flags */ -- cgit v1.2.3 From e1fa076706209cc447d7a2abd0843a18277e5ef7 Mon Sep 17 00:00:00 2001 From: Even Xu Date: Thu, 4 Aug 2022 08:59:19 +0800 Subject: hid: intel-ish-hid: ishtp: Fix ishtp client sending disordered message There is a timing issue captured during ishtp client sending stress tests. It was observed during stress tests that ISH firmware is getting out of ordered messages. This is a rare scenario as the current set of ISH client drivers don't send much data to firmware. But this may not be the case going forward. When message size is bigger than IPC MTU, ishtp splits the message into fragments and uses serialized async method to send message fragments. The call stack: ishtp_cl_send_msg_ipc->ipc_tx_callback(first fregment)-> ishtp_send_msg(with callback)->write_ipc_to_queue-> write_ipc_from_queue->callback->ipc_tx_callback(next fregment)...... When an ipc write complete interrupt is received, driver also calls write_ipc_from_queue->ipc_tx_callback in ISR to start sending of next fragment. Through ipc_tx_callback uses spin_lock to protect message splitting, as the serialized sending method will call back to ipc_tx_callback again, so it doesn't put sending under spin_lock, it causes driver cannot guarantee all fragments be sent in order. Considering this scenario: ipc_tx_callback just finished a fragment splitting, and not call ishtp_send_msg yet, there is a write complete interrupt happens, then ISR->write_ipc_from_queue ->ipc_tx_callback->ishtp_send_msg->write_ipc_to_queue...... Because ISR has higher exec priority than normal thread, this causes the new fragment be sent out before previous fragment. This disordered message causes invalid message to firmware. The solution is, to send fragments synchronously: Use ishtp_write_message writing fragments into tx queue directly one by one, instead of ishtp_send_msg only writing one fragment with completion callback. As no completion callback be used, so change ipc_tx_callback to ipc_tx_send. Signed-off-by: Even Xu Acked-by: Srinivas Pandruvada Signed-off-by: Jiri Kosina --- drivers/hid/intel-ish-hid/ishtp/client.c | 68 ++++++++++++++++++-------------- 1 file changed, 39 insertions(+), 29 deletions(-) diff --git a/drivers/hid/intel-ish-hid/ishtp/client.c b/drivers/hid/intel-ish-hid/ishtp/client.c index 405e0d5212cc..df0a825694f5 100644 --- a/drivers/hid/intel-ish-hid/ishtp/client.c +++ b/drivers/hid/intel-ish-hid/ishtp/client.c @@ -626,13 +626,14 @@ static void ishtp_cl_read_complete(struct ishtp_cl_rb *rb) } /** - * ipc_tx_callback() - IPC tx callback function + * ipc_tx_send() - IPC tx send function * @prm: Pointer to client device instance * - * Send message over IPC either first time or on callback on previous message - * completion + * Send message over IPC. Message will be split into fragments + * if message size is bigger than IPC FIFO size, and all + * fragments will be sent one by one. */ -static void ipc_tx_callback(void *prm) +static void ipc_tx_send(void *prm) { struct ishtp_cl *cl = prm; struct ishtp_cl_tx_ring *cl_msg; @@ -677,32 +678,41 @@ static void ipc_tx_callback(void *prm) list); rem = cl_msg->send_buf.size - cl->tx_offs; - ishtp_hdr.host_addr = cl->host_client_id; - ishtp_hdr.fw_addr = cl->fw_client_id; - ishtp_hdr.reserved = 0; - pmsg = cl_msg->send_buf.data + cl->tx_offs; + while (rem > 0) { + ishtp_hdr.host_addr = cl->host_client_id; + ishtp_hdr.fw_addr = cl->fw_client_id; + ishtp_hdr.reserved = 0; + pmsg = cl_msg->send_buf.data + cl->tx_offs; + + if (rem <= dev->mtu) { + /* Last fragment or only one packet */ + ishtp_hdr.length = rem; + ishtp_hdr.msg_complete = 1; + /* Submit to IPC queue with no callback */ + ishtp_write_message(dev, &ishtp_hdr, pmsg); + cl->tx_offs = 0; + cl->sending = 0; - if (rem <= dev->mtu) { - ishtp_hdr.length = rem; - ishtp_hdr.msg_complete = 1; - cl->sending = 0; - list_del_init(&cl_msg->list); /* Must be before write */ - spin_unlock_irqrestore(&cl->tx_list_spinlock, tx_flags); - /* Submit to IPC queue with no callback */ - ishtp_write_message(dev, &ishtp_hdr, pmsg); - spin_lock_irqsave(&cl->tx_free_list_spinlock, tx_free_flags); - list_add_tail(&cl_msg->list, &cl->tx_free_list.list); - ++cl->tx_ring_free_size; - spin_unlock_irqrestore(&cl->tx_free_list_spinlock, - tx_free_flags); - } else { - /* Send IPC fragment */ - spin_unlock_irqrestore(&cl->tx_list_spinlock, tx_flags); - cl->tx_offs += dev->mtu; - ishtp_hdr.length = dev->mtu; - ishtp_hdr.msg_complete = 0; - ishtp_send_msg(dev, &ishtp_hdr, pmsg, ipc_tx_callback, cl); + break; + } else { + /* Send ipc fragment */ + ishtp_hdr.length = dev->mtu; + ishtp_hdr.msg_complete = 0; + /* All fregments submitted to IPC queue with no callback */ + ishtp_write_message(dev, &ishtp_hdr, pmsg); + cl->tx_offs += dev->mtu; + rem = cl_msg->send_buf.size - cl->tx_offs; + } } + + list_del_init(&cl_msg->list); + spin_unlock_irqrestore(&cl->tx_list_spinlock, tx_flags); + + spin_lock_irqsave(&cl->tx_free_list_spinlock, tx_free_flags); + list_add_tail(&cl_msg->list, &cl->tx_free_list.list); + ++cl->tx_ring_free_size; + spin_unlock_irqrestore(&cl->tx_free_list_spinlock, + tx_free_flags); } /** @@ -720,7 +730,7 @@ static void ishtp_cl_send_msg_ipc(struct ishtp_device *dev, return; cl->tx_offs = 0; - ipc_tx_callback(cl); + ipc_tx_send(cl); ++cl->send_msg_cnt_ipc; } -- cgit v1.2.3 From d9a17651f3749e69890db57ca66e677dfee70829 Mon Sep 17 00:00:00 2001 From: Michael Hübner Date: Fri, 5 Aug 2022 10:05:23 +0200 Subject: HID: thrustmaster: Add sparco wheel and fix array length MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add device id for the Sparco R383 Mod wheel. Fix wheel info array length to match actual wheel count present in the array. Signed-off-by: Michael Hübner Signed-off-by: Jiri Kosina --- drivers/hid/hid-thrustmaster.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/hid/hid-thrustmaster.c b/drivers/hid/hid-thrustmaster.c index c3e6d69fdfbd..cf1679b0d4fb 100644 --- a/drivers/hid/hid-thrustmaster.c +++ b/drivers/hid/hid-thrustmaster.c @@ -67,12 +67,13 @@ static const struct tm_wheel_info tm_wheels_infos[] = { {0x0200, 0x0005, "Thrustmaster T300RS (Missing Attachment)"}, {0x0206, 0x0005, "Thrustmaster T300RS"}, {0x0209, 0x0005, "Thrustmaster T300RS (Open Wheel Attachment)"}, + {0x020a, 0x0005, "Thrustmaster T300RS (Sparco R383 Mod)"}, {0x0204, 0x0005, "Thrustmaster T300 Ferrari Alcantara Edition"}, {0x0002, 0x0002, "Thrustmaster T500RS"} //{0x0407, 0x0001, "Thrustmaster TMX"} }; -static const uint8_t tm_wheels_infos_length = 4; +static const uint8_t tm_wheels_infos_length = 7; /* * This structs contains (in little endian) the response data -- cgit v1.2.3 From adada3f4930ac084740ea340bd8e94028eba4f22 Mon Sep 17 00:00:00 2001 From: Akihiko Odaki Date: Tue, 16 Aug 2022 19:21:20 +0900 Subject: HID: AMD_SFH: Add a DMI quirk entry for Chromebooks Google Chromebooks use Chrome OS Embedded Controller Sensor Hub instead of Sensor Hub Fusion and leaves MP2 uninitialized, which disables all functionalities, even including the registers necessary for feature detections. The behavior was observed with Lenovo ThinkPad C13 Yoga. Signed-off-by: Akihiko Odaki Suggested-by: Mario Limonciello Acked-by: Basavaraj Natikar Signed-off-by: Jiri Kosina --- drivers/hid/amd-sfh-hid/amd_sfh_pcie.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/drivers/hid/amd-sfh-hid/amd_sfh_pcie.c b/drivers/hid/amd-sfh-hid/amd_sfh_pcie.c index 4b90c86ee5f8..47774b9ab3de 100644 --- a/drivers/hid/amd-sfh-hid/amd_sfh_pcie.c +++ b/drivers/hid/amd-sfh-hid/amd_sfh_pcie.c @@ -288,11 +288,29 @@ int amd_sfh_irq_init(struct amd_mp2_dev *privdata) return 0; } +static const struct dmi_system_id dmi_nodevs[] = { + { + /* + * Google Chromebooks use Chrome OS Embedded Controller Sensor + * Hub instead of Sensor Hub Fusion and leaves MP2 + * uninitialized, which disables all functionalities, even + * including the registers necessary for feature detections. + */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Google"), + }, + }, + { } +}; + static int amd_mp2_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) { struct amd_mp2_dev *privdata; int rc; + if (dmi_first_match(dmi_nodevs)) + return -ENODEV; + privdata = devm_kzalloc(&pdev->dev, sizeof(*privdata), GFP_KERNEL); if (!privdata) return -ENOMEM; -- cgit v1.2.3 From 3a47fa7b14c7d9613909a844aba27f99d3c58634 Mon Sep 17 00:00:00 2001 From: Steev Klimaszewski Date: Thu, 18 Aug 2022 21:39:24 -0500 Subject: HID: add Lenovo Yoga C630 battery quirk Similar to the Surface Go devices, the Elantech touchscreen/digitizer in the Lenovo Yoga C630 mistakenly reports the battery of the stylus, and always reports an empty battery. Apply the HID_BATTERY_QUIRK_IGNORE quirk to ignore this battery and prevent the erroneous low battery warnings. Signed-off-by: Steev Klimaszewski Signed-off-by: Jiri Kosina --- drivers/hid/hid-ids.h | 1 + drivers/hid/hid-input.c | 2 ++ 2 files changed, 3 insertions(+) diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index 0fb720a96399..347d783da82c 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -414,6 +414,7 @@ #define USB_DEVICE_ID_ASUS_UX550_TOUCHSCREEN 0x2706 #define I2C_DEVICE_ID_SURFACE_GO_TOUCHSCREEN 0x261A #define I2C_DEVICE_ID_SURFACE_GO2_TOUCHSCREEN 0x2A1C +#define I2C_DEVICE_ID_LENOVO_YOGA_C630_TOUCHSCREEN 0x279F #define USB_VENDOR_ID_ELECOM 0x056e #define USB_DEVICE_ID_ELECOM_BM084 0x0061 diff --git a/drivers/hid/hid-input.c b/drivers/hid/hid-input.c index 48c1c02c69f4..17a453bb09a2 100644 --- a/drivers/hid/hid-input.c +++ b/drivers/hid/hid-input.c @@ -383,6 +383,8 @@ static const struct hid_device_id hid_battery_quirks[] = { HID_BATTERY_QUIRK_IGNORE }, { HID_I2C_DEVICE(USB_VENDOR_ID_ELAN, I2C_DEVICE_ID_SURFACE_GO2_TOUCHSCREEN), HID_BATTERY_QUIRK_IGNORE }, + { HID_I2C_DEVICE(USB_VENDOR_ID_ELAN, I2C_DEVICE_ID_LENOVO_YOGA_C630_TOUCHSCREEN), + HID_BATTERY_QUIRK_IGNORE }, {} }; -- cgit v1.2.3 From 750ec977288d96e9a11424e3507ede097af732c4 Mon Sep 17 00:00:00 2001 From: Aditya Garg Date: Sun, 21 Aug 2022 08:04:45 +0000 Subject: HID: Add Apple Touchbar on T2 Macs in hid_have_special_driver list The touchbar on Apple T2 Macs has 2 modes, one that shows the function keys and other that shows the media controls. The user can use the fn key on his keyboard to switch between the 2 modes. On Linux, if people were using an external keyboard or mouse, the touchbar failed to change modes on pressing the fn key with the following in dmesg :- [ 10.661445] apple-ib-als 0003:05AC:8262.0001: : USB HID v1.01 Device [Apple Inc. Ambient Light Sensor] on usb-bce-vhci-3/input0 [ 11.830992] apple-ib-touchbar 0003:05AC:8302.0007: input: USB HID v1.01 Keyboard [Apple Inc. Touch Bar Display] on usb-bce-vhci-6/input0 [ 12.139407] apple-ib-touchbar 0003:05AC:8102.0008: : USB HID v1.01 Device [Apple Inc. Touch Bar Backlight] on usb-bce-vhci-7/input0 [ 12.211824] apple-ib-touchbar 0003:05AC:8102.0009: : USB HID v1.01 Device [Apple Inc. Touch Bar Backlight] on usb-bce-vhci-7/input1 [ 14.219759] apple-ib-touchbar 0003:05AC:8302.0007: tb: Failed to set touch bar mode to 2 (-110) [ 24.395670] apple-ib-touchbar 0003:05AC:8302.0007: tb: Failed to set touch bar mode to 2 (-110) [ 34.635791] apple-ib-touchbar 0003:05AC:8302.0007: tb: Failed to set touch bar mode to 2 (-110) [ 269.579233] apple-ib-touchbar 0003:05AC:8302.0007: tb: Failed to set touch bar mode to 1 (-110) Add the USB IDs of the touchbar found in T2 Macs to HID have special driver list to fix the issue. Signed-off-by: Aditya Garg Signed-off-by: Jiri Kosina --- drivers/hid/hid-ids.h | 2 ++ drivers/hid/hid-quirks.c | 2 ++ 2 files changed, 4 insertions(+) diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index 347d783da82c..f80d6193fca6 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -185,6 +185,8 @@ #define USB_DEVICE_ID_APPLE_MAGIC_KEYBOARD_2021 0x029c #define USB_DEVICE_ID_APPLE_MAGIC_KEYBOARD_FINGERPRINT_2021 0x029a #define USB_DEVICE_ID_APPLE_MAGIC_KEYBOARD_NUMPAD_2021 0x029f +#define USB_DEVICE_ID_APPLE_TOUCHBAR_BACKLIGHT 0x8102 +#define USB_DEVICE_ID_APPLE_TOUCHBAR_DISPLAY 0x8302 #define USB_VENDOR_ID_ASUS 0x0486 #define USB_DEVICE_ID_ASUS_T91MT 0x0185 diff --git a/drivers/hid/hid-quirks.c b/drivers/hid/hid-quirks.c index dc67717d2dab..70f602c64fd1 100644 --- a/drivers/hid/hid-quirks.c +++ b/drivers/hid/hid-quirks.c @@ -314,6 +314,8 @@ static const struct hid_device_id hid_have_special_driver[] = { { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_GEYSER1_TP_ONLY) }, { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_MAGIC_KEYBOARD_2021) }, { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_MAGIC_KEYBOARD_FINGERPRINT_2021) }, + { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_TOUCHBAR_BACKLIGHT) }, + { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_TOUCHBAR_DISPLAY) }, #endif #if IS_ENABLED(CONFIG_HID_APPLEIR) { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_IRCONTROL) }, -- cgit v1.2.3 From 8db8be9cfc89935c97d791c7e6264e710a7e8a56 Mon Sep 17 00:00:00 2001 From: Benjamin Tissoires Date: Mon, 22 Aug 2022 08:22:47 +0200 Subject: HID: input: fix uclogic tablets commit 87562fcd1342 ("HID: input: remove the need for HID_QUIRK_INVERT") made the assumption that it was the only one handling tablets and thus kept an internal state regarding the tool. Turns out that the uclogic driver has a timer to release the in range bit, effectively making hid-input ignoring all in range information after the very first one. Fix that by having a more rationale approach which consists in forwarding every event and let the input stack filter out the duplicates. Reported-by: Stefan Hansson Fixes: 87562fcd1342 ("HID: input: remove the need for HID_QUIRK_INVERT") Signed-off-by: Benjamin Tissoires Signed-off-by: Jiri Kosina --- drivers/hid/hid-input.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/hid/hid-input.c b/drivers/hid/hid-input.c index 17a453bb09a2..859aeb07542e 100644 --- a/drivers/hid/hid-input.c +++ b/drivers/hid/hid-input.c @@ -1534,7 +1534,10 @@ void hidinput_hid_event(struct hid_device *hid, struct hid_field *field, struct * assume ours */ if (!report->tool) - hid_report_set_tool(report, input, usage->code); + report->tool = usage->code; + + /* drivers may have changed the value behind our back, resend it */ + hid_report_set_tool(report, input, report->tool); } else { hid_report_release_tool(report, input, usage->code); } -- cgit v1.2.3 From 06865077b34c792181e3c4f82417a2888c6e7453 Mon Sep 17 00:00:00 2001 From: Stefan Binding Date: Fri, 12 Aug 2022 14:06:45 +0100 Subject: ACPI: property: Fix type detection of unified integer reading functions The current code expects the type of the value to be an integer type, instead the value passed to the macro is a pointer. Ensure the size comparison uses the correct pointer type to choose the max value, instead of using the integer type. Fixes: 923044133367 ("ACPI: property: Unify integer value reading functions") Signed-off-by: Stefan Binding Reviewed-by: Sakari Ailus Tested-by: Sakari Ailus Tested-by: John Garry Acked-by: Ard Biesheuvel Signed-off-by: Rafael J. Wysocki --- drivers/acpi/property.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/acpi/property.c b/drivers/acpi/property.c index 7b3ad8ed2f4e..b1d4a8db89df 100644 --- a/drivers/acpi/property.c +++ b/drivers/acpi/property.c @@ -1043,10 +1043,10 @@ static int acpi_data_prop_read_single(const struct acpi_device_data *data, break; \ } \ if (__items[i].integer.value > _Generic(__val, \ - u8: U8_MAX, \ - u16: U16_MAX, \ - u32: U32_MAX, \ - u64: U64_MAX, \ + u8 *: U8_MAX, \ + u16 *: U16_MAX, \ + u32 *: U32_MAX, \ + u64 *: U64_MAX, \ default: 0U)) { \ ret = -EOVERFLOW; \ break; \ -- cgit v1.2.3 From 2ea3b19792dbe32287b0c48f3ff7e866f61967c7 Mon Sep 17 00:00:00 2001 From: Sakari Ailus Date: Wed, 24 Aug 2022 14:59:56 +0300 Subject: ACPI: property: Ignore already existing data node tags ACPI node pointers are attached to data node handles, in order to resolve string references to them. _DSD guide allows the same node to be reached from multiple parent nodes, leading the node enumeration algorithm to each such nodes more than once. As attached data already already exists, attaching data with the same tag will fail. Address this problem by ignoring nodes that have been already tagged. Fixes: 1d52f10917a7 ("ACPI: property: Tie data nodes to acpi handles") Reported-by: Pierre-Louis Bossart Signed-off-by: Sakari Ailus Tested-by: Pierre-Louis Bossart Signed-off-by: Rafael J. Wysocki --- drivers/acpi/property.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/acpi/property.c b/drivers/acpi/property.c index b1d4a8db89df..91d0e75859d3 100644 --- a/drivers/acpi/property.c +++ b/drivers/acpi/property.c @@ -370,7 +370,7 @@ static bool acpi_tie_nondev_subnodes(struct acpi_device_data *data) bool ret; status = acpi_attach_data(dn->handle, acpi_nondev_subnode_tag, dn); - if (ACPI_FAILURE(status)) { + if (ACPI_FAILURE(status) && status != AE_ALREADY_EXISTS) { acpi_handle_err(dn->handle, "Can't tag data node\n"); return false; } -- cgit v1.2.3 From bd9594ae4c55351f134620ab890259820abf4c43 Mon Sep 17 00:00:00 2001 From: Sakari Ailus Date: Thu, 25 Aug 2022 14:17:15 +0300 Subject: ACPI: property: Remove default association from integer maximum values Remove the default association from integer maximum value checks. It is not necessary and has caused a bug in other associations being unnoticed. Fixes: 923044133367 ("ACPI: property: Unify integer value reading functions") Signed-off-by: Sakari Ailus Signed-off-by: Rafael J. Wysocki --- drivers/acpi/property.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/acpi/property.c b/drivers/acpi/property.c index 91d0e75859d3..d4c168ce428c 100644 --- a/drivers/acpi/property.c +++ b/drivers/acpi/property.c @@ -1046,8 +1046,7 @@ static int acpi_data_prop_read_single(const struct acpi_device_data *data, u8 *: U8_MAX, \ u16 *: U16_MAX, \ u32 *: U32_MAX, \ - u64 *: U64_MAX, \ - default: 0U)) { \ + u64 *: U64_MAX)) { \ ret = -EOVERFLOW; \ break; \ } \ -- cgit v1.2.3 From 7c12bb8f59d36507bf70c35dae083dfe9cafc815 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Thu, 25 Aug 2022 19:34:59 +0800 Subject: LoongArch: Select PCI_QUIRKS to avoid build error PCI_LOONGSON is a mandatory for LoongArch and it is selected in Kconfig unconditionally, but its dependency PCI_QUIRKS is missing and may cause a build error when "make randconfig": arch/loongarch/pci/acpi.c: In function 'pci_acpi_setup_ecam_mapping': >> arch/loongarch/pci/acpi.c:103:29: error: 'loongson_pci_ecam_ops' undeclared (first use in this function) 103 | ecam_ops = &loongson_pci_ecam_ops; | ^~~~~~~~~~~~~~~~~~~~~ arch/loongarch/pci/acpi.c:103:29: note: each undeclared identifier is reported only once for each function it appears in Kconfig warnings: (for reference only) WARNING: unmet direct dependencies detected for PCI_LOONGSON Depends on [n]: PCI [=y] && (MACH_LOONGSON64 [=y] || COMPILE_TEST [=y]) && (OF [=y] || ACPI [=y]) && PCI_QUIRKS [=n] Selected by [y]: - LOONGARCH [=y] Fix it by selecting PCI_QUIRKS unconditionally, too. Reported-by: kernel test robot Tested-by: Randy Dunlap Signed-off-by: Huacai Chen --- arch/loongarch/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig index 4abc9a28aba4..26aeb1408e56 100644 --- a/arch/loongarch/Kconfig +++ b/arch/loongarch/Kconfig @@ -111,6 +111,7 @@ config LOONGARCH select PCI_ECAM if ACPI select PCI_LOONGSON select PCI_MSI_ARCH_FALLBACKS + select PCI_QUIRKS select PERF_USE_VMALLOC select RTC_LIB select SMP -- cgit v1.2.3 From 84e762060147582912581fd99cb25f3559ec8c22 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Thu, 25 Aug 2022 19:34:59 +0800 Subject: LoongArch: Fix build warnings in VDSO Fix build warnings in VDSO as below: arch/loongarch/vdso/vgettimeofday.c:9:5: warning: no previous prototype for '__vdso_clock_gettime' [-Wmissing-prototypes] 9 | int __vdso_clock_gettime(clockid_t clock, | ^~~~~~~~~~~~~~~~~~~~ arch/loongarch/vdso/vgettimeofday.c:15:5: warning: no previous prototype for '__vdso_gettimeofday' [-Wmissing-prototypes] 15 | int __vdso_gettimeofday(struct __kernel_old_timeval *tv, | ^~~~~~~~~~~~~~~~~~~ arch/loongarch/vdso/vgettimeofday.c:21:5: warning: no previous prototype for '__vdso_clock_getres' [-Wmissing-prototypes] 21 | int __vdso_clock_getres(clockid_t clock_id, | ^~~~~~~~~~~~~~~~~~~ arch/loongarch/vdso/vgetcpu.c:27:5: warning: no previous prototype for '__vdso_getcpu' [-Wmissing-prototypes] 27 | int __vdso_getcpu(unsigned int *cpu, unsigned int *node, struct getcpu_cache *unused) Reported-by: kernel test robot Signed-off-by: Huacai Chen --- arch/loongarch/vdso/vgetcpu.c | 2 ++ arch/loongarch/vdso/vgettimeofday.c | 15 +++++++++------ 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/arch/loongarch/vdso/vgetcpu.c b/arch/loongarch/vdso/vgetcpu.c index 43a0078e4418..e02e775f5360 100644 --- a/arch/loongarch/vdso/vgetcpu.c +++ b/arch/loongarch/vdso/vgetcpu.c @@ -24,6 +24,8 @@ static __always_inline const struct vdso_pcpu_data *get_pcpu_data(void) return (struct vdso_pcpu_data *)(get_vdso_base() - VDSO_DATA_SIZE); } +extern +int __vdso_getcpu(unsigned int *cpu, unsigned int *node, struct getcpu_cache *unused); int __vdso_getcpu(unsigned int *cpu, unsigned int *node, struct getcpu_cache *unused) { int cpu_id; diff --git a/arch/loongarch/vdso/vgettimeofday.c b/arch/loongarch/vdso/vgettimeofday.c index b1f4548dae92..8f22863bd7ea 100644 --- a/arch/loongarch/vdso/vgettimeofday.c +++ b/arch/loongarch/vdso/vgettimeofday.c @@ -6,20 +6,23 @@ */ #include -int __vdso_clock_gettime(clockid_t clock, - struct __kernel_timespec *ts) +extern +int __vdso_clock_gettime(clockid_t clock, struct __kernel_timespec *ts); +int __vdso_clock_gettime(clockid_t clock, struct __kernel_timespec *ts) { return __cvdso_clock_gettime(clock, ts); } -int __vdso_gettimeofday(struct __kernel_old_timeval *tv, - struct timezone *tz) +extern +int __vdso_gettimeofday(struct __kernel_old_timeval *tv, struct timezone *tz); +int __vdso_gettimeofday(struct __kernel_old_timeval *tv, struct timezone *tz) { return __cvdso_gettimeofday(tv, tz); } -int __vdso_clock_getres(clockid_t clock_id, - struct __kernel_timespec *res) +extern +int __vdso_clock_getres(clockid_t clock_id, struct __kernel_timespec *res); +int __vdso_clock_getres(clockid_t clock_id, struct __kernel_timespec *res) { return __cvdso_clock_getres(clock_id, res); } -- cgit v1.2.3 From da48b67cfb6b4f115ae652dd5995c56fe2a2cf9b Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Thu, 25 Aug 2022 19:34:59 +0800 Subject: LoongArch: Cleanup reset routines with new API Cleanup reset routines by using new do_kernel_power_off() instead of old pm_power_off(), and then simplify the whole file (reset.c) organization by inlining some functions. This cleanup also fix a poweroff error if EFI runtime is disabled. Signed-off-by: Huacai Chen --- arch/loongarch/include/asm/reboot.h | 10 ------ arch/loongarch/kernel/reset.c | 69 +++++++++++-------------------------- 2 files changed, 21 insertions(+), 58 deletions(-) delete mode 100644 arch/loongarch/include/asm/reboot.h diff --git a/arch/loongarch/include/asm/reboot.h b/arch/loongarch/include/asm/reboot.h deleted file mode 100644 index 51151749d8f0..000000000000 --- a/arch/loongarch/include/asm/reboot.h +++ /dev/null @@ -1,10 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Copyright (C) 2020-2022 Loongson Technology Corporation Limited - */ -#ifndef _ASM_REBOOT_H -#define _ASM_REBOOT_H - -extern void (*pm_restart)(void); - -#endif /* _ASM_REBOOT_H */ diff --git a/arch/loongarch/kernel/reset.c b/arch/loongarch/kernel/reset.c index 800c965a17ea..8c82021eb2f4 100644 --- a/arch/loongarch/kernel/reset.c +++ b/arch/loongarch/kernel/reset.c @@ -15,10 +15,16 @@ #include #include #include -#include -static void default_halt(void) +void (*pm_power_off)(void); +EXPORT_SYMBOL(pm_power_off); + +void machine_halt(void) { +#ifdef CONFIG_SMP + preempt_disable(); + smp_send_stop(); +#endif local_irq_disable(); clear_csr_ecfg(ECFG0_IM); @@ -30,18 +36,29 @@ static void default_halt(void) } } -static void default_poweroff(void) +void machine_power_off(void) { +#ifdef CONFIG_SMP + preempt_disable(); + smp_send_stop(); +#endif + do_kernel_power_off(); #ifdef CONFIG_EFI efi.reset_system(EFI_RESET_SHUTDOWN, EFI_SUCCESS, 0, NULL); #endif + while (true) { __arch_cpu_idle(); } } -static void default_restart(void) +void machine_restart(char *command) { +#ifdef CONFIG_SMP + preempt_disable(); + smp_send_stop(); +#endif + do_kernel_restart(command); #ifdef CONFIG_EFI if (efi_capsule_pending(NULL)) efi_reboot(REBOOT_WARM, NULL); @@ -55,47 +72,3 @@ static void default_restart(void) __arch_cpu_idle(); } } - -void (*pm_restart)(void); -EXPORT_SYMBOL(pm_restart); - -void (*pm_power_off)(void); -EXPORT_SYMBOL(pm_power_off); - -void machine_halt(void) -{ -#ifdef CONFIG_SMP - preempt_disable(); - smp_send_stop(); -#endif - default_halt(); -} - -void machine_power_off(void) -{ -#ifdef CONFIG_SMP - preempt_disable(); - smp_send_stop(); -#endif - pm_power_off(); -} - -void machine_restart(char *command) -{ -#ifdef CONFIG_SMP - preempt_disable(); - smp_send_stop(); -#endif - do_kernel_restart(command); - pm_restart(); -} - -static int __init loongarch_reboot_setup(void) -{ - pm_restart = default_restart; - pm_power_off = default_poweroff; - - return 0; -} - -arch_initcall(loongarch_reboot_setup); -- cgit v1.2.3 From 092e9ebe52a664f9f58e2d24136ae791fe71c6db Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Thu, 25 Aug 2022 19:34:59 +0800 Subject: LoongArch: Cleanup headers to avoid circular dependency When enable GENERIC_IOREMAP, there will be circular dependency to cause build errors. The root cause is that pgtable.h shouldn't include io.h but pgtable.h need some macros defined in io.h. So cleanup those macros and remove the unnecessary inclusions, as other architectures do. Signed-off-by: Huacai Chen --- arch/loongarch/include/asm/addrspace.h | 16 ++++++++++++++++ arch/loongarch/include/asm/io.h | 19 ------------------- arch/loongarch/include/asm/page.h | 2 +- arch/loongarch/include/asm/pgtable.h | 7 +++---- arch/loongarch/mm/mmap.c | 11 ++--------- 5 files changed, 22 insertions(+), 33 deletions(-) diff --git a/arch/loongarch/include/asm/addrspace.h b/arch/loongarch/include/asm/addrspace.h index b91e0733b2e5..d342935e5a72 100644 --- a/arch/loongarch/include/asm/addrspace.h +++ b/arch/loongarch/include/asm/addrspace.h @@ -109,4 +109,20 @@ extern unsigned long vm_map_base; */ #define PHYSADDR(a) ((_ACAST64_(a)) & TO_PHYS_MASK) +/* + * On LoongArch, I/O ports mappring is following: + * + * | .... | + * |-----------------------| + * | pci io ports(16K~32M) | + * |-----------------------| + * | isa io ports(0 ~16K) | + * PCI_IOBASE ->|-----------------------| + * | .... | + */ +#define PCI_IOBASE ((void __iomem *)(vm_map_base + (2 * PAGE_SIZE))) +#define PCI_IOSIZE SZ_32M +#define ISA_IOSIZE SZ_16K +#define IO_SPACE_LIMIT (PCI_IOSIZE - 1) + #endif /* _ASM_ADDRSPACE_H */ diff --git a/arch/loongarch/include/asm/io.h b/arch/loongarch/include/asm/io.h index 884599739b36..999944ea1cea 100644 --- a/arch/loongarch/include/asm/io.h +++ b/arch/loongarch/include/asm/io.h @@ -7,34 +7,15 @@ #define ARCH_HAS_IOREMAP_WC -#include #include #include #include -#include -#include #include #include #include #include -/* - * On LoongArch, I/O ports mappring is following: - * - * | .... | - * |-----------------------| - * | pci io ports(64K~32M) | - * |-----------------------| - * | isa io ports(0 ~16K) | - * PCI_IOBASE ->|-----------------------| - * | .... | - */ -#define PCI_IOBASE ((void __iomem *)(vm_map_base + (2 * PAGE_SIZE))) -#define PCI_IOSIZE SZ_32M -#define ISA_IOSIZE SZ_16K -#define IO_SPACE_LIMIT (PCI_IOSIZE - 1) - /* * Change "struct page" to physical address. */ diff --git a/arch/loongarch/include/asm/page.h b/arch/loongarch/include/asm/page.h index a37324ac460b..53f284a96182 100644 --- a/arch/loongarch/include/asm/page.h +++ b/arch/loongarch/include/asm/page.h @@ -95,7 +95,7 @@ static inline int pfn_valid(unsigned long pfn) #endif -#define virt_to_pfn(kaddr) PFN_DOWN(virt_to_phys((void *)(kaddr))) +#define virt_to_pfn(kaddr) PFN_DOWN(PHYSADDR(kaddr)) #define virt_to_page(kaddr) pfn_to_page(virt_to_pfn(kaddr)) extern int __virt_addr_valid(volatile void *kaddr); diff --git a/arch/loongarch/include/asm/pgtable.h b/arch/loongarch/include/asm/pgtable.h index e03443abaf7d..8ea57e2f0e04 100644 --- a/arch/loongarch/include/asm/pgtable.h +++ b/arch/loongarch/include/asm/pgtable.h @@ -59,7 +59,6 @@ #include #include #include -#include struct mm_struct; struct vm_area_struct; @@ -145,7 +144,7 @@ static inline void set_p4d(p4d_t *p4d, p4d_t p4dval) *p4d = p4dval; } -#define p4d_phys(p4d) virt_to_phys((void *)p4d_val(p4d)) +#define p4d_phys(p4d) PHYSADDR(p4d_val(p4d)) #define p4d_page(p4d) (pfn_to_page(p4d_phys(p4d) >> PAGE_SHIFT)) #endif @@ -188,7 +187,7 @@ static inline pmd_t *pud_pgtable(pud_t pud) #define set_pud(pudptr, pudval) do { *(pudptr) = (pudval); } while (0) -#define pud_phys(pud) virt_to_phys((void *)pud_val(pud)) +#define pud_phys(pud) PHYSADDR(pud_val(pud)) #define pud_page(pud) (pfn_to_page(pud_phys(pud) >> PAGE_SHIFT)) #endif @@ -221,7 +220,7 @@ static inline void pmd_clear(pmd_t *pmdp) #define set_pmd(pmdptr, pmdval) do { *(pmdptr) = (pmdval); } while (0) -#define pmd_phys(pmd) virt_to_phys((void *)pmd_val(pmd)) +#define pmd_phys(pmd) PHYSADDR(pmd_val(pmd)) #ifndef CONFIG_TRANSPARENT_HUGEPAGE #define pmd_page(pmd) (pfn_to_page(pmd_phys(pmd) >> PAGE_SHIFT)) diff --git a/arch/loongarch/mm/mmap.c b/arch/loongarch/mm/mmap.c index 52e40f0ba732..381a569635a9 100644 --- a/arch/loongarch/mm/mmap.c +++ b/arch/loongarch/mm/mmap.c @@ -2,16 +2,9 @@ /* * Copyright (C) 2020-2022 Loongson Technology Corporation Limited */ -#include -#include -#include +#include #include #include -#include -#include -#include -#include -#include unsigned long shm_align_mask = PAGE_SIZE - 1; /* Sane caches */ EXPORT_SYMBOL(shm_align_mask); @@ -120,6 +113,6 @@ int __virt_addr_valid(volatile void *kaddr) if ((vaddr < PAGE_OFFSET) || (vaddr >= vm_map_base)) return 0; - return pfn_valid(PFN_DOWN(virt_to_phys(kaddr))); + return pfn_valid(PFN_DOWN(PHYSADDR(kaddr))); } EXPORT_SYMBOL_GPL(__virt_addr_valid); -- cgit v1.2.3 From 720dc7ab252bbdf404cab7b909e26b31e602bf7e Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Thu, 25 Aug 2022 19:34:59 +0800 Subject: LoongArch: Add subword xchg/cmpxchg emulation LoongArch only support 32-bit/64-bit xchg/cmpxchg in native. But percpu operation, qspinlock and some drivers need 8-bit/16-bit xchg/cmpxchg. We add subword xchg/cmpxchg emulation in this patch because the emulation has better performance than the generic implementation (on NUMA system), and it can fix some build errors meanwhile [1]. LoongArch's guarantee for forward progress (avoid many ll/sc happening at the same time and no one succeeds): We have the "exclusive access (with timeout) of ll" feature to avoid simultaneous ll (which also blocks other memory load/store on the same address), and the "random delay of sc" feature to avoid simultaneous sc. It is a mandatory requirement for multi-core LoongArch processors to implement such features, only except those single-core and dual-core processors (they also don't support multi-chip interconnection). Feature bits are introduced in CPUCFG3, bit 3 and bit 4 [2]. [1] https://lore.kernel.org/loongarch/CAAhV-H6vvkuOzy8OemWdYK3taj5Jn3bFX0ZTwE=twM8ywpBUYA@mail.gmail.com/T/#t [2] https://loongson.github.io/LoongArch-Documentation/LoongArch-Vol1-EN.html#_cpucfg Reported-by: Sudip Mukherjee (Codethink) Suggested-by: Linus Torvalds Signed-off-by: Rui Wang Signed-off-by: Huacai Chen --- arch/loongarch/include/asm/cmpxchg.h | 98 +++++++++++++++++++++++++++++++++++- arch/loongarch/include/asm/percpu.h | 8 +++ 2 files changed, 105 insertions(+), 1 deletion(-) diff --git a/arch/loongarch/include/asm/cmpxchg.h b/arch/loongarch/include/asm/cmpxchg.h index 0a9b0fac1eee..ae19e33c7754 100644 --- a/arch/loongarch/include/asm/cmpxchg.h +++ b/arch/loongarch/include/asm/cmpxchg.h @@ -5,8 +5,9 @@ #ifndef __ASM_CMPXCHG_H #define __ASM_CMPXCHG_H -#include +#include #include +#include #define __xchg_asm(amswap_db, m, val) \ ({ \ @@ -21,10 +22,53 @@ __ret; \ }) +static inline unsigned int __xchg_small(volatile void *ptr, unsigned int val, + unsigned int size) +{ + unsigned int shift; + u32 old32, mask, temp; + volatile u32 *ptr32; + + /* Mask value to the correct size. */ + mask = GENMASK((size * BITS_PER_BYTE) - 1, 0); + val &= mask; + + /* + * Calculate a shift & mask that correspond to the value we wish to + * exchange within the naturally aligned 4 byte integerthat includes + * it. + */ + shift = (unsigned long)ptr & 0x3; + shift *= BITS_PER_BYTE; + mask <<= shift; + + /* + * Calculate a pointer to the naturally aligned 4 byte integer that + * includes our byte of interest, and load its value. + */ + ptr32 = (volatile u32 *)((unsigned long)ptr & ~0x3); + + asm volatile ( + "1: ll.w %0, %3 \n" + " andn %1, %0, %z4 \n" + " or %1, %1, %z5 \n" + " sc.w %1, %2 \n" + " beqz %1, 1b \n" + : "=&r" (old32), "=&r" (temp), "=ZC" (*ptr32) + : "ZC" (*ptr32), "Jr" (mask), "Jr" (val << shift) + : "memory"); + + return (old32 & mask) >> shift; +} + static inline unsigned long __xchg(volatile void *ptr, unsigned long x, int size) { switch (size) { + case 1: + case 2: + return __xchg_small(ptr, x, size); + case 4: return __xchg_asm("amswap_db.w", (volatile u32 *)ptr, (u32)x); @@ -67,10 +111,62 @@ static inline unsigned long __xchg(volatile void *ptr, unsigned long x, __ret; \ }) +static inline unsigned int __cmpxchg_small(volatile void *ptr, unsigned int old, + unsigned int new, unsigned int size) +{ + unsigned int shift; + u32 old32, mask, temp; + volatile u32 *ptr32; + + /* Mask inputs to the correct size. */ + mask = GENMASK((size * BITS_PER_BYTE) - 1, 0); + old &= mask; + new &= mask; + + /* + * Calculate a shift & mask that correspond to the value we wish to + * compare & exchange within the naturally aligned 4 byte integer + * that includes it. + */ + shift = (unsigned long)ptr & 0x3; + shift *= BITS_PER_BYTE; + old <<= shift; + new <<= shift; + mask <<= shift; + + /* + * Calculate a pointer to the naturally aligned 4 byte integer that + * includes our byte of interest, and load its value. + */ + ptr32 = (volatile u32 *)((unsigned long)ptr & ~0x3); + + asm volatile ( + "1: ll.w %0, %3 \n" + " and %1, %0, %z4 \n" + " bne %1, %z5, 2f \n" + " andn %1, %0, %z4 \n" + " or %1, %1, %z6 \n" + " sc.w %1, %2 \n" + " beqz %1, 1b \n" + " b 3f \n" + "2: \n" + __WEAK_LLSC_MB + "3: \n" + : "=&r" (old32), "=&r" (temp), "=ZC" (*ptr32) + : "ZC" (*ptr32), "Jr" (mask), "Jr" (old), "Jr" (new) + : "memory"); + + return (old32 & mask) >> shift; +} + static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old, unsigned long new, unsigned int size) { switch (size) { + case 1: + case 2: + return __cmpxchg_small(ptr, old, new, size); + case 4: return __cmpxchg_asm("ll.w", "sc.w", (volatile u32 *)ptr, (u32)old, new); diff --git a/arch/loongarch/include/asm/percpu.h b/arch/loongarch/include/asm/percpu.h index e6569f18c6dd..0bd6b0110198 100644 --- a/arch/loongarch/include/asm/percpu.h +++ b/arch/loongarch/include/asm/percpu.h @@ -123,6 +123,10 @@ static inline unsigned long __percpu_xchg(void *ptr, unsigned long val, int size) { switch (size) { + case 1: + case 2: + return __xchg_small((volatile void *)ptr, val, size); + case 4: return __xchg_asm("amswap.w", (volatile u32 *)ptr, (u32)val); @@ -204,9 +208,13 @@ do { \ #define this_cpu_write_4(pcp, val) _percpu_write(pcp, val) #define this_cpu_write_8(pcp, val) _percpu_write(pcp, val) +#define this_cpu_xchg_1(pcp, val) _percpu_xchg(pcp, val) +#define this_cpu_xchg_2(pcp, val) _percpu_xchg(pcp, val) #define this_cpu_xchg_4(pcp, val) _percpu_xchg(pcp, val) #define this_cpu_xchg_8(pcp, val) _percpu_xchg(pcp, val) +#define this_cpu_cmpxchg_1(ptr, o, n) _protect_cmpxchg_local(ptr, o, n) +#define this_cpu_cmpxchg_2(ptr, o, n) _protect_cmpxchg_local(ptr, o, n) #define this_cpu_cmpxchg_4(ptr, o, n) _protect_cmpxchg_local(ptr, o, n) #define this_cpu_cmpxchg_8(ptr, o, n) _protect_cmpxchg_local(ptr, o, n) -- cgit v1.2.3 From b83699ea1e62951857c2d8648bd93a4744899eb7 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Thu, 25 Aug 2022 19:34:59 +0800 Subject: LoongArch: mm: Avoid unnecessary page fault retires on shared memory types Commit d92725256b4f22d0 ("mm: avoid unnecessary page fault retires on shared memory types") modifies do_page_fault() to handle the VM_FAULT_ COMPLETED case, but forget to change for LoongArch, so fix it as other architectures does. Fixes: d92725256b4f22d0 ("mm: avoid unnecessary page fault retires on shared memory types") Reviewed-by: Guo Ren Signed-off-by: Huacai Chen --- arch/loongarch/mm/fault.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/loongarch/mm/fault.c b/arch/loongarch/mm/fault.c index 605579b19a00..1ccd53655cab 100644 --- a/arch/loongarch/mm/fault.c +++ b/arch/loongarch/mm/fault.c @@ -216,6 +216,10 @@ good_area: return; } + /* The fault is fully completed (including releasing mmap lock) */ + if (fault & VM_FAULT_COMPLETED) + return; + if (unlikely(fault & VM_FAULT_RETRY)) { flags |= FAULT_FLAG_TRIED; -- cgit v1.2.3 From ab0af755d40f2ea4ef01503e7bebf083f65c9062 Mon Sep 17 00:00:00 2001 From: Lukas Bulwahn Date: Wed, 17 Aug 2022 06:43:33 +0200 Subject: xen: x86: remove setting the obsolete config XEN_MAX_DOMAIN_MEMORY Commit c70727a5bc18 ("xen: allow more than 512 GB of RAM for 64 bit pv-domains") from July 2015 replaces the config XEN_MAX_DOMAIN_MEMORY with a new config XEN_512GB, but misses to adjust arch/x86/configs/xen.config. As XEN_512GB defaults to yes, there is no need to explicitly set any config in xen.config. Just remove setting the obsolete config XEN_MAX_DOMAIN_MEMORY. Signed-off-by: Lukas Bulwahn Reviewed-by: Juergen Gross Link: https://lore.kernel.org/r/20220817044333.22310-1-lukas.bulwahn@gmail.com Signed-off-by: Juergen Gross --- arch/x86/configs/xen.config | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/x86/configs/xen.config b/arch/x86/configs/xen.config index d9fc7139fd46..581296255b39 100644 --- a/arch/x86/configs/xen.config +++ b/arch/x86/configs/xen.config @@ -14,7 +14,6 @@ CONFIG_CPU_FREQ=y # x86 xen specific config options CONFIG_XEN_PVH=y -CONFIG_XEN_MAX_DOMAIN_MEMORY=500 CONFIG_XEN_SAVE_RESTORE=y # CONFIG_XEN_DEBUG_FS is not set CONFIG_XEN_MCE_LOG=y -- cgit v1.2.3 From 6bb79f5b4c84a09cd6bed46ef3e46ef4fc21407d Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Thu, 18 Aug 2022 23:01:22 +0200 Subject: xen: move from strlcpy with unused retval to strscpy Follow the advice of the below link and prefer 'strscpy' in this subsystem. Conversion is 1:1 because the return value is not used. Generated by a coccinelle script. Link: https://lore.kernel.org/r/CAHk-=wgfRnXz0W3D37d01q3JFkr_i_uTL=V6A6G1oUZcprmknw@mail.gmail.com/ Signed-off-by: Wolfram Sang Reviewed-by: Oleksandr Tyshchenko Link: https://lore.kernel.org/r/20220818210122.7613-1-wsa+renesas@sang-engineering.com Signed-off-by: Juergen Gross --- drivers/xen/xen-scsiback.c | 2 +- drivers/xen/xenbus/xenbus_probe_frontend.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/xen/xen-scsiback.c b/drivers/xen/xen-scsiback.c index 7a0c93acc2c5..d3dcda344989 100644 --- a/drivers/xen/xen-scsiback.c +++ b/drivers/xen/xen-scsiback.c @@ -1121,7 +1121,7 @@ static void scsiback_do_1lun_hotplug(struct vscsibk_info *info, int op, "%s: writing %s", __func__, state); return; } - strlcpy(phy, val, VSCSI_NAMELEN); + strscpy(phy, val, VSCSI_NAMELEN); kfree(val); /* virtual SCSI device */ diff --git a/drivers/xen/xenbus/xenbus_probe_frontend.c b/drivers/xen/xenbus/xenbus_probe_frontend.c index 07b010a68fcf..f44d5a64351e 100644 --- a/drivers/xen/xenbus/xenbus_probe_frontend.c +++ b/drivers/xen/xenbus/xenbus_probe_frontend.c @@ -40,7 +40,7 @@ static int frontend_bus_id(char bus_id[XEN_BUS_ID_SIZE], const char *nodename) return -EINVAL; } - strlcpy(bus_id, nodename + 1, XEN_BUS_ID_SIZE); + strscpy(bus_id, nodename + 1, XEN_BUS_ID_SIZE); if (!strchr(bus_id, '/')) { pr_warn("bus_id %s no slash\n", bus_id); return -EINVAL; -- cgit v1.2.3 From 467249a7dff68451868ca79696aef69764193a8a Mon Sep 17 00:00:00 2001 From: Even Xu Date: Tue, 23 Aug 2022 09:10:59 +0800 Subject: HID: intel-ish-hid: ipc: Add Meteor Lake PCI device ID Add device ID of Meteor Lake P into ishtp support list. Signed-off-by: Even Xu Acked-by: Srinivas Pandruvada Signed-off-by: Jiri Kosina --- drivers/hid/intel-ish-hid/ipc/hw-ish.h | 1 + drivers/hid/intel-ish-hid/ipc/pci-ish.c | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/hid/intel-ish-hid/ipc/hw-ish.h b/drivers/hid/intel-ish-hid/ipc/hw-ish.h index e600dbf04dfc..fc108f19a64c 100644 --- a/drivers/hid/intel-ish-hid/ipc/hw-ish.h +++ b/drivers/hid/intel-ish-hid/ipc/hw-ish.h @@ -32,6 +32,7 @@ #define ADL_P_DEVICE_ID 0x51FC #define ADL_N_DEVICE_ID 0x54FC #define RPL_S_DEVICE_ID 0x7A78 +#define MTL_P_DEVICE_ID 0x7E45 #define REVISION_ID_CHT_A0 0x6 #define REVISION_ID_CHT_Ax_SI 0x0 diff --git a/drivers/hid/intel-ish-hid/ipc/pci-ish.c b/drivers/hid/intel-ish-hid/ipc/pci-ish.c index 2c67ec17bec6..7120b30ac51d 100644 --- a/drivers/hid/intel-ish-hid/ipc/pci-ish.c +++ b/drivers/hid/intel-ish-hid/ipc/pci-ish.c @@ -43,6 +43,7 @@ static const struct pci_device_id ish_pci_tbl[] = { {PCI_DEVICE(PCI_VENDOR_ID_INTEL, ADL_P_DEVICE_ID)}, {PCI_DEVICE(PCI_VENDOR_ID_INTEL, ADL_N_DEVICE_ID)}, {PCI_DEVICE(PCI_VENDOR_ID_INTEL, RPL_S_DEVICE_ID)}, + {PCI_DEVICE(PCI_VENDOR_ID_INTEL, MTL_P_DEVICE_ID)}, {0, } }; MODULE_DEVICE_TABLE(pci, ish_pci_tbl); -- cgit v1.2.3 From 13cccafe0edcd03bf1c841de8ab8a1c8e34f77d9 Mon Sep 17 00:00:00 2001 From: Brian Foster Date: Tue, 16 Aug 2022 11:54:07 -0400 Subject: s390: fix double free of GS and RI CBs on fork() failure The pointers for guarded storage and runtime instrumentation control blocks are stored in the thread_struct of the associated task. These pointers are initially copied on fork() via arch_dup_task_struct() and then cleared via copy_thread() before fork() returns. If fork() happens to fail after the initial task dup and before copy_thread(), the newly allocated task and associated thread_struct memory are freed via free_task() -> arch_release_task_struct(). This results in a double free of the guarded storage and runtime info structs because the fields in the failed task still refer to memory associated with the source task. This problem can manifest as a BUG_ON() in set_freepointer() (with CONFIG_SLAB_FREELIST_HARDENED enabled) or KASAN splat (if enabled) when running trinity syscall fuzz tests on s390x. To avoid this problem, clear the associated pointer fields in arch_dup_task_struct() immediately after the new task is copied. Note that the RI flag is still cleared in copy_thread() because it resides in thread stack memory and that is where stack info is copied. Signed-off-by: Brian Foster Fixes: 8d9047f8b967c ("s390/runtime instrumentation: simplify task exit handling") Fixes: 7b83c6297d2fc ("s390/guarded storage: simplify task exit handling") Cc: # 4.15 Reviewed-by: Gerald Schaefer Reviewed-by: Heiko Carstens Link: https://lore.kernel.org/r/20220816155407.537372-1-bfoster@redhat.com Signed-off-by: Vasily Gorbik --- arch/s390/kernel/process.c | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c index 89949b9f3cf8..d5119e039d85 100644 --- a/arch/s390/kernel/process.c +++ b/arch/s390/kernel/process.c @@ -91,6 +91,18 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) memcpy(dst, src, arch_task_struct_size); dst->thread.fpu.regs = dst->thread.fpu.fprs; + + /* + * Don't transfer over the runtime instrumentation or the guarded + * storage control block pointers. These fields are cleared here instead + * of in copy_thread() to avoid premature freeing of associated memory + * on fork() failure. Wait to clear the RI flag because ->stack still + * refers to the source thread. + */ + dst->thread.ri_cb = NULL; + dst->thread.gs_cb = NULL; + dst->thread.gs_bc_cb = NULL; + return 0; } @@ -150,13 +162,11 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) frame->childregs.flags = 0; if (new_stackp) frame->childregs.gprs[15] = new_stackp; - - /* Don't copy runtime instrumentation info */ - p->thread.ri_cb = NULL; + /* + * Clear the runtime instrumentation flag after the above childregs + * copy. The CB pointer was already cleared in arch_dup_task_struct(). + */ frame->childregs.psw.mask &= ~PSW_MASK_RI; - /* Don't copy guarded storage control block */ - p->thread.gs_cb = NULL; - p->thread.gs_bc_cb = NULL; /* Set a new TLS ? */ if (clone_flags & CLONE_SETTLS) { -- cgit v1.2.3 From 41ac42f137080bc230b5882e3c88c392ab7f2d32 Mon Sep 17 00:00:00 2001 From: Gerald Schaefer Date: Wed, 17 Aug 2022 15:26:03 +0200 Subject: s390/mm: do not trigger write fault when vma does not allow VM_WRITE For non-protection pXd_none() page faults in do_dat_exception(), we call do_exception() with access == (VM_READ | VM_WRITE | VM_EXEC). In do_exception(), vma->vm_flags is checked against that before calling handle_mm_fault(). Since commit 92f842eac7ee3 ("[S390] store indication fault optimization"), we call handle_mm_fault() with FAULT_FLAG_WRITE, when recognizing that it was a write access. However, the vma flags check is still only checking against (VM_READ | VM_WRITE | VM_EXEC), and therefore also calling handle_mm_fault() with FAULT_FLAG_WRITE in cases where the vma does not allow VM_WRITE. Fix this by changing access check in do_exception() to VM_WRITE only, when recognizing write access. Link: https://lkml.kernel.org/r/20220811103435.188481-3-david@redhat.com Fixes: 92f842eac7ee3 ("[S390] store indication fault optimization") Cc: Reported-by: David Hildenbrand Reviewed-by: Heiko Carstens Signed-off-by: Gerald Schaefer Signed-off-by: Vasily Gorbik --- arch/s390/mm/fault.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index 13449941516c..09b6e756d521 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -379,7 +379,9 @@ static inline vm_fault_t do_exception(struct pt_regs *regs, int access) flags = FAULT_FLAG_DEFAULT; if (user_mode(regs)) flags |= FAULT_FLAG_USER; - if (access == VM_WRITE || is_write) + if (is_write) + access = VM_WRITE; + if (access == VM_WRITE) flags |= FAULT_FLAG_WRITE; mmap_read_lock(mm); -- cgit v1.2.3 From 1ff89e06c2e5fab30274e4b02360d4241d6e605e Mon Sep 17 00:00:00 2001 From: "Daniel J. Ogorchock" Date: Wed, 13 Jul 2022 16:20:59 -0400 Subject: HID: nintendo: fix rumble worker null pointer deref We can dereference a null pointer trying to queue work to a destroyed workqueue. If the device is disconnected, nintendo_hid_remove is called, in which the rumble_queue is destroyed. Avoid using that queue to defer rumble work once the controller state is set to JOYCON_CTLR_STATE_REMOVED. This eliminates the null pointer dereference. Signed-off-by: Daniel J. Ogorchock Signed-off-by: Jiri Kosina --- drivers/hid/hid-nintendo.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/hid/hid-nintendo.c b/drivers/hid/hid-nintendo.c index 92ac4f605f13..6028af3c3aae 100644 --- a/drivers/hid/hid-nintendo.c +++ b/drivers/hid/hid-nintendo.c @@ -1221,6 +1221,7 @@ static void joycon_parse_report(struct joycon_ctlr *ctlr, spin_lock_irqsave(&ctlr->lock, flags); if (IS_ENABLED(CONFIG_NINTENDO_FF) && rep->vibrator_report && + ctlr->ctlr_state != JOYCON_CTLR_STATE_REMOVED && (msecs - ctlr->rumble_msecs) >= JC_RUMBLE_PERIOD_MS && (ctlr->rumble_queue_head != ctlr->rumble_queue_tail || ctlr->rumble_zero_countdown > 0)) { @@ -1545,12 +1546,13 @@ static int joycon_set_rumble(struct joycon_ctlr *ctlr, u16 amp_r, u16 amp_l, ctlr->rumble_queue_head = 0; memcpy(ctlr->rumble_data[ctlr->rumble_queue_head], data, JC_RUMBLE_DATA_SIZE); - spin_unlock_irqrestore(&ctlr->lock, flags); /* don't wait for the periodic send (reduces latency) */ - if (schedule_now) + if (schedule_now && ctlr->ctlr_state != JOYCON_CTLR_STATE_REMOVED) queue_work(ctlr->rumble_queue, &ctlr->rumble_worker); + spin_unlock_irqrestore(&ctlr->lock, flags); + return 0; } -- cgit v1.2.3 From 581711c46612c1fd7f98960f9ad53f04fdb89853 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Wed, 24 Aug 2022 13:07:43 +0100 Subject: io_uring/net: save address for sendzc async execution We usually copy all bits that a request needs from the userspace for async execution, so the userspace can keep them on the stack. However, send zerocopy violates this pattern for addresses and may reloads it e.g. from io-wq. Save the address if any in ->async_data as usual. Reported-by: Stefan Metzmacher Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/d7512d7aa9abcd36e9afe1a4d292a24cb2d157e5.1661342812.git.asml.silence@gmail.com [axboe: fold in incremental fix] Signed-off-by: Jens Axboe --- io_uring/net.c | 53 ++++++++++++++++++++++++++++++++++++++++++++++------- io_uring/net.h | 1 + io_uring/opdef.c | 4 +++- 3 files changed, 50 insertions(+), 8 deletions(-) diff --git a/io_uring/net.c b/io_uring/net.c index 3adcb09ae264..0af8a02df580 100644 --- a/io_uring/net.c +++ b/io_uring/net.c @@ -182,6 +182,37 @@ static int io_sendmsg_copy_hdr(struct io_kiocb *req, &iomsg->free_iov); } +int io_sendzc_prep_async(struct io_kiocb *req) +{ + struct io_sendzc *zc = io_kiocb_to_cmd(req, struct io_sendzc); + struct io_async_msghdr *io; + int ret; + + if (!zc->addr || req_has_async_data(req)) + return 0; + if (io_alloc_async_data(req)) + return -ENOMEM; + + io = req->async_data; + ret = move_addr_to_kernel(zc->addr, zc->addr_len, &io->addr); + return ret; +} + +static int io_setup_async_addr(struct io_kiocb *req, + struct sockaddr_storage *addr, + unsigned int issue_flags) +{ + struct io_async_msghdr *io; + + if (!addr || req_has_async_data(req)) + return -EAGAIN; + if (io_alloc_async_data(req)) + return -ENOMEM; + io = req->async_data; + memcpy(&io->addr, addr, sizeof(io->addr)); + return -EAGAIN; +} + int io_sendmsg_prep_async(struct io_kiocb *req) { int ret; @@ -944,7 +975,7 @@ static int io_sg_from_iter(struct sock *sk, struct sk_buff *skb, int io_sendzc(struct io_kiocb *req, unsigned int issue_flags) { - struct sockaddr_storage address; + struct sockaddr_storage __address, *addr = NULL; struct io_ring_ctx *ctx = req->ctx; struct io_sendzc *zc = io_kiocb_to_cmd(req, struct io_sendzc); struct io_notif_slot *notif_slot; @@ -978,10 +1009,17 @@ int io_sendzc(struct io_kiocb *req, unsigned int issue_flags) msg.msg_namelen = 0; if (zc->addr) { - ret = move_addr_to_kernel(zc->addr, zc->addr_len, &address); - if (unlikely(ret < 0)) - return ret; - msg.msg_name = (struct sockaddr *)&address; + if (req_has_async_data(req)) { + struct io_async_msghdr *io = req->async_data; + + msg.msg_name = addr = &io->addr; + } else { + ret = move_addr_to_kernel(zc->addr, zc->addr_len, &__address); + if (unlikely(ret < 0)) + return ret; + msg.msg_name = (struct sockaddr *)&__address; + addr = &__address; + } msg.msg_namelen = zc->addr_len; } @@ -1013,13 +1051,14 @@ int io_sendzc(struct io_kiocb *req, unsigned int issue_flags) if (unlikely(ret < min_ret)) { if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK)) - return -EAGAIN; + return io_setup_async_addr(req, addr, issue_flags); + if (ret > 0 && io_net_retry(sock, msg.msg_flags)) { zc->len -= ret; zc->buf += ret; zc->done_io += ret; req->flags |= REQ_F_PARTIAL_IO; - return -EAGAIN; + return io_setup_async_addr(req, addr, issue_flags); } if (ret == -ERESTARTSYS) ret = -EINTR; diff --git a/io_uring/net.h b/io_uring/net.h index 7c438d39c089..f91f56c6eeac 100644 --- a/io_uring/net.h +++ b/io_uring/net.h @@ -31,6 +31,7 @@ struct io_async_connect { int io_shutdown_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe); int io_shutdown(struct io_kiocb *req, unsigned int issue_flags); +int io_sendzc_prep_async(struct io_kiocb *req); int io_sendmsg_prep_async(struct io_kiocb *req); void io_sendmsg_recvmsg_cleanup(struct io_kiocb *req); int io_sendmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe); diff --git a/io_uring/opdef.c b/io_uring/opdef.c index 72dd2b2d8a9d..41410126c1c6 100644 --- a/io_uring/opdef.c +++ b/io_uring/opdef.c @@ -478,13 +478,15 @@ const struct io_op_def io_op_defs[] = { .pollout = 1, .audit_skip = 1, .ioprio = 1, + .manual_alloc = 1, #if defined(CONFIG_NET) + .async_size = sizeof(struct io_async_msghdr), .prep = io_sendzc_prep, .issue = io_sendzc, + .prep_async = io_sendzc_prep_async, #else .prep = io_eopnotsupp_prep, #endif - }, }; -- cgit v1.2.3 From c93c296fff6b369a7115916145047c8a3db6e27f Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Wed, 24 Aug 2022 17:13:26 +0200 Subject: x86/sev: Mark snp_abort() noreturn Mark both the function prototype and definition as noreturn in order to prevent the compiler from doing transformations which confuse objtool like so: vmlinux.o: warning: objtool: sme_enable+0x71: unreachable instruction This triggers with gcc-12. Add it and sev_es_terminate() to the objtool noreturn tracking array too. Sort it while at it. Suggested-by: Michael Matz Signed-off-by: Borislav Petkov Acked-by: Peter Zijlstra Link: https://lore.kernel.org/r/20220824152420.20547-1-bp@alien8.de --- arch/x86/include/asm/sev.h | 2 +- arch/x86/kernel/sev.c | 2 +- tools/objtool/check.c | 34 ++++++++++++++++++---------------- 3 files changed, 20 insertions(+), 18 deletions(-) diff --git a/arch/x86/include/asm/sev.h b/arch/x86/include/asm/sev.h index 4a23e52fe0ee..ebc271bb6d8e 100644 --- a/arch/x86/include/asm/sev.h +++ b/arch/x86/include/asm/sev.h @@ -195,7 +195,7 @@ void snp_set_memory_shared(unsigned long vaddr, unsigned int npages); void snp_set_memory_private(unsigned long vaddr, unsigned int npages); void snp_set_wakeup_secondary_cpu(void); bool snp_init(struct boot_params *bp); -void snp_abort(void); +void __init __noreturn snp_abort(void); int snp_issue_guest_request(u64 exit_code, struct snp_req_data *input, unsigned long *fw_err); #else static inline void sev_es_ist_enter(struct pt_regs *regs) { } diff --git a/arch/x86/kernel/sev.c b/arch/x86/kernel/sev.c index 4f84c3f11af5..a428c62330d3 100644 --- a/arch/x86/kernel/sev.c +++ b/arch/x86/kernel/sev.c @@ -2112,7 +2112,7 @@ bool __init snp_init(struct boot_params *bp) return true; } -void __init snp_abort(void) +void __init __noreturn snp_abort(void) { sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SNP_UNSUPPORTED); } diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 0cec74da7ffe..ad51689dfb41 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -162,32 +162,34 @@ static bool __dead_end_function(struct objtool_file *file, struct symbol *func, /* * Unfortunately these have to be hard coded because the noreturn - * attribute isn't provided in ELF data. + * attribute isn't provided in ELF data. Keep 'em sorted. */ static const char * const global_noreturns[] = { + "__invalid_creds", + "__module_put_and_kthread_exit", + "__reiserfs_panic", "__stack_chk_fail", - "panic", + "__ubsan_handle_builtin_unreachable", + "cpu_bringup_and_idle", + "cpu_startup_entry", "do_exit", + "do_group_exit", "do_task_dead", - "kthread_exit", - "make_task_dead", - "__module_put_and_kthread_exit", + "ex_handler_msr_mce", + "fortify_panic", "kthread_complete_and_exit", - "__reiserfs_panic", + "kthread_exit", + "kunit_try_catch_throw", "lbug_with_loc", - "fortify_panic", - "usercopy_abort", "machine_real_restart", + "make_task_dead", + "panic", "rewind_stack_and_make_dead", - "kunit_try_catch_throw", - "xen_start_kernel", - "cpu_bringup_and_idle", - "do_group_exit", + "sev_es_terminate", + "snp_abort", "stop_this_cpu", - "__invalid_creds", - "cpu_startup_entry", - "__ubsan_handle_builtin_unreachable", - "ex_handler_msr_mce", + "usercopy_abort", + "xen_start_kernel", }; if (!func) -- cgit v1.2.3 From 00da0cb385d05a89226e150a102eb49d8abb0359 Mon Sep 17 00:00:00 2001 From: Salvatore Bonaccorso Date: Mon, 1 Aug 2022 11:15:30 +0200 Subject: Documentation/ABI: Mention retbleed vulnerability info file for sysfs While reporting for the AMD retbleed vulnerability was added in 6b80b59b3555 ("x86/bugs: Report AMD retbleed vulnerability") the new sysfs file was not mentioned so far in the ABI documentation for sysfs-devices-system-cpu. Fix that. Fixes: 6b80b59b3555 ("x86/bugs: Report AMD retbleed vulnerability") Signed-off-by: Salvatore Bonaccorso Signed-off-by: Borislav Petkov Link: https://lore.kernel.org/r/20220801091529.325327-1-carnil@debian.org --- Documentation/ABI/testing/sysfs-devices-system-cpu | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/ABI/testing/sysfs-devices-system-cpu b/Documentation/ABI/testing/sysfs-devices-system-cpu index 5bf61881f012..760c889b6cd1 100644 --- a/Documentation/ABI/testing/sysfs-devices-system-cpu +++ b/Documentation/ABI/testing/sysfs-devices-system-cpu @@ -523,6 +523,7 @@ What: /sys/devices/system/cpu/vulnerabilities /sys/devices/system/cpu/vulnerabilities/tsx_async_abort /sys/devices/system/cpu/vulnerabilities/itlb_multihit /sys/devices/system/cpu/vulnerabilities/mmio_stale_data + /sys/devices/system/cpu/vulnerabilities/retbleed Date: January 2018 Contact: Linux kernel mailing list Description: Information about CPU vulnerabilities -- cgit v1.2.3 From c5deb27895e017a0267de0a20d140ad5fcc55a54 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Thu, 25 Aug 2022 16:19:18 +0200 Subject: xen/privcmd: fix error exit of privcmd_ioctl_dm_op() The error exit of privcmd_ioctl_dm_op() is calling unlock_pages() potentially with pages being NULL, leading to a NULL dereference. Additionally lock_pages() doesn't check for pin_user_pages_fast() having been completely successful, resulting in potentially not locking all pages into memory. This could result in sporadic failures when using the related memory in user mode. Fix all of that by calling unlock_pages() always with the real number of pinned pages, which will be zero in case pages being NULL, and by checking the number of pages pinned by pin_user_pages_fast() matching the expected number of pages. Cc: Fixes: ab520be8cd5d ("xen/privcmd: Add IOCTL_PRIVCMD_DM_OP") Reported-by: Rustam Subkhankulov Signed-off-by: Juergen Gross Reviewed-by: Jan Beulich Reviewed-by: Oleksandr Tyshchenko Link: https://lore.kernel.org/r/20220825141918.3581-1-jgross@suse.com Signed-off-by: Juergen Gross --- drivers/xen/privcmd.c | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/drivers/xen/privcmd.c b/drivers/xen/privcmd.c index 3369734108af..e88e8f6f0a33 100644 --- a/drivers/xen/privcmd.c +++ b/drivers/xen/privcmd.c @@ -581,27 +581,30 @@ static int lock_pages( struct privcmd_dm_op_buf kbufs[], unsigned int num, struct page *pages[], unsigned int nr_pages, unsigned int *pinned) { - unsigned int i; + unsigned int i, off = 0; - for (i = 0; i < num; i++) { + for (i = 0; i < num; ) { unsigned int requested; int page_count; requested = DIV_ROUND_UP( offset_in_page(kbufs[i].uptr) + kbufs[i].size, - PAGE_SIZE); + PAGE_SIZE) - off; if (requested > nr_pages) return -ENOSPC; page_count = pin_user_pages_fast( - (unsigned long) kbufs[i].uptr, + (unsigned long)kbufs[i].uptr + off * PAGE_SIZE, requested, FOLL_WRITE, pages); - if (page_count < 0) - return page_count; + if (page_count <= 0) + return page_count ? : -EFAULT; *pinned += page_count; nr_pages -= page_count; pages += page_count; + + off = (requested == page_count) ? 0 : off + page_count; + i += !off; } return 0; @@ -677,10 +680,8 @@ static long privcmd_ioctl_dm_op(struct file *file, void __user *udata) } rc = lock_pages(kbufs, kdata.num, pages, nr_pages, &pinned); - if (rc < 0) { - nr_pages = pinned; + if (rc < 0) goto out; - } for (i = 0; i < kdata.num; i++) { set_xen_guest_handle(xbufs[i].h, kbufs[i].uptr); @@ -692,7 +693,7 @@ static long privcmd_ioctl_dm_op(struct file *file, void __user *udata) xen_preemptible_hcall_end(); out: - unlock_pages(pages, nr_pages); + unlock_pages(pages, pinned); kfree(xbufs); kfree(pages); kfree(kbufs); -- cgit v1.2.3 From ad3b0b99113783f697579c7b09285916019865ea Mon Sep 17 00:00:00 2001 From: Tim Huang Date: Mon, 15 Aug 2022 13:03:49 +0800 Subject: drm/amdgpu: add TX_POWER_CTRL_1 macro definitions for NBIO IP v7.7.0 Add the BIF0_PCIE_TX_POWER_CTRL_1 register offset and mask macro definitions for AMD_CG_SUPPORT_BIF_LS. Signed-off-by: Tim Huang Reviewed-by: Yifan Zhang Signed-off-by: Alex Deucher --- .../gpu/drm/amd/include/asic_reg/nbio/nbio_7_7_0_offset.h | 2 ++ .../gpu/drm/amd/include/asic_reg/nbio/nbio_7_7_0_sh_mask.h | 13 +++++++++++++ 2 files changed, 15 insertions(+) diff --git a/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_7_0_offset.h b/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_7_0_offset.h index 2ed95790a600..cf8d60c4df1b 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_7_0_offset.h +++ b/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_7_0_offset.h @@ -15243,6 +15243,8 @@ #define regBIF0_PCIE_TX_TRACKING_ADDR_HI_BASE_IDX 5 #define regBIF0_PCIE_TX_TRACKING_CTRL_STATUS 0x420186 #define regBIF0_PCIE_TX_TRACKING_CTRL_STATUS_BASE_IDX 5 +#define regBIF0_PCIE_TX_POWER_CTRL_1 0x420187 +#define regBIF0_PCIE_TX_POWER_CTRL_1_BASE_IDX 5 #define regBIF0_PCIE_TX_CTRL_4 0x42018b #define regBIF0_PCIE_TX_CTRL_4_BASE_IDX 5 #define regBIF0_PCIE_TX_STATUS 0x420194 diff --git a/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_7_0_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_7_0_sh_mask.h index eb62a18fcc48..3d60c9e92548 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_7_0_sh_mask.h +++ b/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_7_0_sh_mask.h @@ -85627,6 +85627,19 @@ #define BIF0_PCIE_TX_TRACKING_CTRL_STATUS__TX_TRACKING_PORT_MASK 0x0000000EL #define BIF0_PCIE_TX_TRACKING_CTRL_STATUS__TX_TRACKING_UNIT_ID_MASK 0x00007F00L #define BIF0_PCIE_TX_TRACKING_CTRL_STATUS__TX_TRACKING_STATUS_VALID_MASK 0x00008000L +//BIF0_PCIE_TX_POWER_CTRL_1 +#define BIF0_PCIE_TX_POWER_CTRL_1__MST_MEM_LS_EN__SHIFT 0x0 +#define BIF0_PCIE_TX_POWER_CTRL_1__MST_MEM_DS_EN__SHIFT 0x1 +#define BIF0_PCIE_TX_POWER_CTRL_1__MST_MEM_SD_EN__SHIFT 0x2 +#define BIF0_PCIE_TX_POWER_CTRL_1__REPLAY_MEM_LS_EN__SHIFT 0x3 +#define BIF0_PCIE_TX_POWER_CTRL_1__REPLAY_MEM_DS_EN__SHIFT 0x4 +#define BIF0_PCIE_TX_POWER_CTRL_1__REPLAY_MEM_SD_EN__SHIFT 0x5 +#define BIF0_PCIE_TX_POWER_CTRL_1__MST_MEM_LS_EN_MASK 0x00000001L +#define BIF0_PCIE_TX_POWER_CTRL_1__MST_MEM_DS_EN_MASK 0x00000002L +#define BIF0_PCIE_TX_POWER_CTRL_1__MST_MEM_SD_EN_MASK 0x00000004L +#define BIF0_PCIE_TX_POWER_CTRL_1__REPLAY_MEM_LS_EN_MASK 0x00000008L +#define BIF0_PCIE_TX_POWER_CTRL_1__REPLAY_MEM_DS_EN_MASK 0x00000010L +#define BIF0_PCIE_TX_POWER_CTRL_1__REPLAY_MEM_SD_EN_MASK 0x00000020L //BIF0_PCIE_TX_CTRL_4 #define BIF0_PCIE_TX_CTRL_4__TX_PORT_ACCESS_TIMER_SKEW__SHIFT 0x0 #define BIF0_PCIE_TX_CTRL_4__TX_PORT_ACCESS_TIMER_SKEW_MASK 0x0000000FL -- cgit v1.2.3 From 2037769f995e45d3a368fb74983954b3ed8da178 Mon Sep 17 00:00:00 2001 From: Tim Huang Date: Mon, 15 Aug 2022 13:12:21 +0800 Subject: drm/amdgpu: add NBIO IP v7.7.0 Clock Gating support Add BIF Clock Gating MGCG and LS support for NBIO IP v7.7.0. Signed-off-by: Tim Huang Reviewed-by: Yifan Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/nbio_v7_7.c | 78 ++++++++++++++++++++++++++++++++++ 1 file changed, 78 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_7.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_7.c index 01e8288d09a8..1dc95ef21da6 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_7.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_7.c @@ -247,6 +247,81 @@ static void nbio_v7_7_init_registers(struct amdgpu_device *adev) } +static void nbio_v7_7_update_medium_grain_clock_gating(struct amdgpu_device *adev, + bool enable) +{ + uint32_t def, data; + + if (enable && !(adev->cg_flags & AMD_CG_SUPPORT_BIF_MGCG)) + return; + + def = data = RREG32_SOC15(NBIO, 0, regBIF0_CPM_CONTROL); + if (enable) { + data |= (BIF0_CPM_CONTROL__LCLK_DYN_GATE_ENABLE_MASK | + BIF0_CPM_CONTROL__TXCLK_DYN_GATE_ENABLE_MASK | + BIF0_CPM_CONTROL__TXCLK_LCNT_GATE_ENABLE_MASK | + BIF0_CPM_CONTROL__TXCLK_REGS_GATE_ENABLE_MASK | + BIF0_CPM_CONTROL__TXCLK_PRBS_GATE_ENABLE_MASK | + BIF0_CPM_CONTROL__REFCLK_REGS_GATE_ENABLE_MASK); + } else { + data &= ~(BIF0_CPM_CONTROL__LCLK_DYN_GATE_ENABLE_MASK | + BIF0_CPM_CONTROL__TXCLK_DYN_GATE_ENABLE_MASK | + BIF0_CPM_CONTROL__TXCLK_LCNT_GATE_ENABLE_MASK | + BIF0_CPM_CONTROL__TXCLK_REGS_GATE_ENABLE_MASK | + BIF0_CPM_CONTROL__TXCLK_PRBS_GATE_ENABLE_MASK | + BIF0_CPM_CONTROL__REFCLK_REGS_GATE_ENABLE_MASK); + } + + if (def != data) + WREG32_SOC15(NBIO, 0, regBIF0_CPM_CONTROL, data); +} + +static void nbio_v7_7_update_medium_grain_light_sleep(struct amdgpu_device *adev, + bool enable) +{ + uint32_t def, data; + + if (enable && !(adev->cg_flags & AMD_CG_SUPPORT_BIF_LS)) + return; + + def = data = RREG32_SOC15(NBIO, 0, regBIF0_PCIE_CNTL2); + if (enable) + data |= BIF0_PCIE_CNTL2__SLV_MEM_LS_EN_MASK; + else + data &= ~BIF0_PCIE_CNTL2__SLV_MEM_LS_EN_MASK; + + if (def != data) + WREG32_SOC15(NBIO, 0, regBIF0_PCIE_CNTL2, data); + + def = data = RREG32_SOC15(NBIO, 0, regBIF0_PCIE_TX_POWER_CTRL_1); + if (enable) { + data |= (BIF0_PCIE_TX_POWER_CTRL_1__MST_MEM_LS_EN_MASK | + BIF0_PCIE_TX_POWER_CTRL_1__REPLAY_MEM_LS_EN_MASK); + } else { + data &= ~(BIF0_PCIE_TX_POWER_CTRL_1__MST_MEM_LS_EN_MASK | + BIF0_PCIE_TX_POWER_CTRL_1__REPLAY_MEM_LS_EN_MASK); + } + + if (def != data) + WREG32_SOC15(NBIO, 0, regBIF0_PCIE_TX_POWER_CTRL_1, data); +} + +static void nbio_v7_7_get_clockgating_state(struct amdgpu_device *adev, + u64 *flags) +{ + uint32_t data; + + /* AMD_CG_SUPPORT_BIF_MGCG */ + data = RREG32_SOC15(NBIO, 0, regBIF0_CPM_CONTROL); + if (data & BIF0_CPM_CONTROL__LCLK_DYN_GATE_ENABLE_MASK) + *flags |= AMD_CG_SUPPORT_BIF_MGCG; + + /* AMD_CG_SUPPORT_BIF_LS */ + data = RREG32_SOC15(NBIO, 0, regBIF0_PCIE_CNTL2); + if (data & BIF0_PCIE_CNTL2__SLV_MEM_LS_EN_MASK) + *flags |= AMD_CG_SUPPORT_BIF_LS; +} + const struct amdgpu_nbio_funcs nbio_v7_7_funcs = { .get_hdp_flush_req_offset = nbio_v7_7_get_hdp_flush_req_offset, .get_hdp_flush_done_offset = nbio_v7_7_get_hdp_flush_done_offset, @@ -262,6 +337,9 @@ const struct amdgpu_nbio_funcs nbio_v7_7_funcs = { .enable_doorbell_aperture = nbio_v7_7_enable_doorbell_aperture, .enable_doorbell_selfring_aperture = nbio_v7_7_enable_doorbell_selfring_aperture, .ih_doorbell_range = nbio_v7_7_ih_doorbell_range, + .update_medium_grain_clock_gating = nbio_v7_7_update_medium_grain_clock_gating, + .update_medium_grain_light_sleep = nbio_v7_7_update_medium_grain_light_sleep, + .get_clockgating_state = nbio_v7_7_get_clockgating_state, .ih_control = nbio_v7_7_ih_control, .init_registers = nbio_v7_7_init_registers, }; -- cgit v1.2.3 From 16c01544e30a4b4cf5f3eaacf7a4c19a3622b597 Mon Sep 17 00:00:00 2001 From: Tim Huang Date: Mon, 15 Aug 2022 13:50:46 +0800 Subject: drm/amdgpu: enable NBIO IP v7.7.0 Clock Gating Enable AMD_CG_SUPPORT_BIF_MGCG and AMD_CG_SUPPORT_BIF_LS support. Signed-off-by: Tim Huang Reviewed-by: Yifan Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/soc21.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/soc21.c b/drivers/gpu/drm/amd/amdgpu/soc21.c index 1ff7fc7bb340..982c12964879 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc21.c +++ b/drivers/gpu/drm/amd/amdgpu/soc21.c @@ -603,6 +603,8 @@ static int soc21_common_early_init(void *handle) AMD_CG_SUPPORT_ATHUB_MGCG | AMD_CG_SUPPORT_ATHUB_LS | AMD_CG_SUPPORT_IH_CG | + AMD_CG_SUPPORT_BIF_MGCG | + AMD_CG_SUPPORT_BIF_LS | AMD_CG_SUPPORT_VCN_MGCG | AMD_CG_SUPPORT_JPEG_MGCG; adev->pg_flags = @@ -702,6 +704,7 @@ static int soc21_common_set_clockgating_state(void *handle, switch (adev->ip_versions[NBIO_HWIP][0]) { case IP_VERSION(4, 3, 0): case IP_VERSION(4, 3, 1): + case IP_VERSION(7, 7, 0): adev->nbio.funcs->update_medium_grain_clock_gating(adev, state == AMD_CG_STATE_GATE); adev->nbio.funcs->update_medium_grain_light_sleep(adev, @@ -709,10 +712,6 @@ static int soc21_common_set_clockgating_state(void *handle, adev->hdp.funcs->update_clock_gating(adev, state == AMD_CG_STATE_GATE); break; - case IP_VERSION(7, 7, 0): - adev->hdp.funcs->update_clock_gating(adev, - state == AMD_CG_STATE_GATE); - break; default: break; } -- cgit v1.2.3 From 4e3464badbeebb3528c457aefe91413f8a9070b6 Mon Sep 17 00:00:00 2001 From: Roman Li Date: Mon, 22 Aug 2022 12:37:10 -0400 Subject: drm/amd/display: enable PCON support for dcn314 [Why] DCN314 supports PCON. [How] Explicitly enable it in dcn314 resources. Signed-off-by: Roman Li Reviewed-by: Hamza Mahfooz Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn314/dcn314_resource.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_resource.c b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_resource.c index 85f32206a766..3a9e3870b3a9 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_resource.c @@ -1750,6 +1750,7 @@ static bool dcn314_resource_construct( dc->caps.post_blend_color_processing = true; dc->caps.force_dp_tps4_for_cp2520 = true; dc->caps.dp_hpo = true; + dc->caps.dp_hdmi21_pcon_support = true; dc->caps.edp_dsc_support = true; dc->caps.extended_aux_timeout_support = true; dc->caps.dmcub_support = true; -- cgit v1.2.3 From 00047c3d967d7ef8adf8bac3c3579294a3bc0bb1 Mon Sep 17 00:00:00 2001 From: Tim Huang Date: Mon, 22 Aug 2022 13:30:44 +0800 Subject: drm/amdgpu: add sdma instance check for gfx11 CGCG For some ASICs, like GFX IP v11.0.1, only have one SDMA instance, so not need to configure SDMA1_RLC_CGCG_CTRL for this case. Signed-off-by: Tim Huang Reviewed-by: Yifan Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index f45db80810fa..e8db772e068c 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -5182,9 +5182,12 @@ static void gfx_v11_0_update_coarse_grain_clock_gating(struct amdgpu_device *ade data = REG_SET_FIELD(data, SDMA0_RLC_CGCG_CTRL, CGCG_INT_ENABLE, 1); WREG32_SOC15(GC, 0, regSDMA0_RLC_CGCG_CTRL, data); - data = RREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL); - data = REG_SET_FIELD(data, SDMA1_RLC_CGCG_CTRL, CGCG_INT_ENABLE, 1); - WREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL, data); + /* Some ASICs only have one SDMA instance, not need to configure SDMA1 */ + if (adev->sdma.num_instances > 1) { + data = RREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL); + data = REG_SET_FIELD(data, SDMA1_RLC_CGCG_CTRL, CGCG_INT_ENABLE, 1); + WREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL, data); + } } else { /* Program RLC_CGCG_CGLS_CTRL */ def = data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL); @@ -5213,9 +5216,12 @@ static void gfx_v11_0_update_coarse_grain_clock_gating(struct amdgpu_device *ade data &= ~SDMA0_RLC_CGCG_CTRL__CGCG_INT_ENABLE_MASK; WREG32_SOC15(GC, 0, regSDMA0_RLC_CGCG_CTRL, data); - data = RREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL); - data &= ~SDMA1_RLC_CGCG_CTRL__CGCG_INT_ENABLE_MASK; - WREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL, data); + /* Some ASICs only have one SDMA instance, not need to configure SDMA1 */ + if (adev->sdma.num_instances > 1) { + data = RREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL); + data &= ~SDMA1_RLC_CGCG_CTRL__CGCG_INT_ENABLE_MASK; + WREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL, data); + } } } -- cgit v1.2.3 From da1acbb12b33cbc651d8a7e956d254f1acc5034f Mon Sep 17 00:00:00 2001 From: Evan Quan Date: Mon, 8 Aug 2022 10:41:26 +0800 Subject: drm/amd/pm: update SMU 13.0.0 driver_if header To fit the latest 78.53 PMFW. Signed-off-by: Evan Quan Reviewed-by: Feifei Xu Signed-off-by: Alex Deucher --- .../pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_0.h | 31 +++++++++++++--------- drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h | 2 +- 2 files changed, 19 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_0.h b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_0.h index 78620b0bd279..f745cd8f1ab7 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_0.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_0.h @@ -24,12 +24,8 @@ #ifndef SMU13_DRIVER_IF_V13_0_0_H #define SMU13_DRIVER_IF_V13_0_0_H -// *** IMPORTANT *** -// PMFW TEAM: Always increment the interface version on any change to this file -#define SMU13_DRIVER_IF_VERSION 0x23 - //Increment this version if SkuTable_t or BoardTable_t change -#define PPTABLE_VERSION 0x1D +#define PPTABLE_VERSION 0x22 #define NUM_GFXCLK_DPM_LEVELS 16 #define NUM_SOCCLK_DPM_LEVELS 8 @@ -1193,8 +1189,17 @@ typedef struct { // SECTION: Advanced Options uint32_t DebugOverrides; + // Section: Total Board Power idle vs active coefficients + uint8_t TotalBoardPowerSupport; + uint8_t TotalBoardPowerPadding[3]; + + int16_t TotalIdleBoardPowerM; + int16_t TotalIdleBoardPowerB; + int16_t TotalBoardPowerM; + int16_t TotalBoardPowerB; + // SECTION: Sku Reserved - uint32_t Spare[64]; + uint32_t Spare[61]; // Padding for MMHUB - do not modify this uint32_t MmHubPadding[8]; @@ -1259,7 +1264,8 @@ typedef struct { // SECTION: Clock Spread Spectrum // UCLK Spread Spectrum - uint16_t UclkSpreadPadding; + uint8_t UclkTrainingModeSpreadPercent; + uint8_t UclkSpreadPadding; uint16_t UclkSpreadFreq; // kHz // UCLK Spread Spectrum @@ -1272,11 +1278,7 @@ typedef struct { // Section: Memory Config uint8_t DramWidth; // Width of interface to the channel for each DRAM module. See DRAM_BIT_WIDTH_TYPE_e - uint8_t PaddingMem1[3]; - - // Section: Total Board Power - uint16_t TotalBoardPower; //Only needed for TCP Estimated case, where TCP = TGP+Total Board Power - uint16_t BoardPowerPadding; + uint8_t PaddingMem1[7]; // SECTION: UMC feature flags uint8_t HsrEnabled; @@ -1375,8 +1377,11 @@ typedef struct { uint16_t Vcn1ActivityPercentage ; uint32_t EnergyAccumulator; - uint16_t AverageSocketPower ; + uint16_t AverageSocketPower; + uint16_t AverageTotalBoardPower; + uint16_t AvgTemperature[TEMP_COUNT]; + uint16_t TempPadding; uint8_t PcieRate ; uint8_t PcieWidth ; diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h index 6fe2fe92ebd7..ac308e72241a 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h @@ -30,7 +30,7 @@ #define SMU13_DRIVER_IF_VERSION_ALDE 0x08 #define SMU13_DRIVER_IF_VERSION_SMU_V13_0_4 0x05 #define SMU13_DRIVER_IF_VERSION_SMU_V13_0_5 0x04 -#define SMU13_DRIVER_IF_VERSION_SMU_V13_0_0 0x2C +#define SMU13_DRIVER_IF_VERSION_SMU_V13_0_0 0x2E #define SMU13_DRIVER_IF_VERSION_SMU_V13_0_7 0x2C #define SMU13_MODE1_RESET_WAIT_TIME_IN_MS 500 //500ms -- cgit v1.2.3 From 894c9c540f8315007a4752320e2399bc2e0c46b7 Mon Sep 17 00:00:00 2001 From: Mukul Joshi Date: Fri, 19 Aug 2022 17:15:08 -0400 Subject: drm/amdgpu: Fix page table setup on Arcturus MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When translate_further is enabled, page table depth needs to be updated. This was missing on Arcturus MMHUB init. This was causing address translations to fail for SDMA user-mode queues. Fixes: 352e683b72e7 ("drm/amdgpu: Enable translate_further to extend UTCL2 reach") Reviewed-by: Felix Kuehling Acked-by: Christian König Signed-off-by: Mukul Joshi Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c index 6e0145b2b408..445cb06b9d26 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c @@ -295,9 +295,17 @@ static void mmhub_v9_4_disable_identity_aperture(struct amdgpu_device *adev, static void mmhub_v9_4_setup_vmid_config(struct amdgpu_device *adev, int hubid) { struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0]; + unsigned int num_level, block_size; uint32_t tmp; int i; + num_level = adev->vm_manager.num_level; + block_size = adev->vm_manager.block_size; + if (adev->gmc.translate_further) + num_level -= 1; + else + block_size -= 9; + for (i = 0; i <= 14; i++) { tmp = RREG32_SOC15_OFFSET(MMHUB, 0, mmVML2VC0_VM_CONTEXT1_CNTL, hubid * MMHUB_INSTANCE_REGISTER_OFFSET + i); @@ -305,7 +313,7 @@ static void mmhub_v9_4_setup_vmid_config(struct amdgpu_device *adev, int hubid) ENABLE_CONTEXT, 1); tmp = REG_SET_FIELD(tmp, VML2VC0_VM_CONTEXT1_CNTL, PAGE_TABLE_DEPTH, - adev->vm_manager.num_level); + num_level); tmp = REG_SET_FIELD(tmp, VML2VC0_VM_CONTEXT1_CNTL, RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1); tmp = REG_SET_FIELD(tmp, VML2VC0_VM_CONTEXT1_CNTL, @@ -323,7 +331,7 @@ static void mmhub_v9_4_setup_vmid_config(struct amdgpu_device *adev, int hubid) EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, 1); tmp = REG_SET_FIELD(tmp, VML2VC0_VM_CONTEXT1_CNTL, PAGE_TABLE_BLOCK_SIZE, - adev->vm_manager.block_size - 9); + block_size); /* Send no-retry XNACK on fault to suppress VM fault storm. */ tmp = REG_SET_FIELD(tmp, VML2VC0_VM_CONTEXT1_CNTL, RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, -- cgit v1.2.3 From ee8086dbc1585d9f4020a19447388246a5cff5c8 Mon Sep 17 00:00:00 2001 From: Prike Liang Date: Wed, 24 Aug 2022 11:16:51 +0800 Subject: drm/amdkfd: Fix isa version for the GC 10.3.7 Correct the isa version for handling KFD test. Fixes: 7c4f4f197e0c ("drm/amdkfd: Add GC 10.3.6 and 10.3.7 KFD definitions") Signed-off-by: Prike Liang Reviewed-by: Aaron Liu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_device.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index 357298e69495..22c0929d410b 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -382,12 +382,8 @@ struct kfd_dev *kgd2kfd_probe(struct amdgpu_device *adev, bool vf) f2g = &gfx_v10_3_kfd2kgd; break; case IP_VERSION(10, 3, 6): - gfx_target_version = 100306; - if (!vf) - f2g = &gfx_v10_3_kfd2kgd; - break; case IP_VERSION(10, 3, 7): - gfx_target_version = 100307; + gfx_target_version = 100306; if (!vf) f2g = &gfx_v10_3_kfd2kgd; break; -- cgit v1.2.3 From 61251b2cffea8c1811bbd2dbef175b65f64aaa86 Mon Sep 17 00:00:00 2001 From: Likun Gao Date: Tue, 23 Aug 2022 15:34:10 +0800 Subject: drm/amdgpu: add MGCG perfmon setting for gfx11 Enable GFX11 MGCG perfmon setting. V2: set rlc to saft mode before setting. Signed-off-by: Likun Gao Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 3 +++ drivers/gpu/drm/amd/amdgpu/soc21.c | 15 +++++++++++++++ 2 files changed, 18 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index e8db772e068c..f6b1bb40e503 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -131,6 +131,8 @@ static void gfx_v11_0_ring_invalidate_tlbs(struct amdgpu_ring *ring, bool all_hub, uint8_t dst_sel); static void gfx_v11_0_set_safe_mode(struct amdgpu_device *adev); static void gfx_v11_0_unset_safe_mode(struct amdgpu_device *adev); +static void gfx_v11_0_update_perf_clk(struct amdgpu_device *adev, + bool enable); static void gfx11_kiq_set_resources(struct amdgpu_ring *kiq_ring, uint64_t queue_mask) { @@ -1139,6 +1141,7 @@ static const struct amdgpu_gfx_funcs gfx_v11_0_gfx_funcs = { .read_wave_vgprs = &gfx_v11_0_read_wave_vgprs, .select_me_pipe_q = &gfx_v11_0_select_me_pipe_q, .init_spm_golden = &gfx_v11_0_init_spm_golden_registers, + .update_perfmon_mgcg = &gfx_v11_0_update_perf_clk, }; static int gfx_v11_0_gpu_early_init(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/soc21.c b/drivers/gpu/drm/amd/amdgpu/soc21.c index 982c12964879..55284b24f113 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc21.c +++ b/drivers/gpu/drm/amd/amdgpu/soc21.c @@ -494,6 +494,20 @@ static void soc21_pre_asic_init(struct amdgpu_device *adev) { } +static int soc21_update_umd_stable_pstate(struct amdgpu_device *adev, + bool enter) +{ + if (enter) + amdgpu_gfx_rlc_enter_safe_mode(adev); + else + amdgpu_gfx_rlc_exit_safe_mode(adev); + + if (adev->gfx.funcs->update_perfmon_mgcg) + adev->gfx.funcs->update_perfmon_mgcg(adev, !enter); + + return 0; +} + static const struct amdgpu_asic_funcs soc21_asic_funcs = { .read_disabled_bios = &soc21_read_disabled_bios, @@ -513,6 +527,7 @@ static const struct amdgpu_asic_funcs soc21_asic_funcs = .supports_baco = &amdgpu_dpm_is_baco_supported, .pre_asic_init = &soc21_pre_asic_init, .query_video_codecs = &soc21_query_video_codecs, + .update_umd_stable_pstate = &soc21_update_umd_stable_pstate, }; static int soc21_common_early_init(void *handle) -- cgit v1.2.3 From b8983d42524f10ac6bf35bbce6a7cc8e45f61e04 Mon Sep 17 00:00:00 2001 From: Qu Huang Date: Tue, 23 Aug 2022 14:44:06 +0800 Subject: drm/amdgpu: mmVM_L2_CNTL3 register not initialized correctly The mmVM_L2_CNTL3 register is not assigned an initial value Signed-off-by: Qu Huang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c index 3f44a099c52a..3e51e773f92b 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c @@ -176,6 +176,7 @@ static void mmhub_v1_0_init_cache_regs(struct amdgpu_device *adev) tmp = REG_SET_FIELD(tmp, VM_L2_CNTL2, INVALIDATE_L2_CACHE, 1); WREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL2, tmp); + tmp = mmVM_L2_CNTL3_DEFAULT; if (adev->gmc.translate_further) { tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, BANK_SELECT, 12); tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, -- cgit v1.2.3 From 602684adb42a04858e23248b22d4931b7ef2ad7e Mon Sep 17 00:00:00 2001 From: Lukas Bulwahn Date: Wed, 24 Aug 2022 10:08:36 +0200 Subject: docs: Update version number from 5.x to 6.x in README.rst A quick 'grep "5\.x" . -R' on Documentation shows that README.rst, 2.Process.rst and applying-patches.rst all mention the version number "5.x" for kernel releases. As the next release will be version 6.0, updating the version number to 6.x in README.rst seems reasonable. The description in 2.Process.rst is just a description of recent kernel releases, it was last updated in the beginning of 2020, and can be revisited at any time on a regular basis, independent of changing the version number from 5 to 6. So, there is no need to update this document now when transitioning from 5.x to 6.x numbering. The document applying-patches.rst is probably obsolete for most users anyway, a reader will sufficiently well understand the steps, even it mentions version 5 rather than version 6. So, do not update that to a version 6.x numbering scheme. Update version number from 5.x to 6.x in README.rst only. Signed-off-by: Lukas Bulwahn Link: https://lore.kernel.org/r/20220824080836.23087-1-lukas.bulwahn@gmail.com Signed-off-by: Jonathan Corbet --- Documentation/admin-guide/README.rst | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/Documentation/admin-guide/README.rst b/Documentation/admin-guide/README.rst index caa3c09a5c3f..9eb6b9042f75 100644 --- a/Documentation/admin-guide/README.rst +++ b/Documentation/admin-guide/README.rst @@ -1,9 +1,9 @@ .. _readme: -Linux kernel release 5.x +Linux kernel release 6.x ============================================= -These are the release notes for Linux version 5. Read them carefully, +These are the release notes for Linux version 6. Read them carefully, as they tell you what this is all about, explain how to install the kernel, and what to do if something goes wrong. @@ -63,7 +63,7 @@ Installing the kernel source directory where you have permissions (e.g. your home directory) and unpack it:: - xz -cd linux-5.x.tar.xz | tar xvf - + xz -cd linux-6.x.tar.xz | tar xvf - Replace "X" with the version number of the latest kernel. @@ -72,12 +72,12 @@ Installing the kernel source files. They should match the library, and not get messed up by whatever the kernel-du-jour happens to be. - - You can also upgrade between 5.x releases by patching. Patches are + - You can also upgrade between 6.x releases by patching. Patches are distributed in the xz format. To install by patching, get all the newer patch files, enter the top level directory of the kernel source - (linux-5.x) and execute:: + (linux-6.x) and execute:: - xz -cd ../patch-5.x.xz | patch -p1 + xz -cd ../patch-6.x.xz | patch -p1 Replace "x" for all versions bigger than the version "x" of your current source tree, **in_order**, and you should be ok. You may want to remove @@ -85,13 +85,13 @@ Installing the kernel source that there are no failed patches (some-file-name# or some-file-name.rej). If there are, either you or I have made a mistake. - Unlike patches for the 5.x kernels, patches for the 5.x.y kernels + Unlike patches for the 6.x kernels, patches for the 6.x.y kernels (also known as the -stable kernels) are not incremental but instead apply - directly to the base 5.x kernel. For example, if your base kernel is 5.0 - and you want to apply the 5.0.3 patch, you must not first apply the 5.0.1 - and 5.0.2 patches. Similarly, if you are running kernel version 5.0.2 and - want to jump to 5.0.3, you must first reverse the 5.0.2 patch (that is, - patch -R) **before** applying the 5.0.3 patch. You can read more on this in + directly to the base 6.x kernel. For example, if your base kernel is 6.0 + and you want to apply the 6.0.3 patch, you must not first apply the 6.0.1 + and 6.0.2 patches. Similarly, if you are running kernel version 6.0.2 and + want to jump to 6.0.3, you must first reverse the 6.0.2 patch (that is, + patch -R) **before** applying the 6.0.3 patch. You can read more on this in :ref:`Documentation/process/applying-patches.rst `. Alternatively, the script patch-kernel can be used to automate this @@ -114,7 +114,7 @@ Installing the kernel source Software requirements --------------------- - Compiling and running the 5.x kernels requires up-to-date + Compiling and running the 6.x kernels requires up-to-date versions of various software packages. Consult :ref:`Documentation/process/changes.rst ` for the minimum version numbers required and how to get updates for these packages. Beware that using @@ -132,12 +132,12 @@ Build directory for the kernel place for the output files (including .config). Example:: - kernel source code: /usr/src/linux-5.x + kernel source code: /usr/src/linux-6.x build directory: /home/name/build/kernel To configure and build the kernel, use:: - cd /usr/src/linux-5.x + cd /usr/src/linux-6.x make O=/home/name/build/kernel menuconfig make O=/home/name/build/kernel sudo make O=/home/name/build/kernel modules_install install -- cgit v1.2.3 From 465d0eb0dc31ae26c05504668d3957db91e99799 Mon Sep 17 00:00:00 2001 From: Kairui Song Date: Tue, 23 Aug 2022 19:40:53 +0800 Subject: Docs/admin-guide/mm/damon/usage: fix the example code snip The workflow example code is not working since it got the file names wrong. So fix this. Fixes: b18402726bd1 ("Docs/admin-guide/mm/damon/usage: document DAMON sysfs interface") Reviewed-by: SeongJae Park Signed-off-by: Kairui Song Link: https://lore.kernel.org/r/20220823114053.53305-1-ryncsn@gmail.com Signed-off-by: Jonathan Corbet --- Documentation/admin-guide/mm/damon/usage.rst | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/Documentation/admin-guide/mm/damon/usage.rst b/Documentation/admin-guide/mm/damon/usage.rst index d52f572a9029..ca91ecc29078 100644 --- a/Documentation/admin-guide/mm/damon/usage.rst +++ b/Documentation/admin-guide/mm/damon/usage.rst @@ -50,10 +50,10 @@ For a short example, users can monitor the virtual address space of a given workload as below. :: # cd /sys/kernel/mm/damon/admin/ - # echo 1 > kdamonds/nr && echo 1 > kdamonds/0/contexts/nr + # echo 1 > kdamonds/nr_kdamonds && echo 1 > kdamonds/0/contexts/nr_contexts # echo vaddr > kdamonds/0/contexts/0/operations - # echo 1 > kdamonds/0/contexts/0/targets/nr - # echo $(pidof ) > kdamonds/0/contexts/0/targets/0/pid + # echo 1 > kdamonds/0/contexts/0/targets/nr_targets + # echo $(pidof ) > kdamonds/0/contexts/0/targets/0/pid_target # echo on > kdamonds/0/state Files Hierarchy @@ -366,12 +366,12 @@ memory rate becomes larger than 60%, or lower than 30%". :: # echo 1 > kdamonds/0/contexts/0/schemes/nr_schemes # cd kdamonds/0/contexts/0/schemes/0 # # set the basic access pattern and the action - # echo 4096 > access_patterns/sz/min - # echo 8192 > access_patterns/sz/max - # echo 0 > access_patterns/nr_accesses/min - # echo 5 > access_patterns/nr_accesses/max - # echo 10 > access_patterns/age/min - # echo 20 > access_patterns/age/max + # echo 4096 > access_pattern/sz/min + # echo 8192 > access_pattern/sz/max + # echo 0 > access_pattern/nr_accesses/min + # echo 5 > access_pattern/nr_accesses/max + # echo 10 > access_pattern/age/min + # echo 20 > access_pattern/age/max # echo pageout > action # # set quotas # echo 10 > quotas/ms -- cgit v1.2.3 From 2fc31465c5373b5ca4edf2e5238558cb62902311 Mon Sep 17 00:00:00 2001 From: Kumar Kartikeya Dwivedi Date: Tue, 23 Aug 2022 20:52:59 +0200 Subject: bpf: Do mark_chain_precision for ARG_CONST_ALLOC_SIZE_OR_ZERO Precision markers need to be propagated whenever we have an ARG_CONST_* style argument, as the verifier cannot consider imprecise scalars to be equivalent for the purposes of states_equal check when such arguments refine the return value (in this case, set mem_size for PTR_TO_MEM). The resultant mem_size for the R0 is derived from the constant value, and if the verifier incorrectly prunes states considering them equivalent where such arguments exist (by seeing that both registers have reg->precise as false in regsafe), we can end up with invalid programs passing the verifier which can do access beyond what should have been the correct mem_size in that explored state. To show a concrete example of the problem: 0000000000000000 : 0: r2 = *(u32 *)(r1 + 80) 1: r1 = *(u32 *)(r1 + 76) 2: r3 = r1 3: r3 += 4 4: if r3 > r2 goto +18 5: w2 = 0 6: *(u32 *)(r1 + 0) = r2 7: r1 = *(u32 *)(r1 + 0) 8: r2 = 1 9: if w1 == 0 goto +1 10: r2 = -1 0000000000000058 : 11: r1 = 0 ll 13: r3 = 0 14: call bpf_ringbuf_reserve 15: if r0 == 0 goto +7 16: r1 = r0 17: r1 += 16777215 18: w2 = 0 19: *(u8 *)(r1 + 0) = r2 20: r1 = r0 21: r2 = 0 22: call bpf_ringbuf_submit 00000000000000b8 : 23: w0 = 0 24: exit For the first case, the single line execution's exploration will prune the search at insn 14 for the branch insn 9's second leg as it will be verified first using r2 = -1 (UINT_MAX), while as w1 at insn 9 will always be 0 so at runtime we don't get error for being greater than UINT_MAX/4 from bpf_ringbuf_reserve. The verifier during regsafe just sees reg->precise as false for both r2 registers in both states, hence considers them equal for purposes of states_equal. If we propagated precise markers using the backtracking support, we would use the precise marking to then ensure that old r2 (UINT_MAX) was within the new r2 (1) and this would never be true, so the verification would rightfully fail. The end result is that the out of bounds access at instruction 19 would be permitted without this fix. Note that reg->precise is always set to true when user does not have CAP_BPF (or when subprog count is greater than 1 (i.e. use of any static or global functions)), hence this is only a problem when precision marks need to be explicitly propagated (i.e. privileged users with CAP_BPF). A simplified test case has been included in the next patch to prevent future regressions. Fixes: 457f44363a88 ("bpf: Implement BPF ring buffer and verifier support for it") Signed-off-by: Kumar Kartikeya Dwivedi Link: https://lore.kernel.org/r/20220823185300.406-2-memxor@gmail.com Signed-off-by: Alexei Starovoitov --- kernel/bpf/verifier.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 096fdac70165..30c6eebce146 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -6066,6 +6066,9 @@ skip_type_check: return -EACCES; } meta->mem_size = reg->var_off.value; + err = mark_chain_precision(env, regno); + if (err) + return err; break; case ARG_PTR_TO_INT: case ARG_PTR_TO_LONG: -- cgit v1.2.3 From 1800b2ac96d8bc4ccdddc2ea9e83ecaffd54d3f2 Mon Sep 17 00:00:00 2001 From: Kumar Kartikeya Dwivedi Date: Tue, 23 Aug 2022 20:55:00 +0200 Subject: selftests/bpf: Add regression test for pruning fix Add a test to ensure we do mark_chain_precision for the argument type ARG_CONST_ALLOC_SIZE_OR_ZERO. For other argument types, this was already done, but propagation for missing for this case. Without the fix, this test case loads successfully. Signed-off-by: Kumar Kartikeya Dwivedi Link: https://lore.kernel.org/r/20220823185500.467-1-memxor@gmail.com Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/verifier/precise.c | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/tools/testing/selftests/bpf/verifier/precise.c b/tools/testing/selftests/bpf/verifier/precise.c index 9e754423fa8b..6c03a7d805f9 100644 --- a/tools/testing/selftests/bpf/verifier/precise.c +++ b/tools/testing/selftests/bpf/verifier/precise.c @@ -192,3 +192,28 @@ .result = VERBOSE_ACCEPT, .retval = -1, }, +{ + "precise: mark_chain_precision for ARG_CONST_ALLOC_SIZE_OR_ZERO", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_1, offsetof(struct xdp_md, ingress_ifindex)), + BPF_LD_MAP_FD(BPF_REG_6, 0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_MOV64_IMM(BPF_REG_2, 1), + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_4, 0, 1), + BPF_MOV64_IMM(BPF_REG_2, 0x1000), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_ringbuf_reserve), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_0, 42), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_ringbuf_submit), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_ringbuf = { 1 }, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = BPF_F_TEST_STATE_FREQ, + .errstr = "invalid access to memory, mem_size=1 off=42 size=8", + .result = REJECT, +}, -- cgit v1.2.3 From d4fefa4801a1c2f9c0c7a48fbb0fdf384e89a4ab Mon Sep 17 00:00:00 2001 From: Richard Guy Briggs Date: Thu, 25 Aug 2022 15:32:40 -0400 Subject: audit: move audit_return_fixup before the filters The success and return_code are needed by the filters. Move audit_return_fixup() before the filters. This was causing syscall auditing events to be missed. Link: https://github.com/linux-audit/audit-kernel/issues/138 Cc: stable@vger.kernel.org Fixes: 12c5e81d3fd0 ("audit: prepare audit_context for use in calling contexts beyond syscalls") Signed-off-by: Richard Guy Briggs [PM: manual merge required] Signed-off-by: Paul Moore --- kernel/auditsc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/auditsc.c b/kernel/auditsc.c index dd8d9ab747c3..79a5da1bc5bb 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -1940,6 +1940,7 @@ void __audit_uring_exit(int success, long code) goto out; } + audit_return_fixup(ctx, success, code); if (ctx->context == AUDIT_CTX_SYSCALL) { /* * NOTE: See the note in __audit_uring_entry() about the case @@ -1981,7 +1982,6 @@ void __audit_uring_exit(int success, long code) audit_filter_inodes(current, ctx); if (ctx->current_state != AUDIT_STATE_RECORD) goto out; - audit_return_fixup(ctx, success, code); audit_log_exit(); out: @@ -2065,13 +2065,13 @@ void __audit_syscall_exit(int success, long return_code) if (!list_empty(&context->killed_trees)) audit_kill_trees(context); + audit_return_fixup(context, success, return_code); /* run through both filters to ensure we set the filterkey properly */ audit_filter_syscall(current, context); audit_filter_inodes(current, context); if (context->current_state < AUDIT_STATE_RECORD) goto out; - audit_return_fixup(context, success, return_code); audit_log_exit(); out: -- cgit v1.2.3 From a657182a5c5150cdfacb6640aad1d2712571a409 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Thu, 25 Aug 2022 23:26:47 +0200 Subject: bpf: Don't use tnum_range on array range checking for poke descriptors Hsin-Wei reported a KASAN splat triggered by their BPF runtime fuzzer which is based on a customized syzkaller: BUG: KASAN: slab-out-of-bounds in bpf_int_jit_compile+0x1257/0x13f0 Read of size 8 at addr ffff888004e90b58 by task syz-executor.0/1489 CPU: 1 PID: 1489 Comm: syz-executor.0 Not tainted 5.19.0 #1 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.13.0-1ubuntu1.1 04/01/2014 Call Trace: dump_stack_lvl+0x9c/0xc9 print_address_description.constprop.0+0x1f/0x1f0 ? bpf_int_jit_compile+0x1257/0x13f0 kasan_report.cold+0xeb/0x197 ? kvmalloc_node+0x170/0x200 ? bpf_int_jit_compile+0x1257/0x13f0 bpf_int_jit_compile+0x1257/0x13f0 ? arch_prepare_bpf_dispatcher+0xd0/0xd0 ? rcu_read_lock_sched_held+0x43/0x70 bpf_prog_select_runtime+0x3e8/0x640 ? bpf_obj_name_cpy+0x149/0x1b0 bpf_prog_load+0x102f/0x2220 ? __bpf_prog_put.constprop.0+0x220/0x220 ? find_held_lock+0x2c/0x110 ? __might_fault+0xd6/0x180 ? lock_downgrade+0x6e0/0x6e0 ? lock_is_held_type+0xa6/0x120 ? __might_fault+0x147/0x180 __sys_bpf+0x137b/0x6070 ? bpf_perf_link_attach+0x530/0x530 ? new_sync_read+0x600/0x600 ? __fget_files+0x255/0x450 ? lock_downgrade+0x6e0/0x6e0 ? fput+0x30/0x1a0 ? ksys_write+0x1a8/0x260 __x64_sys_bpf+0x7a/0xc0 ? syscall_enter_from_user_mode+0x21/0x70 do_syscall_64+0x3b/0x90 entry_SYSCALL_64_after_hwframe+0x63/0xcd RIP: 0033:0x7f917c4e2c2d The problem here is that a range of tnum_range(0, map->max_entries - 1) has limited ability to represent the concrete tight range with the tnum as the set of resulting states from value + mask can result in a superset of the actual intended range, and as such a tnum_in(range, reg->var_off) check may yield true when it shouldn't, for example tnum_range(0, 2) would result in 00XX -> v = 0000, m = 0011 such that the intended set of {0, 1, 2} is here represented by a less precise superset of {0, 1, 2, 3}. As the register is known const scalar, really just use the concrete reg->var_off.value for the upper index check. Fixes: d2e4c1e6c294 ("bpf: Constant map key tracking for prog array pokes") Reported-by: Hsin-Wei Hung Signed-off-by: Daniel Borkmann Cc: Shung-Hsi Yu Acked-by: John Fastabend Link: https://lore.kernel.org/r/984b37f9fdf7ac36831d2137415a4a915744c1b6.1661462653.git.daniel@iogearbox.net Signed-off-by: Alexei Starovoitov --- kernel/bpf/verifier.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 30c6eebce146..3eadb14e090b 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -7033,8 +7033,7 @@ record_func_key(struct bpf_verifier_env *env, struct bpf_call_arg_meta *meta, struct bpf_insn_aux_data *aux = &env->insn_aux_data[insn_idx]; struct bpf_reg_state *regs = cur_regs(env), *reg; struct bpf_map *map = meta->map_ptr; - struct tnum range; - u64 val; + u64 val, max; int err; if (func_id != BPF_FUNC_tail_call) @@ -7044,10 +7043,11 @@ record_func_key(struct bpf_verifier_env *env, struct bpf_call_arg_meta *meta, return -EINVAL; } - range = tnum_range(0, map->max_entries - 1); reg = ®s[BPF_REG_3]; + val = reg->var_off.value; + max = map->max_entries; - if (!register_is_const(reg) || !tnum_in(range, reg->var_off)) { + if (!(register_is_const(reg) && val < max)) { bpf_map_key_store(aux, BPF_MAP_KEY_POISON); return 0; } @@ -7055,8 +7055,6 @@ record_func_key(struct bpf_verifier_env *env, struct bpf_call_arg_meta *meta, err = mark_chain_precision(env, BPF_REG_3); if (err) return err; - - val = reg->var_off.value; if (bpf_map_key_unseen(aux)) bpf_map_key_store(aux, val); else if (!bpf_map_key_poisoned(aux) && -- cgit v1.2.3 From b82a26d8633cc89367fac75beb3ec33061bea44a Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Sun, 7 Aug 2022 22:57:40 +0200 Subject: Bluetooth: hci_event: Fix vendor (unknown) opcode status handling Commit c8992cffbe74 ("Bluetooth: hci_event: Use of a function table to handle Command Complete") was (presumably) meant to only refactor things without any functional changes. But it does have one undesirable side-effect, before *status would always be set to skb->data[0] and it might be overridden by some of the opcode specific handling. While now it always set by the opcode specific handlers. This means that if the opcode is not known *status does not get set any more at all! This behavior change has broken bluetooth support for BCM4343A0 HCIs, the hci_bcm.c code tries to configure UART attached HCIs at a higher baudraute using vendor specific opcodes. The BCM4343A0 does not support this and this used to simply fail: [ 25.646442] Bluetooth: hci0: BCM: failed to write clock (-56) [ 25.646481] Bluetooth: hci0: Failed to set baudrate After which things would continue with the initial baudraute. But now that hci_cmd_complete_evt() no longer sets status for unknown opcodes *status is left at 0. This causes the hci_bcm.c code to think the baudraute has been changed on the HCI side and to also adjust the UART baudrate, after which communication with the HCI is broken, leading to: [ 28.579042] Bluetooth: hci0: command 0x0c03 tx timeout [ 36.961601] Bluetooth: hci0: BCM: Reset failed (-110) And non working bluetooth. Fix this by restoring the previous default "*status = skb->data[0]" handling for unknown opcodes. Fixes: c8992cffbe74 ("Bluetooth: hci_event: Use of a function table to handle Command Complete") Signed-off-by: Hans de Goede Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/hci_event.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 485c814cf44a..73aa9ee9d21a 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -4179,6 +4179,17 @@ static void hci_cmd_complete_evt(struct hci_dev *hdev, void *data, } } + if (i == ARRAY_SIZE(hci_cc_table)) { + /* Unknown opcode, assume byte 0 contains the status, so + * that e.g. __hci_cmd_sync() properly returns errors + * for vendor specific commands send by HCI drivers. + * If a vendor doesn't actually follow this convention we may + * need to introduce a vendor CC table in order to properly set + * the status. + */ + *status = skb->data[0]; + } + handle_cmd_cnt_and_timer(hdev, ev->ncmd); hci_req_cmd_complete(hdev, *opcode, *status, req_complete, -- cgit v1.2.3 From 1fd02d56dae35b08e4cba8e6bd2c2e7ccff68ecc Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Thu, 11 Aug 2022 14:20:46 -0700 Subject: Bluetooth: hci_sync: Fix suspend performance regression This attempts to fix suspend performance when there is no connections by not updating the event mask. Fixes: ef61b6ea1544 ("Bluetooth: Always set event mask on suspend") Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/hci_sync.c | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c index e6d804b82b67..5fe440cdc68d 100644 --- a/net/bluetooth/hci_sync.c +++ b/net/bluetooth/hci_sync.c @@ -5288,17 +5288,21 @@ int hci_suspend_sync(struct hci_dev *hdev) /* Prevent disconnects from causing scanning to be re-enabled */ hci_pause_scan_sync(hdev); - /* Soft disconnect everything (power off) */ - err = hci_disconnect_all_sync(hdev, HCI_ERROR_REMOTE_POWER_OFF); - if (err) { - /* Set state to BT_RUNNING so resume doesn't notify */ - hdev->suspend_state = BT_RUNNING; - hci_resume_sync(hdev); - return err; - } + if (hci_conn_count(hdev)) { + /* Soft disconnect everything (power off) */ + err = hci_disconnect_all_sync(hdev, HCI_ERROR_REMOTE_POWER_OFF); + if (err) { + /* Set state to BT_RUNNING so resume doesn't notify */ + hdev->suspend_state = BT_RUNNING; + hci_resume_sync(hdev); + return err; + } - /* Update event mask so only the allowed event can wakeup the host */ - hci_set_event_mask_sync(hdev); + /* Update event mask so only the allowed event can wakeup the + * host. + */ + hci_set_event_mask_sync(hdev); + } /* Only configure accept list if disconnect succeeded and wake * isn't being prevented. -- cgit v1.2.3 From b840304fb46cdf7012722f456bce06f151b3e81b Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Fri, 12 Aug 2022 15:33:57 -0700 Subject: Bluetooth: L2CAP: Fix build errors in some archs This attempts to fix the follow errors: In function 'memcmp', inlined from 'bacmp' at ./include/net/bluetooth/bluetooth.h:347:9, inlined from 'l2cap_global_chan_by_psm' at net/bluetooth/l2cap_core.c:2003:15: ./include/linux/fortify-string.h:44:33: error: '__builtin_memcmp' specified bound 6 exceeds source size 0 [-Werror=stringop-overread] 44 | #define __underlying_memcmp __builtin_memcmp | ^ ./include/linux/fortify-string.h:420:16: note: in expansion of macro '__underlying_memcmp' 420 | return __underlying_memcmp(p, q, size); | ^~~~~~~~~~~~~~~~~~~ In function 'memcmp', inlined from 'bacmp' at ./include/net/bluetooth/bluetooth.h:347:9, inlined from 'l2cap_global_chan_by_psm' at net/bluetooth/l2cap_core.c:2004:15: ./include/linux/fortify-string.h:44:33: error: '__builtin_memcmp' specified bound 6 exceeds source size 0 [-Werror=stringop-overread] 44 | #define __underlying_memcmp __builtin_memcmp | ^ ./include/linux/fortify-string.h:420:16: note: in expansion of macro '__underlying_memcmp' 420 | return __underlying_memcmp(p, q, size); | ^~~~~~~~~~~~~~~~~~~ Fixes: 332f1795ca20 ("Bluetooth: L2CAP: Fix l2cap_global_chan_by_psm regression") Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/l2cap_core.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index cbe0cae73434..2c9de67daadc 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -1992,11 +1992,11 @@ static struct l2cap_chan *l2cap_global_chan_by_psm(int state, __le16 psm, src_match = !bacmp(&c->src, src); dst_match = !bacmp(&c->dst, dst); if (src_match && dst_match) { - c = l2cap_chan_hold_unless_zero(c); - if (c) { - read_unlock(&chan_list_lock); - return c; - } + if (!l2cap_chan_hold_unless_zero(c)) + continue; + + read_unlock(&chan_list_lock); + return c; } /* Closest match */ -- cgit v1.2.3 From 23b72814da1a094b4c065e0bb598249f310c5577 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Mon, 15 Aug 2022 16:14:32 -0700 Subject: Bluetooth: MGMT: Fix Get Device Flags Get Device Flags don't check if device does actually use an RPA in which case it shall only set HCI_CONN_FLAG_REMOTE_WAKEUP if LL Privacy is enabled. Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/mgmt.c | 71 +++++++++++++++++++++++++++++++--------------------- 1 file changed, 42 insertions(+), 29 deletions(-) diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 6e31023b84f5..566b6291b38e 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -4547,6 +4547,22 @@ static int set_exp_feature(struct sock *sk, struct hci_dev *hdev, MGMT_STATUS_NOT_SUPPORTED); } +static u32 get_params_flags(struct hci_dev *hdev, + struct hci_conn_params *params) +{ + u32 flags = hdev->conn_flags; + + /* Devices using RPAs can only be programmed in the acceptlist if + * LL Privacy has been enable otherwise they cannot mark + * HCI_CONN_FLAG_REMOTE_WAKEUP. + */ + if ((flags & HCI_CONN_FLAG_REMOTE_WAKEUP) && !use_ll_privacy(hdev) && + hci_find_irk_by_addr(hdev, ¶ms->addr, params->addr_type)) + flags &= ~HCI_CONN_FLAG_REMOTE_WAKEUP; + + return flags; +} + static int get_device_flags(struct sock *sk, struct hci_dev *hdev, void *data, u16 data_len) { @@ -4578,10 +4594,10 @@ static int get_device_flags(struct sock *sk, struct hci_dev *hdev, void *data, } else { params = hci_conn_params_lookup(hdev, &cp->addr.bdaddr, le_addr_type(cp->addr.type)); - if (!params) goto done; + supported_flags = get_params_flags(hdev, params); current_flags = params->flags; } @@ -4649,38 +4665,35 @@ static int set_device_flags(struct sock *sk, struct hci_dev *hdev, void *data, bt_dev_warn(hdev, "No such BR/EDR device %pMR (0x%x)", &cp->addr.bdaddr, cp->addr.type); } - } else { - params = hci_conn_params_lookup(hdev, &cp->addr.bdaddr, - le_addr_type(cp->addr.type)); - if (params) { - /* Devices using RPAs can only be programmed in the - * acceptlist LL Privacy has been enable otherwise they - * cannot mark HCI_CONN_FLAG_REMOTE_WAKEUP. - */ - if ((current_flags & HCI_CONN_FLAG_REMOTE_WAKEUP) && - !use_ll_privacy(hdev) && - hci_find_irk_by_addr(hdev, ¶ms->addr, - params->addr_type)) { - bt_dev_warn(hdev, - "Cannot set wakeable for RPA"); - goto unlock; - } - params->flags = current_flags; - status = MGMT_STATUS_SUCCESS; + goto unlock; + } - /* Update passive scan if HCI_CONN_FLAG_DEVICE_PRIVACY - * has been set. - */ - if (params->flags & HCI_CONN_FLAG_DEVICE_PRIVACY) - hci_update_passive_scan(hdev); - } else { - bt_dev_warn(hdev, "No such LE device %pMR (0x%x)", - &cp->addr.bdaddr, - le_addr_type(cp->addr.type)); - } + params = hci_conn_params_lookup(hdev, &cp->addr.bdaddr, + le_addr_type(cp->addr.type)); + if (!params) { + bt_dev_warn(hdev, "No such LE device %pMR (0x%x)", + &cp->addr.bdaddr, le_addr_type(cp->addr.type)); + goto unlock; + } + + supported_flags = get_params_flags(hdev, params); + + if ((supported_flags | current_flags) != supported_flags) { + bt_dev_warn(hdev, "Bad flag given (0x%x) vs supported (0x%0x)", + current_flags, supported_flags); + goto unlock; } + params->flags = current_flags; + status = MGMT_STATUS_SUCCESS; + + /* Update passive scan if HCI_CONN_FLAG_DEVICE_PRIVACY + * has been set. + */ + if (params->flags & HCI_CONN_FLAG_DEVICE_PRIVACY) + hci_update_passive_scan(hdev); + unlock: hci_dev_unlock(hdev); -- cgit v1.2.3 From 3cfbc6ac22d62d0a06be9ce2996ba9fed75436cd Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Wed, 17 Aug 2022 20:14:36 +0900 Subject: Bluetooth: hci_sync: fix double mgmt_pending_free() in remove_adv_monitor() syzbot is reporting double kfree() at remove_adv_monitor() [1], for commit 7cf5c2978f23fdbb ("Bluetooth: hci_sync: Refactor remove Adv Monitor") forgot to remove duplicated mgmt_pending_remove() when merging "if (err) {" path and "if (!pending) {" path. Link: https://syzkaller.appspot.com/bug?extid=915a8416bf15895b8e07 [1] Reported-by: syzbot Fixes: 7cf5c2978f23fdbb ("Bluetooth: hci_sync: Refactor remove Adv Monitor") Signed-off-by: Tetsuo Handa Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/mgmt.c | 1 - 1 file changed, 1 deletion(-) diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 566b6291b38e..72e6595a71cc 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -5067,7 +5067,6 @@ static int remove_adv_monitor(struct sock *sk, struct hci_dev *hdev, else status = MGMT_STATUS_FAILED; - mgmt_pending_remove(cmd); goto unlock; } -- cgit v1.2.3 From c572909376673d8c126ae2ee53ba754ff9327d98 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Thu, 18 Aug 2022 14:31:42 -0700 Subject: Bluetooth: ISO: Fix not handling shutdown condition In order to properly handle shutdown syscall the code shall not assume that the how argument is always SHUT_RDWR resulting in SHUTDOWN_MASK as that would result in poll to immediately report EPOLLHUP instead of properly waiting for disconnect_cfm (Disconnect Complete) which is rather important for the likes of BAP as the CIG may need to be reprogrammed. Fixes: ccf74f2390d6 ("Bluetooth: Add BTPROTO_ISO socket type") Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/iso.c | 35 +++++++++++++++++++++++++---------- 1 file changed, 25 insertions(+), 10 deletions(-) diff --git a/net/bluetooth/iso.c b/net/bluetooth/iso.c index ced8ad4fed4f..613039ba5dbf 100644 --- a/net/bluetooth/iso.c +++ b/net/bluetooth/iso.c @@ -1309,7 +1309,7 @@ static int iso_sock_shutdown(struct socket *sock, int how) struct sock *sk = sock->sk; int err = 0; - BT_DBG("sock %p, sk %p", sock, sk); + BT_DBG("sock %p, sk %p, how %d", sock, sk, how); if (!sk) return 0; @@ -1317,17 +1317,32 @@ static int iso_sock_shutdown(struct socket *sock, int how) sock_hold(sk); lock_sock(sk); - if (!sk->sk_shutdown) { - sk->sk_shutdown = SHUTDOWN_MASK; - iso_sock_clear_timer(sk); - __iso_sock_close(sk); - - if (sock_flag(sk, SOCK_LINGER) && sk->sk_lingertime && - !(current->flags & PF_EXITING)) - err = bt_sock_wait_state(sk, BT_CLOSED, - sk->sk_lingertime); + switch (how) { + case SHUT_RD: + if (sk->sk_shutdown & RCV_SHUTDOWN) + goto unlock; + sk->sk_shutdown |= RCV_SHUTDOWN; + break; + case SHUT_WR: + if (sk->sk_shutdown & SEND_SHUTDOWN) + goto unlock; + sk->sk_shutdown |= SEND_SHUTDOWN; + break; + case SHUT_RDWR: + if (sk->sk_shutdown & SHUTDOWN_MASK) + goto unlock; + sk->sk_shutdown |= SHUTDOWN_MASK; + break; } + iso_sock_clear_timer(sk); + __iso_sock_close(sk); + + if (sock_flag(sk, SOCK_LINGER) && sk->sk_lingertime && + !(current->flags & PF_EXITING)) + err = bt_sock_wait_state(sk, BT_CLOSED, sk->sk_lingertime); + +unlock: release_sock(sk); sock_put(sk); -- cgit v1.2.3 From f48735a9aaf8258f39918e13adf464ccd7dce33b Mon Sep 17 00:00:00 2001 From: Archie Pusaka Date: Tue, 23 Aug 2022 12:39:22 +0800 Subject: Bluetooth: hci_event: Fix checking conn for le_conn_complete_evt To prevent multiple conn complete events, we shouldn't look up the conn with hci_lookup_le_connect, since it requires the state to be BT_CONNECT. By the time the duplicate event is processed, the state might have changed, so we end up processing the new event anyway. Change the lookup function to hci_conn_hash_lookup_ba. Fixes: d5ebaa7c5f6f6 ("Bluetooth: hci_event: Ignore multiple conn complete events") Signed-off-by: Archie Pusaka Reviewed-by: Sonny Sasaka Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/hci_event.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 73aa9ee9d21a..6643c9c20fa4 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -5801,7 +5801,7 @@ static void le_conn_complete_evt(struct hci_dev *hdev, u8 status, */ hci_dev_clear_flag(hdev, HCI_LE_ADV); - conn = hci_lookup_le_connect(hdev); + conn = hci_conn_hash_lookup_ba(hdev, LE_LINK, bdaddr); if (!conn) { /* In case of error status and there is no connection pending * just unlock as there is nothing to cleanup. -- cgit v1.2.3 From cb0d160f813b23df2f4fe7af3237a026c44ed1aa Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Thu, 18 Aug 2022 23:02:07 +0200 Subject: Bluetooth: move from strlcpy with unused retval to strscpy Follow the advice of the below link and prefer 'strscpy' in this subsystem. Conversion is 1:1 because the return value is not used. Generated by a coccinelle script. Link: https://lore.kernel.org/r/CAHk-=wgfRnXz0W3D37d01q3JFkr_i_uTL=V6A6G1oUZcprmknw@mail.gmail.com/ Signed-off-by: Wolfram Sang Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/hidp/core.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c index 5940744a8cd8..cc20e706c639 100644 --- a/net/bluetooth/hidp/core.c +++ b/net/bluetooth/hidp/core.c @@ -83,14 +83,14 @@ static void hidp_copy_session(struct hidp_session *session, struct hidp_conninfo ci->product = session->input->id.product; ci->version = session->input->id.version; if (session->input->name) - strlcpy(ci->name, session->input->name, 128); + strscpy(ci->name, session->input->name, 128); else - strlcpy(ci->name, "HID Boot Device", 128); + strscpy(ci->name, "HID Boot Device", 128); } else if (session->hid) { ci->vendor = session->hid->vendor; ci->product = session->hid->product; ci->version = session->hid->version; - strlcpy(ci->name, session->hid->name, 128); + strscpy(ci->name, session->hid->name, 128); } } -- cgit v1.2.3 From 2da8eb834b775a9d1acea6214d3e4a78ac841e6e Mon Sep 17 00:00:00 2001 From: Zhengping Jiang Date: Tue, 23 Aug 2022 10:28:08 -0700 Subject: Bluetooth: hci_sync: hold hdev->lock when cleanup hci_conn When disconnecting all devices, hci_conn_failed is used to cleanup hci_conn object when the hci_conn object cannot be aborted. The function hci_conn_failed requires the caller holds hdev->lock. Fixes: 9b3628d79b46f ("Bluetooth: hci_sync: Cleanup hci_conn if it cannot be aborted") Signed-off-by: Zhengping Jiang Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/hci_sync.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c index 5fe440cdc68d..187786454d98 100644 --- a/net/bluetooth/hci_sync.c +++ b/net/bluetooth/hci_sync.c @@ -4773,9 +4773,11 @@ int hci_abort_conn_sync(struct hci_dev *hdev, struct hci_conn *conn, u8 reason) /* Cleanup hci_conn object if it cannot be cancelled as it * likelly means the controller and host stack are out of sync. */ - if (err) + if (err) { + hci_dev_lock(hdev); hci_conn_failed(conn, err); - + hci_dev_unlock(hdev); + } return err; case BT_CONNECT2: return hci_reject_conn_sync(hdev, conn, reason); -- cgit v1.2.3 From 874b301985ef2f89b8b592ad255e03fb6fbfe605 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Thu, 25 Aug 2022 22:04:56 -0700 Subject: crypto: lib - remove unneeded selection of XOR_BLOCKS CRYPTO_LIB_CHACHA_GENERIC doesn't need to select XOR_BLOCKS. It perhaps was thought that it's needed for __crypto_xor, but that's not the case. Enabling XOR_BLOCKS is problematic because the XOR_BLOCKS code runs a benchmark when it is initialized. That causes a boot time regression on systems that didn't have it enabled before. Therefore, remove this unnecessary and problematic selection. Fixes: e56e18985596 ("lib/crypto: add prompts back to crypto libraries") Cc: stable@vger.kernel.org Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu --- lib/crypto/Kconfig | 1 - 1 file changed, 1 deletion(-) diff --git a/lib/crypto/Kconfig b/lib/crypto/Kconfig index 9ff549f63540..47816af9a9d7 100644 --- a/lib/crypto/Kconfig +++ b/lib/crypto/Kconfig @@ -33,7 +33,6 @@ config CRYPTO_ARCH_HAVE_LIB_CHACHA config CRYPTO_LIB_CHACHA_GENERIC tristate - select XOR_BLOCKS help This symbol can be depended upon by arch implementations of the ChaCha library interface that require the generic code as a -- cgit v1.2.3 From 5934d9a0383619c14df91af8fd76261dc3de2f5f Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 26 Aug 2022 18:01:10 +0300 Subject: ALSA: control: Re-order bounds checking in get_ctl_id_hash() These two checks are in the reverse order so it might read one element beyond the end of the array. First check if the "i" is within bounds before using it. Fixes: 6ab55ec0a938 ("ALSA: control: Fix an out-of-bounds bug in get_ctl_id_hash()") Signed-off-by: Dan Carpenter Link: https://lore.kernel.org/r/YwjgNh/gkG1hH7po@kili Signed-off-by: Takashi Iwai --- sound/core/control.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/core/control.c b/sound/core/control.c index e8fc4c511e5f..a7271927d875 100644 --- a/sound/core/control.c +++ b/sound/core/control.c @@ -391,7 +391,7 @@ static unsigned long get_ctl_id_hash(const struct snd_ctl_elem_id *id) h = id->iface; h = MULTIPLIER * h + id->device; h = MULTIPLIER * h + id->subdevice; - for (i = 0; id->name[i] && i < SNDRV_CTL_ELEM_ID_NAME_MAXLEN; i++) + for (i = 0; i < SNDRV_CTL_ELEM_ID_NAME_MAXLEN && id->name[i]; i++) h = MULTIPLIER * h + id->name[i]; h = MULTIPLIER * h + id->index; h &= LONG_MAX; -- cgit v1.2.3 From 2a5840124009f133bd09fd855963551fb2cefe22 Mon Sep 17 00:00:00 2001 From: Luis Chamberlain Date: Fri, 15 Jul 2022 12:16:22 -0700 Subject: lsm,io_uring: add LSM hooks for the new uring_cmd file op io-uring cmd support was added through ee692a21e9bf ("fs,io_uring: add infrastructure for uring-cmd"), this extended the struct file_operations to allow a new command which each subsystem can use to enable command passthrough. Add an LSM specific for the command passthrough which enables LSMs to inspect the command details. This was discussed long ago without no clear pointer for something conclusive, so this enables LSMs to at least reject this new file operation. [0] https://lkml.kernel.org/r/8adf55db-7bab-f59d-d612-ed906b948d19@schaufler-ca.com Cc: stable@vger.kernel.org Fixes: ee692a21e9bf ("fs,io_uring: add infrastructure for uring-cmd") Signed-off-by: Luis Chamberlain Acked-by: Jens Axboe Signed-off-by: Paul Moore --- include/linux/lsm_hook_defs.h | 1 + include/linux/lsm_hooks.h | 3 +++ include/linux/security.h | 5 +++++ io_uring/uring_cmd.c | 5 +++++ security/security.c | 4 ++++ 5 files changed, 18 insertions(+) diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h index 806448173033..60fff133c0b1 100644 --- a/include/linux/lsm_hook_defs.h +++ b/include/linux/lsm_hook_defs.h @@ -407,4 +407,5 @@ LSM_HOOK(int, 0, perf_event_write, struct perf_event *event) #ifdef CONFIG_IO_URING LSM_HOOK(int, 0, uring_override_creds, const struct cred *new) LSM_HOOK(int, 0, uring_sqpoll, void) +LSM_HOOK(int, 0, uring_cmd, struct io_uring_cmd *ioucmd) #endif /* CONFIG_IO_URING */ diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h index 84a0d7e02176..3aa6030302f5 100644 --- a/include/linux/lsm_hooks.h +++ b/include/linux/lsm_hooks.h @@ -1582,6 +1582,9 @@ * Check whether the current task is allowed to spawn a io_uring polling * thread (IORING_SETUP_SQPOLL). * + * @uring_cmd: + * Check whether the file_operations uring_cmd is allowed to run. + * */ union security_list_options { #define LSM_HOOK(RET, DEFAULT, NAME, ...) RET (*NAME)(__VA_ARGS__); diff --git a/include/linux/security.h b/include/linux/security.h index 1bc362cb413f..7bd0c490703d 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -2060,6 +2060,7 @@ static inline int security_perf_event_write(struct perf_event *event) #ifdef CONFIG_SECURITY extern int security_uring_override_creds(const struct cred *new); extern int security_uring_sqpoll(void); +extern int security_uring_cmd(struct io_uring_cmd *ioucmd); #else static inline int security_uring_override_creds(const struct cred *new) { @@ -2069,6 +2070,10 @@ static inline int security_uring_sqpoll(void) { return 0; } +static inline int security_uring_cmd(struct io_uring_cmd *ioucmd) +{ + return 0; +} #endif /* CONFIG_SECURITY */ #endif /* CONFIG_IO_URING */ diff --git a/io_uring/uring_cmd.c b/io_uring/uring_cmd.c index 8e0cc2d9205e..0f7ad956ddcb 100644 --- a/io_uring/uring_cmd.c +++ b/io_uring/uring_cmd.c @@ -3,6 +3,7 @@ #include #include #include +#include #include @@ -88,6 +89,10 @@ int io_uring_cmd(struct io_kiocb *req, unsigned int issue_flags) if (!req->file->f_op->uring_cmd) return -EOPNOTSUPP; + ret = security_uring_cmd(ioucmd); + if (ret) + return ret; + if (ctx->flags & IORING_SETUP_SQE128) issue_flags |= IO_URING_F_SQE128; if (ctx->flags & IORING_SETUP_CQE32) diff --git a/security/security.c b/security/security.c index 14d30fec8a00..4b95de24bc8d 100644 --- a/security/security.c +++ b/security/security.c @@ -2660,4 +2660,8 @@ int security_uring_sqpoll(void) { return call_int_hook(uring_sqpoll, 0); } +int security_uring_cmd(struct io_uring_cmd *ioucmd) +{ + return call_int_hook(uring_cmd, 0, ioucmd); +} #endif /* CONFIG_IO_URING */ -- cgit v1.2.3 From f4d653dcaa4e4056e1630423e6a8ece4869b544f Mon Sep 17 00:00:00 2001 From: Paul Moore Date: Wed, 10 Aug 2022 15:55:36 -0400 Subject: selinux: implement the security_uring_cmd() LSM hook Add a SELinux access control for the iouring IORING_OP_URING_CMD command. This includes the addition of a new permission in the existing "io_uring" object class: "cmd". The subject of the new permission check is the domain of the process requesting access, the object is the open file which points to the device/file that is the target of the IORING_OP_URING_CMD operation. A sample policy rule is shown below: allow :io_uring { cmd }; Cc: stable@vger.kernel.org Fixes: ee692a21e9bf ("fs,io_uring: add infrastructure for uring-cmd") Signed-off-by: Paul Moore --- security/selinux/hooks.c | 24 ++++++++++++++++++++++++ security/selinux/include/classmap.h | 2 +- 2 files changed, 25 insertions(+), 1 deletion(-) diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 79573504783b..03bca97c8b29 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -91,6 +91,7 @@ #include #include #include +#include #include "avc.h" #include "objsec.h" @@ -6987,6 +6988,28 @@ static int selinux_uring_sqpoll(void) return avc_has_perm(&selinux_state, sid, sid, SECCLASS_IO_URING, IO_URING__SQPOLL, NULL); } + +/** + * selinux_uring_cmd - check if IORING_OP_URING_CMD is allowed + * @ioucmd: the io_uring command structure + * + * Check to see if the current domain is allowed to execute an + * IORING_OP_URING_CMD against the device/file specified in @ioucmd. + * + */ +static int selinux_uring_cmd(struct io_uring_cmd *ioucmd) +{ + struct file *file = ioucmd->file; + struct inode *inode = file_inode(file); + struct inode_security_struct *isec = selinux_inode(inode); + struct common_audit_data ad; + + ad.type = LSM_AUDIT_DATA_FILE; + ad.u.file = file; + + return avc_has_perm(&selinux_state, current_sid(), isec->sid, + SECCLASS_IO_URING, IO_URING__CMD, &ad); +} #endif /* CONFIG_IO_URING */ /* @@ -7231,6 +7254,7 @@ static struct security_hook_list selinux_hooks[] __lsm_ro_after_init = { #ifdef CONFIG_IO_URING LSM_HOOK_INIT(uring_override_creds, selinux_uring_override_creds), LSM_HOOK_INIT(uring_sqpoll, selinux_uring_sqpoll), + LSM_HOOK_INIT(uring_cmd, selinux_uring_cmd), #endif /* diff --git a/security/selinux/include/classmap.h b/security/selinux/include/classmap.h index ff757ae5f253..1c2f41ff4e55 100644 --- a/security/selinux/include/classmap.h +++ b/security/selinux/include/classmap.h @@ -253,7 +253,7 @@ const struct security_class_mapping secclass_map[] = { { "anon_inode", { COMMON_FILE_PERMS, NULL } }, { "io_uring", - { "override_creds", "sqpoll", NULL } }, + { "override_creds", "sqpoll", "cmd", NULL } }, { NULL } }; -- cgit v1.2.3 From 707527956d90ea4f304188555a97144183af1e49 Mon Sep 17 00:00:00 2001 From: Paul Moore Date: Fri, 19 Aug 2022 16:20:33 -0400 Subject: /dev/null: add IORING_OP_URING_CMD support This patch adds support for the io_uring command pass through, aka IORING_OP_URING_CMD, to the /dev/null driver. As with all of the /dev/null functionality, the implementation is just a simple sink where commands go to die, but it should be useful for developers who need a simple IORING_OP_URING_CMD test device that doesn't require any special hardware. Cc: Arnd Bergmann Cc: Greg Kroah-Hartman Acked-by: Jens Axboe Signed-off-by: Paul Moore --- drivers/char/mem.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/char/mem.c b/drivers/char/mem.c index 84ca98ed1dad..32a932a065a6 100644 --- a/drivers/char/mem.c +++ b/drivers/char/mem.c @@ -480,6 +480,11 @@ static ssize_t splice_write_null(struct pipe_inode_info *pipe, struct file *out, return splice_from_pipe(pipe, out, ppos, len, flags, pipe_to_null); } +static int uring_cmd_null(struct io_uring_cmd *ioucmd, unsigned int issue_flags) +{ + return 0; +} + static ssize_t read_iter_zero(struct kiocb *iocb, struct iov_iter *iter) { size_t written = 0; @@ -663,6 +668,7 @@ static const struct file_operations null_fops = { .read_iter = read_iter_null, .write_iter = write_iter_null, .splice_write = splice_write_null, + .uring_cmd = uring_cmd_null, }; static const struct file_operations __maybe_unused port_fops = { -- cgit v1.2.3 From 8238b4579866b7c1bb99883cfe102a43db5506ff Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Fri, 26 Aug 2022 09:17:08 -0400 Subject: wait_on_bit: add an acquire memory barrier There are several places in the kernel where wait_on_bit is not followed by a memory barrier (for example, in drivers/md/dm-bufio.c:new_read). On architectures with weak memory ordering, it may happen that memory accesses that follow wait_on_bit are reordered before wait_on_bit and they may return invalid data. Fix this class of bugs by introducing a new function "test_bit_acquire" that works like test_bit, but has acquire memory ordering semantics. Signed-off-by: Mikulas Patocka Acked-by: Will Deacon Cc: stable@vger.kernel.org Signed-off-by: Linus Torvalds --- Documentation/atomic_bitops.txt | 10 ++++------ arch/x86/include/asm/bitops.h | 21 +++++++++++++++++++++ include/asm-generic/bitops/generic-non-atomic.h | 14 ++++++++++++++ .../asm-generic/bitops/instrumented-non-atomic.h | 12 ++++++++++++ include/asm-generic/bitops/non-atomic.h | 1 + .../bitops/non-instrumented-non-atomic.h | 1 + include/linux/bitops.h | 1 + include/linux/buffer_head.h | 2 +- include/linux/wait_bit.h | 8 ++++---- kernel/sched/wait_bit.c | 2 +- 10 files changed, 60 insertions(+), 12 deletions(-) diff --git a/Documentation/atomic_bitops.txt b/Documentation/atomic_bitops.txt index d8b101c97031..edea4656c5c0 100644 --- a/Documentation/atomic_bitops.txt +++ b/Documentation/atomic_bitops.txt @@ -58,13 +58,11 @@ Like with atomic_t, the rule of thumb is: - RMW operations that have a return value are fully ordered. - - RMW operations that are conditional are unordered on FAILURE, - otherwise the above rules apply. In the case of test_and_set_bit_lock(), - if the bit in memory is unchanged by the operation then it is deemed to have - failed. + - RMW operations that are conditional are fully ordered. -Except for a successful test_and_set_bit_lock() which has ACQUIRE semantics and -clear_bit_unlock() which has RELEASE semantics. +Except for a successful test_and_set_bit_lock() which has ACQUIRE semantics, +clear_bit_unlock() which has RELEASE semantics and test_bit_acquire which has +ACQUIRE semantics. Since a platform only has a single means of achieving atomic operations the same barriers as for atomic_t are used, see atomic_t.txt. diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h index 973c6bd17f98..0fe9de58af31 100644 --- a/arch/x86/include/asm/bitops.h +++ b/arch/x86/include/asm/bitops.h @@ -207,6 +207,20 @@ static __always_inline bool constant_test_bit(long nr, const volatile unsigned l (addr[nr >> _BITOPS_LONG_SHIFT])) != 0; } +static __always_inline bool constant_test_bit_acquire(long nr, const volatile unsigned long *addr) +{ + bool oldbit; + + asm volatile("testb %2,%1" + CC_SET(nz) + : CC_OUT(nz) (oldbit) + : "m" (((unsigned char *)addr)[nr >> 3]), + "i" (1 << (nr & 7)) + :"memory"); + + return oldbit; +} + static __always_inline bool variable_test_bit(long nr, volatile const unsigned long *addr) { bool oldbit; @@ -226,6 +240,13 @@ arch_test_bit(unsigned long nr, const volatile unsigned long *addr) variable_test_bit(nr, addr); } +static __always_inline bool +arch_test_bit_acquire(unsigned long nr, const volatile unsigned long *addr) +{ + return __builtin_constant_p(nr) ? constant_test_bit_acquire(nr, addr) : + variable_test_bit(nr, addr); +} + /** * __ffs - find first set bit in word * @word: The word to search diff --git a/include/asm-generic/bitops/generic-non-atomic.h b/include/asm-generic/bitops/generic-non-atomic.h index 3d5ebd24652b..564a8c675d85 100644 --- a/include/asm-generic/bitops/generic-non-atomic.h +++ b/include/asm-generic/bitops/generic-non-atomic.h @@ -4,6 +4,7 @@ #define __ASM_GENERIC_BITOPS_GENERIC_NON_ATOMIC_H #include +#include #ifndef _LINUX_BITOPS_H #error only can be included directly @@ -127,6 +128,18 @@ generic_test_bit(unsigned long nr, const volatile unsigned long *addr) return 1UL & (addr[BIT_WORD(nr)] >> (nr & (BITS_PER_LONG-1))); } +/** + * generic_test_bit_acquire - Determine, with acquire semantics, whether a bit is set + * @nr: bit number to test + * @addr: Address to start counting from + */ +static __always_inline bool +generic_test_bit_acquire(unsigned long nr, const volatile unsigned long *addr) +{ + unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); + return 1UL & (smp_load_acquire(p) >> (nr & (BITS_PER_LONG-1))); +} + /* * const_*() definitions provide good compile-time optimizations when * the passed arguments can be resolved at compile time. @@ -137,6 +150,7 @@ generic_test_bit(unsigned long nr, const volatile unsigned long *addr) #define const___test_and_set_bit generic___test_and_set_bit #define const___test_and_clear_bit generic___test_and_clear_bit #define const___test_and_change_bit generic___test_and_change_bit +#define const_test_bit_acquire generic_test_bit_acquire /** * const_test_bit - Determine whether a bit is set diff --git a/include/asm-generic/bitops/instrumented-non-atomic.h b/include/asm-generic/bitops/instrumented-non-atomic.h index 988a3bbfba34..2b238b161a62 100644 --- a/include/asm-generic/bitops/instrumented-non-atomic.h +++ b/include/asm-generic/bitops/instrumented-non-atomic.h @@ -142,4 +142,16 @@ _test_bit(unsigned long nr, const volatile unsigned long *addr) return arch_test_bit(nr, addr); } +/** + * _test_bit_acquire - Determine, with acquire semantics, whether a bit is set + * @nr: bit number to test + * @addr: Address to start counting from + */ +static __always_inline bool +_test_bit_acquire(unsigned long nr, const volatile unsigned long *addr) +{ + instrument_atomic_read(addr + BIT_WORD(nr), sizeof(long)); + return arch_test_bit_acquire(nr, addr); +} + #endif /* _ASM_GENERIC_BITOPS_INSTRUMENTED_NON_ATOMIC_H */ diff --git a/include/asm-generic/bitops/non-atomic.h b/include/asm-generic/bitops/non-atomic.h index 5c37ced343ae..71f8d54a5195 100644 --- a/include/asm-generic/bitops/non-atomic.h +++ b/include/asm-generic/bitops/non-atomic.h @@ -13,6 +13,7 @@ #define arch___test_and_change_bit generic___test_and_change_bit #define arch_test_bit generic_test_bit +#define arch_test_bit_acquire generic_test_bit_acquire #include diff --git a/include/asm-generic/bitops/non-instrumented-non-atomic.h b/include/asm-generic/bitops/non-instrumented-non-atomic.h index bdb9b1ffaee9..0ddc78dfc358 100644 --- a/include/asm-generic/bitops/non-instrumented-non-atomic.h +++ b/include/asm-generic/bitops/non-instrumented-non-atomic.h @@ -12,5 +12,6 @@ #define ___test_and_change_bit arch___test_and_change_bit #define _test_bit arch_test_bit +#define _test_bit_acquire arch_test_bit_acquire #endif /* __ASM_GENERIC_BITOPS_NON_INSTRUMENTED_NON_ATOMIC_H */ diff --git a/include/linux/bitops.h b/include/linux/bitops.h index cf9bf65039f2..3b89c64bcfd8 100644 --- a/include/linux/bitops.h +++ b/include/linux/bitops.h @@ -59,6 +59,7 @@ extern unsigned long __sw_hweight64(__u64 w); #define __test_and_clear_bit(nr, addr) bitop(___test_and_clear_bit, nr, addr) #define __test_and_change_bit(nr, addr) bitop(___test_and_change_bit, nr, addr) #define test_bit(nr, addr) bitop(_test_bit, nr, addr) +#define test_bit_acquire(nr, addr) bitop(_test_bit_acquire, nr, addr) /* * Include this here because some architectures need generic_ffs/fls in diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index def8b8d30ccc..089c9ade4325 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -156,7 +156,7 @@ static __always_inline int buffer_uptodate(const struct buffer_head *bh) * make it consistent with folio_test_uptodate * pairs with smp_mb__before_atomic in set_buffer_uptodate */ - return (smp_load_acquire(&bh->b_state) & (1UL << BH_Uptodate)) != 0; + return test_bit_acquire(BH_Uptodate, &bh->b_state); } #define bh_offset(bh) ((unsigned long)(bh)->b_data & ~PAGE_MASK) diff --git a/include/linux/wait_bit.h b/include/linux/wait_bit.h index 7dec36aecbd9..7725b7579b78 100644 --- a/include/linux/wait_bit.h +++ b/include/linux/wait_bit.h @@ -71,7 +71,7 @@ static inline int wait_on_bit(unsigned long *word, int bit, unsigned mode) { might_sleep(); - if (!test_bit(bit, word)) + if (!test_bit_acquire(bit, word)) return 0; return out_of_line_wait_on_bit(word, bit, bit_wait, @@ -96,7 +96,7 @@ static inline int wait_on_bit_io(unsigned long *word, int bit, unsigned mode) { might_sleep(); - if (!test_bit(bit, word)) + if (!test_bit_acquire(bit, word)) return 0; return out_of_line_wait_on_bit(word, bit, bit_wait_io, @@ -123,7 +123,7 @@ wait_on_bit_timeout(unsigned long *word, int bit, unsigned mode, unsigned long timeout) { might_sleep(); - if (!test_bit(bit, word)) + if (!test_bit_acquire(bit, word)) return 0; return out_of_line_wait_on_bit_timeout(word, bit, bit_wait_timeout, @@ -151,7 +151,7 @@ wait_on_bit_action(unsigned long *word, int bit, wait_bit_action_f *action, unsigned mode) { might_sleep(); - if (!test_bit(bit, word)) + if (!test_bit_acquire(bit, word)) return 0; return out_of_line_wait_on_bit(word, bit, action, mode); } diff --git a/kernel/sched/wait_bit.c b/kernel/sched/wait_bit.c index d4788f810b55..0b1cd985dc27 100644 --- a/kernel/sched/wait_bit.c +++ b/kernel/sched/wait_bit.c @@ -47,7 +47,7 @@ __wait_on_bit(struct wait_queue_head *wq_head, struct wait_bit_queue_entry *wbq_ prepare_to_wait(wq_head, &wbq_entry->wq_entry, mode); if (test_bit(wbq_entry->key.bit_nr, wbq_entry->key.flags)) ret = (*action)(&wbq_entry->key, mode); - } while (test_bit(wbq_entry->key.bit_nr, wbq_entry->key.flags) && !ret); + } while (test_bit_acquire(wbq_entry->key.bit_nr, wbq_entry->key.flags) && !ret); finish_wait(wq_head, &wbq_entry->wq_entry); -- cgit v1.2.3 From a5a923038d70d2d4a86cb4e3f32625a5ee6e7e24 Mon Sep 17 00:00:00 2001 From: Shigeru Yoshida Date: Fri, 19 Aug 2022 03:13:36 +0900 Subject: fbdev: fbcon: Properly revert changes when vc_resize() failed fbcon_do_set_font() calls vc_resize() when font size is changed. However, if if vc_resize() failed, current implementation doesn't revert changes for font size, and this causes inconsistent state. syzbot reported unable to handle page fault due to this issue [1]. syzbot's repro uses fault injection which cause failure for memory allocation, so vc_resize() failed. This patch fixes this issue by properly revert changes for font related date when vc_resize() failed. Link: https://syzkaller.appspot.com/bug?id=3443d3a1fa6d964dd7310a0cb1696d165a3e07c4 [1] Reported-by: syzbot+a168dbeaaa7778273c1b@syzkaller.appspotmail.com Signed-off-by: Shigeru Yoshida Signed-off-by: Helge Deller CC: stable@vger.kernel.org # 5.15+ --- drivers/video/fbdev/core/fbcon.c | 27 +++++++++++++++++++++++++-- 1 file changed, 25 insertions(+), 2 deletions(-) diff --git a/drivers/video/fbdev/core/fbcon.c b/drivers/video/fbdev/core/fbcon.c index 4a032fcf0d14..098b62f7b701 100644 --- a/drivers/video/fbdev/core/fbcon.c +++ b/drivers/video/fbdev/core/fbcon.c @@ -2401,15 +2401,21 @@ static int fbcon_do_set_font(struct vc_data *vc, int w, int h, int charcount, struct fb_info *info = fbcon_info_from_console(vc->vc_num); struct fbcon_ops *ops = info->fbcon_par; struct fbcon_display *p = &fb_display[vc->vc_num]; - int resize; + int resize, ret, old_userfont, old_width, old_height, old_charcount; char *old_data = NULL; resize = (w != vc->vc_font.width) || (h != vc->vc_font.height); if (p->userfont) old_data = vc->vc_font.data; vc->vc_font.data = (void *)(p->fontdata = data); + old_userfont = p->userfont; if ((p->userfont = userfont)) REFCOUNT(data)++; + + old_width = vc->vc_font.width; + old_height = vc->vc_font.height; + old_charcount = vc->vc_font.charcount; + vc->vc_font.width = w; vc->vc_font.height = h; vc->vc_font.charcount = charcount; @@ -2425,7 +2431,9 @@ static int fbcon_do_set_font(struct vc_data *vc, int w, int h, int charcount, rows = FBCON_SWAP(ops->rotate, info->var.yres, info->var.xres); cols /= w; rows /= h; - vc_resize(vc, cols, rows); + ret = vc_resize(vc, cols, rows); + if (ret) + goto err_out; } else if (con_is_visible(vc) && vc->vc_mode == KD_TEXT) { fbcon_clear_margins(vc, 0); @@ -2435,6 +2443,21 @@ static int fbcon_do_set_font(struct vc_data *vc, int w, int h, int charcount, if (old_data && (--REFCOUNT(old_data) == 0)) kfree(old_data - FONT_EXTRA_WORDS * sizeof(int)); return 0; + +err_out: + p->fontdata = old_data; + vc->vc_font.data = (void *)old_data; + + if (userfont) { + p->userfont = old_userfont; + REFCOUNT(data)--; + } + + vc->vc_font.width = old_width; + vc->vc_font.height = old_height; + vc->vc_font.charcount = old_charcount; + + return ret; } /* -- cgit v1.2.3 From dd9373402280cf4715fdc8fd5070f7d039e43511 Mon Sep 17 00:00:00 2001 From: Casey Schaufler Date: Tue, 23 Aug 2022 16:46:18 -0700 Subject: Smack: Provide read control for io_uring_cmd Limit io_uring "cmd" options to files for which the caller has Smack read access. There may be cases where the cmd option may be closer to a write access than a read, but there is no way to make that determination. Cc: stable@vger.kernel.org Fixes: ee692a21e9bf ("fs,io_uring: add infrastructure for uring-cmd") Signed-off-by: Casey Schaufler Signed-off-by: Paul Moore --- security/smack/smack_lsm.c | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c index 001831458fa2..bffccdc494cb 100644 --- a/security/smack/smack_lsm.c +++ b/security/smack/smack_lsm.c @@ -42,6 +42,7 @@ #include #include #include +#include #include "smack.h" #define TRANS_TRUE "TRUE" @@ -4732,6 +4733,36 @@ static int smack_uring_sqpoll(void) return -EPERM; } +/** + * smack_uring_cmd - check on file operations for io_uring + * @ioucmd: the command in question + * + * Make a best guess about whether a io_uring "command" should + * be allowed. Use the same logic used for determining if the + * file could be opened for read in the absence of better criteria. + */ +static int smack_uring_cmd(struct io_uring_cmd *ioucmd) +{ + struct file *file = ioucmd->file; + struct smk_audit_info ad; + struct task_smack *tsp; + struct inode *inode; + int rc; + + if (!file) + return -EINVAL; + + tsp = smack_cred(file->f_cred); + inode = file_inode(file); + + smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_PATH); + smk_ad_setfield_u_fs_path(&ad, file->f_path); + rc = smk_tskacc(tsp, smk_of_inode(inode), MAY_READ, &ad); + rc = smk_bu_credfile(file->f_cred, file, MAY_READ, rc); + + return rc; +} + #endif /* CONFIG_IO_URING */ struct lsm_blob_sizes smack_blob_sizes __lsm_ro_after_init = { @@ -4889,6 +4920,7 @@ static struct security_hook_list smack_hooks[] __lsm_ro_after_init = { #ifdef CONFIG_IO_URING LSM_HOOK_INIT(uring_override_creds, smack_uring_override_creds), LSM_HOOK_INIT(uring_sqpoll, smack_uring_sqpoll), + LSM_HOOK_INIT(uring_cmd, smack_uring_cmd), #endif }; -- cgit v1.2.3 From 11745ecfe8fea4b4a4c322967a7605d2ecbd5080 Mon Sep 17 00:00:00 2001 From: Stephane Eranian Date: Wed, 3 Aug 2022 09:00:31 -0700 Subject: perf/x86/intel/uncore: Fix broken read_counter() for SNB IMC PMU Existing code was generating bogus counts for the SNB IMC bandwidth counters: $ perf stat -a -I 1000 -e uncore_imc/data_reads/,uncore_imc/data_writes/ 1.000327813 1,024.03 MiB uncore_imc/data_reads/ 1.000327813 20.73 MiB uncore_imc/data_writes/ 2.000580153 261,120.00 MiB uncore_imc/data_reads/ 2.000580153 23.28 MiB uncore_imc/data_writes/ The problem was introduced by commit: 07ce734dd8ad ("perf/x86/intel/uncore: Clean up client IMC") Where the read_counter callback was replace to point to the generic uncore_mmio_read_counter() function. The SNB IMC counters are freerunnig 32-bit counters laid out contiguously in MMIO. But uncore_mmio_read_counter() is using a readq() call to read from MMIO therefore reading 64-bit from MMIO. Although this is okay for the uncore_perf_event_update() function because it is shifting the value based on the actual counter width to compute a delta, it is not okay for the uncore_pmu_event_start() which is simply reading the counter and therefore priming the event->prev_count with a bogus value which is responsible for causing bogus deltas in the perf stat command above. The fix is to reintroduce the custom callback for read_counter for the SNB IMC PMU and use readl() instead of readq(). With the change the output of perf stat is back to normal: $ perf stat -a -I 1000 -e uncore_imc/data_reads/,uncore_imc/data_writes/ 1.000120987 296.94 MiB uncore_imc/data_reads/ 1.000120987 138.42 MiB uncore_imc/data_writes/ 2.000403144 175.91 MiB uncore_imc/data_reads/ 2.000403144 68.50 MiB uncore_imc/data_writes/ Fixes: 07ce734dd8ad ("perf/x86/intel/uncore: Clean up client IMC") Signed-off-by: Stephane Eranian Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Kan Liang Link: https://lore.kernel.org/r/20220803160031.1379788-1-eranian@google.com --- arch/x86/events/intel/uncore_snb.c | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/arch/x86/events/intel/uncore_snb.c b/arch/x86/events/intel/uncore_snb.c index ce440011cc4e..1ef4f7861e2e 100644 --- a/arch/x86/events/intel/uncore_snb.c +++ b/arch/x86/events/intel/uncore_snb.c @@ -841,6 +841,22 @@ int snb_pci2phy_map_init(int devid) return 0; } +static u64 snb_uncore_imc_read_counter(struct intel_uncore_box *box, struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + + /* + * SNB IMC counters are 32-bit and are laid out back to back + * in MMIO space. Therefore we must use a 32-bit accessor function + * using readq() from uncore_mmio_read_counter() causes problems + * because it is reading 64-bit at a time. This is okay for the + * uncore_perf_event_update() function because it drops the upper + * 32-bits but not okay for plain uncore_read_counter() as invoked + * in uncore_pmu_event_start(). + */ + return (u64)readl(box->io_addr + hwc->event_base); +} + static struct pmu snb_uncore_imc_pmu = { .task_ctx_nr = perf_invalid_context, .event_init = snb_uncore_imc_event_init, @@ -860,7 +876,7 @@ static struct intel_uncore_ops snb_uncore_imc_ops = { .disable_event = snb_uncore_imc_disable_event, .enable_event = snb_uncore_imc_enable_event, .hw_config = snb_uncore_imc_hw_config, - .read_counter = uncore_mmio_read_counter, + .read_counter = snb_uncore_imc_read_counter, }; static struct intel_uncore_type snb_uncore_imc = { -- cgit v1.2.3 From 5479d6d4bf122d4b137659559a7bd17784b97b7e Mon Sep 17 00:00:00 2001 From: Menglong Dong Date: Sat, 27 Aug 2022 00:01:50 +0800 Subject: docs/conf.py: add function attribute '__fix_address' to conf.py Stephen Rothwell reported htmldocs warning when merging net-next: Documentation/networking/kapi:26: net/core/skbuff.c:780: WARNING: Error in declarator or parameters Invalid C declaration: Expecting "(" in parameters. [error at 19] void __fix_address kfree_skb_reason (struct sk_buff *skb, enum skb_drop_reason reason) -------------------^ Add __fix_address keyword to c_id_attributes array in conf.py to fix the warning. Link: https://lore.kernel.org/linux-next/20220825154105.534d78ab@canb.auug.org.au/ Reported-by: Stephen Rothwell Signed-off-by: Menglong Dong Tested-by: Bagas Sanjaya Signed-off-by: Jonathan Corbet --- Documentation/conf.py | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/conf.py b/Documentation/conf.py index 934727e23e0e..255384d094bf 100644 --- a/Documentation/conf.py +++ b/Documentation/conf.py @@ -86,6 +86,7 @@ if major >= 3: "__used", "__weak", "noinline", + "__fix_address", # include/linux/memblock.h: "__init_memblock", -- cgit v1.2.3 From 7498a457ecf7ff2c4d379360aa8f24566bb1543e Mon Sep 17 00:00:00 2001 From: Casper Andersson Date: Thu, 25 Aug 2022 10:49:55 +0200 Subject: net: sparx5: fix handling uneven length packets in manual extraction Packets that are not of length divisible by 4 (e.g. 77, 78, 79) would have the checksum included up to next multiple of 4 (a 77 bytes packet would have 3 bytes of ethernet checksum included). The check for the value expects it in host (Little) endian. Fixes: f3cad2611a77 ("net: sparx5: add hostmode with phylink support") Signed-off-by: Casper Andersson Reviewed-by: Steen Hegelund Link: https://lore.kernel.org/r/20220825084955.684637-1-casper.casan@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/microchip/sparx5/sparx5_packet.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_packet.c b/drivers/net/ethernet/microchip/sparx5/sparx5_packet.c index 304f84aadc36..21844beba72d 100644 --- a/drivers/net/ethernet/microchip/sparx5/sparx5_packet.c +++ b/drivers/net/ethernet/microchip/sparx5/sparx5_packet.c @@ -113,6 +113,8 @@ static void sparx5_xtr_grp(struct sparx5 *sparx5, u8 grp, bool byte_swap) /* This assumes STATUS_WORD_POS == 1, Status * just after last data */ + if (!byte_swap) + val = ntohl((__force __be32)val); byte_cnt -= (4 - XTR_VALID_BYTES(val)); eof_flag = true; break; -- cgit v1.2.3 From 2ca1c94ce0b65a2ce7512b718f3d8a0fe6224bca Mon Sep 17 00:00:00 2001 From: Kai-Heng Feng Date: Fri, 26 Aug 2022 08:25:30 +0800 Subject: tg3: Disable tg3 device on system reboot to avoid triggering AER Commit d60cd06331a3 ("PM: ACPI: reboot: Use S5 for reboot") caused a reboot hang on one Dell servers so the commit was reverted. Someone managed to collect the AER log and it's caused by MSI: [ 148.762067] ACPI: Preparing to enter system sleep state S5 [ 148.794638] {1}[Hardware Error]: Hardware error from APEI Generic Hardware Error Source: 5 [ 148.803731] {1}[Hardware Error]: event severity: recoverable [ 148.810191] {1}[Hardware Error]: Error 0, type: fatal [ 148.816088] {1}[Hardware Error]: section_type: PCIe error [ 148.822391] {1}[Hardware Error]: port_type: 0, PCIe end point [ 148.829026] {1}[Hardware Error]: version: 3.0 [ 148.834266] {1}[Hardware Error]: command: 0x0006, status: 0x0010 [ 148.841140] {1}[Hardware Error]: device_id: 0000:04:00.0 [ 148.847309] {1}[Hardware Error]: slot: 0 [ 148.852077] {1}[Hardware Error]: secondary_bus: 0x00 [ 148.857876] {1}[Hardware Error]: vendor_id: 0x14e4, device_id: 0x165f [ 148.865145] {1}[Hardware Error]: class_code: 020000 [ 148.870845] {1}[Hardware Error]: aer_uncor_status: 0x00100000, aer_uncor_mask: 0x00010000 [ 148.879842] {1}[Hardware Error]: aer_uncor_severity: 0x000ef030 [ 148.886575] {1}[Hardware Error]: TLP Header: 40000001 0000030f 90028090 00000000 [ 148.894823] tg3 0000:04:00.0: AER: aer_status: 0x00100000, aer_mask: 0x00010000 [ 148.902795] tg3 0000:04:00.0: AER: [20] UnsupReq (First) [ 148.910234] tg3 0000:04:00.0: AER: aer_layer=Transaction Layer, aer_agent=Requester ID [ 148.918806] tg3 0000:04:00.0: AER: aer_uncor_severity: 0x000ef030 [ 148.925558] tg3 0000:04:00.0: AER: TLP Header: 40000001 0000030f 90028090 00000000 The MSI is probably raised by incoming packets, so power down the device and disable bus mastering to stop the traffic, as user confirmed this approach works. In addition to that, be extra safe and cancel reset task if it's running. Cc: Josef Bacik Link: https://lore.kernel.org/all/b8db79e6857c41dab4ef08bdf826ea7c47e3bafc.1615947283.git.josef@toxicpanda.com/ BugLink: https://bugs.launchpad.net/bugs/1917471 Signed-off-by: Kai-Heng Feng Reviewed-by: Michael Chan Link: https://lore.kernel.org/r/20220826002530.1153296-1-kai.heng.feng@canonical.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/tg3.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c index db1e9d810b41..89889d8150da 100644 --- a/drivers/net/ethernet/broadcom/tg3.c +++ b/drivers/net/ethernet/broadcom/tg3.c @@ -18076,16 +18076,20 @@ static void tg3_shutdown(struct pci_dev *pdev) struct net_device *dev = pci_get_drvdata(pdev); struct tg3 *tp = netdev_priv(dev); + tg3_reset_task_cancel(tp); + rtnl_lock(); + netif_device_detach(dev); if (netif_running(dev)) dev_close(dev); - if (system_state == SYSTEM_POWER_OFF) - tg3_power_down(tp); + tg3_power_down(tp); rtnl_unlock(); + + pci_disable_device(pdev); } /** -- cgit v1.2.3 From 3ce9f2bef75528936c78a7053301f5725f622f3a Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Wed, 24 Aug 2022 19:39:51 -0700 Subject: net: smsc911x: Stop and start PHY during suspend and resume Commit 744d23c71af3 ("net: phy: Warn about incorrect mdio_bus_phy_resume() state") unveiled that the smsc911x driver was not properly stopping and restarting the PHY during suspend/resume. Correct that by indicating that the MAC is in charge of PHY PM operations and ensure that all MDIO bus activity is quiescent during suspend. Tested-by: Geert Uytterhoeven Tested-by: Marek Szyprowski Fixes: fba863b81604 ("net: phy: make PHY PM ops a no-op if MAC driver manages PHY PM") Fixes: 2aa70f864955 ("net: smsc911x: Quieten netif during suspend") Signed-off-by: Florian Fainelli Link: https://lore.kernel.org/r/20220825023951.3220-1-f.fainelli@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/smsc/smsc911x.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/ethernet/smsc/smsc911x.c b/drivers/net/ethernet/smsc/smsc911x.c index 3bf20211cceb..3829c2805b16 100644 --- a/drivers/net/ethernet/smsc/smsc911x.c +++ b/drivers/net/ethernet/smsc/smsc911x.c @@ -1037,6 +1037,8 @@ static int smsc911x_mii_probe(struct net_device *dev) return ret; } + /* Indicate that the MAC is responsible for managing PHY PM */ + phydev->mac_managed_pm = true; phy_attached_info(phydev); phy_set_max_speed(phydev, SPEED_100); @@ -2587,6 +2589,8 @@ static int smsc911x_suspend(struct device *dev) if (netif_running(ndev)) { netif_stop_queue(ndev); netif_device_detach(ndev); + if (!device_may_wakeup(dev)) + phy_stop(ndev->phydev); } /* enable wake on LAN, energy detection and the external PME @@ -2628,6 +2632,8 @@ static int smsc911x_resume(struct device *dev) if (netif_running(ndev)) { netif_device_attach(ndev); netif_start_queue(ndev); + if (!device_may_wakeup(dev)) + phy_start(ndev->phydev); } return 0; -- cgit v1.2.3 From a87406f4adee9c53b311d8a1ba2849c69e29a6d0 Mon Sep 17 00:00:00 2001 From: Andrey Zhadchenko Date: Thu, 25 Aug 2022 05:03:26 +0300 Subject: openvswitch: fix memory leak at failed datapath creation ovs_dp_cmd_new()->ovs_dp_change()->ovs_dp_set_upcall_portids() allocates array via kmalloc. If for some reason new_vport() fails during ovs_dp_cmd_new() dp->upcall_portids must be freed. Add missing kfree. Kmemleak example: unreferenced object 0xffff88800c382500 (size 64): comm "dump_state", pid 323, jiffies 4294955418 (age 104.347s) hex dump (first 32 bytes): 5e c2 79 e4 1f 7a 38 c7 09 21 38 0c 80 88 ff ff ^.y..z8..!8..... 03 00 00 00 0a 00 00 00 14 00 00 00 28 00 00 00 ............(... backtrace: [<0000000071bebc9f>] ovs_dp_set_upcall_portids+0x38/0xa0 [<000000000187d8bd>] ovs_dp_change+0x63/0xe0 [<000000002397e446>] ovs_dp_cmd_new+0x1f0/0x380 [<00000000aa06f36e>] genl_family_rcv_msg_doit+0xea/0x150 [<000000008f583bc4>] genl_rcv_msg+0xdc/0x1e0 [<00000000fa10e377>] netlink_rcv_skb+0x50/0x100 [<000000004959cece>] genl_rcv+0x24/0x40 [<000000004699ac7f>] netlink_unicast+0x23e/0x360 [<00000000c153573e>] netlink_sendmsg+0x24e/0x4b0 [<000000006f4aa380>] sock_sendmsg+0x62/0x70 [<00000000d0068654>] ____sys_sendmsg+0x230/0x270 [<0000000012dacf7d>] ___sys_sendmsg+0x88/0xd0 [<0000000011776020>] __sys_sendmsg+0x59/0xa0 [<000000002e8f2dc1>] do_syscall_64+0x3b/0x90 [<000000003243e7cb>] entry_SYSCALL_64_after_hwframe+0x63/0xcd Fixes: b83d23a2a38b ("openvswitch: Introduce per-cpu upcall dispatch") Acked-by: Aaron Conole Signed-off-by: Andrey Zhadchenko Link: https://lore.kernel.org/r/20220825020326.664073-1-andrey.zhadchenko@virtuozzo.com Signed-off-by: Jakub Kicinski --- net/openvswitch/datapath.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index 7e8a39a35627..6c9d153afbee 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -1802,7 +1802,7 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info) ovs_dp_reset_user_features(skb, info); } - goto err_unlock_and_destroy_meters; + goto err_destroy_portids; } err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid, @@ -1817,6 +1817,8 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info) ovs_notify(&dp_datapath_genl_family, reply, info); return 0; +err_destroy_portids: + kfree(rcu_dereference_raw(dp->upcall_portids)); err_unlock_and_destroy_meters: ovs_unlock(); ovs_meters_exit(dp); -- cgit v1.2.3 From ebe5555c2f34505cdb1ae5c3de8b24e33740b3e0 Mon Sep 17 00:00:00 2001 From: Tianyu Yuan Date: Thu, 25 Aug 2022 10:08:45 +0200 Subject: nfp: flower: fix ingress police using matchall filter Referenced commit introduced nfp_policer_validate in the progress installing rate limiter. This validate check the action id and will reject police with CONTINUE, which is required to support ingress police offload. Fix this issue by allowing FLOW_ACTION_CONTINUE as notexceed action id in nfp_policer_validate Fixes: d97b4b105ce7 ("flow_offload: reject offload for all drivers with invalid police parameters") Signed-off-by: Tianyu Yuan Reviewed-by: Baowen Zheng Reviewed-by: Louis Peens Signed-off-by: Simon Horman Link: https://lore.kernel.org/r/20220825080845.507534-1-simon.horman@corigine.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/netronome/nfp/flower/qos_conf.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/flower/qos_conf.c b/drivers/net/ethernet/netronome/nfp/flower/qos_conf.c index 4e5df9f2c372..7b92026e1a6f 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/qos_conf.c +++ b/drivers/net/ethernet/netronome/nfp/flower/qos_conf.c @@ -127,10 +127,11 @@ static int nfp_policer_validate(const struct flow_action *action, return -EOPNOTSUPP; } - if (act->police.notexceed.act_id != FLOW_ACTION_PIPE && + if (act->police.notexceed.act_id != FLOW_ACTION_CONTINUE && + act->police.notexceed.act_id != FLOW_ACTION_PIPE && act->police.notexceed.act_id != FLOW_ACTION_ACCEPT) { NL_SET_ERR_MSG_MOD(extack, - "Offload not supported when conform action is not pipe or ok"); + "Offload not supported when conform action is not continue, pipe or ok"); return -EOPNOTSUPP; } -- cgit v1.2.3 From bc9e7fe313d5e56d4d5f34bcc04d1165f94f86fb Mon Sep 17 00:00:00 2001 From: James Clark Date: Thu, 28 Jul 2022 10:39:46 +0100 Subject: perf python: Fix build when PYTHON_CONFIG is user supplied The previous change to Python autodetection had a small mistake where the auto value was used to determine the Python binary, rather than the user supplied value. The Python binary is only used for one part of the build process, rather than the final linking, so it was producing correct builds in most scenarios, especially when the auto detected value matched what the user wanted, or the system only had a valid set of Pythons. Change it so that the Python binary path is derived from either the PYTHON_CONFIG value or PYTHON value, depending on what is specified by the user. This was the original intention. This error was spotted in a build failure an odd cross compilation environment after commit 4c41cb46a732fe82 ("perf python: Prefer python3") was merged. Fixes: 630af16eee495f58 ("perf tools: Use Python devtools for version autodetection rather than runtime") Signed-off-by: James Clark Acked-by: Ian Rogers Cc: Alexander Shishkin Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20220728093946.1337642-1-james.clark@arm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Makefile.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index 0661a1cf9855..2171f02daf59 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -265,7 +265,7 @@ endif # defined. get-executable-or-default fails with an error if the first argument is supplied but # doesn't exist. override PYTHON_CONFIG := $(call get-executable-or-default,PYTHON_CONFIG,$(PYTHON_AUTO)) -override PYTHON := $(call get-executable-or-default,PYTHON,$(subst -config,,$(PYTHON_AUTO))) +override PYTHON := $(call get-executable-or-default,PYTHON,$(subst -config,,$(PYTHON_CONFIG))) grep-libs = $(filter -l%,$(1)) strip-libs = $(filter-out -l%,$(1)) -- cgit v1.2.3 From dbcfe5ec3f9a5799d8b49ad2c81549bbfa8390e7 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 21 Dec 2020 12:53:44 -0300 Subject: tools kvm headers arm64: Update KVM header from the kernel sources To pick the changes from: ae3b1da95413614f ("KVM: arm64: Fix compile error due to sign extension") That doesn't result in any changes in tooling (when built on x86), only addresses this perf build warning: Warning: Kernel ABI header at 'tools/arch/arm64/include/uapi/asm/kvm.h' differs from latest version at 'arch/arm64/include/uapi/asm/kvm.h' diff -u tools/arch/arm64/include/uapi/asm/kvm.h arch/arm64/include/uapi/asm/kvm.h Cc: Adrian Hunter Cc: Ian Rogers Cc: Jiri Olsa Cc: Marc Zyngier Cc: Namhyung Kim Cc: Yang Yingliang Link: https://lore.kernel.org/all/YwOMCCc4E79FuvDe@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/arch/arm64/include/uapi/asm/kvm.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tools/arch/arm64/include/uapi/asm/kvm.h b/tools/arch/arm64/include/uapi/asm/kvm.h index 3bb134355874..316917b98707 100644 --- a/tools/arch/arm64/include/uapi/asm/kvm.h +++ b/tools/arch/arm64/include/uapi/asm/kvm.h @@ -75,9 +75,11 @@ struct kvm_regs { /* KVM_ARM_SET_DEVICE_ADDR ioctl id encoding */ #define KVM_ARM_DEVICE_TYPE_SHIFT 0 -#define KVM_ARM_DEVICE_TYPE_MASK (0xffff << KVM_ARM_DEVICE_TYPE_SHIFT) +#define KVM_ARM_DEVICE_TYPE_MASK GENMASK(KVM_ARM_DEVICE_TYPE_SHIFT + 15, \ + KVM_ARM_DEVICE_TYPE_SHIFT) #define KVM_ARM_DEVICE_ID_SHIFT 16 -#define KVM_ARM_DEVICE_ID_MASK (0xffff << KVM_ARM_DEVICE_ID_SHIFT) +#define KVM_ARM_DEVICE_ID_MASK GENMASK(KVM_ARM_DEVICE_ID_SHIFT + 15, \ + KVM_ARM_DEVICE_ID_SHIFT) /* Supported device IDs */ #define KVM_ARM_DEVICE_VGIC_V2 0 -- cgit v1.2.3 From bf515f024e4c0ca46a1b08c4f31860c01781d8a5 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Mon, 22 Aug 2022 14:33:51 -0700 Subject: perf stat: Clear evsel->reset_group for each stat run If a weak group is broken then the reset_group flag remains set for the next run. Having reset_group set means the counter isn't created and ultimately a segfault. A simple reproduction of this is: # perf stat -r2 -e '{cycles,cycles,cycles,cycles,cycles,cycles,cycles,cycles,cycles,cycles}:W which will be added as a test in the next patch. Fixes: 4804e0111662d7d8 ("perf stat: Use affinity for opening events") Reviewed-by: Andi Kleen Signed-off-by: Ian Rogers Tested-by: Arnaldo Carvalho de Melo Tested-by: Xing Zhengjun Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ingo Molnar Cc: Jiri Olsa Cc: Kan Liang Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: https://lore.kernel.org/r/20220822213352.75721-1-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 7fb81a44672d..54cd29d07ca8 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -826,6 +826,7 @@ static int __run_perf_stat(int argc, const char **argv, int run_idx) } evlist__for_each_entry(evsel_list, counter) { + counter->reset_group = false; if (bpf_counter__load(counter, &target)) return -1; if (!evsel__is_bpf(counter)) -- cgit v1.2.3 From 0c361c6eaba7fe1a29391540dd8797e850e49f21 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Mon, 22 Aug 2022 14:33:52 -0700 Subject: perf test: Stat test for repeat with a weak group Breaking a weak group requires multiple passes of an evlist, with multiple runs this can introduce bugs ultimately leading to segfaults. Add a test to cover this. Signed-off-by: Ian Rogers Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ingo Molnar Cc: Jiri Olsa Cc: Kan Liang Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: https://lore.kernel.org/r/20220822213352.75721-2-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/shell/stat.sh | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/tools/perf/tests/shell/stat.sh b/tools/perf/tests/shell/stat.sh index 9313ef2739e0..26a51b48aee4 100755 --- a/tools/perf/tests/shell/stat.sh +++ b/tools/perf/tests/shell/stat.sh @@ -28,6 +28,24 @@ test_stat_record_report() { echo "stat record and report test [Success]" } +test_stat_repeat_weak_groups() { + echo "stat repeat weak groups test" + if ! perf stat -e '{cycles,cycles,cycles,cycles,cycles,cycles,cycles,cycles,cycles,cycles}' \ + true 2>&1 | grep -q 'seconds time elapsed' + then + echo "stat repeat weak groups test [Skipped event parsing failed]" + return + fi + if ! perf stat -r2 -e '{cycles,cycles,cycles,cycles,cycles,cycles,cycles,cycles,cycles,cycles}:W' \ + true > /dev/null 2>&1 + then + echo "stat repeat weak groups test [Failed]" + err=1 + return + fi + echo "stat repeat weak groups test [Success]" +} + test_topdown_groups() { # Topdown events must be grouped with the slots event first. Test that # parse-events reorders this. @@ -75,6 +93,7 @@ test_topdown_weak_groups() { test_default_stat test_stat_record_report +test_stat_repeat_weak_groups test_topdown_groups test_topdown_weak_groups exit $err -- cgit v1.2.3 From e89eaa611c7568d1288a2ccca88355a9434f2d47 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 18 Aug 2022 03:01:27 -0700 Subject: perf record: Fix manpage formatting of description of support to hybrid systems The Intel hybrid description is written in a different style than the rest of the perf record man page. There were some new command line options added after it which resulted in very strange section ordering. Move the hybrid include last. Also the sub sections in the hybrid document don't fit the record manpage well (especially since it talks about all kinds of unrelated commands). I left this for now, but would be better to separate this properly in the different man pages. It would be better to use sub sections for the other sections, but these don't seem to be supported in AsciiDoc? Some of the examples are still misrendered in the manpage with an indented troff command, but I don't know how to fix that. In any case it's now better than before. Signed-off-by: Andi Kleen Cc: zhengjun.xing@intel.com Link: https://lore.kernel.org/r/20220818100127.249401-1-ak@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/intel-hybrid.txt | 10 ---------- tools/perf/Documentation/perf-record.txt | 4 ++-- 2 files changed, 2 insertions(+), 12 deletions(-) diff --git a/tools/perf/Documentation/intel-hybrid.txt b/tools/perf/Documentation/intel-hybrid.txt index c9302096dc46..e7a776ad25d7 100644 --- a/tools/perf/Documentation/intel-hybrid.txt +++ b/tools/perf/Documentation/intel-hybrid.txt @@ -21,11 +21,6 @@ cat /sys/devices/cpu_atom/cpus It indicates cpu0-cpu15 are core cpus and cpu16-cpu23 are atom cpus. -Quickstart - -List hybrid event ------------------ - As before, use perf-list to list the symbolic event. perf list @@ -40,7 +35,6 @@ the event is belong to. Same event name but with different pmu can be supported. Enable hybrid event with a specific pmu ---------------------------------------- To enable a core only event or atom only event, following syntax is supported: @@ -53,7 +47,6 @@ For example, count the 'cycles' event on core cpus. perf stat -e cpu_core/cycles/ Create two events for one hardware event automatically ------------------------------------------------------- When creating one event and the event is available on both atom and core, two events are created automatically. One is for atom, the other is for @@ -132,7 +125,6 @@ For perf-stat result, it displays two events: The first 'cycles' is core event, the second 'cycles' is atom event. Thread mode example: --------------------- perf-stat reports the scaled counts for hybrid event and with a percentage displayed. The percentage is the event's running time/enabling time. @@ -176,14 +168,12 @@ perf_event_attr: 604,097,080 cpu_atom/cycles/ (99.57%) perf-record: ------------- If there is no '-e' specified in perf record, on hybrid platform, it creates two default 'cycles' and adds them to event list. One is for core, the other is for atom. perf-stat: ----------- If there is no '-e' specified in perf stat, on hybrid platform, besides of software events, following events are created and diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt index 099817ef5150..6ec6d0ba0a72 100644 --- a/tools/perf/Documentation/perf-record.txt +++ b/tools/perf/Documentation/perf-record.txt @@ -757,8 +757,6 @@ events in data directory files. Option specified with no or empty value defaults to CPU layout. Masks defined or provided by the option value are filtered through the mask provided by -C option. -include::intel-hybrid.txt[] - --debuginfod[=URLs]:: Specify debuginfod URL to be used when cacheing perf.data binaries, it follows the same syntax as the DEBUGINFOD_URLS variable, like: @@ -778,6 +776,8 @@ include::intel-hybrid.txt[] only, as of now. So the applications built without the frame pointer might see bogus addresses. +include::intel-hybrid.txt[] + SEE ALSO -------- linkperf:perf-stat[1], linkperf:perf-list[1], linkperf:perf-intel-pt[1] -- cgit v1.2.3 From d72e5cf3cf69d4c68d3b54aea232451b0a8b69d3 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Wed, 24 Aug 2022 07:57:33 -0700 Subject: perf sched: Fix memory leaks in __cmd_record detected with -fsanitize=address An array of strings is passed to cmd_record but not freed. As cmd_record modifies the array, add another array as a copy that can be mutated allowing the original array contents to all be freed. Detected with -fsanitize=address. Signed-off-by: Ian Rogers Cc: Alexander Shishkin Cc: Ingo Molnar Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: https://lore.kernel.org/r/20220824145733.409005-1-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-sched.c | 24 +++++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-) diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c index 2f6cd1b8b662..a5cf243c337f 100644 --- a/tools/perf/builtin-sched.c +++ b/tools/perf/builtin-sched.c @@ -3355,7 +3355,8 @@ static bool schedstat_events_exposed(void) static int __cmd_record(int argc, const char **argv) { unsigned int rec_argc, i, j; - const char **rec_argv; + char **rec_argv; + const char **rec_argv_copy; const char * const record_args[] = { "record", "-a", @@ -3384,6 +3385,7 @@ static int __cmd_record(int argc, const char **argv) ARRAY_SIZE(schedstat_args) : 0; struct tep_event *waking_event; + int ret; /* * +2 for either "-e", "sched:sched_wakeup" or @@ -3391,14 +3393,18 @@ static int __cmd_record(int argc, const char **argv) */ rec_argc = ARRAY_SIZE(record_args) + 2 + schedstat_argc + argc - 1; rec_argv = calloc(rec_argc + 1, sizeof(char *)); - if (rec_argv == NULL) return -ENOMEM; + rec_argv_copy = calloc(rec_argc + 1, sizeof(char *)); + if (rec_argv_copy == NULL) { + free(rec_argv); + return -ENOMEM; + } for (i = 0; i < ARRAY_SIZE(record_args); i++) rec_argv[i] = strdup(record_args[i]); - rec_argv[i++] = "-e"; + rec_argv[i++] = strdup("-e"); waking_event = trace_event__tp_format("sched", "sched_waking"); if (!IS_ERR(waking_event)) rec_argv[i++] = strdup("sched:sched_waking"); @@ -3409,11 +3415,19 @@ static int __cmd_record(int argc, const char **argv) rec_argv[i++] = strdup(schedstat_args[j]); for (j = 1; j < (unsigned int)argc; j++, i++) - rec_argv[i] = argv[j]; + rec_argv[i] = strdup(argv[j]); BUG_ON(i != rec_argc); - return cmd_record(i, rec_argv); + memcpy(rec_argv_copy, rec_argv, sizeof(char *) * rec_argc); + ret = cmd_record(rec_argc, rec_argv_copy); + + for (i = 0; i < rec_argc; i++) + free(rec_argv[i]); + free(rec_argv); + free(rec_argv_copy); + + return ret; } int cmd_sched(int argc, const char **argv) -- cgit v1.2.3 From 3126204ce3d9ab083cbdc2d61ab93746232eb89b Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Tue, 16 Aug 2022 05:56:12 -0700 Subject: perf docs: Update the documentation for the save_type filter Update the documentation to reflect the kernel changes. Signed-off-by: Kan Liang Cc: Andi Kleen Cc: Ian Rogers Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: https://lore.kernel.org/r/20220816125612.2042397-2-kan.liang@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-record.txt | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt index 6ec6d0ba0a72..0228efc96686 100644 --- a/tools/perf/Documentation/perf-record.txt +++ b/tools/perf/Documentation/perf-record.txt @@ -397,6 +397,9 @@ following filters are defined: - abort_tx: only when the target is a hardware transaction abort - cond: conditional branches - save_type: save branch type during sampling in case binary is not available later + For the platforms with Intel Arch LBR support (12th-Gen+ client or + 4th-Gen Xeon+ server), the save branch type is unconditionally enabled + when the taken branch stack sampling is enabled. + The option requires at least one branch type among any, any_call, any_ret, ind_call, cond. -- cgit v1.2.3 From 48648548ef764dcb1f6ffc9c9f9057f7c610caa4 Mon Sep 17 00:00:00 2001 From: Zhengjun Xing Date: Thu, 25 Aug 2022 09:54:58 +0800 Subject: perf stat: Capitalize topdown metrics' names Capitalize topdown metrics' names to follow the intel SDM. Before: # ./perf stat -a sleep 1 Performance counter stats for 'system wide': 228,094.05 msec cpu-clock # 225.026 CPUs utilized 842 context-switches # 3.691 /sec 224 cpu-migrations # 0.982 /sec 70 page-faults # 0.307 /sec 23,164,105 cycles # 0.000 GHz 29,403,446 instructions # 1.27 insn per cycle 5,268,185 branches # 23.097 K/sec 33,239 branch-misses # 0.63% of all branches 136,248,990 slots # 597.337 K/sec 32,976,450 topdown-retiring # 24.2% retiring 4,651,918 topdown-bad-spec # 3.4% bad speculation 26,148,695 topdown-fe-bound # 19.2% frontend bound 72,515,776 topdown-be-bound # 53.2% backend bound 6,008,540 topdown-heavy-ops # 4.4% heavy operations # 19.8% light operations 3,934,049 topdown-br-mispredict # 2.9% branch mispredict # 0.5% machine clears 16,655,439 topdown-fetch-lat # 12.2% fetch latency # 7.0% fetch bandwidth 41,635,972 topdown-mem-bound # 30.5% memory bound # 22.7% Core bound 1.013634593 seconds time elapsed After: # ./perf stat -a sleep 1 Performance counter stats for 'system wide': 228,081.94 msec cpu-clock # 225.003 CPUs utilized 824 context-switches # 3.613 /sec 224 cpu-migrations # 0.982 /sec 67 page-faults # 0.294 /sec 22,647,423 cycles # 0.000 GHz 28,870,551 instructions # 1.27 insn per cycle 5,167,099 branches # 22.655 K/sec 32,383 branch-misses # 0.63% of all branches 133,411,074 slots # 584.926 K/sec 32,352,607 topdown-retiring # 24.3% Retiring 4,456,977 topdown-bad-spec # 3.3% Bad Speculation 25,626,487 topdown-fe-bound # 19.2% Frontend Bound 70,955,316 topdown-be-bound # 53.2% Backend Bound 5,834,844 topdown-heavy-ops # 4.4% Heavy Operations # 19.9% Light Operations 3,738,781 topdown-br-mispredict # 2.8% Branch Mispredict # 0.5% Machine Clears 16,286,803 topdown-fetch-lat # 12.2% Fetch Latency # 7.0% Fetch Bandwidth 40,802,069 topdown-mem-bound # 30.6% Memory Bound # 22.6% Core Bound 1.013683125 seconds time elapsed Reviewed-by: Kan Liang Signed-off-by: Xing Zhengjun Acked-by: Ian Rogers Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ingo Molnar Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20220825015458.3252239-1-zhengjun.xing@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/stat-shadow.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c index 979c8cb918f7..788ce5e46470 100644 --- a/tools/perf/util/stat-shadow.c +++ b/tools/perf/util/stat-shadow.c @@ -1193,7 +1193,7 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, &rsd); if (retiring > 0.7) color = PERF_COLOR_GREEN; - print_metric(config, ctxp, color, "%8.1f%%", "retiring", + print_metric(config, ctxp, color, "%8.1f%%", "Retiring", retiring * 100.); } else if (perf_stat_evsel__is(evsel, TOPDOWN_FE_BOUND) && full_td(cpu_map_idx, st, &rsd)) { @@ -1202,7 +1202,7 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, &rsd); if (fe_bound > 0.2) color = PERF_COLOR_RED; - print_metric(config, ctxp, color, "%8.1f%%", "frontend bound", + print_metric(config, ctxp, color, "%8.1f%%", "Frontend Bound", fe_bound * 100.); } else if (perf_stat_evsel__is(evsel, TOPDOWN_BE_BOUND) && full_td(cpu_map_idx, st, &rsd)) { @@ -1211,7 +1211,7 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, &rsd); if (be_bound > 0.2) color = PERF_COLOR_RED; - print_metric(config, ctxp, color, "%8.1f%%", "backend bound", + print_metric(config, ctxp, color, "%8.1f%%", "Backend Bound", be_bound * 100.); } else if (perf_stat_evsel__is(evsel, TOPDOWN_BAD_SPEC) && full_td(cpu_map_idx, st, &rsd)) { @@ -1220,7 +1220,7 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, &rsd); if (bad_spec > 0.1) color = PERF_COLOR_RED; - print_metric(config, ctxp, color, "%8.1f%%", "bad speculation", + print_metric(config, ctxp, color, "%8.1f%%", "Bad Speculation", bad_spec * 100.); } else if (perf_stat_evsel__is(evsel, TOPDOWN_HEAVY_OPS) && full_td(cpu_map_idx, st, &rsd) && (config->topdown_level > 1)) { @@ -1234,13 +1234,13 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, if (retiring > 0.7 && heavy_ops > 0.1) color = PERF_COLOR_GREEN; - print_metric(config, ctxp, color, "%8.1f%%", "heavy operations", + print_metric(config, ctxp, color, "%8.1f%%", "Heavy Operations", heavy_ops * 100.); if (retiring > 0.7 && light_ops > 0.6) color = PERF_COLOR_GREEN; else color = NULL; - print_metric(config, ctxp, color, "%8.1f%%", "light operations", + print_metric(config, ctxp, color, "%8.1f%%", "Light Operations", light_ops * 100.); } else if (perf_stat_evsel__is(evsel, TOPDOWN_BR_MISPREDICT) && full_td(cpu_map_idx, st, &rsd) && (config->topdown_level > 1)) { @@ -1254,13 +1254,13 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, if (bad_spec > 0.1 && br_mis > 0.05) color = PERF_COLOR_RED; - print_metric(config, ctxp, color, "%8.1f%%", "branch mispredict", + print_metric(config, ctxp, color, "%8.1f%%", "Branch Mispredict", br_mis * 100.); if (bad_spec > 0.1 && m_clears > 0.05) color = PERF_COLOR_RED; else color = NULL; - print_metric(config, ctxp, color, "%8.1f%%", "machine clears", + print_metric(config, ctxp, color, "%8.1f%%", "Machine Clears", m_clears * 100.); } else if (perf_stat_evsel__is(evsel, TOPDOWN_FETCH_LAT) && full_td(cpu_map_idx, st, &rsd) && (config->topdown_level > 1)) { @@ -1274,13 +1274,13 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, if (fe_bound > 0.2 && fetch_lat > 0.15) color = PERF_COLOR_RED; - print_metric(config, ctxp, color, "%8.1f%%", "fetch latency", + print_metric(config, ctxp, color, "%8.1f%%", "Fetch Latency", fetch_lat * 100.); if (fe_bound > 0.2 && fetch_bw > 0.1) color = PERF_COLOR_RED; else color = NULL; - print_metric(config, ctxp, color, "%8.1f%%", "fetch bandwidth", + print_metric(config, ctxp, color, "%8.1f%%", "Fetch Bandwidth", fetch_bw * 100.); } else if (perf_stat_evsel__is(evsel, TOPDOWN_MEM_BOUND) && full_td(cpu_map_idx, st, &rsd) && (config->topdown_level > 1)) { @@ -1294,13 +1294,13 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, if (be_bound > 0.2 && mem_bound > 0.2) color = PERF_COLOR_RED; - print_metric(config, ctxp, color, "%8.1f%%", "memory bound", + print_metric(config, ctxp, color, "%8.1f%%", "Memory Bound", mem_bound * 100.); if (be_bound > 0.2 && core_bound > 0.1) color = PERF_COLOR_RED; else color = NULL; - print_metric(config, ctxp, color, "%8.1f%%", "Core bound", + print_metric(config, ctxp, color, "%8.1f%%", "Core Bound", core_bound * 100.); } else if (evsel->metric_expr) { generic_metric(config, evsel->metric_expr, evsel->metric_events, NULL, -- cgit v1.2.3 From 1bd3a383075c64d638e65d263c9267b08ee7733c Mon Sep 17 00:00:00 2001 From: Jean-Francois Le Fillatre Date: Wed, 24 Aug 2022 21:14:36 +0200 Subject: r8152: add PID for the Lenovo OneLink+ Dock The Lenovo OneLink+ Dock contains an RTL8153 controller that behaves as a broken CDC device by default. Add the custom Lenovo PID to the r8152 driver to support it properly. Also, systems compatible with this dock provide a BIOS option to enable MAC address passthrough (as per Lenovo document "ThinkPad Docking Solutions 2017"). Add the custom PID to the MAC passthrough list too. Tested on a ThinkPad 13 1st gen with the expected results: passthrough disabled: Invalid header when reading pass-thru MAC addr passthrough enabled: Using pass-thru MAC addr XX:XX:XX:XX:XX:XX Signed-off-by: Jean-Francois Le Fillatre Signed-off-by: David S. Miller --- drivers/net/usb/cdc_ether.c | 7 +++++++ drivers/net/usb/r8152.c | 3 +++ 2 files changed, 10 insertions(+) diff --git a/drivers/net/usb/cdc_ether.c b/drivers/net/usb/cdc_ether.c index 2de09ad5bac0..e11f70911acc 100644 --- a/drivers/net/usb/cdc_ether.c +++ b/drivers/net/usb/cdc_ether.c @@ -777,6 +777,13 @@ static const struct usb_device_id products[] = { }, #endif +/* Lenovo ThinkPad OneLink+ Dock (based on Realtek RTL8153) */ +{ + USB_DEVICE_AND_INTERFACE_INFO(LENOVO_VENDOR_ID, 0x3054, USB_CLASS_COMM, + USB_CDC_SUBCLASS_ETHERNET, USB_CDC_PROTO_NONE), + .driver_info = 0, +}, + /* ThinkPad USB-C Dock (based on Realtek RTL8153) */ { USB_DEVICE_AND_INTERFACE_INFO(LENOVO_VENDOR_ID, 0x3062, USB_CLASS_COMM, diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index d142ac8fcf6e..688905ea0a6d 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -770,6 +770,7 @@ enum rtl8152_flags { RX_EPROTO, }; +#define DEVICE_ID_THINKPAD_ONELINK_PLUS_DOCK 0x3054 #define DEVICE_ID_THINKPAD_THUNDERBOLT3_DOCK_GEN2 0x3082 #define DEVICE_ID_THINKPAD_USB_C_DONGLE 0x720c #define DEVICE_ID_THINKPAD_USB_C_DOCK_GEN2 0xa387 @@ -9581,6 +9582,7 @@ static bool rtl8152_supports_lenovo_macpassthru(struct usb_device *udev) if (vendor_id == VENDOR_ID_LENOVO) { switch (product_id) { + case DEVICE_ID_THINKPAD_ONELINK_PLUS_DOCK: case DEVICE_ID_THINKPAD_THUNDERBOLT3_DOCK_GEN2: case DEVICE_ID_THINKPAD_USB_C_DOCK_GEN2: case DEVICE_ID_THINKPAD_USB_C_DOCK_GEN3: @@ -9828,6 +9830,7 @@ static const struct usb_device_id rtl8152_table[] = { REALTEK_USB_DEVICE(VENDOR_ID_MICROSOFT, 0x0927), REALTEK_USB_DEVICE(VENDOR_ID_SAMSUNG, 0xa101), REALTEK_USB_DEVICE(VENDOR_ID_LENOVO, 0x304f), + REALTEK_USB_DEVICE(VENDOR_ID_LENOVO, 0x3054), REALTEK_USB_DEVICE(VENDOR_ID_LENOVO, 0x3062), REALTEK_USB_DEVICE(VENDOR_ID_LENOVO, 0x3069), REALTEK_USB_DEVICE(VENDOR_ID_LENOVO, 0x3082), -- cgit v1.2.3 From d6ffe6067a54972564552ea45d320fb98db1ac5e Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Fri, 26 Aug 2022 16:43:51 -0400 Subject: provide arch_test_bit_acquire for architectures that define test_bit Some architectures define their own arch_test_bit and they also need arch_test_bit_acquire, otherwise they won't compile. We also clean up the code by using the generic test_bit if that is equivalent to the arch-specific version. Signed-off-by: Mikulas Patocka Cc: stable@vger.kernel.org Fixes: 8238b4579866 ("wait_on_bit: add an acquire memory barrier") Signed-off-by: Linus Torvalds --- arch/alpha/include/asm/bitops.h | 7 ++----- arch/hexagon/include/asm/bitops.h | 15 +++++++++++++++ arch/ia64/include/asm/bitops.h | 7 ++----- arch/m68k/include/asm/bitops.h | 7 ++----- arch/s390/include/asm/bitops.h | 10 ++-------- arch/sh/include/asm/bitops-op32.h | 12 ++---------- 6 files changed, 25 insertions(+), 33 deletions(-) diff --git a/arch/alpha/include/asm/bitops.h b/arch/alpha/include/asm/bitops.h index 492c7713ddae..bafb1c1f0fdc 100644 --- a/arch/alpha/include/asm/bitops.h +++ b/arch/alpha/include/asm/bitops.h @@ -283,11 +283,8 @@ arch___test_and_change_bit(unsigned long nr, volatile unsigned long *addr) return (old & mask) != 0; } -static __always_inline bool -arch_test_bit(unsigned long nr, const volatile unsigned long *addr) -{ - return (1UL & (((const int *) addr)[nr >> 5] >> (nr & 31))) != 0UL; -} +#define arch_test_bit generic_test_bit +#define arch_test_bit_acquire generic_test_bit_acquire /* * ffz = Find First Zero in word. Undefined if no zero exists, diff --git a/arch/hexagon/include/asm/bitops.h b/arch/hexagon/include/asm/bitops.h index da500471ac73..160d8f37fa1a 100644 --- a/arch/hexagon/include/asm/bitops.h +++ b/arch/hexagon/include/asm/bitops.h @@ -179,6 +179,21 @@ arch_test_bit(unsigned long nr, const volatile unsigned long *addr) return retval; } +static __always_inline bool +arch_test_bit_acquire(unsigned long nr, const volatile unsigned long *addr) +{ + int retval; + + asm volatile( + "{P0 = tstbit(%1,%2); if (P0.new) %0 = #1; if (!P0.new) %0 = #0;}\n" + : "=&r" (retval) + : "r" (addr[BIT_WORD(nr)]), "r" (nr % BITS_PER_LONG) + : "p0", "memory" + ); + + return retval; +} + /* * ffz - find first zero in word. * @word: The word to search diff --git a/arch/ia64/include/asm/bitops.h b/arch/ia64/include/asm/bitops.h index 9f62af7fd7c4..1accb7842f58 100644 --- a/arch/ia64/include/asm/bitops.h +++ b/arch/ia64/include/asm/bitops.h @@ -331,11 +331,8 @@ arch___test_and_change_bit(unsigned long nr, volatile unsigned long *addr) return (old & bit) != 0; } -static __always_inline bool -arch_test_bit(unsigned long nr, const volatile unsigned long *addr) -{ - return 1 & (((const volatile __u32 *) addr)[nr >> 5] >> (nr & 31)); -} +#define arch_test_bit generic_test_bit +#define arch_test_bit_acquire generic_test_bit_acquire /** * ffz - find the first zero bit in a long word diff --git a/arch/m68k/include/asm/bitops.h b/arch/m68k/include/asm/bitops.h index 470aed978590..e984af71df6b 100644 --- a/arch/m68k/include/asm/bitops.h +++ b/arch/m68k/include/asm/bitops.h @@ -157,11 +157,8 @@ arch___change_bit(unsigned long nr, volatile unsigned long *addr) change_bit(nr, addr); } -static __always_inline bool -arch_test_bit(unsigned long nr, const volatile unsigned long *addr) -{ - return (addr[nr >> 5] & (1UL << (nr & 31))) != 0; -} +#define arch_test_bit generic_test_bit +#define arch_test_bit_acquire generic_test_bit_acquire static inline int bset_reg_test_and_set_bit(int nr, volatile unsigned long *vaddr) diff --git a/arch/s390/include/asm/bitops.h b/arch/s390/include/asm/bitops.h index 9a7d15da966e..2de74fcd0578 100644 --- a/arch/s390/include/asm/bitops.h +++ b/arch/s390/include/asm/bitops.h @@ -176,14 +176,8 @@ arch___test_and_change_bit(unsigned long nr, volatile unsigned long *addr) return old & mask; } -static __always_inline bool -arch_test_bit(unsigned long nr, const volatile unsigned long *addr) -{ - const volatile unsigned long *p = __bitops_word(nr, addr); - unsigned long mask = __bitops_mask(nr); - - return *p & mask; -} +#define arch_test_bit generic_test_bit +#define arch_test_bit_acquire generic_test_bit_acquire static inline bool arch_test_and_set_bit_lock(unsigned long nr, volatile unsigned long *ptr) diff --git a/arch/sh/include/asm/bitops-op32.h b/arch/sh/include/asm/bitops-op32.h index 565a85d8b7fb..5ace89b46507 100644 --- a/arch/sh/include/asm/bitops-op32.h +++ b/arch/sh/include/asm/bitops-op32.h @@ -135,16 +135,8 @@ arch___test_and_change_bit(unsigned long nr, volatile unsigned long *addr) return (old & mask) != 0; } -/** - * arch_test_bit - Determine whether a bit is set - * @nr: bit number to test - * @addr: Address to start counting from - */ -static __always_inline bool -arch_test_bit(unsigned long nr, const volatile unsigned long *addr) -{ - return 1UL & (addr[BIT_WORD(nr)] >> (nr & (BITS_PER_LONG-1))); -} +#define arch_test_bit generic_test_bit +#define arch_test_bit_acquire generic_test_bit_acquire #include -- cgit v1.2.3 From a2d57ebec1e15f0ac256eb8397e82b07adfaaacc Mon Sep 17 00:00:00 2001 From: Kacper Michajłow Date: Sat, 27 Aug 2022 22:33:28 +0200 Subject: ALSA: hda/realtek: Add speaker AMP init for Samsung laptops with ALC298 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Magic initialization sequence was extracted from Windows driver and cleaned up manually. Fixes internal speakers output. Link: https://bugzilla.kernel.org/show_bug.cgi?id=207423 Link: https://bugs.launchpad.net/ubuntu/+source/linux/+bug/1851518 Signed-off-by: Kacper Michajłow Cc: Link: https://lore.kernel.org/r/20220827203328.30363-1-kasper93@gmail.com Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 63 ++++++++++++++++++++++++++++++++++++++----- 1 file changed, 56 insertions(+), 7 deletions(-) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 47e72cf76608..38930cf5aace 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -4700,6 +4700,48 @@ static void alc236_fixup_hp_mute_led_micmute_vref(struct hda_codec *codec, alc236_fixup_hp_micmute_led_vref(codec, fix, action); } +static inline void alc298_samsung_write_coef_pack(struct hda_codec *codec, + const unsigned short coefs[2]) +{ + alc_write_coef_idx(codec, 0x23, coefs[0]); + alc_write_coef_idx(codec, 0x25, coefs[1]); + alc_write_coef_idx(codec, 0x26, 0xb011); +} + +struct alc298_samsung_amp_desc { + unsigned char nid; + unsigned short init_seq[2][2]; +}; + +static void alc298_fixup_samsung_amp(struct hda_codec *codec, + const struct hda_fixup *fix, int action) +{ + int i, j; + static const unsigned short init_seq[][2] = { + { 0x19, 0x00 }, { 0x20, 0xc0 }, { 0x22, 0x44 }, { 0x23, 0x08 }, + { 0x24, 0x85 }, { 0x25, 0x41 }, { 0x35, 0x40 }, { 0x36, 0x01 }, + { 0x38, 0x81 }, { 0x3a, 0x03 }, { 0x3b, 0x81 }, { 0x40, 0x3e }, + { 0x41, 0x07 }, { 0x400, 0x1 } + }; + static const struct alc298_samsung_amp_desc amps[] = { + { 0x3a, { { 0x18, 0x1 }, { 0x26, 0x0 } } }, + { 0x39, { { 0x18, 0x2 }, { 0x26, 0x1 } } } + }; + + if (action != HDA_FIXUP_ACT_INIT) + return; + + for (i = 0; i < ARRAY_SIZE(amps); i++) { + alc_write_coef_idx(codec, 0x22, amps[i].nid); + + for (j = 0; j < ARRAY_SIZE(amps[i].init_seq); j++) + alc298_samsung_write_coef_pack(codec, amps[i].init_seq[j]); + + for (j = 0; j < ARRAY_SIZE(init_seq); j++) + alc298_samsung_write_coef_pack(codec, init_seq[j]); + } +} + #if IS_REACHABLE(CONFIG_INPUT) static void gpio2_mic_hotkey_event(struct hda_codec *codec, struct hda_jack_callback *event) @@ -7030,6 +7072,7 @@ enum { ALC236_FIXUP_HP_GPIO_LED, ALC236_FIXUP_HP_MUTE_LED, ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF, + ALC298_FIXUP_SAMSUNG_AMP, ALC298_FIXUP_SAMSUNG_HEADPHONE_VERY_QUIET, ALC256_FIXUP_SAMSUNG_HEADPHONE_VERY_QUIET, ALC295_FIXUP_ASUS_MIC_NO_PRESENCE, @@ -8396,6 +8439,12 @@ static const struct hda_fixup alc269_fixups[] = { .type = HDA_FIXUP_FUNC, .v.func = alc236_fixup_hp_mute_led_micmute_vref, }, + [ALC298_FIXUP_SAMSUNG_AMP] = { + .type = HDA_FIXUP_FUNC, + .v.func = alc298_fixup_samsung_amp, + .chained = true, + .chain_id = ALC298_FIXUP_SAMSUNG_HEADPHONE_VERY_QUIET + }, [ALC298_FIXUP_SAMSUNG_HEADPHONE_VERY_QUIET] = { .type = HDA_FIXUP_VERBS, .v.verbs = (const struct hda_verb[]) { @@ -9342,13 +9391,13 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x10ec, 0x1254, "Intel Reference board", ALC295_FIXUP_CHROME_BOOK), SND_PCI_QUIRK(0x10f7, 0x8338, "Panasonic CF-SZ6", ALC269_FIXUP_HEADSET_MODE), SND_PCI_QUIRK(0x144d, 0xc109, "Samsung Ativ book 9 (NP900X3G)", ALC269_FIXUP_INV_DMIC), - SND_PCI_QUIRK(0x144d, 0xc169, "Samsung Notebook 9 Pen (NP930SBE-K01US)", ALC298_FIXUP_SAMSUNG_HEADPHONE_VERY_QUIET), - SND_PCI_QUIRK(0x144d, 0xc176, "Samsung Notebook 9 Pro (NP930MBE-K04US)", ALC298_FIXUP_SAMSUNG_HEADPHONE_VERY_QUIET), - SND_PCI_QUIRK(0x144d, 0xc189, "Samsung Galaxy Flex Book (NT950QCG-X716)", ALC298_FIXUP_SAMSUNG_HEADPHONE_VERY_QUIET), - SND_PCI_QUIRK(0x144d, 0xc18a, "Samsung Galaxy Book Ion (NP930XCJ-K01US)", ALC298_FIXUP_SAMSUNG_HEADPHONE_VERY_QUIET), + SND_PCI_QUIRK(0x144d, 0xc169, "Samsung Notebook 9 Pen (NP930SBE-K01US)", ALC298_FIXUP_SAMSUNG_AMP), + SND_PCI_QUIRK(0x144d, 0xc176, "Samsung Notebook 9 Pro (NP930MBE-K04US)", ALC298_FIXUP_SAMSUNG_AMP), + SND_PCI_QUIRK(0x144d, 0xc189, "Samsung Galaxy Flex Book (NT950QCG-X716)", ALC298_FIXUP_SAMSUNG_AMP), + SND_PCI_QUIRK(0x144d, 0xc18a, "Samsung Galaxy Book Ion (NP930XCJ-K01US)", ALC298_FIXUP_SAMSUNG_AMP), SND_PCI_QUIRK(0x144d, 0xc740, "Samsung Ativ book 8 (NP870Z5G)", ALC269_FIXUP_ATIV_BOOK_8), - SND_PCI_QUIRK(0x144d, 0xc812, "Samsung Notebook Pen S (NT950SBE-X58)", ALC298_FIXUP_SAMSUNG_HEADPHONE_VERY_QUIET), - SND_PCI_QUIRK(0x144d, 0xc830, "Samsung Galaxy Book Ion (NT950XCJ-X716A)", ALC298_FIXUP_SAMSUNG_HEADPHONE_VERY_QUIET), + SND_PCI_QUIRK(0x144d, 0xc812, "Samsung Notebook Pen S (NT950SBE-X58)", ALC298_FIXUP_SAMSUNG_AMP), + SND_PCI_QUIRK(0x144d, 0xc830, "Samsung Galaxy Book Ion (NT950XCJ-X716A)", ALC298_FIXUP_SAMSUNG_AMP), SND_PCI_QUIRK(0x144d, 0xc832, "Samsung Galaxy Book Flex Alpha (NP730QCJ)", ALC256_FIXUP_SAMSUNG_HEADPHONE_VERY_QUIET), SND_PCI_QUIRK(0x1458, 0xfa53, "Gigabyte BXBT-2807", ALC283_FIXUP_HEADSET_MIC), SND_PCI_QUIRK(0x1462, 0xb120, "MSI Cubi MS-B120", ALC283_FIXUP_HEADSET_MIC), @@ -9716,7 +9765,7 @@ static const struct hda_model_fixup alc269_fixup_models[] = { {.id = ALC299_FIXUP_PREDATOR_SPK, .name = "predator-spk"}, {.id = ALC298_FIXUP_HUAWEI_MBX_STEREO, .name = "huawei-mbx-stereo"}, {.id = ALC256_FIXUP_MEDION_HEADSET_NO_PRESENCE, .name = "alc256-medion-headset"}, - {.id = ALC298_FIXUP_SAMSUNG_HEADPHONE_VERY_QUIET, .name = "alc298-samsung-headphone"}, + {.id = ALC298_FIXUP_SAMSUNG_AMP, .name = "alc298-samsung-amp"}, {.id = ALC256_FIXUP_SAMSUNG_HEADPHONE_VERY_QUIET, .name = "alc256-samsung-headphone"}, {.id = ALC255_FIXUP_XIAOMI_HEADSET_MIC, .name = "alc255-xiaomi-headset"}, {.id = ALC274_FIXUP_HP_MIC, .name = "alc274-hp-mic-detect"}, -- cgit v1.2.3 From 5f3d9e8161bb8cb23ab3b4678cd13f6e90a06186 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Sun, 28 Aug 2022 09:41:43 +0200 Subject: ALSA: usb-audio: Add quirk for LH Labs Geek Out HD Audio 1V5 The USB DAC from LH Labs (2522:0007) seems requiring the same quirk as Sony Walkman to set up the interface like UAC1; otherwise it gets the constant errors "usb_set_interface failed (-71)". This patch adds a quirk entry for addressing the buggy behavior. Reported-by: Lennert Van Alboom Cc: Link: https://lore.kernel.org/r/T3VPXtCc4uFws9Gfh2RjX6OdwM1RqfC6VqQr--_LMDyB2x5N3p9_q6AtPna17IXhHwBtcJVdXuS80ZZSCMjh_BafIbnzJPhbrkmhmWS6DlI=@vanalboom.org Link: https://lore.kernel.org/r/20220828074143.14736-1-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/usb/quirks.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c index 168fd802d70b..9bfead5efc4c 100644 --- a/sound/usb/quirks.c +++ b/sound/usb/quirks.c @@ -1903,6 +1903,8 @@ static const struct usb_audio_quirk_flags_table quirk_flags_table[] = { QUIRK_FLAG_SHARE_MEDIA_DEVICE | QUIRK_FLAG_ALIGN_TRANSFER), DEVICE_FLG(0x21b4, 0x0081, /* AudioQuest DragonFly */ QUIRK_FLAG_GET_SAMPLE_RATE), + DEVICE_FLG(0x2522, 0x0007, /* LH Labs Geek Out HD Audio 1V5 */ + QUIRK_FLAG_SET_IFACE_FIRST), DEVICE_FLG(0x2708, 0x0002, /* Audient iD14 */ QUIRK_FLAG_IGNORE_CTL_ERROR), DEVICE_FLG(0x2912, 0x30c8, /* Audioengine D1 */ -- cgit v1.2.3 From ab74ef708dc51df7cf2b8a890b9c6990fac5c0c6 Mon Sep 17 00:00:00 2001 From: Miaohe Lin Date: Tue, 12 Jul 2022 21:05:42 +0800 Subject: mm/hugetlb: avoid corrupting page->mapping in hugetlb_mcopy_atomic_pte In MCOPY_ATOMIC_CONTINUE case with a non-shared VMA, pages in the page cache are installed in the ptes. But hugepage_add_new_anon_rmap is called for them mistakenly because they're not vm_shared. This will corrupt the page->mapping used by page cache code. Link: https://lkml.kernel.org/r/20220712130542.18836-1-linmiaohe@huawei.com Fixes: f619147104c8 ("userfaultfd: add UFFDIO_CONTINUE ioctl") Signed-off-by: Miaohe Lin Reviewed-by: Mike Kravetz Cc: Axel Rasmussen Cc: Peter Xu Cc: Signed-off-by: Andrew Morton --- mm/hugetlb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 2480ba627aa5..e070b8593b37 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -6041,7 +6041,7 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm, if (!huge_pte_none_mostly(huge_ptep_get(dst_pte))) goto out_release_unlock; - if (vm_shared) { + if (page_in_pagecache) { page_dup_file_rmap(page, true); } else { ClearHPageRestoreReserve(page); -- cgit v1.2.3 From 9dfb3b8d655022760ca68af11821f1c63aa547c3 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Sat, 30 Jul 2022 05:25:18 +0100 Subject: shmem: update folio if shmem_replace_page() updates the page If we allocate a new page, we need to make sure that our folio matches that new page. If we do end up in this code path, we store the wrong page in the shmem inode's page cache, and I would rather imagine that data corruption ensues. This will be solved by changing shmem_replace_page() to shmem_replace_folio(), but this is the minimal fix. Link: https://lkml.kernel.org/r/20220730042518.1264767-1-willy@infradead.org Fixes: da08e9b79323 ("mm/shmem: convert shmem_swapin_page() to shmem_swapin_folio()") Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: William Kucharski Cc: Hugh Dickins Cc: Signed-off-by: Andrew Morton --- mm/shmem.c | 1 + 1 file changed, 1 insertion(+) diff --git a/mm/shmem.c b/mm/shmem.c index d075dd2dcc48..42e5888bf84d 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -1782,6 +1782,7 @@ static int shmem_swapin_folio(struct inode *inode, pgoff_t index, if (shmem_should_replace_folio(folio, gfp)) { error = shmem_replace_page(&page, gfp, info, index); + folio = page_folio(page); if (error) goto failed; } -- cgit v1.2.3 From f87904c075515f3e1d8f4a7115869d3b914674fd Mon Sep 17 00:00:00 2001 From: Khazhismel Kumykov Date: Mon, 1 Aug 2022 08:50:34 -0700 Subject: writeback: avoid use-after-free after removing device When a disk is removed, bdi_unregister gets called to stop further writeback and wait for associated delayed work to complete. However, wb_inode_writeback_end() may schedule bandwidth estimation dwork after this has completed, which can result in the timer attempting to access the just freed bdi_writeback. Fix this by checking if the bdi_writeback is alive, similar to when scheduling writeback work. Since this requires wb->work_lock, and wb_inode_writeback_end() may get called from interrupt, switch wb->work_lock to an irqsafe lock. Link: https://lkml.kernel.org/r/20220801155034.3772543-1-khazhy@google.com Fixes: 45a2966fd641 ("writeback: fix bandwidth estimate for spiky workload") Signed-off-by: Khazhismel Kumykov Reviewed-by: Jan Kara Cc: Michael Stapelberg Cc: Wu Fengguang Cc: Alexander Viro Cc: Signed-off-by: Andrew Morton --- fs/fs-writeback.c | 12 ++++++------ mm/backing-dev.c | 10 +++++----- mm/page-writeback.c | 6 +++++- 3 files changed, 16 insertions(+), 12 deletions(-) diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 05221366a16d..08a1993ab7fd 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -134,10 +134,10 @@ static bool inode_io_list_move_locked(struct inode *inode, static void wb_wakeup(struct bdi_writeback *wb) { - spin_lock_bh(&wb->work_lock); + spin_lock_irq(&wb->work_lock); if (test_bit(WB_registered, &wb->state)) mod_delayed_work(bdi_wq, &wb->dwork, 0); - spin_unlock_bh(&wb->work_lock); + spin_unlock_irq(&wb->work_lock); } static void finish_writeback_work(struct bdi_writeback *wb, @@ -164,7 +164,7 @@ static void wb_queue_work(struct bdi_writeback *wb, if (work->done) atomic_inc(&work->done->cnt); - spin_lock_bh(&wb->work_lock); + spin_lock_irq(&wb->work_lock); if (test_bit(WB_registered, &wb->state)) { list_add_tail(&work->list, &wb->work_list); @@ -172,7 +172,7 @@ static void wb_queue_work(struct bdi_writeback *wb, } else finish_writeback_work(wb, work); - spin_unlock_bh(&wb->work_lock); + spin_unlock_irq(&wb->work_lock); } /** @@ -2082,13 +2082,13 @@ static struct wb_writeback_work *get_next_work_item(struct bdi_writeback *wb) { struct wb_writeback_work *work = NULL; - spin_lock_bh(&wb->work_lock); + spin_lock_irq(&wb->work_lock); if (!list_empty(&wb->work_list)) { work = list_entry(wb->work_list.next, struct wb_writeback_work, list); list_del_init(&work->list); } - spin_unlock_bh(&wb->work_lock); + spin_unlock_irq(&wb->work_lock); return work; } diff --git a/mm/backing-dev.c b/mm/backing-dev.c index 95550b8fa7fe..de65cb1e5f76 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -260,10 +260,10 @@ void wb_wakeup_delayed(struct bdi_writeback *wb) unsigned long timeout; timeout = msecs_to_jiffies(dirty_writeback_interval * 10); - spin_lock_bh(&wb->work_lock); + spin_lock_irq(&wb->work_lock); if (test_bit(WB_registered, &wb->state)) queue_delayed_work(bdi_wq, &wb->dwork, timeout); - spin_unlock_bh(&wb->work_lock); + spin_unlock_irq(&wb->work_lock); } static void wb_update_bandwidth_workfn(struct work_struct *work) @@ -334,12 +334,12 @@ static void cgwb_remove_from_bdi_list(struct bdi_writeback *wb); static void wb_shutdown(struct bdi_writeback *wb) { /* Make sure nobody queues further work */ - spin_lock_bh(&wb->work_lock); + spin_lock_irq(&wb->work_lock); if (!test_and_clear_bit(WB_registered, &wb->state)) { - spin_unlock_bh(&wb->work_lock); + spin_unlock_irq(&wb->work_lock); return; } - spin_unlock_bh(&wb->work_lock); + spin_unlock_irq(&wb->work_lock); cgwb_remove_from_bdi_list(wb); /* diff --git a/mm/page-writeback.c b/mm/page-writeback.c index d0d466a5c804..032a7bf8d259 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -2892,6 +2892,7 @@ static void wb_inode_writeback_start(struct bdi_writeback *wb) static void wb_inode_writeback_end(struct bdi_writeback *wb) { + unsigned long flags; atomic_dec(&wb->writeback_inodes); /* * Make sure estimate of writeback throughput gets updated after @@ -2900,7 +2901,10 @@ static void wb_inode_writeback_end(struct bdi_writeback *wb) * that if multiple inodes end writeback at a similar time, they get * batched into one bandwidth update. */ - queue_delayed_work(bdi_wq, &wb->bw_dwork, BANDWIDTH_INTERVAL); + spin_lock_irqsave(&wb->work_lock, flags); + if (test_bit(WB_registered, &wb->state)) + queue_delayed_work(bdi_wq, &wb->bw_dwork, BANDWIDTH_INTERVAL); + spin_unlock_irqrestore(&wb->work_lock, flags); } bool __folio_end_writeback(struct folio *folio) -- cgit v1.2.3 From 6c26d17eea01477f9223c4d5da8ebc2099e4f027 Mon Sep 17 00:00:00 2001 From: "Guilherme G. Piccoli" Date: Thu, 4 Aug 2022 17:22:07 -0300 Subject: mailmap: update Guilherme G. Piccoli's email addresses Both @canonical and @ibm email addresses are invalid now; use my personal address instead. Link: https://lkml.kernel.org/r/20220804202207.439427-1-gpiccoli@igalia.com Signed-off-by: Guilherme G. Piccoli Signed-off-by: Andrew Morton --- .mailmap | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.mailmap b/.mailmap index 38255d412f0b..23241db2647f 100644 --- a/.mailmap +++ b/.mailmap @@ -150,6 +150,8 @@ Greg Kroah-Hartman Greg Kroah-Hartman Greg Kurz Gregory CLEMENT +Guilherme G. Piccoli +Guilherme G. Piccoli Guo Ren Guo Ren Gustavo Padovan -- cgit v1.2.3 From f09bddbd86619bf6213c96142a3b6b6a84818798 Mon Sep 17 00:00:00 2001 From: Stephen Brennan Date: Mon, 8 Aug 2022 13:54:10 -0700 Subject: vmcoreinfo: add kallsyms_num_syms symbol The rest of the kallsyms symbols are useless without knowing the number of symbols in the table. In an earlier patch, I somehow dropped the kallsyms_num_syms symbol, so add it back in. Link: https://lkml.kernel.org/r/20220808205410.18590-1-stephen.s.brennan@oracle.com Fixes: 5fd8fea935a1 ("vmcoreinfo: include kallsyms symbols") Signed-off-by: Stephen Brennan Cc: Baoquan He Cc: Dave Young Cc: Vivek Goyal Signed-off-by: Andrew Morton --- kernel/crash_core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/crash_core.c b/kernel/crash_core.c index 07b26df453a9..a0eb4d5cf557 100644 --- a/kernel/crash_core.c +++ b/kernel/crash_core.c @@ -494,6 +494,7 @@ static int __init crash_save_vmcoreinfo_init(void) #ifdef CONFIG_KALLSYMS VMCOREINFO_SYMBOL(kallsyms_names); + VMCOREINFO_SYMBOL(kallsyms_num_syms); VMCOREINFO_SYMBOL(kallsyms_token_table); VMCOREINFO_SYMBOL(kallsyms_token_index); #ifdef CONFIG_KALLSYMS_BASE_RELATIVE -- cgit v1.2.3 From fcab34b433e2c13e333b2f53c4a8409eadc432c7 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Wed, 10 Aug 2022 10:53:59 -0600 Subject: mm: re-allow pinning of zero pfns (again) The below referenced commit makes the same error as 1c563432588d ("mm: fix is_pinnable_page against a cma page"), re-interpreting the logic to exclude pinning of the zero page, which breaks device assignment with vfio. To avoid further subtle mistakes, split the logic into discrete tests. [akpm@linux-foundation.org: simplify comment, per John] Link: https://lkml.kernel.org/r/166015037385.760108.16881097713975517242.stgit@omen Link: https://lore.kernel.org/all/165490039431.944052.12458624139225785964.stgit@omen Fixes: f25cbb7a95a2 ("mm: add zone device coherent type memory support") Signed-off-by: Alex Williamson Suggested-by: Matthew Wilcox Suggested-by: Felix Kuehling Tested-by: Slawomir Laba Reviewed-by: John Hubbard Cc: Alex Sierra Cc: Christoph Hellwig Cc: Alistair Popple Signed-off-by: Andrew Morton --- include/linux/mm.h | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/include/linux/mm.h b/include/linux/mm.h index 982f2607180b..21f8b27bd9fd 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1544,9 +1544,16 @@ static inline bool is_longterm_pinnable_page(struct page *page) if (mt == MIGRATE_CMA || mt == MIGRATE_ISOLATE) return false; #endif - return !(is_device_coherent_page(page) || - is_zone_movable_page(page) || - is_zero_pfn(page_to_pfn(page))); + /* The zero page may always be pinned */ + if (is_zero_pfn(page_to_pfn(page))) + return true; + + /* Coherent device memory must always allow eviction. */ + if (is_device_coherent_page(page)) + return false; + + /* Otherwise, non-movable zone pages can be pinned. */ + return !is_zone_movable_page(page); } #else static inline bool is_longterm_pinnable_page(struct page *page) -- cgit v1.2.3 From 44e602b4e52f70f04620bbbf4fe46ecb40170bde Mon Sep 17 00:00:00 2001 From: Liam Howlett Date: Wed, 10 Aug 2022 16:02:25 +0000 Subject: binder_alloc: add missing mmap_lock calls when using the VMA MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Take the mmap_read_lock() when using the VMA in binder_alloc_print_pages() and when checking for a VMA in binder_alloc_new_buf_locked(). It is worth noting binder_alloc_new_buf_locked() drops the VMA read lock after it verifies a VMA exists, but may be taken again deeper in the call stack, if necessary. Link: https://lkml.kernel.org/r/20220810160209.1630707-1-Liam.Howlett@oracle.com Fixes: a43cfc87caaf (android: binder: stop saving a pointer to the VMA) Signed-off-by: Liam R. Howlett Reported-by: Ondrej Mosnacek Reported-by: Acked-by: Carlos Llamas Tested-by: Ondrej Mosnacek Cc: Minchan Kim Cc: Christian Brauner (Microsoft) Cc: Greg Kroah-Hartman Cc: Hridya Valsaraju Cc: Joel Fernandes Cc: Martijn Coenen Cc: Suren Baghdasaryan Cc: Todd Kjos Cc: Matthew Wilcox (Oracle) Cc: "Arve Hjønnevåg" Signed-off-by: Andrew Morton --- drivers/android/binder_alloc.c | 31 +++++++++++++++++++++---------- 1 file changed, 21 insertions(+), 10 deletions(-) diff --git a/drivers/android/binder_alloc.c b/drivers/android/binder_alloc.c index 1014beb12802..51f4e1c5cd01 100644 --- a/drivers/android/binder_alloc.c +++ b/drivers/android/binder_alloc.c @@ -402,12 +402,15 @@ static struct binder_buffer *binder_alloc_new_buf_locked( size_t size, data_offsets_size; int ret; + mmap_read_lock(alloc->vma_vm_mm); if (!binder_alloc_get_vma(alloc)) { + mmap_read_unlock(alloc->vma_vm_mm); binder_alloc_debug(BINDER_DEBUG_USER_ERROR, "%d: binder_alloc_buf, no vma\n", alloc->pid); return ERR_PTR(-ESRCH); } + mmap_read_unlock(alloc->vma_vm_mm); data_offsets_size = ALIGN(data_size, sizeof(void *)) + ALIGN(offsets_size, sizeof(void *)); @@ -929,17 +932,25 @@ void binder_alloc_print_pages(struct seq_file *m, * Make sure the binder_alloc is fully initialized, otherwise we might * read inconsistent state. */ - if (binder_alloc_get_vma(alloc) != NULL) { - for (i = 0; i < alloc->buffer_size / PAGE_SIZE; i++) { - page = &alloc->pages[i]; - if (!page->page_ptr) - free++; - else if (list_empty(&page->lru)) - active++; - else - lru++; - } + + mmap_read_lock(alloc->vma_vm_mm); + if (binder_alloc_get_vma(alloc) == NULL) { + mmap_read_unlock(alloc->vma_vm_mm); + goto uninitialized; } + + mmap_read_unlock(alloc->vma_vm_mm); + for (i = 0; i < alloc->buffer_size / PAGE_SIZE; i++) { + page = &alloc->pages[i]; + if (!page->page_ptr) + free++; + else if (list_empty(&page->lru)) + active++; + else + lru++; + } + +uninitialized: mutex_unlock(&alloc->mutex); seq_printf(m, " pages: %d:%d:%d\n", active, lru, free); seq_printf(m, " pages high watermark: %zu\n", alloc->pages_high); -- cgit v1.2.3 From a5d2172180e8f94a8cfc7a7fa0243035629bf8d0 Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky Date: Tue, 16 Aug 2022 14:09:06 +0900 Subject: mm/zsmalloc: do not attempt to free IS_ERR handle zsmalloc() now returns ERR_PTR values as handles, which zram accidentally can pass to zs_free(). Another bad scenario is when zcomp_compress() fails - handle has default -ENOMEM value, and zs_free() will try to free that "pointer value". Add the missing check and make sure that zs_free() bails out when ERR_PTR() is passed to it. Link: https://lkml.kernel.org/r/20220816050906.2583956-1-senozhatsky@chromium.org Fixes: c7e6f17b52e9 ("zsmalloc: zs_malloc: return ERR_PTR on failure") Signed-off-by: Sergey Senozhatsky Cc: Minchan Kim Cc: Nitin Gupta , Signed-off-by: Andrew Morton --- mm/zsmalloc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c index 34f784a1604b..907c9b1e1e61 100644 --- a/mm/zsmalloc.c +++ b/mm/zsmalloc.c @@ -1487,7 +1487,7 @@ void zs_free(struct zs_pool *pool, unsigned long handle) struct size_class *class; enum fullness_group fullness; - if (unlikely(!handle)) + if (IS_ERR_OR_NULL((void *)handle)) return; /* -- cgit v1.2.3 From dbb16df6443c59e8a1ef21c2272fcf387d600ddf Mon Sep 17 00:00:00 2001 From: Shakeel Butt Date: Wed, 17 Aug 2022 17:21:39 +0000 Subject: Revert "memcg: cleanup racy sum avoidance code" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit 96e51ccf1af33e82f429a0d6baebba29c6448d0f. Recently we started running the kernel with rstat infrastructure on production traffic and begin to see negative memcg stats values. Particularly the 'sock' stat is the one which we observed having negative value. $ grep "sock " /mnt/memory/job/memory.stat sock 253952 total_sock 18446744073708724224 Re-run after couple of seconds $ grep "sock " /mnt/memory/job/memory.stat sock 253952 total_sock 53248 For now we are only seeing this issue on large machines (256 CPUs) and only with 'sock' stat. I think the networking stack increase the stat on one cpu and decrease it on another cpu much more often. So, this negative sock is due to rstat flusher flushing the stats on the CPU that has seen the decrement of sock but missed the CPU that has increments. A typical race condition. For easy stable backport, revert is the most simple solution. For long term solution, I am thinking of two directions. First is just reduce the race window by optimizing the rstat flusher. Second is if the reader sees a negative stat value, force flush and restart the stat collection. Basically retry but limited. Link: https://lkml.kernel.org/r/20220817172139.3141101-1-shakeelb@google.com Fixes: 96e51ccf1af33e8 ("memcg: cleanup racy sum avoidance code") Signed-off-by: Shakeel Butt Cc: "Michal Koutný" Cc: Johannes Weiner Cc: Michal Hocko Cc: Roman Gushchin Cc: Muchun Song Cc: David Hildenbrand Cc: Yosry Ahmed Cc: Greg Thelen Cc: [5.15] Signed-off-by: Andrew Morton --- include/linux/memcontrol.h | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 4d31ce55b1c0..6257867fbf95 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -987,19 +987,30 @@ static inline void mod_memcg_page_state(struct page *page, static inline unsigned long memcg_page_state(struct mem_cgroup *memcg, int idx) { - return READ_ONCE(memcg->vmstats.state[idx]); + long x = READ_ONCE(memcg->vmstats.state[idx]); +#ifdef CONFIG_SMP + if (x < 0) + x = 0; +#endif + return x; } static inline unsigned long lruvec_page_state(struct lruvec *lruvec, enum node_stat_item idx) { struct mem_cgroup_per_node *pn; + long x; if (mem_cgroup_disabled()) return node_page_state(lruvec_pgdat(lruvec), idx); pn = container_of(lruvec, struct mem_cgroup_per_node, lruvec); - return READ_ONCE(pn->lruvec_stats.state[idx]); + x = READ_ONCE(pn->lruvec_stats.state[idx]); +#ifdef CONFIG_SMP + if (x < 0) + x = 0; +#endif + return x; } static inline unsigned long lruvec_page_state_local(struct lruvec *lruvec, -- cgit v1.2.3 From 550842cc60987b269e31b222283ade3e1b6c7fc8 Mon Sep 17 00:00:00 2001 From: Heming Zhao Date: Mon, 15 Aug 2022 16:57:54 +0800 Subject: ocfs2: fix freeing uninitialized resource on ocfs2_dlm_shutdown After commit 0737e01de9c4 ("ocfs2: ocfs2_mount_volume does cleanup job before return error"), any procedure after ocfs2_dlm_init() fails will trigger crash when calling ocfs2_dlm_shutdown(). ie: On local mount mode, no dlm resource is initialized. If ocfs2_mount_volume() fails in ocfs2_find_slot(), error handling will call ocfs2_dlm_shutdown(), then does dlm resource cleanup job, which will trigger kernel crash. This solution should bypass uninitialized resources in ocfs2_dlm_shutdown(). Link: https://lkml.kernel.org/r/20220815085754.20417-1-heming.zhao@suse.com Fixes: 0737e01de9c4 ("ocfs2: ocfs2_mount_volume does cleanup job before return error") Signed-off-by: Heming Zhao Reviewed-by: Joseph Qi Cc: Mark Fasheh Cc: Joel Becker Cc: Junxiao Bi Cc: Changwei Ge Cc: Gang He Cc: Jun Piao Cc: Signed-off-by: Andrew Morton --- fs/ocfs2/dlmglue.c | 8 +++++--- fs/ocfs2/super.c | 3 +-- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c index 801e60bab955..c28bc983a7b1 100644 --- a/fs/ocfs2/dlmglue.c +++ b/fs/ocfs2/dlmglue.c @@ -3403,10 +3403,12 @@ void ocfs2_dlm_shutdown(struct ocfs2_super *osb, ocfs2_lock_res_free(&osb->osb_nfs_sync_lockres); ocfs2_lock_res_free(&osb->osb_orphan_scan.os_lockres); - ocfs2_cluster_disconnect(osb->cconn, hangup_pending); - osb->cconn = NULL; + if (osb->cconn) { + ocfs2_cluster_disconnect(osb->cconn, hangup_pending); + osb->cconn = NULL; - ocfs2_dlm_shutdown_debug(osb); + ocfs2_dlm_shutdown_debug(osb); + } } static int ocfs2_drop_lock(struct ocfs2_super *osb, diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index 013a727bd7c8..e2cc9eec287c 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c @@ -1914,8 +1914,7 @@ static void ocfs2_dismount_volume(struct super_block *sb, int mnt_err) !ocfs2_is_hard_readonly(osb)) hangup_needed = 1; - if (osb->cconn) - ocfs2_dlm_shutdown(osb, hangup_needed); + ocfs2_dlm_shutdown(osb, hangup_needed); ocfs2_blockcheck_stats_debugfs_remove(&osb->osb_ecc_stats); debugfs_remove_recursive(osb->osb_debug_root); -- cgit v1.2.3 From dd0ff4d12dd284c334f7e9b07f8f335af856ac78 Mon Sep 17 00:00:00 2001 From: Liu Shixin Date: Fri, 19 Aug 2022 17:40:05 +0800 Subject: bootmem: remove the vmemmap pages from kmemleak in put_page_bootmem The vmemmap pages is marked by kmemleak when allocated from memblock. Remove it from kmemleak when freeing the page. Otherwise, when we reuse the page, kmemleak may report such an error and then stop working. kmemleak: Cannot insert 0xffff98fb6eab3d40 into the object search tree (overlaps existing) kmemleak: Kernel memory leak detector disabled kmemleak: Object 0xffff98fb6be00000 (size 335544320): kmemleak: comm "swapper", pid 0, jiffies 4294892296 kmemleak: min_count = 0 kmemleak: count = 0 kmemleak: flags = 0x1 kmemleak: checksum = 0 kmemleak: backtrace: Link: https://lkml.kernel.org/r/20220819094005.2928241-1-liushixin2@huawei.com Fixes: f41f2ed43ca5 (mm: hugetlb: free the vmemmap pages associated with each HugeTLB page) Signed-off-by: Liu Shixin Reviewed-by: Muchun Song Cc: Matthew Wilcox Cc: Mike Kravetz Cc: Oscar Salvador Cc: Signed-off-by: Andrew Morton --- mm/bootmem_info.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/mm/bootmem_info.c b/mm/bootmem_info.c index f18a631e7479..b1efebfcf94b 100644 --- a/mm/bootmem_info.c +++ b/mm/bootmem_info.c @@ -12,6 +12,7 @@ #include #include #include +#include void get_page_bootmem(unsigned long info, struct page *page, unsigned long type) { @@ -33,6 +34,7 @@ void put_page_bootmem(struct page *page) ClearPagePrivate(page); set_page_private(page, 0); INIT_LIST_HEAD(&page->lru); + kmemleak_free_part(page_to_virt(page), PAGE_SIZE); free_reserved_page(page); } } -- cgit v1.2.3 From 0c7d7cc2b4fe2e74ef8728f030f0f1674f9f6aee Mon Sep 17 00:00:00 2001 From: Quanyang Wang Date: Fri, 19 Aug 2022 16:11:45 +0800 Subject: asm-generic: sections: refactor memory_intersects There are two problems with the current code of memory_intersects: First, it doesn't check whether the region (begin, end) falls inside the region (virt, vend), that is (virt < begin && vend > end). The second problem is if vend is equal to begin, it will return true but this is wrong since vend (virt + size) is not the last address of the memory region but (virt + size -1) is. The wrong determination will trigger the misreporting when the function check_for_illegal_area calls memory_intersects to check if the dma region intersects with stext region. The misreporting is as below (stext is at 0x80100000): WARNING: CPU: 0 PID: 77 at kernel/dma/debug.c:1073 check_for_illegal_area+0x130/0x168 DMA-API: chipidea-usb2 e0002000.usb: device driver maps memory from kernel text or rodata [addr=800f0000] [len=65536] Modules linked in: CPU: 1 PID: 77 Comm: usb-storage Not tainted 5.19.0-yocto-standard #5 Hardware name: Xilinx Zynq Platform unwind_backtrace from show_stack+0x18/0x1c show_stack from dump_stack_lvl+0x58/0x70 dump_stack_lvl from __warn+0xb0/0x198 __warn from warn_slowpath_fmt+0x80/0xb4 warn_slowpath_fmt from check_for_illegal_area+0x130/0x168 check_for_illegal_area from debug_dma_map_sg+0x94/0x368 debug_dma_map_sg from __dma_map_sg_attrs+0x114/0x128 __dma_map_sg_attrs from dma_map_sg_attrs+0x18/0x24 dma_map_sg_attrs from usb_hcd_map_urb_for_dma+0x250/0x3b4 usb_hcd_map_urb_for_dma from usb_hcd_submit_urb+0x194/0x214 usb_hcd_submit_urb from usb_sg_wait+0xa4/0x118 usb_sg_wait from usb_stor_bulk_transfer_sglist+0xa0/0xec usb_stor_bulk_transfer_sglist from usb_stor_bulk_srb+0x38/0x70 usb_stor_bulk_srb from usb_stor_Bulk_transport+0x150/0x360 usb_stor_Bulk_transport from usb_stor_invoke_transport+0x38/0x440 usb_stor_invoke_transport from usb_stor_control_thread+0x1e0/0x238 usb_stor_control_thread from kthread+0xf8/0x104 kthread from ret_from_fork+0x14/0x2c Refactor memory_intersects to fix the two problems above. Before the 1d7db834a027e ("dma-debug: use memory_intersects() directly"), memory_intersects is called only by printk_late_init: printk_late_init -> init_section_intersects ->memory_intersects. There were few places where memory_intersects was called. When commit 1d7db834a027e ("dma-debug: use memory_intersects() directly") was merged and CONFIG_DMA_API_DEBUG is enabled, the DMA subsystem uses it to check for an illegal area and the calltrace above is triggered. [akpm@linux-foundation.org: fix nearby comment typo] Link: https://lkml.kernel.org/r/20220819081145.948016-1-quanyang.wang@windriver.com Fixes: 979559362516 ("asm/sections: add helpers to check for section data") Signed-off-by: Quanyang Wang Cc: Ard Biesheuvel Cc: Arnd Bergmann Cc: Thierry Reding Cc: Signed-off-by: Andrew Morton --- include/asm-generic/sections.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/include/asm-generic/sections.h b/include/asm-generic/sections.h index d0f7bdd2fdf2..db13bb620f52 100644 --- a/include/asm-generic/sections.h +++ b/include/asm-generic/sections.h @@ -97,7 +97,7 @@ static inline bool memory_contains(void *begin, void *end, void *virt, /** * memory_intersects - checks if the region occupied by an object intersects * with another memory region - * @begin: virtual address of the beginning of the memory regien + * @begin: virtual address of the beginning of the memory region * @end: virtual address of the end of the memory region * @virt: virtual address of the memory object * @size: size of the memory object @@ -110,7 +110,10 @@ static inline bool memory_intersects(void *begin, void *end, void *virt, { void *vend = virt + size; - return (virt >= begin && virt < end) || (vend >= begin && vend < end); + if (virt < end && vend > begin) + return true; + + return false; } /** -- cgit v1.2.3 From ac733f6558ec86938e40433d88ec4e6148bac485 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Wed, 17 Aug 2022 22:27:53 +0100 Subject: mailmap: update email address for Colin King Colin King is working on kernel janitorial fixes in his spare time and using his Intel email is confusing. Use his gmail account as the default email address. Link: https://lkml.kernel.org/r/20220817212753.101109-1-colin.i.king@gmail.com Signed-off-by: Colin Ian King Signed-off-by: Andrew Morton --- .mailmap | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.mailmap b/.mailmap index 23241db2647f..984ddd1f6ad3 100644 --- a/.mailmap +++ b/.mailmap @@ -98,8 +98,7 @@ Christian Brauner Christian Marangi Christophe Ricard Christoph Hellwig -Colin Ian King -Colin Ian King +Colin Ian King Corey Minyard Damian Hobson-Garcia Daniel Borkmann -- cgit v1.2.3 From d26f60703606ab425eee9882b32a1781a8bed74d Mon Sep 17 00:00:00 2001 From: Badari Pulavarty Date: Sun, 21 Aug 2022 18:08:53 +0000 Subject: mm/damon/dbgfs: avoid duplicate context directory creation When user tries to create a DAMON context via the DAMON debugfs interface with a name of an already existing context, the context directory creation fails but a new context is created and added in the internal data structure, due to absence of the directory creation success check. As a result, memory could leak and DAMON cannot be turned on. An example test case is as below: # cd /sys/kernel/debug/damon/ # echo "off" > monitor_on # echo paddr > target_ids # echo "abc" > mk_context # echo "abc" > mk_context # echo $$ > abc/target_ids # echo "on" > monitor_on <<< fails Return value of 'debugfs_create_dir()' is expected to be ignored in general, but this is an exceptional case as DAMON feature is depending on the debugfs functionality and it has the potential duplicate name issue. This commit therefore fixes the issue by checking the directory creation failure and immediately return the error in the case. Link: https://lkml.kernel.org/r/20220821180853.2400-1-sj@kernel.org Fixes: 75c1c2b53c78 ("mm/damon/dbgfs: support multiple contexts") Signed-off-by: Badari Pulavarty Signed-off-by: SeongJae Park Cc: [ 5.15.x] Signed-off-by: Andrew Morton --- mm/damon/dbgfs.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/mm/damon/dbgfs.c b/mm/damon/dbgfs.c index cb8a7e9926a4..cfdf63132d5a 100644 --- a/mm/damon/dbgfs.c +++ b/mm/damon/dbgfs.c @@ -818,6 +818,9 @@ static int dbgfs_mk_context(char *name) return -ENOENT; new_dir = debugfs_create_dir(name, root); + /* Below check is required for a potential duplicated name case */ + if (IS_ERR(new_dir)) + return PTR_ERR(new_dir); dbgfs_dirs[dbgfs_nr_ctxs] = new_dir; new_ctx = dbgfs_new_ctx(); -- cgit v1.2.3 From 1f13dff09ffc8bcf6aa20639b638d813379c7f6b Mon Sep 17 00:00:00 2001 From: Phillip Lougher Date: Mon, 22 Aug 2022 22:54:30 +0100 Subject: squashfs: don't call kmalloc in decompressors The decompressors may be called while in an atomic section. So move the kmalloc() out of this path, and into the "page actor" init function. This fixes a regression introduced by commit f268eedddf35 ("squashfs: extend "page actor" to handle missing pages") Link: https://lkml.kernel.org/r/20220822215430.15933-1-phillip@squashfs.org.uk Fixes: f268eedddf35 ("squashfs: extend "page actor" to handle missing pages") Reported-by: Chris Murphy Signed-off-by: Phillip Lougher Cc: Signed-off-by: Andrew Morton --- fs/squashfs/file.c | 2 +- fs/squashfs/file_direct.c | 2 +- fs/squashfs/page_actor.c | 34 +++++++++++++++------------------- fs/squashfs/page_actor.h | 5 +++++ 4 files changed, 22 insertions(+), 21 deletions(-) diff --git a/fs/squashfs/file.c b/fs/squashfs/file.c index 98e64fec75b7..e56510964b22 100644 --- a/fs/squashfs/file.c +++ b/fs/squashfs/file.c @@ -593,7 +593,7 @@ static void squashfs_readahead(struct readahead_control *ractl) res = squashfs_read_data(inode->i_sb, block, bsize, NULL, actor); - kfree(actor); + squashfs_page_actor_free(actor); if (res == expected) { int bytes; diff --git a/fs/squashfs/file_direct.c b/fs/squashfs/file_direct.c index be4b12d31e0c..f1ccad519e28 100644 --- a/fs/squashfs/file_direct.c +++ b/fs/squashfs/file_direct.c @@ -74,7 +74,7 @@ int squashfs_readpage_block(struct page *target_page, u64 block, int bsize, /* Decompress directly into the page cache buffers */ res = squashfs_read_data(inode->i_sb, block, bsize, NULL, actor); - kfree(actor); + squashfs_page_actor_free(actor); if (res < 0) goto mark_errored; diff --git a/fs/squashfs/page_actor.c b/fs/squashfs/page_actor.c index b23b780d8f42..54b93bf4a25c 100644 --- a/fs/squashfs/page_actor.c +++ b/fs/squashfs/page_actor.c @@ -52,6 +52,7 @@ struct squashfs_page_actor *squashfs_page_actor_init(void **buffer, actor->buffer = buffer; actor->pages = pages; actor->next_page = 0; + actor->tmp_buffer = NULL; actor->squashfs_first_page = cache_first_page; actor->squashfs_next_page = cache_next_page; actor->squashfs_finish_page = cache_finish_page; @@ -68,20 +69,9 @@ static void *handle_next_page(struct squashfs_page_actor *actor) if ((actor->next_page == actor->pages) || (actor->next_index != actor->page[actor->next_page]->index)) { - if (actor->alloc_buffer) { - void *tmp_buffer = kmalloc(PAGE_SIZE, GFP_KERNEL); - - if (tmp_buffer) { - actor->tmp_buffer = tmp_buffer; - actor->next_index++; - actor->returned_pages++; - return tmp_buffer; - } - } - actor->next_index++; actor->returned_pages++; - return ERR_PTR(-ENOMEM); + return actor->alloc_buffer ? actor->tmp_buffer : ERR_PTR(-ENOMEM); } actor->next_index++; @@ -96,11 +86,10 @@ static void *direct_first_page(struct squashfs_page_actor *actor) static void *direct_next_page(struct squashfs_page_actor *actor) { - if (actor->pageaddr) + if (actor->pageaddr) { kunmap_local(actor->pageaddr); - - kfree(actor->tmp_buffer); - actor->pageaddr = actor->tmp_buffer = NULL; + actor->pageaddr = NULL; + } return handle_next_page(actor); } @@ -109,8 +98,6 @@ static void direct_finish_page(struct squashfs_page_actor *actor) { if (actor->pageaddr) kunmap_local(actor->pageaddr); - - kfree(actor->tmp_buffer); } struct squashfs_page_actor *squashfs_page_actor_init_special(struct squashfs_sb_info *msblk, @@ -121,6 +108,16 @@ struct squashfs_page_actor *squashfs_page_actor_init_special(struct squashfs_sb_ if (actor == NULL) return NULL; + if (msblk->decompressor->alloc_buffer) { + actor->tmp_buffer = kmalloc(PAGE_SIZE, GFP_KERNEL); + + if (actor->tmp_buffer == NULL) { + kfree(actor); + return NULL; + } + } else + actor->tmp_buffer = NULL; + actor->length = length ? : pages * PAGE_SIZE; actor->page = page; actor->pages = pages; @@ -128,7 +125,6 @@ struct squashfs_page_actor *squashfs_page_actor_init_special(struct squashfs_sb_ actor->returned_pages = 0; actor->next_index = page[0]->index & ~((1 << (msblk->block_log - PAGE_SHIFT)) - 1); actor->pageaddr = NULL; - actor->tmp_buffer = NULL; actor->alloc_buffer = msblk->decompressor->alloc_buffer; actor->squashfs_first_page = direct_first_page; actor->squashfs_next_page = direct_next_page; diff --git a/fs/squashfs/page_actor.h b/fs/squashfs/page_actor.h index 24841d28bc0f..95ffbb543d91 100644 --- a/fs/squashfs/page_actor.h +++ b/fs/squashfs/page_actor.h @@ -29,6 +29,11 @@ extern struct squashfs_page_actor *squashfs_page_actor_init(void **buffer, extern struct squashfs_page_actor *squashfs_page_actor_init_special( struct squashfs_sb_info *msblk, struct page **page, int pages, int length); +static inline void squashfs_page_actor_free(struct squashfs_page_actor *actor) +{ + kfree(actor->tmp_buffer); + kfree(actor); +} static inline void *squashfs_first_page(struct squashfs_page_actor *actor) { return actor->squashfs_first_page(actor); -- cgit v1.2.3 From 3d2f78f08cd8388035ac375e731ec1ac1b79b09d Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Tue, 23 Aug 2022 18:11:38 -0400 Subject: mm/mprotect: only reference swap pfn page if type match Yu Zhao reported a bug after the commit "mm/swap: Add swp_offset_pfn() to fetch PFN from swap entry" added a check in swp_offset_pfn() for swap type [1]: kernel BUG at include/linux/swapops.h:117! CPU: 46 PID: 5245 Comm: EventManager_De Tainted: G S O L 6.0.0-dbg-DEV #2 RIP: 0010:pfn_swap_entry_to_page+0x72/0xf0 Code: c6 48 8b 36 48 83 fe ff 74 53 48 01 d1 48 83 c1 08 48 8b 09 f6 c1 01 75 7b 66 90 48 89 c1 48 8b 09 f6 c1 01 74 74 5d c3 eb 9e <0f> 0b 48 ba ff ff ff ff 03 00 00 00 eb ae a9 ff 0f 00 00 75 13 48 RSP: 0018:ffffa59e73fabb80 EFLAGS: 00010282 RAX: 00000000ffffffe8 RBX: 0c00000000000000 RCX: ffffcd5440000000 RDX: 1ffffffffff7a80a RSI: 0000000000000000 RDI: 0c0000000000042b RBP: ffffa59e73fabb80 R08: ffff9965ca6e8bb8 R09: 0000000000000000 R10: ffffffffa5a2f62d R11: 0000030b372e9fff R12: ffff997b79db5738 R13: 000000000000042b R14: 0c0000000000042b R15: 1ffffffffff7a80a FS: 00007f549d1bb700(0000) GS:ffff99d3cf680000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000440d035b3180 CR3: 0000002243176004 CR4: 00000000003706e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: change_pte_range+0x36e/0x880 change_p4d_range+0x2e8/0x670 change_protection_range+0x14e/0x2c0 mprotect_fixup+0x1ee/0x330 do_mprotect_pkey+0x34c/0x440 __x64_sys_mprotect+0x1d/0x30 It triggers because pfn_swap_entry_to_page() could be called upon e.g. a genuine swap entry. Fix it by only calling it when it's a write migration entry where the page* is used. [1] https://lore.kernel.org/lkml/CAOUHufaVC2Za-p8m0aiHw6YkheDcrO-C3wRGixwDS32VTS+k1w@mail.gmail.com/ Link: https://lkml.kernel.org/r/20220823221138.45602-1-peterx@redhat.com Fixes: 6c287605fd56 ("mm: remember exclusively mapped anonymous pages with PG_anon_exclusive") Signed-off-by: Peter Xu Reported-by: Yu Zhao Tested-by: Yu Zhao Reviewed-by: David Hildenbrand Cc: "Huang, Ying" Cc: Signed-off-by: Andrew Morton --- mm/mprotect.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/mm/mprotect.c b/mm/mprotect.c index 3a23dde73723..bc6bddd156ca 100644 --- a/mm/mprotect.c +++ b/mm/mprotect.c @@ -196,10 +196,11 @@ static unsigned long change_pte_range(struct mmu_gather *tlb, pages++; } else if (is_swap_pte(oldpte)) { swp_entry_t entry = pte_to_swp_entry(oldpte); - struct page *page = pfn_swap_entry_to_page(entry); pte_t newpte; if (is_writable_migration_entry(entry)) { + struct page *page = pfn_swap_entry_to_page(entry); + /* * A protection check is difficult so * just be safe and disable write -- cgit v1.2.3 From 0ebafe2ea879e97fef6bd97f72303a6ccf39f092 Mon Sep 17 00:00:00 2001 From: Luca Ceresoli Date: Fri, 26 Aug 2022 15:05:15 +0200 Subject: .mailmap: update Luca Ceresoli's e-mail address My Bootlin address is preferred from now on. Link: https://lkml.kernel.org/r/20220826130515.3011951-1-luca.ceresoli@bootlin.com Signed-off-by: Luca Ceresoli Cc: Arnd Bergmann Cc: Atish Patra Cc: Hans Verkuil Cc: Thomas Petazzoni Signed-off-by: Andrew Morton --- .mailmap | 1 + 1 file changed, 1 insertion(+) diff --git a/.mailmap b/.mailmap index 984ddd1f6ad3..8ded2e7c2906 100644 --- a/.mailmap +++ b/.mailmap @@ -254,6 +254,7 @@ Linus Lüssing Li Yang Li Yang Lorenzo Pieralisi +Luca Ceresoli Lukasz Luba Maciej W. Rozycki Maciej W. Rozycki -- cgit v1.2.3 From b90cb1053190353cc30f0fef0ef1f378ccc063c5 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 28 Aug 2022 15:05:29 -0700 Subject: Linux 6.0-rc3 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index c7705f749601..952d354069a4 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 6 PATCHLEVEL = 0 SUBLEVEL = 0 -EXTRAVERSION = -rc2 +EXTRAVERSION = -rc3 NAME = Hurr durr I'ma ninja sloth # *DOCUMENTATION* -- cgit v1.2.3 From f0da47118c7e93cdbbc6fb403dd729a5f2c90ee3 Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Fri, 26 Aug 2022 16:29:54 +0200 Subject: net: mac802154: Fix a condition in the receive path Upon reception, a packet must be categorized, either it's destination is the host, or it is another host. A packet with no destination addressing fields may be valid in two situations: - the packet has no source field: only ACKs are built like that, we consider the host as the destination. - the packet has a valid source field: it is directed to the PAN coordinator, as for know we don't have this information we consider we are not the PAN coordinator. There was likely a copy/paste error made during a previous cleanup because the if clause is now containing exactly the same condition as in the switch case, which can never be true. In the past the destination address was used in the switch and the source address was used in the if, which matches what the spec says. Cc: stable@vger.kernel.org Fixes: ae531b9475f6 ("ieee802154: use ieee802154_addr instead of *_sa variants") Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/r/20220826142954.254853-1-miquel.raynal@bootlin.com Signed-off-by: Stefan Schmidt --- net/mac802154/rx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/mac802154/rx.c b/net/mac802154/rx.c index b8ce84618a55..c439125ef2b9 100644 --- a/net/mac802154/rx.c +++ b/net/mac802154/rx.c @@ -44,7 +44,7 @@ ieee802154_subif_frame(struct ieee802154_sub_if_data *sdata, switch (mac_cb(skb)->dest.mode) { case IEEE802154_ADDR_NONE: - if (mac_cb(skb)->dest.mode != IEEE802154_ADDR_NONE) + if (hdr->source.mode != IEEE802154_ADDR_NONE) /* FIXME: check if we are PAN coordinator */ skb->pkt_type = PACKET_OTHERHOST; else -- cgit v1.2.3 From ffd7bdddaab193c38416fd5dd416d065517d266e Mon Sep 17 00:00:00 2001 From: Li Qiong Date: Mon, 29 Aug 2022 15:12:59 +0800 Subject: ieee802154: cc2520: add rc code in cc2520_tx() The rc code is 0 at the error path "status & CC2520_STATUS_TX_UNDERFLOW". Assign rc code with '-EINVAL' at this error path to fix it. Signed-off-by: Li Qiong Link: https://lore.kernel.org/r/20220829071259.18330-1-liqiong@nfschina.com Signed-off-by: Stefan Schmidt --- drivers/net/ieee802154/cc2520.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ieee802154/cc2520.c b/drivers/net/ieee802154/cc2520.c index 1e1f40f628a0..c69b87d3837d 100644 --- a/drivers/net/ieee802154/cc2520.c +++ b/drivers/net/ieee802154/cc2520.c @@ -504,6 +504,7 @@ cc2520_tx(struct ieee802154_hw *hw, struct sk_buff *skb) goto err_tx; if (status & CC2520_STATUS_TX_UNDERFLOW) { + rc = -EINVAL; dev_err(&priv->spi->dev, "cc2520 tx underflow exception\n"); goto err_tx; } -- cgit v1.2.3 From 3f8ae9fe0409698799e173f698b714f34570b64b Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Thu, 25 Aug 2022 13:36:44 +0200 Subject: net: dsa: xrs700x: Use irqsave variant for u64 stats update xrs700x_read_port_counters() updates the stats from a worker using the u64_stats_update_begin() version. This is okay on 32-UP since on the reader side preemption is disabled. On 32bit-SMP the writer can be preempted by the reader at which point the reader will spin on the seqcount until writer continues and completes the update. Assigning the mib_mutex mutex to the underlying seqcount would ensure proper synchronisation. The API for that on the u64_stats_init() side isn't available. Since it is the only user, just use disable interrupts during the update. Use u64_stats_update_begin_irqsave() on the writer side to ensure an uninterrupted update. Fixes: ee00b24f32eb8 ("net: dsa: add Arrow SpeedChips XRS700x driver") Cc: Andrew Lunn Cc: Florian Fainelli Cc: George McCollister Cc: Vivien Didelot Cc: Vladimir Oltean Signed-off-by: Sebastian Andrzej Siewior Acked-by: George McCollister Signed-off-by: David S. Miller --- drivers/net/dsa/xrs700x/xrs700x.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/dsa/xrs700x/xrs700x.c b/drivers/net/dsa/xrs700x/xrs700x.c index 3887ed33c5fe..fa622639d640 100644 --- a/drivers/net/dsa/xrs700x/xrs700x.c +++ b/drivers/net/dsa/xrs700x/xrs700x.c @@ -109,6 +109,7 @@ static void xrs700x_read_port_counters(struct xrs700x *priv, int port) { struct xrs700x_port *p = &priv->ports[port]; struct rtnl_link_stats64 stats; + unsigned long flags; int i; memset(&stats, 0, sizeof(stats)); @@ -138,9 +139,9 @@ static void xrs700x_read_port_counters(struct xrs700x *priv, int port) */ stats.rx_packets += stats.multicast; - u64_stats_update_begin(&p->syncp); + flags = u64_stats_update_begin_irqsave(&p->syncp); p->stats64 = stats; - u64_stats_update_end(&p->syncp); + u64_stats_update_end_irqrestore(&p->syncp, flags); mutex_unlock(&p->mib_mutex); } -- cgit v1.2.3 From 278d3ba61563ceed3cb248383ced19e14ec7bc1f Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Thu, 25 Aug 2022 13:36:45 +0200 Subject: net: Use u64_stats_fetch_begin_irq() for stats fetch. On 32bit-UP u64_stats_fetch_begin() disables only preemption. If the reader is in preemptible context and the writer side (u64_stats_update_begin*()) runs in an interrupt context (IRQ or softirq) then the writer can update the stats during the read operation. This update remains undetected. Use u64_stats_fetch_begin_irq() to ensure the stats fetch on 32bit-UP are not interrupted by a writer. 32bit-SMP remains unaffected by this change. Cc: "David S. Miller" Cc: Catherine Sullivan Cc: David Awogbemila Cc: Dimitris Michailidis Cc: Eric Dumazet Cc: Hans Ulli Kroll Cc: Jakub Kicinski Cc: Jeroen de Borst Cc: Johannes Berg Cc: Linus Walleij Cc: Paolo Abeni Cc: Simon Horman Cc: linux-arm-kernel@lists.infradead.org Cc: linux-wireless@vger.kernel.org Cc: netdev@vger.kernel.org Cc: oss-drivers@corigine.com Cc: stable@vger.kernel.org Signed-off-by: Sebastian Andrzej Siewior Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- drivers/net/ethernet/cortina/gemini.c | 24 +++++++++++----------- drivers/net/ethernet/fungible/funeth/funeth_txrx.h | 4 ++-- drivers/net/ethernet/google/gve/gve_ethtool.c | 16 +++++++-------- drivers/net/ethernet/google/gve/gve_main.c | 12 +++++------ drivers/net/ethernet/huawei/hinic/hinic_rx.c | 4 ++-- drivers/net/ethernet/huawei/hinic/hinic_tx.c | 4 ++-- .../net/ethernet/netronome/nfp/nfp_net_common.c | 8 ++++---- .../net/ethernet/netronome/nfp/nfp_net_ethtool.c | 8 ++++---- drivers/net/netdevsim/netdev.c | 4 ++-- net/mac80211/sta_info.c | 8 ++++---- net/mpls/af_mpls.c | 4 ++-- 11 files changed, 48 insertions(+), 48 deletions(-) diff --git a/drivers/net/ethernet/cortina/gemini.c b/drivers/net/ethernet/cortina/gemini.c index 9e6de2f968fa..6dae768671e3 100644 --- a/drivers/net/ethernet/cortina/gemini.c +++ b/drivers/net/ethernet/cortina/gemini.c @@ -1919,7 +1919,7 @@ static void gmac_get_stats64(struct net_device *netdev, /* Racing with RX NAPI */ do { - start = u64_stats_fetch_begin(&port->rx_stats_syncp); + start = u64_stats_fetch_begin_irq(&port->rx_stats_syncp); stats->rx_packets = port->stats.rx_packets; stats->rx_bytes = port->stats.rx_bytes; @@ -1931,11 +1931,11 @@ static void gmac_get_stats64(struct net_device *netdev, stats->rx_crc_errors = port->stats.rx_crc_errors; stats->rx_frame_errors = port->stats.rx_frame_errors; - } while (u64_stats_fetch_retry(&port->rx_stats_syncp, start)); + } while (u64_stats_fetch_retry_irq(&port->rx_stats_syncp, start)); /* Racing with MIB and TX completion interrupts */ do { - start = u64_stats_fetch_begin(&port->ir_stats_syncp); + start = u64_stats_fetch_begin_irq(&port->ir_stats_syncp); stats->tx_errors = port->stats.tx_errors; stats->tx_packets = port->stats.tx_packets; @@ -1945,15 +1945,15 @@ static void gmac_get_stats64(struct net_device *netdev, stats->rx_missed_errors = port->stats.rx_missed_errors; stats->rx_fifo_errors = port->stats.rx_fifo_errors; - } while (u64_stats_fetch_retry(&port->ir_stats_syncp, start)); + } while (u64_stats_fetch_retry_irq(&port->ir_stats_syncp, start)); /* Racing with hard_start_xmit */ do { - start = u64_stats_fetch_begin(&port->tx_stats_syncp); + start = u64_stats_fetch_begin_irq(&port->tx_stats_syncp); stats->tx_dropped = port->stats.tx_dropped; - } while (u64_stats_fetch_retry(&port->tx_stats_syncp, start)); + } while (u64_stats_fetch_retry_irq(&port->tx_stats_syncp, start)); stats->rx_dropped += stats->rx_missed_errors; } @@ -2031,18 +2031,18 @@ static void gmac_get_ethtool_stats(struct net_device *netdev, /* Racing with MIB interrupt */ do { p = values; - start = u64_stats_fetch_begin(&port->ir_stats_syncp); + start = u64_stats_fetch_begin_irq(&port->ir_stats_syncp); for (i = 0; i < RX_STATS_NUM; i++) *p++ = port->hw_stats[i]; - } while (u64_stats_fetch_retry(&port->ir_stats_syncp, start)); + } while (u64_stats_fetch_retry_irq(&port->ir_stats_syncp, start)); values = p; /* Racing with RX NAPI */ do { p = values; - start = u64_stats_fetch_begin(&port->rx_stats_syncp); + start = u64_stats_fetch_begin_irq(&port->rx_stats_syncp); for (i = 0; i < RX_STATUS_NUM; i++) *p++ = port->rx_stats[i]; @@ -2050,13 +2050,13 @@ static void gmac_get_ethtool_stats(struct net_device *netdev, *p++ = port->rx_csum_stats[i]; *p++ = port->rx_napi_exits; - } while (u64_stats_fetch_retry(&port->rx_stats_syncp, start)); + } while (u64_stats_fetch_retry_irq(&port->rx_stats_syncp, start)); values = p; /* Racing with TX start_xmit */ do { p = values; - start = u64_stats_fetch_begin(&port->tx_stats_syncp); + start = u64_stats_fetch_begin_irq(&port->tx_stats_syncp); for (i = 0; i < TX_MAX_FRAGS; i++) { *values++ = port->tx_frag_stats[i]; @@ -2065,7 +2065,7 @@ static void gmac_get_ethtool_stats(struct net_device *netdev, *values++ = port->tx_frags_linearized; *values++ = port->tx_hw_csummed; - } while (u64_stats_fetch_retry(&port->tx_stats_syncp, start)); + } while (u64_stats_fetch_retry_irq(&port->tx_stats_syncp, start)); } static int gmac_get_ksettings(struct net_device *netdev, diff --git a/drivers/net/ethernet/fungible/funeth/funeth_txrx.h b/drivers/net/ethernet/fungible/funeth/funeth_txrx.h index 53b7e95213a8..671f51135c26 100644 --- a/drivers/net/ethernet/fungible/funeth/funeth_txrx.h +++ b/drivers/net/ethernet/fungible/funeth/funeth_txrx.h @@ -206,9 +206,9 @@ struct funeth_rxq { #define FUN_QSTAT_READ(q, seq, stats_copy) \ do { \ - seq = u64_stats_fetch_begin(&(q)->syncp); \ + seq = u64_stats_fetch_begin_irq(&(q)->syncp); \ stats_copy = (q)->stats; \ - } while (u64_stats_fetch_retry(&(q)->syncp, (seq))) + } while (u64_stats_fetch_retry_irq(&(q)->syncp, (seq))) #define FUN_INT_NAME_LEN (IFNAMSIZ + 16) diff --git a/drivers/net/ethernet/google/gve/gve_ethtool.c b/drivers/net/ethernet/google/gve/gve_ethtool.c index 50b384910c83..7b9a2d9d9624 100644 --- a/drivers/net/ethernet/google/gve/gve_ethtool.c +++ b/drivers/net/ethernet/google/gve/gve_ethtool.c @@ -177,14 +177,14 @@ gve_get_ethtool_stats(struct net_device *netdev, struct gve_rx_ring *rx = &priv->rx[ring]; start = - u64_stats_fetch_begin(&priv->rx[ring].statss); + u64_stats_fetch_begin_irq(&priv->rx[ring].statss); tmp_rx_pkts = rx->rpackets; tmp_rx_bytes = rx->rbytes; tmp_rx_skb_alloc_fail = rx->rx_skb_alloc_fail; tmp_rx_buf_alloc_fail = rx->rx_buf_alloc_fail; tmp_rx_desc_err_dropped_pkt = rx->rx_desc_err_dropped_pkt; - } while (u64_stats_fetch_retry(&priv->rx[ring].statss, + } while (u64_stats_fetch_retry_irq(&priv->rx[ring].statss, start)); rx_pkts += tmp_rx_pkts; rx_bytes += tmp_rx_bytes; @@ -198,10 +198,10 @@ gve_get_ethtool_stats(struct net_device *netdev, if (priv->tx) { do { start = - u64_stats_fetch_begin(&priv->tx[ring].statss); + u64_stats_fetch_begin_irq(&priv->tx[ring].statss); tmp_tx_pkts = priv->tx[ring].pkt_done; tmp_tx_bytes = priv->tx[ring].bytes_done; - } while (u64_stats_fetch_retry(&priv->tx[ring].statss, + } while (u64_stats_fetch_retry_irq(&priv->tx[ring].statss, start)); tx_pkts += tmp_tx_pkts; tx_bytes += tmp_tx_bytes; @@ -259,13 +259,13 @@ gve_get_ethtool_stats(struct net_device *netdev, data[i++] = rx->fill_cnt - rx->cnt; do { start = - u64_stats_fetch_begin(&priv->rx[ring].statss); + u64_stats_fetch_begin_irq(&priv->rx[ring].statss); tmp_rx_bytes = rx->rbytes; tmp_rx_skb_alloc_fail = rx->rx_skb_alloc_fail; tmp_rx_buf_alloc_fail = rx->rx_buf_alloc_fail; tmp_rx_desc_err_dropped_pkt = rx->rx_desc_err_dropped_pkt; - } while (u64_stats_fetch_retry(&priv->rx[ring].statss, + } while (u64_stats_fetch_retry_irq(&priv->rx[ring].statss, start)); data[i++] = tmp_rx_bytes; data[i++] = rx->rx_cont_packet_cnt; @@ -331,9 +331,9 @@ gve_get_ethtool_stats(struct net_device *netdev, } do { start = - u64_stats_fetch_begin(&priv->tx[ring].statss); + u64_stats_fetch_begin_irq(&priv->tx[ring].statss); tmp_tx_bytes = tx->bytes_done; - } while (u64_stats_fetch_retry(&priv->tx[ring].statss, + } while (u64_stats_fetch_retry_irq(&priv->tx[ring].statss, start)); data[i++] = tmp_tx_bytes; data[i++] = tx->wake_queue; diff --git a/drivers/net/ethernet/google/gve/gve_main.c b/drivers/net/ethernet/google/gve/gve_main.c index 6cafee55efc3..044db3ebb071 100644 --- a/drivers/net/ethernet/google/gve/gve_main.c +++ b/drivers/net/ethernet/google/gve/gve_main.c @@ -51,10 +51,10 @@ static void gve_get_stats(struct net_device *dev, struct rtnl_link_stats64 *s) for (ring = 0; ring < priv->rx_cfg.num_queues; ring++) { do { start = - u64_stats_fetch_begin(&priv->rx[ring].statss); + u64_stats_fetch_begin_irq(&priv->rx[ring].statss); packets = priv->rx[ring].rpackets; bytes = priv->rx[ring].rbytes; - } while (u64_stats_fetch_retry(&priv->rx[ring].statss, + } while (u64_stats_fetch_retry_irq(&priv->rx[ring].statss, start)); s->rx_packets += packets; s->rx_bytes += bytes; @@ -64,10 +64,10 @@ static void gve_get_stats(struct net_device *dev, struct rtnl_link_stats64 *s) for (ring = 0; ring < priv->tx_cfg.num_queues; ring++) { do { start = - u64_stats_fetch_begin(&priv->tx[ring].statss); + u64_stats_fetch_begin_irq(&priv->tx[ring].statss); packets = priv->tx[ring].pkt_done; bytes = priv->tx[ring].bytes_done; - } while (u64_stats_fetch_retry(&priv->tx[ring].statss, + } while (u64_stats_fetch_retry_irq(&priv->tx[ring].statss, start)); s->tx_packets += packets; s->tx_bytes += bytes; @@ -1274,9 +1274,9 @@ void gve_handle_report_stats(struct gve_priv *priv) } do { - start = u64_stats_fetch_begin(&priv->tx[idx].statss); + start = u64_stats_fetch_begin_irq(&priv->tx[idx].statss); tx_bytes = priv->tx[idx].bytes_done; - } while (u64_stats_fetch_retry(&priv->tx[idx].statss, start)); + } while (u64_stats_fetch_retry_irq(&priv->tx[idx].statss, start)); stats[stats_idx++] = (struct stats) { .stat_name = cpu_to_be32(TX_WAKE_CNT), .value = cpu_to_be64(priv->tx[idx].wake_queue), diff --git a/drivers/net/ethernet/huawei/hinic/hinic_rx.c b/drivers/net/ethernet/huawei/hinic/hinic_rx.c index a866bea65110..e5828a658caf 100644 --- a/drivers/net/ethernet/huawei/hinic/hinic_rx.c +++ b/drivers/net/ethernet/huawei/hinic/hinic_rx.c @@ -74,14 +74,14 @@ void hinic_rxq_get_stats(struct hinic_rxq *rxq, struct hinic_rxq_stats *stats) unsigned int start; do { - start = u64_stats_fetch_begin(&rxq_stats->syncp); + start = u64_stats_fetch_begin_irq(&rxq_stats->syncp); stats->pkts = rxq_stats->pkts; stats->bytes = rxq_stats->bytes; stats->errors = rxq_stats->csum_errors + rxq_stats->other_errors; stats->csum_errors = rxq_stats->csum_errors; stats->other_errors = rxq_stats->other_errors; - } while (u64_stats_fetch_retry(&rxq_stats->syncp, start)); + } while (u64_stats_fetch_retry_irq(&rxq_stats->syncp, start)); } /** diff --git a/drivers/net/ethernet/huawei/hinic/hinic_tx.c b/drivers/net/ethernet/huawei/hinic/hinic_tx.c index 5051cdff2384..3b6c7b585737 100644 --- a/drivers/net/ethernet/huawei/hinic/hinic_tx.c +++ b/drivers/net/ethernet/huawei/hinic/hinic_tx.c @@ -99,14 +99,14 @@ void hinic_txq_get_stats(struct hinic_txq *txq, struct hinic_txq_stats *stats) unsigned int start; do { - start = u64_stats_fetch_begin(&txq_stats->syncp); + start = u64_stats_fetch_begin_irq(&txq_stats->syncp); stats->pkts = txq_stats->pkts; stats->bytes = txq_stats->bytes; stats->tx_busy = txq_stats->tx_busy; stats->tx_wake = txq_stats->tx_wake; stats->tx_dropped = txq_stats->tx_dropped; stats->big_frags_pkts = txq_stats->big_frags_pkts; - } while (u64_stats_fetch_retry(&txq_stats->syncp, start)); + } while (u64_stats_fetch_retry_irq(&txq_stats->syncp, start)); } /** diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c index cf4d6f1129fa..349a2b1a19a2 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c @@ -1630,21 +1630,21 @@ static void nfp_net_stat64(struct net_device *netdev, unsigned int start; do { - start = u64_stats_fetch_begin(&r_vec->rx_sync); + start = u64_stats_fetch_begin_irq(&r_vec->rx_sync); data[0] = r_vec->rx_pkts; data[1] = r_vec->rx_bytes; data[2] = r_vec->rx_drops; - } while (u64_stats_fetch_retry(&r_vec->rx_sync, start)); + } while (u64_stats_fetch_retry_irq(&r_vec->rx_sync, start)); stats->rx_packets += data[0]; stats->rx_bytes += data[1]; stats->rx_dropped += data[2]; do { - start = u64_stats_fetch_begin(&r_vec->tx_sync); + start = u64_stats_fetch_begin_irq(&r_vec->tx_sync); data[0] = r_vec->tx_pkts; data[1] = r_vec->tx_bytes; data[2] = r_vec->tx_errors; - } while (u64_stats_fetch_retry(&r_vec->tx_sync, start)); + } while (u64_stats_fetch_retry_irq(&r_vec->tx_sync, start)); stats->tx_packets += data[0]; stats->tx_bytes += data[1]; stats->tx_errors += data[2]; diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c index eeb1455a4e5d..b1b1b648e40c 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c @@ -649,7 +649,7 @@ static u64 *nfp_vnic_get_sw_stats(struct net_device *netdev, u64 *data) unsigned int start; do { - start = u64_stats_fetch_begin(&nn->r_vecs[i].rx_sync); + start = u64_stats_fetch_begin_irq(&nn->r_vecs[i].rx_sync); data[0] = nn->r_vecs[i].rx_pkts; tmp[0] = nn->r_vecs[i].hw_csum_rx_ok; tmp[1] = nn->r_vecs[i].hw_csum_rx_inner_ok; @@ -657,10 +657,10 @@ static u64 *nfp_vnic_get_sw_stats(struct net_device *netdev, u64 *data) tmp[3] = nn->r_vecs[i].hw_csum_rx_error; tmp[4] = nn->r_vecs[i].rx_replace_buf_alloc_fail; tmp[5] = nn->r_vecs[i].hw_tls_rx; - } while (u64_stats_fetch_retry(&nn->r_vecs[i].rx_sync, start)); + } while (u64_stats_fetch_retry_irq(&nn->r_vecs[i].rx_sync, start)); do { - start = u64_stats_fetch_begin(&nn->r_vecs[i].tx_sync); + start = u64_stats_fetch_begin_irq(&nn->r_vecs[i].tx_sync); data[1] = nn->r_vecs[i].tx_pkts; data[2] = nn->r_vecs[i].tx_busy; tmp[6] = nn->r_vecs[i].hw_csum_tx; @@ -670,7 +670,7 @@ static u64 *nfp_vnic_get_sw_stats(struct net_device *netdev, u64 *data) tmp[10] = nn->r_vecs[i].hw_tls_tx; tmp[11] = nn->r_vecs[i].tls_tx_fallback; tmp[12] = nn->r_vecs[i].tls_tx_no_fallback; - } while (u64_stats_fetch_retry(&nn->r_vecs[i].tx_sync, start)); + } while (u64_stats_fetch_retry_irq(&nn->r_vecs[i].tx_sync, start)); data += NN_RVEC_PER_Q_STATS; diff --git a/drivers/net/netdevsim/netdev.c b/drivers/net/netdevsim/netdev.c index e470e3398abc..9a1a5b203624 100644 --- a/drivers/net/netdevsim/netdev.c +++ b/drivers/net/netdevsim/netdev.c @@ -67,10 +67,10 @@ nsim_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats) unsigned int start; do { - start = u64_stats_fetch_begin(&ns->syncp); + start = u64_stats_fetch_begin_irq(&ns->syncp); stats->tx_bytes = ns->tx_bytes; stats->tx_packets = ns->tx_packets; - } while (u64_stats_fetch_retry(&ns->syncp, start)); + } while (u64_stats_fetch_retry_irq(&ns->syncp, start)); } static int diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index 330dab41f2fe..58998d821778 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -2316,9 +2316,9 @@ static inline u64 sta_get_tidstats_msdu(struct ieee80211_sta_rx_stats *rxstats, u64 value; do { - start = u64_stats_fetch_begin(&rxstats->syncp); + start = u64_stats_fetch_begin_irq(&rxstats->syncp); value = rxstats->msdu[tid]; - } while (u64_stats_fetch_retry(&rxstats->syncp, start)); + } while (u64_stats_fetch_retry_irq(&rxstats->syncp, start)); return value; } @@ -2384,9 +2384,9 @@ static inline u64 sta_get_stats_bytes(struct ieee80211_sta_rx_stats *rxstats) u64 value; do { - start = u64_stats_fetch_begin(&rxstats->syncp); + start = u64_stats_fetch_begin_irq(&rxstats->syncp); value = rxstats->bytes; - } while (u64_stats_fetch_retry(&rxstats->syncp, start)); + } while (u64_stats_fetch_retry_irq(&rxstats->syncp, start)); return value; } diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c index 35b5f806fdda..b52afe316dc4 100644 --- a/net/mpls/af_mpls.c +++ b/net/mpls/af_mpls.c @@ -1079,9 +1079,9 @@ static void mpls_get_stats(struct mpls_dev *mdev, p = per_cpu_ptr(mdev->stats, i); do { - start = u64_stats_fetch_begin(&p->syncp); + start = u64_stats_fetch_begin_irq(&p->syncp); local = p->stats; - } while (u64_stats_fetch_retry(&p->syncp, start)); + } while (u64_stats_fetch_retry_irq(&p->syncp, start)); stats->rx_packets += local.rx_packets; stats->rx_bytes += local.rx_bytes; -- cgit v1.2.3 From dcf8e5633e2e69ad60b730ab5905608b756a032f Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Tue, 23 Aug 2022 12:59:25 -0700 Subject: tracing: Define the is_signed_type() macro once There are two definitions of the is_signed_type() macro: one in and a second definition in . As suggested by Linus, move the definition of the is_signed_type() macro into the header file. Change the definition of the is_signed_type() macro to make sure that it does not trigger any sparse warnings with future versions of sparse for bitwise types. Link: https://lore.kernel.org/all/CAHk-=whjH6p+qzwUdx5SOVVHjS3WvzJQr6mDUwhEyTf6pJWzaQ@mail.gmail.com/ Link: https://lore.kernel.org/all/CAHk-=wjQGnVfb4jehFR0XyZikdQvCZouE96xR_nnf5kqaM5qqQ@mail.gmail.com/ Cc: Rasmus Villemoes Cc: Steven Rostedt Acked-by: Kees Cook Signed-off-by: Bart Van Assche Signed-off-by: Linus Torvalds --- include/linux/compiler.h | 6 ++++++ include/linux/overflow.h | 1 - include/linux/trace_events.h | 2 -- 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 01ce94b58b42..7713d7bcdaea 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -239,6 +239,12 @@ static inline void *offset_to_ptr(const int *off) /* &a[0] degrades to a pointer: a different type from an array */ #define __must_be_array(a) BUILD_BUG_ON_ZERO(__same_type((a), &(a)[0])) +/* + * Whether 'type' is a signed type or an unsigned type. Supports scalar types, + * bool and also pointer types. + */ +#define is_signed_type(type) (((type)(-1)) < (__force type)1) + /* * This is needed in functions which generate the stack canary, see * arch/x86/kernel/smpboot.c::start_secondary() for an example. diff --git a/include/linux/overflow.h b/include/linux/overflow.h index f1221d11f8e5..0eb3b192f07a 100644 --- a/include/linux/overflow.h +++ b/include/linux/overflow.h @@ -30,7 +30,6 @@ * https://mail-index.netbsd.org/tech-misc/2007/02/05/0000.html - * credit to Christian Biere. */ -#define is_signed_type(type) (((type)(-1)) < (type)1) #define __type_half_max(type) ((type)1 << (8*sizeof(type) - 1 - is_signed_type(type))) #define type_max(T) ((T)((__type_half_max(T) - 1) + __type_half_max(T))) #define type_min(T) ((T)((T)-type_max(T)-(T)1)) diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h index b18759a673c6..8401dec93c15 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h @@ -814,8 +814,6 @@ extern int trace_add_event_call(struct trace_event_call *call); extern int trace_remove_event_call(struct trace_event_call *call); extern int trace_event_get_offsets(struct trace_event_call *call); -#define is_signed_type(type) (((type)(-1)) < (type)1) - int ftrace_set_clr_event(struct trace_array *tr, char *buf, int set); int trace_set_clr_event(const char *system, const char *event, int set); int trace_array_set_clr_event(struct trace_array *tr, const char *system, -- cgit v1.2.3 From b05972f01e7d30419987a1f221b5593668fd6448 Mon Sep 17 00:00:00 2001 From: Zhengchao Shao Date: Fri, 26 Aug 2022 09:39:30 +0800 Subject: net: sched: tbf: don't call qdisc_put() while holding tree lock The issue is the same to commit c2999f7fb05b ("net: sched: multiq: don't call qdisc_put() while holding tree lock"). Qdiscs call qdisc_put() while holding sch tree spinlock, which results sleeping-while-atomic BUG. Fixes: c266f64dbfa2 ("net: sched: protect block state with mutex") Signed-off-by: Zhengchao Shao Link: https://lore.kernel.org/r/20220826013930.340121-1-shaozhengchao@huawei.com Signed-off-by: Paolo Abeni --- net/sched/sch_tbf.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c index 72102277449e..36079fdde2cb 100644 --- a/net/sched/sch_tbf.c +++ b/net/sched/sch_tbf.c @@ -356,6 +356,7 @@ static int tbf_change(struct Qdisc *sch, struct nlattr *opt, struct nlattr *tb[TCA_TBF_MAX + 1]; struct tc_tbf_qopt *qopt; struct Qdisc *child = NULL; + struct Qdisc *old = NULL; struct psched_ratecfg rate; struct psched_ratecfg peak; u64 max_size; @@ -447,7 +448,7 @@ static int tbf_change(struct Qdisc *sch, struct nlattr *opt, sch_tree_lock(sch); if (child) { qdisc_tree_flush_backlog(q->qdisc); - qdisc_put(q->qdisc); + old = q->qdisc; q->qdisc = child; } q->limit = qopt->limit; @@ -467,6 +468,7 @@ static int tbf_change(struct Qdisc *sch, struct nlattr *opt, memcpy(&q->peak, &peak, sizeof(struct psched_ratecfg)); sch_tree_unlock(sch); + qdisc_put(old); err = 0; tbf_offload_change(sch); -- cgit v1.2.3 From f612466ebecb12a00d9152344ddda6f6345f04dc Mon Sep 17 00:00:00 2001 From: Wang Hai Date: Fri, 26 Aug 2022 17:00:55 +0800 Subject: net/sched: fix netdevice reference leaks in attach_default_qdiscs() In attach_default_qdiscs(), if a dev has multiple queues and queue 0 fails to attach qdisc because there is no memory in attach_one_default_qdisc(). Then dev->qdisc will be noop_qdisc by default. But the other queues may be able to successfully attach to default qdisc. In this case, the fallback to noqueue process will be triggered. If the original attached qdisc is not released and a new one is directly attached, this will cause netdevice reference leaks. The following is the bug log: veth0: default qdisc (fq_codel) fail, fallback to noqueue unregister_netdevice: waiting for veth0 to become free. Usage count = 32 leaked reference. qdisc_alloc+0x12e/0x210 qdisc_create_dflt+0x62/0x140 attach_one_default_qdisc.constprop.41+0x44/0x70 dev_activate+0x128/0x290 __dev_open+0x12a/0x190 __dev_change_flags+0x1a2/0x1f0 dev_change_flags+0x23/0x60 do_setlink+0x332/0x1150 __rtnl_newlink+0x52f/0x8e0 rtnl_newlink+0x43/0x70 rtnetlink_rcv_msg+0x140/0x3b0 netlink_rcv_skb+0x50/0x100 netlink_unicast+0x1bb/0x290 netlink_sendmsg+0x37c/0x4e0 sock_sendmsg+0x5f/0x70 ____sys_sendmsg+0x208/0x280 Fix this bug by clearing any non-noop qdiscs that may have been assigned before trying to re-attach. Fixes: bf6dba76d278 ("net: sched: fallback to qdisc noqueue if default qdisc setup fail") Signed-off-by: Wang Hai Link: https://lore.kernel.org/r/20220826090055.24424-1-wanghai38@huawei.com Signed-off-by: Paolo Abeni --- net/sched/sch_generic.c | 31 ++++++++++++++++--------------- 1 file changed, 16 insertions(+), 15 deletions(-) diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 99b697ad2b98..7a8ea03f673d 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -1122,6 +1122,21 @@ struct Qdisc *dev_graft_qdisc(struct netdev_queue *dev_queue, } EXPORT_SYMBOL(dev_graft_qdisc); +static void shutdown_scheduler_queue(struct net_device *dev, + struct netdev_queue *dev_queue, + void *_qdisc_default) +{ + struct Qdisc *qdisc = dev_queue->qdisc_sleeping; + struct Qdisc *qdisc_default = _qdisc_default; + + if (qdisc) { + rcu_assign_pointer(dev_queue->qdisc, qdisc_default); + dev_queue->qdisc_sleeping = qdisc_default; + + qdisc_put(qdisc); + } +} + static void attach_one_default_qdisc(struct net_device *dev, struct netdev_queue *dev_queue, void *_unused) @@ -1169,6 +1184,7 @@ static void attach_default_qdiscs(struct net_device *dev) if (qdisc == &noop_qdisc) { netdev_warn(dev, "default qdisc (%s) fail, fallback to %s\n", default_qdisc_ops->id, noqueue_qdisc_ops.id); + netdev_for_each_tx_queue(dev, shutdown_scheduler_queue, &noop_qdisc); dev->priv_flags |= IFF_NO_QUEUE; netdev_for_each_tx_queue(dev, attach_one_default_qdisc, NULL); qdisc = txq->qdisc_sleeping; @@ -1447,21 +1463,6 @@ void dev_init_scheduler(struct net_device *dev) timer_setup(&dev->watchdog_timer, dev_watchdog, 0); } -static void shutdown_scheduler_queue(struct net_device *dev, - struct netdev_queue *dev_queue, - void *_qdisc_default) -{ - struct Qdisc *qdisc = dev_queue->qdisc_sleeping; - struct Qdisc *qdisc_default = _qdisc_default; - - if (qdisc) { - rcu_assign_pointer(dev_queue->qdisc, qdisc_default); - dev_queue->qdisc_sleeping = qdisc_default; - - qdisc_put(qdisc); - } -} - void dev_shutdown(struct net_device *dev) { netdev_for_each_tx_queue(dev, shutdown_scheduler_queue, &noop_qdisc); -- cgit v1.2.3 From fce1c23f629173e0db78b79a74f2052044a00e65 Mon Sep 17 00:00:00 2001 From: Alvaro Karsz Date: Tue, 23 Aug 2022 10:39:47 +0300 Subject: net: virtio_net: fix notification coalescing comments Fix wording in comments for the notifications coalescing feature. Signed-off-by: Alvaro Karsz Acked-by: Michael S. Tsirkin Link: https://lore.kernel.org/r/20220823073947.14774-1-alvaro.karsz@solid-run.com Signed-off-by: Jakub Kicinski --- include/uapi/linux/virtio_net.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/include/uapi/linux/virtio_net.h b/include/uapi/linux/virtio_net.h index 29ced55514d4..6cb842ea8979 100644 --- a/include/uapi/linux/virtio_net.h +++ b/include/uapi/linux/virtio_net.h @@ -56,7 +56,7 @@ #define VIRTIO_NET_F_MQ 22 /* Device supports Receive Flow * Steering */ #define VIRTIO_NET_F_CTRL_MAC_ADDR 23 /* Set MAC address */ -#define VIRTIO_NET_F_NOTF_COAL 53 /* Guest can handle notifications coalescing */ +#define VIRTIO_NET_F_NOTF_COAL 53 /* Device supports notifications coalescing */ #define VIRTIO_NET_F_HASH_REPORT 57 /* Supports hash report */ #define VIRTIO_NET_F_RSS 60 /* Supports RSS RX steering */ #define VIRTIO_NET_F_RSC_EXT 61 /* extended coalescing info */ @@ -364,24 +364,24 @@ struct virtio_net_hash_config { */ #define VIRTIO_NET_CTRL_NOTF_COAL 6 /* - * Set the tx-usecs/tx-max-packets patameters. - * tx-usecs - Maximum number of usecs to delay a TX notification. - * tx-max-packets - Maximum number of packets to send before a TX notification. + * Set the tx-usecs/tx-max-packets parameters. */ struct virtio_net_ctrl_coal_tx { + /* Maximum number of packets to send before a TX notification */ __le32 tx_max_packets; + /* Maximum number of usecs to delay a TX notification */ __le32 tx_usecs; }; #define VIRTIO_NET_CTRL_NOTF_COAL_TX_SET 0 /* - * Set the rx-usecs/rx-max-packets patameters. - * rx-usecs - Maximum number of usecs to delay a RX notification. - * rx-max-frames - Maximum number of packets to receive before a RX notification. + * Set the rx-usecs/rx-max-packets parameters. */ struct virtio_net_ctrl_coal_rx { + /* Maximum number of packets to receive before a RX notification */ __le32 rx_max_packets; + /* Maximum number of usecs to delay a RX notification */ __le32 rx_usecs; }; -- cgit v1.2.3 From 4a4ce82212ef014d70f486a427005b2b5bab8e34 Mon Sep 17 00:00:00 2001 From: Horatiu Vultur Date: Tue, 30 Aug 2022 08:40:55 +0200 Subject: net: phy: micrel: Make the GPIO to be non-exclusive The same GPIO line can be shared by multiple phys for the coma mode pin. If that is the case then, all the other phys that share the same line will failed to be probed because the access to the gpio line is not non-exclusive. Fix this by making access to the gpio line to be nonexclusive using flag GPIOD_FLAGS_BIT_NONEXCLUSIVE. This allows all the other PHYs to be probed. Fixes: 738871b09250ee ("net: phy: micrel: add coma mode GPIO") Reviewed-by: Andrew Lunn Signed-off-by: Horatiu Vultur Link: https://lore.kernel.org/r/20220830064055.2340403-1-horatiu.vultur@microchip.com Signed-off-by: Jakub Kicinski --- drivers/net/phy/micrel.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c index e78d0bf69bc3..6f52b4fb6888 100644 --- a/drivers/net/phy/micrel.c +++ b/drivers/net/phy/micrel.c @@ -2873,12 +2873,18 @@ static int lan8814_config_init(struct phy_device *phydev) return 0; } +/* It is expected that there will not be any 'lan8814_take_coma_mode' + * function called in suspend. Because the GPIO line can be shared, so if one of + * the phys goes back in coma mode, then all the other PHYs will go, which is + * wrong. + */ static int lan8814_release_coma_mode(struct phy_device *phydev) { struct gpio_desc *gpiod; gpiod = devm_gpiod_get_optional(&phydev->mdio.dev, "coma-mode", - GPIOD_OUT_HIGH_OPEN_DRAIN); + GPIOD_OUT_HIGH_OPEN_DRAIN | + GPIOD_FLAGS_BIT_NONEXCLUSIVE); if (IS_ERR(gpiod)) return PTR_ERR(gpiod); -- cgit v1.2.3 From 642b2122c5df332a6df52dd1f91e552bc4356951 Mon Sep 17 00:00:00 2001 From: Gao Xiao Date: Mon, 29 Aug 2022 12:16:51 +0200 Subject: nfp: fix the access to management firmware hanging When running `ethtool -p` with the old management firmware, the management firmware resource is not correctly released, which causes firmware related malfunction: all the access to management firmware hangs. It releases the management firmware resource when set id mode operation is not supported. Fixes: ccb9bc1dfa44 ("nfp: add 'ethtool --identify' support") Signed-off-by: Gao Xiao Reviewed-by: Louis Peens Signed-off-by: Simon Horman Reviewed-by: Jesse Brandeburg Link: https://lore.kernel.org/r/20220829101651.633840-1-simon.horman@corigine.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp_eth.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp_eth.c b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp_eth.c index edd300033735..4cc38799eabc 100644 --- a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp_eth.c +++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp_eth.c @@ -507,6 +507,7 @@ int nfp_eth_set_idmode(struct nfp_cpp *cpp, unsigned int idx, bool state) if (nfp_nsp_get_abi_ver_minor(nsp) < 32) { nfp_err(nfp_nsp_cpp(nsp), "set id mode operation not supported, please update flash\n"); + nfp_eth_config_cleanup_end(nsp); return -EOPNOTSUPP; } -- cgit v1.2.3 From 13a9d08c296228d18289de60b83792c586e1d073 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 26 Aug 2022 18:00:30 +0300 Subject: net: lan966x: improve error handle in lan966x_fdma_rx_get_frame() Don't just print a warning. Clean up and return an error as well. Fixes: c8349639324a ("net: lan966x: Add FDMA functionality") Signed-off-by: Dan Carpenter Reviewed-by: Horatiu Vultur Link: https://lore.kernel.org/r/YwjgDm/SVd5c1tQU@kili Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/microchip/lan966x/lan966x_fdma.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_fdma.c b/drivers/net/ethernet/microchip/lan966x/lan966x_fdma.c index 6dea7f8c1481..51f8a0816377 100644 --- a/drivers/net/ethernet/microchip/lan966x/lan966x_fdma.c +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_fdma.c @@ -425,7 +425,8 @@ static struct sk_buff *lan966x_fdma_rx_get_frame(struct lan966x_rx *rx) lan966x_ifh_get_src_port(skb->data, &src_port); lan966x_ifh_get_timestamp(skb->data, ×tamp); - WARN_ON(src_port >= lan966x->num_phys_ports); + if (WARN_ON(src_port >= lan966x->num_phys_ports)) + goto free_skb; skb->dev = lan966x->ports[src_port]->dev; skb_pull(skb, IFH_LEN * sizeof(u32)); @@ -449,6 +450,8 @@ static struct sk_buff *lan966x_fdma_rx_get_frame(struct lan966x_rx *rx) return skb; +free_skb: + kfree_skb(skb); unmap_page: dma_unmap_page(lan966x->dev, (dma_addr_t)db->dataptr, FDMA_DCB_STATUS_BLOCKL(db->status), -- cgit v1.2.3 From c0955bf957be4bead01fae1d791476260da7325d Mon Sep 17 00:00:00 2001 From: Duoming Zhou Date: Sat, 27 Aug 2022 23:38:15 +0800 Subject: ethernet: rocker: fix sleep in atomic context bug in neigh_timer_handler The function neigh_timer_handler() is a timer handler that runs in an atomic context. When used by rocker, neigh_timer_handler() calls "kzalloc(.., GFP_KERNEL)" that may sleep. As a result, the sleep in atomic context bug will happen. One of the processes is shown below: ofdpa_fib4_add() ... neigh_add_timer() (wait a timer) neigh_timer_handler() neigh_release() neigh_destroy() rocker_port_neigh_destroy() rocker_world_port_neigh_destroy() ofdpa_port_neigh_destroy() ofdpa_port_ipv4_neigh() kzalloc(sizeof(.., GFP_KERNEL) //may sleep This patch changes the gfp_t parameter of kzalloc() from GFP_KERNEL to GFP_ATOMIC in order to mitigate the bug. Fixes: 00fc0c51e35b ("rocker: Change world_ops API and implementation to be switchdev independant") Signed-off-by: Duoming Zhou Signed-off-by: David S. Miller --- drivers/net/ethernet/rocker/rocker_ofdpa.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/rocker/rocker_ofdpa.c b/drivers/net/ethernet/rocker/rocker_ofdpa.c index bc70c6abd6a5..58cf7cc54f40 100644 --- a/drivers/net/ethernet/rocker/rocker_ofdpa.c +++ b/drivers/net/ethernet/rocker/rocker_ofdpa.c @@ -1273,7 +1273,7 @@ static int ofdpa_port_ipv4_neigh(struct ofdpa_port *ofdpa_port, bool removing; int err = 0; - entry = kzalloc(sizeof(*entry), GFP_KERNEL); + entry = kzalloc(sizeof(*entry), GFP_ATOMIC); if (!entry) return -ENOMEM; -- cgit v1.2.3 From ec1bd37123c607ca6485beb4542a792a4db765aa Mon Sep 17 00:00:00 2001 From: Khalid Masum Date: Thu, 18 Aug 2022 10:07:38 +0600 Subject: fscache: fix misdocumented parameter This patch fixes two warnings generated by make docs. The functions fscache_use_cookie and fscache_unuse_cookie, both have a parameter named cookie. But they are documented with the name "object" with unclear description. Which generates the warning when creating docs. This commit will replace the currently misdocumented parameter names with the correct ones while adding proper descriptions. CC: Randy Dunlap Signed-off-by: Khalid Masum Signed-off-by: David Howells Link: https://lore.kernel.org/r/20220521142446.4746-1-khalid.masum.92@gmail.com/ # v1 Link: https://lore.kernel.org/r/20220818040738.12036-1-khalid.masum.92@gmail.com/ # v2 Link: https://lore.kernel.org/r/880d7d25753fb326ee17ac08005952112fcf9bdb.1657360984.git.mchehab@kernel.org/ # Mauro's version --- include/linux/fscache.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/linux/fscache.h b/include/linux/fscache.h index 720874e6ee94..36e5dd84cf59 100644 --- a/include/linux/fscache.h +++ b/include/linux/fscache.h @@ -258,7 +258,7 @@ struct fscache_cookie *fscache_acquire_cookie(struct fscache_volume *volume, /** * fscache_use_cookie - Request usage of cookie attached to an object - * @object: Object description + * @cookie: The cookie representing the cache object * @will_modify: If cache is expected to be modified locally * * Request usage of the cookie attached to an object. The caller should tell @@ -274,7 +274,7 @@ static inline void fscache_use_cookie(struct fscache_cookie *cookie, /** * fscache_unuse_cookie - Cease usage of cookie attached to an object - * @object: Object description + * @cookie: The cookie representing the cache object * @aux_data: Updated auxiliary data (or NULL) * @object_size: Revised size of the object (or NULL) * -- cgit v1.2.3 From c93ccd63b18c8d108c57b2bb0e5f3b058b9d2029 Mon Sep 17 00:00:00 2001 From: Sun Ke Date: Fri, 26 Aug 2022 10:35:15 +0800 Subject: cachefiles: fix error return code in cachefiles_ondemand_copen() The cache_size field of copen is specified by the user daemon. If cache_size < 0, then the OPEN request is expected to fail, while copen itself shall succeed. However, returning 0 is indeed unexpected when cache_size is an invalid error code. Fix this by returning error when cache_size is an invalid error code. Changes ======= v4: update the code suggested by Dan v3: update the commit log suggested by Jingbo. Fixes: c8383054506c ("cachefiles: notify the user daemon when looking up cookie") Signed-off-by: Sun Ke Suggested-by: Jeffle Xu Suggested-by: Dan Carpenter Signed-off-by: David Howells Reviewed-by: Gao Xiang Reviewed-by: Jingbo Xu Reviewed-by: Dan Carpenter Link: https://lore.kernel.org/r/20220818111935.1683062-1-sunke32@huawei.com/ # v2 Link: https://lore.kernel.org/r/20220818125038.2247720-1-sunke32@huawei.com/ # v3 Link: https://lore.kernel.org/r/20220826023515.3437469-1-sunke32@huawei.com/ # v4 --- fs/cachefiles/ondemand.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/fs/cachefiles/ondemand.c b/fs/cachefiles/ondemand.c index 1fee702d5529..7e1586bd5cf3 100644 --- a/fs/cachefiles/ondemand.c +++ b/fs/cachefiles/ondemand.c @@ -158,9 +158,13 @@ int cachefiles_ondemand_copen(struct cachefiles_cache *cache, char *args) /* fail OPEN request if daemon reports an error */ if (size < 0) { - if (!IS_ERR_VALUE(size)) - size = -EINVAL; - req->error = size; + if (!IS_ERR_VALUE(size)) { + req->error = -EINVAL; + ret = -EINVAL; + } else { + req->error = size; + ret = 0; + } goto out; } -- cgit v1.2.3 From 1122f40072731525c06b1371cfa30112b9b54d27 Mon Sep 17 00:00:00 2001 From: Xin Yin Date: Thu, 25 Aug 2022 10:09:45 +0800 Subject: cachefiles: make on-demand request distribution fairer For now, enqueuing and dequeuing on-demand requests all start from idx 0, this makes request distribution unfair. In the weighty concurrent I/O scenario, the request stored in higher idx will starve. Searching requests cyclically in cachefiles_ondemand_daemon_read, makes distribution fairer. Fixes: c8383054506c ("cachefiles: notify the user daemon when looking up cookie") Reported-by: Yongqing Li Signed-off-by: Xin Yin Signed-off-by: David Howells Reviewed-by: Jeffle Xu Reviewed-by: Gao Xiang Link: https://lore.kernel.org/r/20220817065200.11543-1-yinxin.x@bytedance.com/ # v1 Link: https://lore.kernel.org/r/20220825020945.2293-1-yinxin.x@bytedance.com/ # v2 --- fs/cachefiles/internal.h | 1 + fs/cachefiles/ondemand.c | 12 +++++++++--- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/fs/cachefiles/internal.h b/fs/cachefiles/internal.h index 6cba2c6de2f9..2ad58c465208 100644 --- a/fs/cachefiles/internal.h +++ b/fs/cachefiles/internal.h @@ -111,6 +111,7 @@ struct cachefiles_cache { char *tag; /* cache binding tag */ refcount_t unbind_pincount;/* refcount to do daemon unbind */ struct xarray reqs; /* xarray of pending on-demand requests */ + unsigned long req_id_next; struct xarray ondemand_ids; /* xarray for ondemand_id allocation */ u32 ondemand_id_next; }; diff --git a/fs/cachefiles/ondemand.c b/fs/cachefiles/ondemand.c index 7e1586bd5cf3..0254ed39f68c 100644 --- a/fs/cachefiles/ondemand.c +++ b/fs/cachefiles/ondemand.c @@ -242,14 +242,19 @@ ssize_t cachefiles_ondemand_daemon_read(struct cachefiles_cache *cache, unsigned long id = 0; size_t n; int ret = 0; - XA_STATE(xas, &cache->reqs, 0); + XA_STATE(xas, &cache->reqs, cache->req_id_next); /* - * Search for a request that has not ever been processed, to prevent - * requests from being processed repeatedly. + * Cyclically search for a request that has not ever been processed, + * to prevent requests from being processed repeatedly, and make + * request distribution fair. */ xa_lock(&cache->reqs); req = xas_find_marked(&xas, UINT_MAX, CACHEFILES_REQ_NEW); + if (!req && cache->req_id_next > 0) { + xas_set(&xas, 0); + req = xas_find_marked(&xas, cache->req_id_next - 1, CACHEFILES_REQ_NEW); + } if (!req) { xa_unlock(&cache->reqs); return 0; @@ -264,6 +269,7 @@ ssize_t cachefiles_ondemand_daemon_read(struct cachefiles_cache *cache, } xas_clear_mark(&xas, CACHEFILES_REQ_NEW); + cache->req_id_next = xas.xa_index + 1; xa_unlock(&cache->reqs); id = xas.xa_index; -- cgit v1.2.3 From 3a1a274e933fca73fdc960cb1f60636cd285a265 Mon Sep 17 00:00:00 2001 From: David Thompson Date: Fri, 26 Aug 2022 11:59:16 -0400 Subject: mlxbf_gige: compute MDIO period based on i1clk This patch adds logic to compute the MDIO period based on the i1clk, and thereafter write the MDIO period into the YU MDIO config register. The i1clk resource from the ACPI table is used to provide addressing to YU bootrecord PLL registers. The values in these registers are used to compute MDIO period. If the i1clk resource is not present in the ACPI table, then the current default hardcorded value of 430Mhz is used. The i1clk clock value of 430MHz is only accurate for boards with BF2 mid bin and main bin SoCs. The BF2 high bin SoCs have i1clk = 500MHz, but can support a slower MDIO period. Fixes: f92e1869d74e ("Add Mellanox BlueField Gigabit Ethernet driver") Reviewed-by: Asmaa Mnebhi Signed-off-by: David Thompson Link: https://lore.kernel.org/r/20220826155916.12491-1-davthompson@nvidia.com Signed-off-by: Jakub Kicinski --- .../net/ethernet/mellanox/mlxbf_gige/mlxbf_gige.h | 4 +- .../ethernet/mellanox/mlxbf_gige/mlxbf_gige_mdio.c | 122 ++++++++++++++++++--- .../ethernet/mellanox/mlxbf_gige/mlxbf_gige_regs.h | 2 + 3 files changed, 110 insertions(+), 18 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige.h b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige.h index 5fdf9b7179f5..5a1027b07215 100644 --- a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige.h +++ b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige.h @@ -75,6 +75,7 @@ struct mlxbf_gige { struct net_device *netdev; struct platform_device *pdev; void __iomem *mdio_io; + void __iomem *clk_io; struct mii_bus *mdiobus; spinlock_t lock; /* for packet processing indices */ u16 rx_q_entries; @@ -137,7 +138,8 @@ enum mlxbf_gige_res { MLXBF_GIGE_RES_MDIO9, MLXBF_GIGE_RES_GPIO0, MLXBF_GIGE_RES_LLU, - MLXBF_GIGE_RES_PLU + MLXBF_GIGE_RES_PLU, + MLXBF_GIGE_RES_CLK }; /* Version of register data returned by mlxbf_gige_get_regs() */ diff --git a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_mdio.c b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_mdio.c index 2e6c1b7af096..85155cd9405c 100644 --- a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_mdio.c +++ b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_mdio.c @@ -22,10 +22,23 @@ #include #include "mlxbf_gige.h" +#include "mlxbf_gige_regs.h" #define MLXBF_GIGE_MDIO_GW_OFFSET 0x0 #define MLXBF_GIGE_MDIO_CFG_OFFSET 0x4 +#define MLXBF_GIGE_MDIO_FREQ_REFERENCE 156250000ULL +#define MLXBF_GIGE_MDIO_COREPLL_CONST 16384ULL +#define MLXBF_GIGE_MDC_CLK_NS 400 +#define MLXBF_GIGE_MDIO_PLL_I1CLK_REG1 0x4 +#define MLXBF_GIGE_MDIO_PLL_I1CLK_REG2 0x8 +#define MLXBF_GIGE_MDIO_CORE_F_SHIFT 0 +#define MLXBF_GIGE_MDIO_CORE_F_MASK GENMASK(25, 0) +#define MLXBF_GIGE_MDIO_CORE_R_SHIFT 26 +#define MLXBF_GIGE_MDIO_CORE_R_MASK GENMASK(31, 26) +#define MLXBF_GIGE_MDIO_CORE_OD_SHIFT 0 +#define MLXBF_GIGE_MDIO_CORE_OD_MASK GENMASK(3, 0) + /* Support clause 22 */ #define MLXBF_GIGE_MDIO_CL22_ST1 0x1 #define MLXBF_GIGE_MDIO_CL22_WRITE 0x1 @@ -50,27 +63,76 @@ #define MLXBF_GIGE_MDIO_CFG_MDIO_IN_SAMP_MASK GENMASK(23, 16) #define MLXBF_GIGE_MDIO_CFG_MDIO_OUT_SAMP_MASK GENMASK(31, 24) +#define MLXBF_GIGE_MDIO_CFG_VAL (FIELD_PREP(MLXBF_GIGE_MDIO_CFG_MDIO_MODE_MASK, 1) | \ + FIELD_PREP(MLXBF_GIGE_MDIO_CFG_MDIO3_3_MASK, 1) | \ + FIELD_PREP(MLXBF_GIGE_MDIO_CFG_MDIO_FULL_DRIVE_MASK, 1) | \ + FIELD_PREP(MLXBF_GIGE_MDIO_CFG_MDIO_IN_SAMP_MASK, 6) | \ + FIELD_PREP(MLXBF_GIGE_MDIO_CFG_MDIO_OUT_SAMP_MASK, 13)) + +#define MLXBF_GIGE_BF2_COREPLL_ADDR 0x02800c30 +#define MLXBF_GIGE_BF2_COREPLL_SIZE 0x0000000c + +static struct resource corepll_params[] = { + [MLXBF_GIGE_VERSION_BF2] = { + .start = MLXBF_GIGE_BF2_COREPLL_ADDR, + .end = MLXBF_GIGE_BF2_COREPLL_ADDR + MLXBF_GIGE_BF2_COREPLL_SIZE - 1, + .name = "COREPLL_RES" + }, +}; + +/* Returns core clock i1clk in Hz */ +static u64 calculate_i1clk(struct mlxbf_gige *priv) +{ + u8 core_od, core_r; + u64 freq_output; + u32 reg1, reg2; + u32 core_f; + + reg1 = readl(priv->clk_io + MLXBF_GIGE_MDIO_PLL_I1CLK_REG1); + reg2 = readl(priv->clk_io + MLXBF_GIGE_MDIO_PLL_I1CLK_REG2); + + core_f = (reg1 & MLXBF_GIGE_MDIO_CORE_F_MASK) >> + MLXBF_GIGE_MDIO_CORE_F_SHIFT; + core_r = (reg1 & MLXBF_GIGE_MDIO_CORE_R_MASK) >> + MLXBF_GIGE_MDIO_CORE_R_SHIFT; + core_od = (reg2 & MLXBF_GIGE_MDIO_CORE_OD_MASK) >> + MLXBF_GIGE_MDIO_CORE_OD_SHIFT; + + /* Compute PLL output frequency as follow: + * + * CORE_F / 16384 + * freq_output = freq_reference * ---------------------------- + * (CORE_R + 1) * (CORE_OD + 1) + */ + freq_output = div_u64((MLXBF_GIGE_MDIO_FREQ_REFERENCE * core_f), + MLXBF_GIGE_MDIO_COREPLL_CONST); + freq_output = div_u64(freq_output, (core_r + 1) * (core_od + 1)); + + return freq_output; +} + /* Formula for encoding the MDIO period. The encoded value is * passed to the MDIO config register. * - * mdc_clk = 2*(val + 1)*i1clk + * mdc_clk = 2*(val + 1)*(core clock in sec) * - * 400 ns = 2*(val + 1)*(((1/430)*1000) ns) + * i1clk is in Hz: + * 400 ns = 2*(val + 1)*(1/i1clk) * - * val = (((400 * 430 / 1000) / 2) - 1) + * val = (((400/10^9) / (1/i1clk) / 2) - 1) + * val = (400/2 * i1clk)/10^9 - 1 */ -#define MLXBF_GIGE_I1CLK_MHZ 430 -#define MLXBF_GIGE_MDC_CLK_NS 400 +static u8 mdio_period_map(struct mlxbf_gige *priv) +{ + u8 mdio_period; + u64 i1clk; -#define MLXBF_GIGE_MDIO_PERIOD (((MLXBF_GIGE_MDC_CLK_NS * MLXBF_GIGE_I1CLK_MHZ / 1000) / 2) - 1) + i1clk = calculate_i1clk(priv); -#define MLXBF_GIGE_MDIO_CFG_VAL (FIELD_PREP(MLXBF_GIGE_MDIO_CFG_MDIO_MODE_MASK, 1) | \ - FIELD_PREP(MLXBF_GIGE_MDIO_CFG_MDIO3_3_MASK, 1) | \ - FIELD_PREP(MLXBF_GIGE_MDIO_CFG_MDIO_FULL_DRIVE_MASK, 1) | \ - FIELD_PREP(MLXBF_GIGE_MDIO_CFG_MDC_PERIOD_MASK, \ - MLXBF_GIGE_MDIO_PERIOD) | \ - FIELD_PREP(MLXBF_GIGE_MDIO_CFG_MDIO_IN_SAMP_MASK, 6) | \ - FIELD_PREP(MLXBF_GIGE_MDIO_CFG_MDIO_OUT_SAMP_MASK, 13)) + mdio_period = div_u64((MLXBF_GIGE_MDC_CLK_NS >> 1) * i1clk, 1000000000) - 1; + + return mdio_period; +} static u32 mlxbf_gige_mdio_create_cmd(u16 data, int phy_add, int phy_reg, u32 opcode) @@ -124,9 +186,9 @@ static int mlxbf_gige_mdio_write(struct mii_bus *bus, int phy_add, int phy_reg, u16 val) { struct mlxbf_gige *priv = bus->priv; + u32 temp; u32 cmd; int ret; - u32 temp; if (phy_reg & MII_ADDR_C45) return -EOPNOTSUPP; @@ -144,18 +206,44 @@ static int mlxbf_gige_mdio_write(struct mii_bus *bus, int phy_add, return ret; } +static void mlxbf_gige_mdio_cfg(struct mlxbf_gige *priv) +{ + u8 mdio_period; + u32 val; + + mdio_period = mdio_period_map(priv); + + val = MLXBF_GIGE_MDIO_CFG_VAL; + val |= FIELD_PREP(MLXBF_GIGE_MDIO_CFG_MDC_PERIOD_MASK, mdio_period); + writel(val, priv->mdio_io + MLXBF_GIGE_MDIO_CFG_OFFSET); +} + int mlxbf_gige_mdio_probe(struct platform_device *pdev, struct mlxbf_gige *priv) { struct device *dev = &pdev->dev; + struct resource *res; int ret; priv->mdio_io = devm_platform_ioremap_resource(pdev, MLXBF_GIGE_RES_MDIO9); if (IS_ERR(priv->mdio_io)) return PTR_ERR(priv->mdio_io); - /* Configure mdio parameters */ - writel(MLXBF_GIGE_MDIO_CFG_VAL, - priv->mdio_io + MLXBF_GIGE_MDIO_CFG_OFFSET); + /* clk resource shared with other drivers so cannot use + * devm_platform_ioremap_resource + */ + res = platform_get_resource(pdev, IORESOURCE_MEM, MLXBF_GIGE_RES_CLK); + if (!res) { + /* For backward compatibility with older ACPI tables, also keep + * CLK resource internal to the driver. + */ + res = &corepll_params[MLXBF_GIGE_VERSION_BF2]; + } + + priv->clk_io = devm_ioremap(dev, res->start, resource_size(res)); + if (IS_ERR(priv->clk_io)) + return PTR_ERR(priv->clk_io); + + mlxbf_gige_mdio_cfg(priv); priv->mdiobus = devm_mdiobus_alloc(dev); if (!priv->mdiobus) { diff --git a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_regs.h b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_regs.h index 5fb33c9294bf..7be3a793984d 100644 --- a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_regs.h +++ b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_regs.h @@ -8,6 +8,8 @@ #ifndef __MLXBF_GIGE_REGS_H__ #define __MLXBF_GIGE_REGS_H__ +#define MLXBF_GIGE_VERSION 0x0000 +#define MLXBF_GIGE_VERSION_BF2 0x0 #define MLXBF_GIGE_STATUS 0x0010 #define MLXBF_GIGE_STATUS_READY BIT(0) #define MLXBF_GIGE_INT_STATUS 0x0028 -- cgit v1.2.3 From 8fc29ff3910f3af08a7c40a75d436b5720efe2bf Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Sat, 27 Aug 2022 11:13:14 -0700 Subject: kcm: fix strp_init() order and cleanup strp_init() is called just a few lines above this csk->sk_user_data check, it also initializes strp->work etc., therefore, it is unnecessary to call strp_done() to cancel the freshly initialized work. And if sk_user_data is already used by KCM, psock->strp should not be touched, particularly strp->work state, so we need to move strp_init() after the csk->sk_user_data check. This also makes a lockdep warning reported by syzbot go away. Reported-and-tested-by: syzbot+9fc084a4348493ef65d2@syzkaller.appspotmail.com Reported-by: syzbot+e696806ef96cdd2d87cd@syzkaller.appspotmail.com Fixes: e5571240236c ("kcm: Check if sk_user_data already set in kcm_attach") Fixes: dff8baa26117 ("kcm: Call strp_stop before strp_done in kcm_attach") Cc: Tom Herbert Signed-off-by: Cong Wang Link: https://lore.kernel.org/r/20220827181314.193710-1-xiyou.wangcong@gmail.com Signed-off-by: Jakub Kicinski --- net/kcm/kcmsock.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c index 71899e5a5a11..1215c863e1c4 100644 --- a/net/kcm/kcmsock.c +++ b/net/kcm/kcmsock.c @@ -1412,12 +1412,6 @@ static int kcm_attach(struct socket *sock, struct socket *csock, psock->sk = csk; psock->bpf_prog = prog; - err = strp_init(&psock->strp, csk, &cb); - if (err) { - kmem_cache_free(kcm_psockp, psock); - goto out; - } - write_lock_bh(&csk->sk_callback_lock); /* Check if sk_user_data is already by KCM or someone else. @@ -1425,13 +1419,18 @@ static int kcm_attach(struct socket *sock, struct socket *csock, */ if (csk->sk_user_data) { write_unlock_bh(&csk->sk_callback_lock); - strp_stop(&psock->strp); - strp_done(&psock->strp); kmem_cache_free(kcm_psockp, psock); err = -EALREADY; goto out; } + err = strp_init(&psock->strp, csk, &cb); + if (err) { + write_unlock_bh(&csk->sk_callback_lock); + kmem_cache_free(kcm_psockp, psock); + goto out; + } + psock->save_data_ready = csk->sk_data_ready; psock->save_write_space = csk->sk_write_space; psock->save_state_change = csk->sk_state_change; -- cgit v1.2.3 From 404a5ad72011f5bd2bb90f0a035be7635e2bd839 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Mon, 29 Aug 2022 16:54:14 -0700 Subject: Documentation: networking: correct possessive "its" Change occurrences of "it's" that are possessive to "its" so that they don't read as "it is". Signed-off-by: Randy Dunlap Cc: Jonathan Corbet Cc: Eric Dumazet Cc: Paolo Abeni Cc: Jiri Pirko Link: https://lore.kernel.org/r/20220829235414.17110-1-rdunlap@infradead.org Signed-off-by: Jakub Kicinski --- Documentation/networking/devlink/netdevsim.rst | 2 +- Documentation/networking/driver.rst | 2 +- Documentation/networking/ipvlan.rst | 2 +- Documentation/networking/l2tp.rst | 2 +- Documentation/networking/switchdev.rst | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/Documentation/networking/devlink/netdevsim.rst b/Documentation/networking/devlink/netdevsim.rst index 8a292fb5aaea..ec5e6d79b2e2 100644 --- a/Documentation/networking/devlink/netdevsim.rst +++ b/Documentation/networking/devlink/netdevsim.rst @@ -67,7 +67,7 @@ The ``netdevsim`` driver supports rate objects management, which includes: - setting tx_share and tx_max rate values for any rate object type; - setting parent node for any rate object type. -Rate nodes and it's parameters are exposed in ``netdevsim`` debugfs in RO mode. +Rate nodes and their parameters are exposed in ``netdevsim`` debugfs in RO mode. For example created rate node with name ``some_group``: .. code:: shell diff --git a/Documentation/networking/driver.rst b/Documentation/networking/driver.rst index c8f59dbda46f..64f7236ff10b 100644 --- a/Documentation/networking/driver.rst +++ b/Documentation/networking/driver.rst @@ -8,7 +8,7 @@ Transmit path guidelines: 1) The ndo_start_xmit method must not return NETDEV_TX_BUSY under any normal circumstances. It is considered a hard error unless - there is no way your device can tell ahead of time when it's + there is no way your device can tell ahead of time when its transmit function will become busy. Instead it must maintain the queue properly. For example, diff --git a/Documentation/networking/ipvlan.rst b/Documentation/networking/ipvlan.rst index 694adcba36b0..0000c1d383bc 100644 --- a/Documentation/networking/ipvlan.rst +++ b/Documentation/networking/ipvlan.rst @@ -11,7 +11,7 @@ Initial Release: ================ This is conceptually very similar to the macvlan driver with one major exception of using L3 for mux-ing /demux-ing among slaves. This property makes -the master device share the L2 with it's slave devices. I have developed this +the master device share the L2 with its slave devices. I have developed this driver in conjunction with network namespaces and not sure if there is use case outside of it. diff --git a/Documentation/networking/l2tp.rst b/Documentation/networking/l2tp.rst index 498b382d25a0..7f383e99dbad 100644 --- a/Documentation/networking/l2tp.rst +++ b/Documentation/networking/l2tp.rst @@ -530,7 +530,7 @@ its tunnel close actions. For L2TPIP sockets, the socket's close handler initiates the same tunnel close actions. All sessions are first closed. Each session drops its tunnel ref. When the tunnel ref reaches zero, the tunnel puts its socket ref. When the socket is -eventually destroyed, it's sk_destruct finally frees the L2TP tunnel +eventually destroyed, its sk_destruct finally frees the L2TP tunnel context. Sessions diff --git a/Documentation/networking/switchdev.rst b/Documentation/networking/switchdev.rst index f1f4e6a85a29..bbf272e9d607 100644 --- a/Documentation/networking/switchdev.rst +++ b/Documentation/networking/switchdev.rst @@ -159,7 +159,7 @@ tools such as iproute2. The switchdev driver can know a particular port's position in the topology by monitoring NETDEV_CHANGEUPPER notifications. For example, a port moved into a -bond will see it's upper master change. If that bond is moved into a bridge, +bond will see its upper master change. If that bond is moved into a bridge, the bond's upper master will change. And so on. The driver will track such movements to know what position a port is in in the overall topology by registering for netdevice events and acting on NETDEV_CHANGEUPPER. -- cgit v1.2.3 From 5a3a59981027b53ec0f729ad76a43ce2b64ad968 Mon Sep 17 00:00:00 2001 From: Axel Rasmussen Date: Mon, 29 Aug 2022 11:47:48 -0700 Subject: selftests: net: sort .gitignore file This is the result of `sort tools/testing/selftests/net/.gitignore`, but preserving the comment at the top. Suggested-by: Jakub Kicinski Signed-off-by: Axel Rasmussen Link: https://lore.kernel.org/r/20220829184748.1535580-1-axelrasmussen@google.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/.gitignore | 50 +++++++++++++++++----------------- 1 file changed, 25 insertions(+), 25 deletions(-) diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore index 0e5751af6247..de7d5cc15f85 100644 --- a/tools/testing/selftests/net/.gitignore +++ b/tools/testing/selftests/net/.gitignore @@ -1,42 +1,42 @@ # SPDX-License-Identifier: GPL-2.0-only +cmsg_sender +fin_ack_lat +gro +hwtstamp_config +ioam6_parser +ip_defrag ipsec +ipv6_flowlabel +ipv6_flowlabel_mgr msg_zerocopy -socket +nettest psock_fanout psock_snd psock_tpacket -stress_reuseport_listen +reuseaddr_conflict +reuseaddr_ports_exhausted reuseport_addr_any reuseport_bpf reuseport_bpf_cpu reuseport_bpf_numa reuseport_dualstack -reuseaddr_conflict -tcp_mmap -udpgso -udpgso_bench_rx -udpgso_bench_tx -tcp_inq -tls -txring_overwrite -ip_defrag -ipv6_flowlabel -ipv6_flowlabel_mgr -so_txtime -tcp_fastopen_backup_key -nettest -fin_ack_lat -reuseaddr_ports_exhausted -hwtstamp_config rxtimestamp -timestamping -txtimestamp +socket so_netns_cookie +so_txtime +stress_reuseport_listen +tap +tcp_fastopen_backup_key +tcp_inq +tcp_mmap test_unix_oob -gro -ioam6_parser +timestamping +tls toeplitz tun -cmsg_sender +txring_overwrite +txtimestamp +udpgso +udpgso_bench_rx +udpgso_bench_tx unix_connect -tap \ No newline at end of file -- cgit v1.2.3 From 90fabae8a2c225c4e4936723c38857887edde5cc Mon Sep 17 00:00:00 2001 From: Toke Høiland-Jørgensen Date: Wed, 31 Aug 2022 11:21:03 +0200 Subject: sch_cake: Return __NET_XMIT_STOLEN when consuming enqueued skb MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When the GSO splitting feature of sch_cake is enabled, GSO superpackets will be broken up and the resulting segments enqueued in place of the original skb. In this case, CAKE calls consume_skb() on the original skb, but still returns NET_XMIT_SUCCESS. This can confuse parent qdiscs into assuming the original skb still exists, when it really has been freed. Fix this by adding the __NET_XMIT_STOLEN flag to the return value in this case. Fixes: 0c850344d388 ("sch_cake: Conditionally split GSO segments") Signed-off-by: Toke Høiland-Jørgensen Reported-by: zdi-disclosures@trendmicro.com # ZDI-CAN-18231 Link: https://lore.kernel.org/r/20220831092103.442868-1-toke@toke.dk Signed-off-by: Jakub Kicinski --- net/sched/sch_cake.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/sched/sch_cake.c b/net/sched/sch_cake.c index a43a58a73d09..a04928082e4a 100644 --- a/net/sched/sch_cake.c +++ b/net/sched/sch_cake.c @@ -1713,6 +1713,7 @@ static s32 cake_enqueue(struct sk_buff *skb, struct Qdisc *sch, } idx--; flow = &b->flows[idx]; + ret = NET_XMIT_SUCCESS; /* ensure shaper state isn't stale */ if (!b->tin_backlog) { @@ -1771,6 +1772,7 @@ static s32 cake_enqueue(struct sk_buff *skb, struct Qdisc *sch, qdisc_tree_reduce_backlog(sch, 1-numsegs, len-slen); consume_skb(skb); + ret |= __NET_XMIT_STOLEN; } else { /* not splitting */ cobalt_set_enqueue_time(skb, now); @@ -1904,7 +1906,7 @@ static s32 cake_enqueue(struct sk_buff *skb, struct Qdisc *sch, } b->drop_overlimit += dropped; } - return NET_XMIT_SUCCESS; + return ret; } static struct sk_buff *cake_dequeue_one(struct Qdisc *sch) -- cgit v1.2.3 From 2555283eb40df89945557273121e9393ef9b542b Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Wed, 31 Aug 2022 19:06:00 +0200 Subject: mm/rmap: Fix anon_vma->degree ambiguity leading to double-reuse anon_vma->degree tracks the combined number of child anon_vmas and VMAs that use the anon_vma as their ->anon_vma. anon_vma_clone() then assumes that for any anon_vma attached to src->anon_vma_chain other than src->anon_vma, it is impossible for it to be a leaf node of the VMA tree, meaning that for such VMAs ->degree is elevated by 1 because of a child anon_vma, meaning that if ->degree equals 1 there are no VMAs that use the anon_vma as their ->anon_vma. This assumption is wrong because the ->degree optimization leads to leaf nodes being abandoned on anon_vma_clone() - an existing anon_vma is reused and no new parent-child relationship is created. So it is possible to reuse an anon_vma for one VMA while it is still tied to another VMA. This is an issue because is_mergeable_anon_vma() and its callers assume that if two VMAs have the same ->anon_vma, the list of anon_vmas attached to the VMAs is guaranteed to be the same. When this assumption is violated, vma_merge() can merge pages into a VMA that is not attached to the corresponding anon_vma, leading to dangling page->mapping pointers that will be dereferenced during rmap walks. Fix it by separately tracking the number of child anon_vmas and the number of VMAs using the anon_vma as their ->anon_vma. Fixes: 7a3ef208e662 ("mm: prevent endless growth of anon_vma hierarchy") Cc: stable@kernel.org Acked-by: Michal Hocko Acked-by: Vlastimil Babka Signed-off-by: Jann Horn Signed-off-by: Linus Torvalds --- include/linux/rmap.h | 7 +++++-- mm/rmap.c | 29 ++++++++++++++++------------- 2 files changed, 21 insertions(+), 15 deletions(-) diff --git a/include/linux/rmap.h b/include/linux/rmap.h index bf80adca980b..b89b4b86951f 100644 --- a/include/linux/rmap.h +++ b/include/linux/rmap.h @@ -41,12 +41,15 @@ struct anon_vma { atomic_t refcount; /* - * Count of child anon_vmas and VMAs which points to this anon_vma. + * Count of child anon_vmas. Equals to the count of all anon_vmas that + * have ->parent pointing to this one, including itself. * * This counter is used for making decision about reusing anon_vma * instead of forking new one. See comments in function anon_vma_clone. */ - unsigned degree; + unsigned long num_children; + /* Count of VMAs whose ->anon_vma pointer points to this object. */ + unsigned long num_active_vmas; struct anon_vma *parent; /* Parent of this anon_vma */ diff --git a/mm/rmap.c b/mm/rmap.c index edc06c52bc82..93d5a6f793d2 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -93,7 +93,8 @@ static inline struct anon_vma *anon_vma_alloc(void) anon_vma = kmem_cache_alloc(anon_vma_cachep, GFP_KERNEL); if (anon_vma) { atomic_set(&anon_vma->refcount, 1); - anon_vma->degree = 1; /* Reference for first vma */ + anon_vma->num_children = 0; + anon_vma->num_active_vmas = 0; anon_vma->parent = anon_vma; /* * Initialise the anon_vma root to point to itself. If called @@ -201,6 +202,7 @@ int __anon_vma_prepare(struct vm_area_struct *vma) anon_vma = anon_vma_alloc(); if (unlikely(!anon_vma)) goto out_enomem_free_avc; + anon_vma->num_children++; /* self-parent link for new root */ allocated = anon_vma; } @@ -210,8 +212,7 @@ int __anon_vma_prepare(struct vm_area_struct *vma) if (likely(!vma->anon_vma)) { vma->anon_vma = anon_vma; anon_vma_chain_link(vma, avc, anon_vma); - /* vma reference or self-parent link for new root */ - anon_vma->degree++; + anon_vma->num_active_vmas++; allocated = NULL; avc = NULL; } @@ -296,19 +297,19 @@ int anon_vma_clone(struct vm_area_struct *dst, struct vm_area_struct *src) anon_vma_chain_link(dst, avc, anon_vma); /* - * Reuse existing anon_vma if its degree lower than two, - * that means it has no vma and only one anon_vma child. + * Reuse existing anon_vma if it has no vma and only one + * anon_vma child. * - * Do not choose parent anon_vma, otherwise first child - * will always reuse it. Root anon_vma is never reused: + * Root anon_vma is never reused: * it has self-parent reference and at least one child. */ if (!dst->anon_vma && src->anon_vma && - anon_vma != src->anon_vma && anon_vma->degree < 2) + anon_vma->num_children < 2 && + anon_vma->num_active_vmas == 0) dst->anon_vma = anon_vma; } if (dst->anon_vma) - dst->anon_vma->degree++; + dst->anon_vma->num_active_vmas++; unlock_anon_vma_root(root); return 0; @@ -358,6 +359,7 @@ int anon_vma_fork(struct vm_area_struct *vma, struct vm_area_struct *pvma) anon_vma = anon_vma_alloc(); if (!anon_vma) goto out_error; + anon_vma->num_active_vmas++; avc = anon_vma_chain_alloc(GFP_KERNEL); if (!avc) goto out_error_free_anon_vma; @@ -378,7 +380,7 @@ int anon_vma_fork(struct vm_area_struct *vma, struct vm_area_struct *pvma) vma->anon_vma = anon_vma; anon_vma_lock_write(anon_vma); anon_vma_chain_link(vma, avc, anon_vma); - anon_vma->parent->degree++; + anon_vma->parent->num_children++; anon_vma_unlock_write(anon_vma); return 0; @@ -410,7 +412,7 @@ void unlink_anon_vmas(struct vm_area_struct *vma) * to free them outside the lock. */ if (RB_EMPTY_ROOT(&anon_vma->rb_root.rb_root)) { - anon_vma->parent->degree--; + anon_vma->parent->num_children--; continue; } @@ -418,7 +420,7 @@ void unlink_anon_vmas(struct vm_area_struct *vma) anon_vma_chain_free(avc); } if (vma->anon_vma) { - vma->anon_vma->degree--; + vma->anon_vma->num_active_vmas--; /* * vma would still be needed after unlink, and anon_vma will be prepared @@ -436,7 +438,8 @@ void unlink_anon_vmas(struct vm_area_struct *vma) list_for_each_entry_safe(avc, next, &vma->anon_vma_chain, same_vma) { struct anon_vma *anon_vma = avc->anon_vma; - VM_WARN_ON(anon_vma->degree); + VM_WARN_ON(anon_vma->num_children); + VM_WARN_ON(anon_vma->num_active_vmas); put_anon_vma(anon_vma); list_del(&avc->same_vma); -- cgit v1.2.3 From eb55dc09b5dd040232d5de32812cc83001a23da6 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Mon, 29 Aug 2022 12:01:21 +0200 Subject: ip: fix triggering of 'icmp redirect' __mkroute_input() uses fib_validate_source() to trigger an icmp redirect. My understanding is that fib_validate_source() is used to know if the src address and the gateway address are on the same link. For that, fib_validate_source() returns 1 (same link) or 0 (not the same network). __mkroute_input() is the only user of these positive values, all other callers only look if the returned value is negative. Since the below patch, fib_validate_source() didn't return anymore 1 when both addresses are on the same network, because the route lookup returns RT_SCOPE_LINK instead of RT_SCOPE_HOST. But this is, in fact, right. Let's adapat the test to return 1 again when both addresses are on the same link. CC: stable@vger.kernel.org Fixes: 747c14307214 ("ip: fix dflt addr selection for connected nexthop") Reported-by: kernel test robot Reported-by: Heng Qi Signed-off-by: Nicolas Dichtel Reviewed-by: David Ahern Link: https://lore.kernel.org/r/20220829100121.3821-1-nicolas.dichtel@6wind.com Signed-off-by: Jakub Kicinski --- net/ipv4/fib_frontend.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index f361d3d56be2..943edf4ad4db 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -389,7 +389,7 @@ static int __fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst, dev_match = dev_match || (res.type == RTN_LOCAL && dev == net->loopback_dev); if (dev_match) { - ret = FIB_RES_NHC(res)->nhc_scope >= RT_SCOPE_HOST; + ret = FIB_RES_NHC(res)->nhc_scope >= RT_SCOPE_LINK; return ret; } if (no_addr) @@ -401,7 +401,7 @@ static int __fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst, ret = 0; if (fib_lookup(net, &fl4, &res, FIB_LOOKUP_IGNORE_LINKSTATE) == 0) { if (res.type == RTN_UNICAST) - ret = FIB_RES_NHC(res)->nhc_scope >= RT_SCOPE_HOST; + ret = FIB_RES_NHC(res)->nhc_scope >= RT_SCOPE_LINK; } return ret; -- cgit v1.2.3 From 52267ce25f60f37ae40ccbca0b21328ebae5ae75 Mon Sep 17 00:00:00 2001 From: Kurt Kanzenbach Date: Tue, 30 Aug 2022 18:34:48 +0200 Subject: net: dsa: hellcreek: Print warning only once In case the source port cannot be decoded, print the warning only once. This still brings attention to the user and does not spam the logs at the same time. Signed-off-by: Kurt Kanzenbach Reviewed-by: Andrew Lunn Reviewed-by: Vladimir Oltean Link: https://lore.kernel.org/r/20220830163448.8921-1-kurt@linutronix.de Signed-off-by: Jakub Kicinski --- net/dsa/tag_hellcreek.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/dsa/tag_hellcreek.c b/net/dsa/tag_hellcreek.c index eb204ad36eee..846588c0070a 100644 --- a/net/dsa/tag_hellcreek.c +++ b/net/dsa/tag_hellcreek.c @@ -45,7 +45,7 @@ static struct sk_buff *hellcreek_rcv(struct sk_buff *skb, skb->dev = dsa_master_find_slave(dev, 0, port); if (!skb->dev) { - netdev_warn(dev, "Failed to get source port: %d\n", port); + netdev_warn_once(dev, "Failed to get source port: %d\n", port); return NULL; } -- cgit v1.2.3 From 8c70521238b7863c2af607e20bcba20f974c969b Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 30 Aug 2022 11:56:55 -0700 Subject: tcp: annotate data-race around challenge_timestamp challenge_timestamp can be read an written by concurrent threads. This was expected, but we need to annotate the race to avoid potential issues. Following patch moves challenge_timestamp and challenge_count to per-netns storage to provide better isolation. Fixes: 354e4aa391ed ("tcp: RFC 5961 5.2 Blind Data Injection Attack Mitigation") Reported-by: syzbot Signed-off-by: Eric Dumazet Acked-by: Neal Cardwell Signed-off-by: Jakub Kicinski --- net/ipv4/tcp_input.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index ab5f0ea166f1..c184e15397a2 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -3629,11 +3629,11 @@ static void tcp_send_challenge_ack(struct sock *sk) /* Then check host-wide RFC 5961 rate limit. */ now = jiffies / HZ; - if (now != challenge_timestamp) { + if (now != READ_ONCE(challenge_timestamp)) { u32 ack_limit = READ_ONCE(net->ipv4.sysctl_tcp_challenge_ack_limit); u32 half = (ack_limit + 1) >> 1; - challenge_timestamp = now; + WRITE_ONCE(challenge_timestamp, now); WRITE_ONCE(challenge_count, half + prandom_u32_max(ack_limit)); } count = READ_ONCE(challenge_count); -- cgit v1.2.3 From 79e3602caa6f9d59c4f66a268407080496dae408 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 30 Aug 2022 11:56:56 -0700 Subject: tcp: make global challenge ack rate limitation per net-ns and default disabled Because per host rate limiting has been proven problematic (side channel attacks can be based on it), per host rate limiting of challenge acks ideally should be per netns and turned off by default. This is a long due followup of following commits: 083ae308280d ("tcp: enable per-socket rate limiting of all 'challenge acks'") f2b2c582e824 ("tcp: mitigate ACK loops for connections as tcp_sock") 75ff39ccc1bd ("tcp: make challenge acks less predictable") Signed-off-by: Eric Dumazet Cc: Jason Baron Acked-by: Neal Cardwell Signed-off-by: Jakub Kicinski --- Documentation/networking/ip-sysctl.rst | 5 ++++- include/net/netns/ipv4.h | 2 ++ net/ipv4/tcp_input.c | 21 +++++++++++---------- net/ipv4/tcp_ipv4.c | 6 ++++-- 4 files changed, 21 insertions(+), 13 deletions(-) diff --git a/Documentation/networking/ip-sysctl.rst b/Documentation/networking/ip-sysctl.rst index 56cd4ea059b2..a759872a2883 100644 --- a/Documentation/networking/ip-sysctl.rst +++ b/Documentation/networking/ip-sysctl.rst @@ -1035,7 +1035,10 @@ tcp_limit_output_bytes - INTEGER tcp_challenge_ack_limit - INTEGER Limits number of Challenge ACK sent per second, as recommended in RFC 5961 (Improving TCP's Robustness to Blind In-Window Attacks) - Default: 1000 + Note that this per netns rate limit can allow some side channel + attacks and probably should not be enabled. + TCP stack implements per TCP socket limits anyway. + Default: INT_MAX (unlimited) UDP variables ============= diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index c7320ef356d9..6320a76cefdc 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -179,6 +179,8 @@ struct netns_ipv4 { unsigned int sysctl_tcp_fastopen_blackhole_timeout; atomic_t tfo_active_disable_times; unsigned long tfo_active_disable_stamp; + u32 tcp_challenge_timestamp; + u32 tcp_challenge_count; int sysctl_udp_wmem_min; int sysctl_udp_rmem_min; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index c184e15397a2..b85a9f755da4 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -3614,12 +3614,9 @@ bool tcp_oow_rate_limited(struct net *net, const struct sk_buff *skb, /* RFC 5961 7 [ACK Throttling] */ static void tcp_send_challenge_ack(struct sock *sk) { - /* unprotected vars, we dont care of overwrites */ - static u32 challenge_timestamp; - static unsigned int challenge_count; struct tcp_sock *tp = tcp_sk(sk); struct net *net = sock_net(sk); - u32 count, now; + u32 count, now, ack_limit; /* First check our per-socket dupack rate limit. */ if (__tcp_oow_rate_limited(net, @@ -3627,18 +3624,22 @@ static void tcp_send_challenge_ack(struct sock *sk) &tp->last_oow_ack_time)) return; + ack_limit = READ_ONCE(net->ipv4.sysctl_tcp_challenge_ack_limit); + if (ack_limit == INT_MAX) + goto send_ack; + /* Then check host-wide RFC 5961 rate limit. */ now = jiffies / HZ; - if (now != READ_ONCE(challenge_timestamp)) { - u32 ack_limit = READ_ONCE(net->ipv4.sysctl_tcp_challenge_ack_limit); + if (now != READ_ONCE(net->ipv4.tcp_challenge_timestamp)) { u32 half = (ack_limit + 1) >> 1; - WRITE_ONCE(challenge_timestamp, now); - WRITE_ONCE(challenge_count, half + prandom_u32_max(ack_limit)); + WRITE_ONCE(net->ipv4.tcp_challenge_timestamp, now); + WRITE_ONCE(net->ipv4.tcp_challenge_count, half + prandom_u32_max(ack_limit)); } - count = READ_ONCE(challenge_count); + count = READ_ONCE(net->ipv4.tcp_challenge_count); if (count > 0) { - WRITE_ONCE(challenge_count, count - 1); + WRITE_ONCE(net->ipv4.tcp_challenge_count, count - 1); +send_ack: NET_INC_STATS(net, LINUX_MIB_TCPCHALLENGEACK); tcp_send_ack(sk); } diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 0c83780dc9bf..5b019ba2b9d2 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -3139,8 +3139,10 @@ static int __net_init tcp_sk_init(struct net *net) net->ipv4.sysctl_tcp_tso_win_divisor = 3; /* Default TSQ limit of 16 TSO segments */ net->ipv4.sysctl_tcp_limit_output_bytes = 16 * 65536; - /* rfc5961 challenge ack rate limiting */ - net->ipv4.sysctl_tcp_challenge_ack_limit = 1000; + + /* rfc5961 challenge ack rate limiting, per net-ns, disabled by default. */ + net->ipv4.sysctl_tcp_challenge_ack_limit = INT_MAX; + net->ipv4.sysctl_tcp_min_tso_segs = 2; net->ipv4.sysctl_tcp_tso_rtt_log = 9; /* 2^9 = 512 usec */ net->ipv4.sysctl_tcp_min_rtt_wlen = 300; -- cgit v1.2.3 From 0b4f688d53fdc2a731b9d9cdf0c96255bc024ea6 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 31 Aug 2022 20:01:32 -0700 Subject: Revert "sch_cake: Return __NET_XMIT_STOLEN when consuming enqueued skb" This reverts commit 90fabae8a2c225c4e4936723c38857887edde5cc. Patch was applied hastily, revert and let the v2 be reviewed. Fixes: 90fabae8a2c2 ("sch_cake: Return __NET_XMIT_STOLEN when consuming enqueued skb") Link: https://lore.kernel.org/all/87wnao2ha3.fsf@toke.dk/ Signed-off-by: Jakub Kicinski --- net/sched/sch_cake.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/net/sched/sch_cake.c b/net/sched/sch_cake.c index a04928082e4a..a43a58a73d09 100644 --- a/net/sched/sch_cake.c +++ b/net/sched/sch_cake.c @@ -1713,7 +1713,6 @@ static s32 cake_enqueue(struct sk_buff *skb, struct Qdisc *sch, } idx--; flow = &b->flows[idx]; - ret = NET_XMIT_SUCCESS; /* ensure shaper state isn't stale */ if (!b->tin_backlog) { @@ -1772,7 +1771,6 @@ static s32 cake_enqueue(struct sk_buff *skb, struct Qdisc *sch, qdisc_tree_reduce_backlog(sch, 1-numsegs, len-slen); consume_skb(skb); - ret |= __NET_XMIT_STOLEN; } else { /* not splitting */ cobalt_set_enqueue_time(skb, now); @@ -1906,7 +1904,7 @@ static s32 cake_enqueue(struct sk_buff *skb, struct Qdisc *sch, } b->drop_overlimit += dropped; } - return ret; + return NET_XMIT_SUCCESS; } static struct sk_buff *cake_dequeue_one(struct Qdisc *sch) -- cgit v1.2.3 From a8424a9b4522a3ab9f32175ad6d848739079071f Mon Sep 17 00:00:00 2001 From: Yacan Liu Date: Tue, 30 Aug 2022 23:23:14 +0800 Subject: net/smc: Remove redundant refcount increase For passive connections, the refcount increment has been done in smc_clcsock_accept()-->smc_sock_alloc(). Fixes: 3b2dec2603d5 ("net/smc: restructure client and server code in af_smc") Signed-off-by: Yacan Liu Reviewed-by: Tony Lu Link: https://lore.kernel.org/r/20220830152314.838736-1-liuyacan@corp.netease.com Signed-off-by: Paolo Abeni --- net/smc/af_smc.c | 1 - 1 file changed, 1 deletion(-) diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index 79c1318af1fe..0939cc3b915a 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -1855,7 +1855,6 @@ static void smc_listen_out_connected(struct smc_sock *new_smc) { struct sock *newsmcsk = &new_smc->sk; - sk_refcnt_debug_inc(newsmcsk); if (newsmcsk->sk_state == SMC_INIT) newsmcsk->sk_state = SMC_ACTIVE; -- cgit v1.2.3 From 0495e337b7039191dfce6e03f5f830454b1fae6b Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Fri, 12 Aug 2022 14:30:33 -0400 Subject: mm/slab_common: Deleting kobject in kmem_cache_destroy() without holding slab_mutex/cpu_hotplug_lock A circular locking problem is reported by lockdep due to the following circular locking dependency. +--> cpu_hotplug_lock --> slab_mutex --> kn->active --+ | | +-----------------------------------------------------+ The forward cpu_hotplug_lock ==> slab_mutex ==> kn->active dependency happens in kmem_cache_destroy(): cpus_read_lock(); mutex_lock(&slab_mutex); ==> sysfs_slab_unlink() ==> kobject_del() ==> kernfs_remove() ==> __kernfs_remove() ==> kernfs_drain(): rwsem_acquire(&kn->dep_map, ...); The backward kn->active ==> cpu_hotplug_lock dependency happens in kernfs_fop_write_iter(): kernfs_get_active(); ==> slab_attr_store() ==> cpu_partial_store() ==> flush_all(): cpus_read_lock() One way to break this circular locking chain is to avoid holding cpu_hotplug_lock and slab_mutex while deleting the kobject in sysfs_slab_unlink() which should be equivalent to doing a write_lock and write_unlock pair of the kn->active virtual lock. Since the kobject structures are not protected by slab_mutex or the cpu_hotplug_lock, we can certainly release those locks before doing the delete operation. Move sysfs_slab_unlink() and sysfs_slab_release() to the newly created kmem_cache_release() and call it outside the slab_mutex & cpu_hotplug_lock critical sections. There will be a slight delay in the deletion of sysfs files if kmem_cache_release() is called indirectly from a work function. Fixes: 5a836bf6b09f ("mm: slub: move flush_cpu_slab() invocations __free_slab() invocations out of IRQ context") Signed-off-by: Waiman Long Reviewed-by: Hyeonggon Yoo <42.hyeyoo@gmail.com> Reviewed-by: Roman Gushchin Acked-by: David Rientjes Link: https://lore.kernel.org/all/YwOImVd+nRUsSAga@hyeyoo/ Signed-off-by: Vlastimil Babka --- mm/slab_common.c | 45 +++++++++++++++++++++++++++++---------------- 1 file changed, 29 insertions(+), 16 deletions(-) diff --git a/mm/slab_common.c b/mm/slab_common.c index 17996649cfe3..07b948288f84 100644 --- a/mm/slab_common.c +++ b/mm/slab_common.c @@ -392,6 +392,28 @@ kmem_cache_create(const char *name, unsigned int size, unsigned int align, } EXPORT_SYMBOL(kmem_cache_create); +#ifdef SLAB_SUPPORTS_SYSFS +/* + * For a given kmem_cache, kmem_cache_destroy() should only be called + * once or there will be a use-after-free problem. The actual deletion + * and release of the kobject does not need slab_mutex or cpu_hotplug_lock + * protection. So they are now done without holding those locks. + * + * Note that there will be a slight delay in the deletion of sysfs files + * if kmem_cache_release() is called indrectly from a work function. + */ +static void kmem_cache_release(struct kmem_cache *s) +{ + sysfs_slab_unlink(s); + sysfs_slab_release(s); +} +#else +static void kmem_cache_release(struct kmem_cache *s) +{ + slab_kmem_cache_release(s); +} +#endif + static void slab_caches_to_rcu_destroy_workfn(struct work_struct *work) { LIST_HEAD(to_destroy); @@ -418,11 +440,7 @@ static void slab_caches_to_rcu_destroy_workfn(struct work_struct *work) list_for_each_entry_safe(s, s2, &to_destroy, list) { debugfs_slab_release(s); kfence_shutdown_cache(s); -#ifdef SLAB_SUPPORTS_SYSFS - sysfs_slab_release(s); -#else - slab_kmem_cache_release(s); -#endif + kmem_cache_release(s); } } @@ -437,20 +455,11 @@ static int shutdown_cache(struct kmem_cache *s) list_del(&s->list); if (s->flags & SLAB_TYPESAFE_BY_RCU) { -#ifdef SLAB_SUPPORTS_SYSFS - sysfs_slab_unlink(s); -#endif list_add_tail(&s->list, &slab_caches_to_rcu_destroy); schedule_work(&slab_caches_to_rcu_destroy_work); } else { kfence_shutdown_cache(s); debugfs_slab_release(s); -#ifdef SLAB_SUPPORTS_SYSFS - sysfs_slab_unlink(s); - sysfs_slab_release(s); -#else - slab_kmem_cache_release(s); -#endif } return 0; @@ -465,14 +474,16 @@ void slab_kmem_cache_release(struct kmem_cache *s) void kmem_cache_destroy(struct kmem_cache *s) { + int refcnt; + if (unlikely(!s) || !kasan_check_byte(s)) return; cpus_read_lock(); mutex_lock(&slab_mutex); - s->refcount--; - if (s->refcount) + refcnt = --s->refcount; + if (refcnt) goto out_unlock; WARN(shutdown_cache(s), @@ -481,6 +492,8 @@ void kmem_cache_destroy(struct kmem_cache *s) out_unlock: mutex_unlock(&slab_mutex); cpus_read_unlock(); + if (!refcnt && !(s->flags & SLAB_TYPESAFE_BY_RCU)) + kmem_cache_release(s); } EXPORT_SYMBOL(kmem_cache_destroy); -- cgit v1.2.3