From 19d7df69fdb2636856dc8919de72fc1bf8f79598 Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Thu, 1 Feb 2018 08:49:23 +0100 Subject: xfrm: Refuse to insert 32 bit userspace socket policies on 64 bit systems We don't have a compat layer for xfrm, so userspace and kernel structures have different sizes in this case. This results in a broken configuration, so refuse to configure socket policies when trying to insert from 32 bit userspace as we do it already with policies inserted via netlink. Reported-and-tested-by: syzbot+e1a1577ca8bcb47b769a@syzkaller.appspotmail.com Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_state.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 54e21f19d722..f9d2f2233f09 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -2056,6 +2056,11 @@ int xfrm_user_policy(struct sock *sk, int optname, u8 __user *optval, int optlen struct xfrm_mgr *km; struct xfrm_policy *pol = NULL; +#ifdef CONFIG_COMPAT + if (in_compat_syscall()) + return -EOPNOTSUPP; +#endif + if (!optval && !optlen) { xfrm_sk_policy_insert(sk, XFRM_POLICY_IN, NULL); xfrm_sk_policy_insert(sk, XFRM_POLICY_OUT, NULL); -- cgit v1.2.3 From 0d3e45bc6507bd1f8728bf586ebd16c2d9e40613 Mon Sep 17 00:00:00 2001 From: Dong Bo Date: Fri, 26 Jan 2018 11:21:49 +0800 Subject: libata: Fix compile warning with ATA_DEBUG enabled This fixs the following comile warnings with ATA_DEBUG enabled, which detected by Linaro GCC 5.2-2015.11: drivers/ata/libata-scsi.c: In function 'ata_scsi_dump_cdb': ./include/linux/kern_levels.h:5:18: warning: format '%d' expects argument of type 'int', but argument 6 has type 'u64 {aka long long unsigned int}' [-Wformat=] tj: Patch hand-applied and description trimmed. Signed-off-by: Dong Bo Signed-off-by: Tejun Heo --- drivers/ata/libata-scsi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c index 66be961c93a4..d959b154de4f 100644 --- a/drivers/ata/libata-scsi.c +++ b/drivers/ata/libata-scsi.c @@ -4282,7 +4282,7 @@ static inline void ata_scsi_dump_cdb(struct ata_port *ap, #ifdef ATA_DEBUG struct scsi_device *scsidev = cmd->device; - DPRINTK("CDB (%u:%d,%d,%d) %9ph\n", + DPRINTK("CDB (%u:%d,%d,%lld) %9ph\n", ap->print_id, scsidev->channel, scsidev->id, scsidev->lun, cmd->cmnd); -- cgit v1.2.3 From 3b61e5121d5c4d0ea79fe90ced8df2fe5cb67dc2 Mon Sep 17 00:00:00 2001 From: Stefan Roese Date: Tue, 30 Jan 2018 11:02:55 +0100 Subject: ahci: Add check for device presence (PCIe hot unplug) in ahci_stop_engine() Exit directly with ENODEV, if the AHCI controller is not available anymore. Otherwise a delay of 500ms for each port is added to the remove function while trying to issue a command on the non-existent controller. Signed-off-by: Stefan Roese Cc: Tejun Heo Signed-off-by: Tejun Heo --- drivers/ata/libahci.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/ata/libahci.c b/drivers/ata/libahci.c index a0de7a38430c..7adcf3caabd0 100644 --- a/drivers/ata/libahci.c +++ b/drivers/ata/libahci.c @@ -665,6 +665,16 @@ int ahci_stop_engine(struct ata_port *ap) if ((tmp & (PORT_CMD_START | PORT_CMD_LIST_ON)) == 0) return 0; + /* + * Don't try to issue commands but return with ENODEV if the + * AHCI controller not available anymore (e.g. due to PCIe hot + * unplugging). Otherwise a 500ms delay for each port is added. + */ + if (tmp == 0xffffffff) { + dev_err(ap->host->dev, "AHCI controller unavailable!\n"); + return -ENODEV; + } + /* setting HBA to idle */ tmp &= ~PORT_CMD_START; writel(tmp, port_mmio + PORT_CMD); -- cgit v1.2.3 From 9f2b51db5b551085e26c8af5fbe484d62b891ec9 Mon Sep 17 00:00:00 2001 From: Baruch Siach Date: Mon, 5 Feb 2018 13:50:36 +0200 Subject: ata: libahci: fix comment indentation Indent the numbered item with one space like all other items in the same list. Signed-off-by: Baruch Siach Signed-off-by: Tejun Heo --- drivers/ata/libahci_platform.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/ata/libahci_platform.c b/drivers/ata/libahci_platform.c index 341d0ef82cbd..30cc8f1a31e1 100644 --- a/drivers/ata/libahci_platform.c +++ b/drivers/ata/libahci_platform.c @@ -340,7 +340,7 @@ static int ahci_platform_get_regulator(struct ahci_host_priv *hpriv, u32 port, * 2) regulator for controlling the targets power (optional) * 3) 0 - AHCI_MAX_CLKS clocks, as specified in the devs devicetree node, * or for non devicetree enabled platforms a single clock - * 4) phys (optional) + * 4) phys (optional) * * RETURNS: * The allocated ahci_host_priv on success, otherwise an ERR_PTR value -- cgit v1.2.3 From 058f58e235cbe03e923b30ea7c49995a46a8725f Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sat, 3 Feb 2018 20:30:56 -0800 Subject: libata: fix length validation of ATAPI-relayed SCSI commands syzkaller reported a crash in ata_bmdma_fill_sg() when writing to /dev/sg1. The immediate cause was that the ATA command's scatterlist was not DMA-mapped, which causes 'pi - 1' to underflow, resulting in a write to 'qc->ap->bmdma_prd[0xffffffff]'. Strangely though, the flag ATA_QCFLAG_DMAMAP was set in qc->flags. The root cause is that when __ata_scsi_queuecmd() is preparing to relay a SCSI command to an ATAPI device, it doesn't correctly validate the CDB length before copying it into the 16-byte buffer 'cdb' in 'struct ata_queued_cmd'. Namely, it validates the fixed CDB length expected based on the SCSI opcode but not the actual CDB length, which can be larger due to the use of the SG_NEXT_CMD_LEN ioctl. Since 'flags' is the next member in ata_queued_cmd, a buffer overflow corrupts it. Fix it by requiring that the actual CDB length be <= 16 (ATAPI_CDB_LEN). [Really it seems the length should be required to be <= dev->cdb_len, but the current behavior seems to have been intentionally introduced by commit 607126c2a21c ("libata-scsi: be tolerant of 12-byte ATAPI commands in 16-byte CDBs") to work around a userspace bug in mplayer. Probably the workaround is no longer needed (mplayer was fixed in 2007), but continuing to allow lengths to up 16 appears harmless for now.] Here's a reproducer that works in QEMU when /dev/sg1 refers to the CD-ROM drive that qemu-system-x86_64 creates by default: #include #include #include #define SG_NEXT_CMD_LEN 0x2283 int main() { char buf[53] = { [36] = 0x7e, [52] = 0x02 }; int fd = open("/dev/sg1", O_RDWR); ioctl(fd, SG_NEXT_CMD_LEN, &(int){ 17 }); write(fd, buf, sizeof(buf)); } The crash was: BUG: unable to handle kernel paging request at ffff8cb97db37ffc IP: ata_bmdma_fill_sg drivers/ata/libata-sff.c:2623 [inline] IP: ata_bmdma_qc_prep+0xa4/0xc0 drivers/ata/libata-sff.c:2727 PGD fb6c067 P4D fb6c067 PUD 0 Oops: 0002 [#1] SMP CPU: 1 PID: 150 Comm: syz_ata_bmdma_q Not tainted 4.15.0-next-20180202 #99 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.11.0-20171110_100015-anatol 04/01/2014 [...] Call Trace: ata_qc_issue+0x100/0x1d0 drivers/ata/libata-core.c:5421 ata_scsi_translate+0xc9/0x1a0 drivers/ata/libata-scsi.c:2024 __ata_scsi_queuecmd drivers/ata/libata-scsi.c:4326 [inline] ata_scsi_queuecmd+0x8c/0x210 drivers/ata/libata-scsi.c:4375 scsi_dispatch_cmd+0xa2/0xe0 drivers/scsi/scsi_lib.c:1727 scsi_request_fn+0x24c/0x530 drivers/scsi/scsi_lib.c:1865 __blk_run_queue_uncond block/blk-core.c:412 [inline] __blk_run_queue+0x3a/0x60 block/blk-core.c:432 blk_execute_rq_nowait+0x93/0xc0 block/blk-exec.c:78 sg_common_write.isra.7+0x272/0x5a0 drivers/scsi/sg.c:806 sg_write+0x1ef/0x340 drivers/scsi/sg.c:677 __vfs_write+0x31/0x160 fs/read_write.c:480 vfs_write+0xa7/0x160 fs/read_write.c:544 SYSC_write fs/read_write.c:589 [inline] SyS_write+0x4d/0xc0 fs/read_write.c:581 do_syscall_64+0x5e/0x110 arch/x86/entry/common.c:287 entry_SYSCALL_64_after_hwframe+0x21/0x86 Fixes: 607126c2a21c ("libata-scsi: be tolerant of 12-byte ATAPI commands in 16-byte CDBs") Reported-by: syzbot+1ff6f9fcc3c35f1c72a95e26528c8e7e3276e4da@syzkaller.appspotmail.com Cc: # v2.6.24+ Signed-off-by: Eric Biggers Signed-off-by: Tejun Heo --- drivers/ata/libata-scsi.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c index d959b154de4f..9ae8986bae48 100644 --- a/drivers/ata/libata-scsi.c +++ b/drivers/ata/libata-scsi.c @@ -4309,7 +4309,9 @@ static inline int __ata_scsi_queuecmd(struct scsi_cmnd *scmd, if (likely((scsi_op != ATA_16) || !atapi_passthru16)) { /* relay SCSI command to ATAPI device */ int len = COMMAND_SIZE(scsi_op); - if (unlikely(len > scmd->cmd_len || len > dev->cdb_len)) + if (unlikely(len > scmd->cmd_len || + len > dev->cdb_len || + scmd->cmd_len > ATAPI_CDB_LEN)) goto bad_cdb_len; xlat_func = atapi_xlat; -- cgit v1.2.3 From 9173e5e80729c8434b8d27531527c5245f4a5594 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sat, 3 Feb 2018 20:33:27 -0800 Subject: libata: remove WARN() for DMA or PIO command without data syzkaller hit a WARN() in ata_qc_issue() when writing to /dev/sg0. This happened because it issued a READ_6 command with no data buffer. Just remove the WARN(), as it doesn't appear indicate a kernel bug. The expected behavior is to fail the command, which the code does. Here's a reproducer that works in QEMU when /dev/sg0 refers to a disk of the default type ("82371SB PIIX3 IDE"): #include #include int main() { char buf[42] = { [36] = 0x8 /* READ_6 */ }; write(open("/dev/sg0", O_RDWR), buf, sizeof(buf)); } Fixes: f92a26365a72 ("libata: change ATA_QCFLAG_DMAMAP semantics") Reported-by: syzbot+f7b556d1766502a69d85071d2ff08bd87be53d0f@syzkaller.appspotmail.com Cc: # v2.6.25+ Signed-off-by: Eric Biggers Signed-off-by: Tejun Heo --- drivers/ata/libata-core.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index 3c09122bf038..61b09968d032 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -5401,8 +5401,7 @@ void ata_qc_issue(struct ata_queued_cmd *qc) * We guarantee to LLDs that they will have at least one * non-zero sg if the command is a data command. */ - if (WARN_ON_ONCE(ata_is_data(prot) && - (!qc->sg || !qc->n_elem || !qc->nbytes))) + if (ata_is_data(prot) && (!qc->sg || !qc->n_elem || !qc->nbytes)) goto sys_err; if (ata_is_dma(prot) || (ata_is_pio(prot) && -- cgit v1.2.3 From 2c1ec6fda2d07044cda922ee25337cf5d4b429b3 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sat, 3 Feb 2018 20:33:51 -0800 Subject: libata: don't try to pass through NCQ commands to non-NCQ devices syzkaller hit a WARN() in ata_bmdma_qc_issue() when writing to /dev/sg0. This happened because it issued an ATA pass-through command (ATA_16) where the protocol field indicated that NCQ should be used -- but the device did not support NCQ. We could just remove the WARN() from libata-sff.c, but the real problem seems to be that the SCSI -> ATA translation code passes through NCQ commands without verifying that the device actually supports NCQ. Fix this by adding the appropriate check to ata_scsi_pass_thru(). Here's reproducer that works in QEMU when /dev/sg0 refers to a disk of the default type ("82371SB PIIX3 IDE"): #include #include int main() { char buf[53] = { 0 }; buf[36] = 0x85; /* ATA_16 */ buf[37] = (12 << 1); /* FPDMA */ buf[38] = 0x1; /* Has data */ buf[51] = 0xC8; /* ATA_CMD_READ */ write(open("/dev/sg0", O_RDWR), buf, sizeof(buf)); } Fixes: ee7fb331c3ac ("libata: add support for NCQ commands for SG interface") Reported-by: syzbot+2f69ca28df61bdfc77cd36af2e789850355a221e@syzkaller.appspotmail.com Cc: # v4.4+ Signed-off-by: Eric Biggers Signed-off-by: Tejun Heo --- drivers/ata/libata-scsi.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c index 9ae8986bae48..89a9d4a2efc8 100644 --- a/drivers/ata/libata-scsi.c +++ b/drivers/ata/libata-scsi.c @@ -3316,6 +3316,12 @@ static unsigned int ata_scsi_pass_thru(struct ata_queued_cmd *qc) goto invalid_fld; } + /* We may not issue NCQ commands to devices not supporting NCQ */ + if (ata_is_ncq(tf->protocol) && !ata_ncq_enabled(dev)) { + fp = 1; + goto invalid_fld; + } + /* sanity check for pio multi commands */ if ((cdb[1] & 0xe0) && !is_multi_taskfile(tf)) { fp = 1; -- cgit v1.2.3 From da77d76b95a0e8940793f4f7fe12a4a2d2048e39 Mon Sep 17 00:00:00 2001 From: Khiem Nguyen Date: Mon, 5 Feb 2018 04:18:51 +0900 Subject: sata_rcar: Reset SATA PHY when Salvator-X board resumes Because power of Salvator-X board is cut off in suspend, it needs to reset SATA PHY state in resume. Otherwise, SATA partition could not be accessed anymore. Signed-off-by: Khiem Nguyen Signed-off-by: Hien Dang [reinit phy in sata_rcar_resume() function on R-Car Gen3 only] [factor out SATA module init sequence] [fixed the prefix for the subject] Signed-off-by: Yoshihiro Kaneko Signed-off-by: Tejun Heo --- drivers/ata/sata_rcar.c | 63 +++++++++++++++++++++++++++++++------------------ 1 file changed, 40 insertions(+), 23 deletions(-) diff --git a/drivers/ata/sata_rcar.c b/drivers/ata/sata_rcar.c index 80ee2f2a50d0..6f47ca34767d 100644 --- a/drivers/ata/sata_rcar.c +++ b/drivers/ata/sata_rcar.c @@ -146,6 +146,7 @@ enum sata_rcar_type { RCAR_GEN1_SATA, RCAR_GEN2_SATA, + RCAR_GEN3_SATA, RCAR_R8A7790_ES1_SATA, }; @@ -784,26 +785,11 @@ static void sata_rcar_setup_port(struct ata_host *host) ioaddr->command_addr = ioaddr->cmd_addr + (ATA_REG_CMD << 2); } -static void sata_rcar_init_controller(struct ata_host *host) +static void sata_rcar_init_module(struct sata_rcar_priv *priv) { - struct sata_rcar_priv *priv = host->private_data; void __iomem *base = priv->base; u32 val; - /* reset and setup phy */ - switch (priv->type) { - case RCAR_GEN1_SATA: - sata_rcar_gen1_phy_init(priv); - break; - case RCAR_GEN2_SATA: - case RCAR_R8A7790_ES1_SATA: - sata_rcar_gen2_phy_init(priv); - break; - default: - dev_warn(host->dev, "SATA phy is not initialized\n"); - break; - } - /* SATA-IP reset state */ val = ioread32(base + ATAPI_CONTROL1_REG); val |= ATAPI_CONTROL1_RESET; @@ -824,10 +810,34 @@ static void sata_rcar_init_controller(struct ata_host *host) /* ack and mask */ iowrite32(0, base + SATAINTSTAT_REG); iowrite32(0x7ff, base + SATAINTMASK_REG); + /* enable interrupts */ iowrite32(ATAPI_INT_ENABLE_SATAINT, base + ATAPI_INT_ENABLE_REG); } +static void sata_rcar_init_controller(struct ata_host *host) +{ + struct sata_rcar_priv *priv = host->private_data; + void __iomem *base = priv->base; + + /* reset and setup phy */ + switch (priv->type) { + case RCAR_GEN1_SATA: + sata_rcar_gen1_phy_init(priv); + break; + case RCAR_GEN2_SATA: + case RCAR_GEN3_SATA: + case RCAR_R8A7790_ES1_SATA: + sata_rcar_gen2_phy_init(priv); + break; + default: + dev_warn(host->dev, "SATA phy is not initialized\n"); + break; + } + + sata_rcar_init_module(priv); +} + static const struct of_device_id sata_rcar_match[] = { { /* Deprecated by "renesas,sata-r8a7779" */ @@ -856,7 +866,7 @@ static const struct of_device_id sata_rcar_match[] = { }, { .compatible = "renesas,sata-r8a7795", - .data = (void *)RCAR_GEN2_SATA + .data = (void *)RCAR_GEN3_SATA }, { .compatible = "renesas,rcar-gen2-sata", @@ -864,7 +874,7 @@ static const struct of_device_id sata_rcar_match[] = { }, { .compatible = "renesas,rcar-gen3-sata", - .data = (void *)RCAR_GEN2_SATA + .data = (void *)RCAR_GEN3_SATA }, { }, }; @@ -982,11 +992,18 @@ static int sata_rcar_resume(struct device *dev) if (ret) return ret; - /* ack and mask */ - iowrite32(0, base + SATAINTSTAT_REG); - iowrite32(0x7ff, base + SATAINTMASK_REG); - /* enable interrupts */ - iowrite32(ATAPI_INT_ENABLE_SATAINT, base + ATAPI_INT_ENABLE_REG); + if (priv->type == RCAR_GEN3_SATA) { + sata_rcar_gen2_phy_init(priv); + sata_rcar_init_module(priv); + } else { + /* ack and mask */ + iowrite32(0, base + SATAINTSTAT_REG); + iowrite32(0x7ff, base + SATAINTMASK_REG); + + /* enable interrupts */ + iowrite32(ATAPI_INT_ENABLE_SATAINT, + base + ATAPI_INT_ENABLE_REG); + } ata_host_resume(host); -- cgit v1.2.3 From c53593e5cb693d59d9e8b64fb3a79436bf99c3b3 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 22 Jan 2018 11:26:18 -0800 Subject: sched, cgroup: Don't reject lower cpu.max on ancestors While adding cgroup2 interface for the cpu controller, 0d5936344f30 ("sched: Implement interface for cgroup unified hierarchy") forgot to update input validation and left it to reject cpu.max config if any descendant has set a higher value. cgroup2 officially supports delegation and a descendant must not be able to restrict what its ancestors can configure. For absolute limits such as cpu.max and memory.max, this means that the config at each level should only act as the upper limit at that level and shouldn't interfere with what other cgroups can configure. This patch updates config validation on cgroup2 so that the cpu controller follows the same convention. Signed-off-by: Tejun Heo Fixes: 0d5936344f30 ("sched: Implement interface for cgroup unified hierarchy") Acked-by: Peter Zijlstra (Intel) Cc: stable@vger.kernel.org # v4.15+ --- kernel/sched/core.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index bf724c1952ea..1bc6a694c84f 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -6678,13 +6678,18 @@ static int tg_cfs_schedulable_down(struct task_group *tg, void *data) parent_quota = parent_b->hierarchical_quota; /* - * Ensure max(child_quota) <= parent_quota, inherit when no + * Ensure max(child_quota) <= parent_quota. On cgroup2, + * always take the min. On cgroup1, only inherit when no * limit is set: */ - if (quota == RUNTIME_INF) - quota = parent_quota; - else if (parent_quota != RUNTIME_INF && quota > parent_quota) - return -EINVAL; + if (cgroup_subsys_on_dfl(cpu_cgrp_subsys)) { + quota = min(quota, parent_quota); + } else { + if (quota == RUNTIME_INF) + quota = parent_quota; + else if (parent_quota != RUNTIME_INF && quota > parent_quota) + return -EINVAL; + } } cfs_b->hierarchical_quota = quota; -- cgit v1.2.3 From 685469e5bf9d31ccd9212be86f861a18fc213d05 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 12 Feb 2018 12:10:41 -0800 Subject: percpu: add Dennis Zhou as a percpu co-maintainer Dennis rewrote the percpu area allocator some months ago, understands most of the code base and has been responsive with the bug reports and questions. Let's add him as a co-maintainer. Signed-off-by: Tejun Heo Acked-by: Christopher Lameter --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index 3bdc260e36b7..f384e22546d3 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -10832,6 +10832,7 @@ F: drivers/platform/x86/peaq-wmi.c PER-CPU MEMORY ALLOCATOR M: Tejun Heo M: Christoph Lameter +M: Dennis Zhou T: git git://git.kernel.org/pub/scm/linux/kernel/git/tj/percpu.git S: Maintained F: include/linux/percpu*.h -- cgit v1.2.3 From 2471c98165494173a3cd03231b216b909c063e41 Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Thu, 1 Feb 2018 11:26:12 +0100 Subject: xfrm: Fix policy hold queue after flowcache removal. Now that the flowcache is removed we need to generate a new dummy bundle every time we check if the needed SAs are in place because the dummy bundle is not cached anymore. Fix it by passing the XFRM_LOOKUP_QUEUE flag to xfrm_lookup(). This makes sure that we get a dummy bundle in case the SAs are not yet in place. Fixes: 3ca28286ea80 ("xfrm_policy: bypass flow_cache_lookup") Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_policy.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 7a23078132cf..8b3811ff002d 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -1891,7 +1891,7 @@ static void xfrm_policy_queue_process(struct timer_list *t) spin_unlock(&pq->hold_queue.lock); dst_hold(xfrm_dst_path(dst)); - dst = xfrm_lookup(net, xfrm_dst_path(dst), &fl, sk, 0); + dst = xfrm_lookup(net, xfrm_dst_path(dst), &fl, sk, XFRM_LOOKUP_QUEUE); if (IS_ERR(dst)) goto purge_queue; -- cgit v1.2.3 From d97ca5d714a5334aecadadf696875da40f1fbf3e Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 12 Feb 2018 14:42:01 +0100 Subject: xfrm_user: uncoditionally validate esn replay attribute struct The sanity test added in ecd7918745234 can be bypassed, validation only occurs if XFRM_STATE_ESN flag is set, but rest of code doesn't care and just checks if the attribute itself is present. So always validate. Alternative is to reject if we have the attribute without the flag but that would change abi. Reported-by: syzbot+0ab777c27d2bb7588f73@syzkaller.appspotmail.com Cc: Mathias Krause Fixes: ecd7918745234 ("xfrm_user: ensure user supplied esn replay window is valid") Fixes: d8647b79c3b7e ("xfrm: Add user interface for esn and big anti-replay windows") Signed-off-by: Florian Westphal Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_user.c | 21 ++++++++------------- 1 file changed, 8 insertions(+), 13 deletions(-) diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 7f52b8eb177d..080035f056d9 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -121,22 +121,17 @@ static inline int verify_replay(struct xfrm_usersa_info *p, struct nlattr *rt = attrs[XFRMA_REPLAY_ESN_VAL]; struct xfrm_replay_state_esn *rs; - if (p->flags & XFRM_STATE_ESN) { - if (!rt) - return -EINVAL; + if (!rt) + return (p->flags & XFRM_STATE_ESN) ? -EINVAL : 0; - rs = nla_data(rt); + rs = nla_data(rt); - if (rs->bmp_len > XFRMA_REPLAY_ESN_MAX / sizeof(rs->bmp[0]) / 8) - return -EINVAL; - - if (nla_len(rt) < (int)xfrm_replay_state_esn_len(rs) && - nla_len(rt) != sizeof(*rs)) - return -EINVAL; - } + if (rs->bmp_len > XFRMA_REPLAY_ESN_MAX / sizeof(rs->bmp[0]) / 8) + return -EINVAL; - if (!rt) - return 0; + if (nla_len(rt) < (int)xfrm_replay_state_esn_len(rs) && + nla_len(rt) != sizeof(*rs)) + return -EINVAL; /* As only ESP and AH support ESN feature. */ if ((p->id.proto != IPPROTO_ESP) && (p->id.proto != IPPROTO_AH)) -- cgit v1.2.3 From b1c7fe26e0497386d3ae2e2404d1fa7f93895405 Mon Sep 17 00:00:00 2001 From: Aishwarya Pant Date: Tue, 13 Feb 2018 16:51:59 +0530 Subject: libata: transport: cleanup documentation of sysfs interface Clean-up the documentation of sysfs interfaces to be in the same format as described in Documentation/ABI/README. This will be useful for tracking changes in the ABI. Attributes are grouped by function (device, link or port) and then by date added. This patch also adds documentation for one attribute - /sys/class/ata_port/ataX/port_no Signed-off-by: Aishwarya Pant Signed-off-by: Tejun Heo --- Documentation/ABI/testing/sysfs-ata | 171 +++++++++++++++++++++--------------- 1 file changed, 100 insertions(+), 71 deletions(-) diff --git a/Documentation/ABI/testing/sysfs-ata b/Documentation/ABI/testing/sysfs-ata index aa4296498859..9ab0ef1dd1c7 100644 --- a/Documentation/ABI/testing/sysfs-ata +++ b/Documentation/ABI/testing/sysfs-ata @@ -1,110 +1,139 @@ What: /sys/class/ata_... -Date: August 2008 -Contact: Gwendal Grignou Description: - -Provide a place in sysfs for storing the ATA topology of the system. This allows -retrieving various information about ATA objects. + Provide a place in sysfs for storing the ATA topology of the + system. This allows retrieving various information about ATA + objects. Files under /sys/class/ata_port ------------------------------- - For each port, a directory ataX is created where X is the ata_port_id of - the port. The device parent is the ata host device. +For each port, a directory ataX is created where X is the ata_port_id of the +port. The device parent is the ata host device. -idle_irq (read) - Number of IRQ received by the port while idle [some ata HBA only]. +What: /sys/class/ata_port/ataX/nr_pmp_links +What: /sys/class/ata_port/ataX/idle_irq +Date: May, 2010 +KernelVersion: v2.6.37 +Contact: Gwendal Grignou +Description: + nr_pmp_links: (RO) If a SATA Port Multiplier (PM) is + connected, the number of links behind it. -nr_pmp_links (read) + idle_irq: (RO) Number of IRQ received by the port while + idle [some ata HBA only]. - If a SATA Port Multiplier (PM) is connected, number of link behind it. + +What: /sys/class/ata_port/ataX/port_no +Date: May, 2013 +KernelVersion: v3.11 +Contact: Gwendal Grignou +Description: + (RO) Host local port number. While registering host controller, + port numbers are tracked based upon number of ports available on + the controller. This attribute is needed by udev for composing + persistent links in /dev/disk/by-path. Files under /sys/class/ata_link ------------------------------- - Behind each port, there is a ata_link. If there is a SATA PM in the - topology, 15 ata_link objects are created. - - If a link is behind a port, the directory name is linkX, where X is - ata_port_id of the port. - If a link is behind a PM, its name is linkX.Y where X is ata_port_id - of the parent port and Y the PM port. +Behind each port, there is a ata_link. If there is a SATA PM in the topology, 15 +ata_link objects are created. -hw_sata_spd_limit +If a link is behind a port, the directory name is linkX, where X is ata_port_id +of the port. If a link is behind a PM, its name is linkX.Y where X is +ata_port_id of the parent port and Y the PM port. - Maximum speed supported by the connected SATA device. -sata_spd_limit +What: /sys/class/ata_link/linkX[.Y]/hw_sata_spd_limit +What: /sys/class/ata_link/linkX[.Y]/sata_spd_limit +What: /sys/class/ata_link/linkX[.Y]/sata_spd +Date: May, 2010 +KernelVersion: v2.6.37 +Contact: Gwendal Grignou +Description: + hw_sata_spd_limit: (RO) Maximum speed supported by the + connected SATA device. - Maximum speed imposed by libata. + sata_spd_limit: (RO) Maximum speed imposed by libata. -sata_spd + sata_spd: (RO) Current speed of the link + eg. 1.5, 3 Gbps etc. - Current speed of the link [1.5, 3Gps,...]. Files under /sys/class/ata_device --------------------------------- - Behind each link, up to two ata device are created. - The name of the directory is devX[.Y].Z where: - - X is ata_port_id of the port where the device is connected, - - Y the port of the PM if any, and - - Z the device id: for PATA, there is usually 2 devices [0,1], - only 1 for SATA. - -class - Device class. Can be "ata" for disk, "atapi" for packet device, - "pmp" for PM, or "none" if no device was found behind the link. - -dma_mode +Behind each link, up to two ata devices are created. +The name of the directory is devX[.Y].Z where: +- X is ata_port_id of the port where the device is connected, +- Y the port of the PM if any, and +- Z the device id: for PATA, there is usually 2 devices [0,1], only 1 for SATA. + + +What: /sys/class/ata_device/devX[.Y].Z/spdn_cnt +What: /sys/class/ata_device/devX[.Y].Z/gscr +What: /sys/class/ata_device/devX[.Y].Z/ering +What: /sys/class/ata_device/devX[.Y].Z/id +What: /sys/class/ata_device/devX[.Y].Z/pio_mode +What: /sys/class/ata_device/devX[.Y].Z/xfer_mode +What: /sys/class/ata_device/devX[.Y].Z/dma_mode +What: /sys/class/ata_device/devX[.Y].Z/class +Date: May, 2010 +KernelVersion: v2.6.37 +Contact: Gwendal Grignou +Description: + spdn_cnt: (RO) Number of times libata decided to lower the + speed of link due to errors. - Transfer modes supported by the device when in DMA mode. - Mostly used by PATA device. + gscr: (RO) Cached result of the dump of PM GSCR + register. Valid registers are: -pio_mode + 0: SATA_PMP_GSCR_PROD_ID, + 1: SATA_PMP_GSCR_REV, + 2: SATA_PMP_GSCR_PORT_INFO, + 32: SATA_PMP_GSCR_ERROR, + 33: SATA_PMP_GSCR_ERROR_EN, + 64: SATA_PMP_GSCR_FEAT, + 96: SATA_PMP_GSCR_FEAT_EN, + 130: SATA_PMP_GSCR_SII_GPIO - Transfer modes supported by the device when in PIO mode. - Mostly used by PATA device. + Only valid if the device is a PM. -xfer_mode + ering: (RO) Formatted output of the error ring of the + device. - Current transfer mode. + id: (RO) Cached result of IDENTIFY command, as + described in ATA8 7.16 and 7.17. Only valid if + the device is not a PM. -id + pio_mode: (RO) Transfer modes supported by the device when + in PIO mode. Mostly used by PATA device. - Cached result of IDENTIFY command, as described in ATA8 7.16 and 7.17. - Only valid if the device is not a PM. + xfer_mode: (RO) Current transfer mode -gscr + dma_mode: (RO) Transfer modes supported by the device when + in DMA mode. Mostly used by PATA device. - Cached result of the dump of PM GSCR register. - Valid registers are: - 0: SATA_PMP_GSCR_PROD_ID, - 1: SATA_PMP_GSCR_REV, - 2: SATA_PMP_GSCR_PORT_INFO, - 32: SATA_PMP_GSCR_ERROR, - 33: SATA_PMP_GSCR_ERROR_EN, - 64: SATA_PMP_GSCR_FEAT, - 96: SATA_PMP_GSCR_FEAT_EN, - 130: SATA_PMP_GSCR_SII_GPIO - Only valid if the device is a PM. + class: (RO) Device class. Can be "ata" for disk, + "atapi" for packet device, "pmp" for PM, or + "none" if no device was found behind the link. -trim - Shows the DSM TRIM mode currently used by the device. Valid - values are: - unsupported: Drive does not support DSM TRIM - unqueued: Drive supports unqueued DSM TRIM only - queued: Drive supports queued DSM TRIM - forced_unqueued: Drive's queued DSM support is known to be - buggy and only unqueued TRIM commands - are sent +What: /sys/class/ata_device/devX[.Y].Z/trim +Date: May, 2015 +KernelVersion: v4.10 +Contact: Gwendal Grignou +Description: + (RO) Shows the DSM TRIM mode currently used by the device. Valid + values are: -spdn_cnt + unsupported: Drive does not support DSM TRIM - Number of time libata decided to lower the speed of link due to errors. + unqueued: Drive supports unqueued DSM TRIM only -ering + queued: Drive supports queued DSM TRIM - Formatted output of the error ring of the device. + forced_unqueued: Drive's queued DSM support is known to + be buggy and only unqueued TRIM commands + are sent -- cgit v1.2.3 From 8f8ca51dbb4da0457f57f83d94aea81931b0707a Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 13 Feb 2018 13:43:23 +0100 Subject: ata: sata_rcar: Remove unused variable in sata_rcar_init_controller() drivers/ata/sata_rcar.c: In function 'sata_rcar_init_controller': drivers/ata/sata_rcar.c:821:8: warning: unused variable 'base' [-Wunused-variable] Fixes: da77d76b95a0e894 ("sata_rcar: Reset SATA PHY when Salvator-X board resumes") Signed-off-by: Geert Uytterhoeven Signed-off-by: Tejun Heo --- drivers/ata/sata_rcar.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/ata/sata_rcar.c b/drivers/ata/sata_rcar.c index 6f47ca34767d..6456e07db72a 100644 --- a/drivers/ata/sata_rcar.c +++ b/drivers/ata/sata_rcar.c @@ -818,7 +818,6 @@ static void sata_rcar_init_module(struct sata_rcar_priv *priv) static void sata_rcar_init_controller(struct ata_host *host) { struct sata_rcar_priv *priv = host->private_data; - void __iomem *base = priv->base; /* reset and setup phy */ switch (priv->type) { -- cgit v1.2.3 From 0a65e125150c227314dcd561a202a84228398449 Mon Sep 17 00:00:00 2001 From: Aishwarya Pant Date: Tue, 13 Feb 2018 13:48:16 +0530 Subject: libata: update documentation for sysfs interfaces Dcoumentation has been added by parsing through git commit history and reading code. This might be useful for scripting and tracking changes in the ABI. I do not have complete descriptions for the following 3 attributes; they have been annotated with the comment [to be documented] - /sys/class/scsi_host/hostX/ahci_port_cmd /sys/class/scsi_host/hostX/ahci_host_caps /sys/class/scsi_host/hostX/ahci_host_cap2 Signed-off-by: Aishwarya Pant Signed-off-by: Tejun Heo --- Documentation/ABI/testing/sysfs-block-device | 58 ++++++++++++++++ Documentation/ABI/testing/sysfs-class-scsi_host | 89 +++++++++++++++++++++++++ 2 files changed, 147 insertions(+) create mode 100644 Documentation/ABI/testing/sysfs-block-device diff --git a/Documentation/ABI/testing/sysfs-block-device b/Documentation/ABI/testing/sysfs-block-device new file mode 100644 index 000000000000..82ef6eab042d --- /dev/null +++ b/Documentation/ABI/testing/sysfs-block-device @@ -0,0 +1,58 @@ +What: /sys/block/*/device/sw_activity +Date: Jun, 2008 +KernelVersion: v2.6.27 +Contact: linux-ide@vger.kernel.org +Description: + (RW) Used by drivers which support software controlled activity + LEDs. + + It has the following valid values: + + 0 OFF - the LED is not activated on activity + 1 BLINK_ON - the LED blinks on every 10ms when activity is + detected. + 2 BLINK_OFF - the LED is on when idle, and blinks off + every 10ms when activity is detected. + + Note that the user must turn sw_activity OFF it they wish to + control the activity LED via the em_message file. + + +What: /sys/block/*/device/unload_heads +Date: Sep, 2008 +KernelVersion: v2.6.28 +Contact: linux-ide@vger.kernel.org +Description: + (RW) Hard disk shock protection + + Writing an integer value to this file will take the heads of the + respective drive off the platter and block all I/O operations + for the specified number of milliseconds. + + - If the device does not support the unload heads feature, + access is denied with -EOPNOTSUPP. + - The maximal value accepted for a timeout is 30000 + milliseconds. + - A previously set timeout can be cancelled and disk can resume + normal operation immediately by specifying a timeout of 0. + - Some hard drives only comply with an earlier version of the + ATA standard, but support the unload feature nonetheless. + There is no safe way Linux can detect these devices, so this + is not enabled by default. If it is known that your device + does support the unload feature, then you can tell the kernel + to enable it by writing -1. It can be disabled again by + writing -2. + - Values below -2 are rejected with -EINVAL + + For more information, see + Documentation/laptops/disk-shock-protection.txt + + +What: /sys/block/*/device/ncq_prio_enable +Date: Oct, 2016 +KernelVersion: v4.10 +Contact: linux-ide@vger.kernel.org +Description: + (RW) Write to the file to turn on or off the SATA ncq (native + command queueing) support. By default this feature is turned + off. diff --git a/Documentation/ABI/testing/sysfs-class-scsi_host b/Documentation/ABI/testing/sysfs-class-scsi_host index 0eb255e7db12..bafc59fd7b69 100644 --- a/Documentation/ABI/testing/sysfs-class-scsi_host +++ b/Documentation/ABI/testing/sysfs-class-scsi_host @@ -27,3 +27,92 @@ Description: This file contains the current status of the "SSD Smart Path" the direct i/o path to physical devices. This setting is controller wide, affecting all configured logical drives on the controller. This file is readable and writable. + +What: /sys/class/scsi_host/hostX/link_power_management_policy +Date: Oct, 2007 +KernelVersion: v2.6.24 +Contact: linux-ide@vger.kernel.org +Description: + (RW) This parameter allows the user to read and set the link + (interface) power management. + + There are four possible options: + + min_power: Tell the controller to try to make the link use the + least possible power when possible. This may sacrifice some + performance due to increased latency when coming out of lower + power states. + + max_performance: Generally, this means no power management. + Tell the controller to have performance be a priority over power + management. + + medium_power: Tell the controller to enter a lower power state + when possible, but do not enter the lowest power state, thus + improving latency over min_power setting. + + med_power_with_dipm: Identical to the existing medium_power + setting except that it enables dipm (device initiated power + management) on top, which makes it match the Windows IRST (Intel + Rapid Storage Technology) driver settings. This setting is also + close to min_power, except that: + a) It does not use host-initiated slumber mode, but it does + allow device-initiated slumber + b) It does not enable low power device sleep mode (DevSlp). + +What: /sys/class/scsi_host/hostX/em_message +What: /sys/class/scsi_host/hostX/em_message_type +Date: Jun, 2008 +KernelVersion: v2.6.27 +Contact: linux-ide@vger.kernel.org +Description: + em_message: (RW) Enclosure management support. For the LED + protocol, writes and reads correspond to the LED message format + as defined in the AHCI spec. + + The user must turn sw_activity (under /sys/block/*/device/) OFF + it they wish to control the activity LED via the em_message + file. + + em_message_type: (RO) Displays the current enclosure management + protocol that is being used by the driver (for eg. LED, SAF-TE, + SES-2, SGPIO etc). + +What: /sys/class/scsi_host/hostX/ahci_port_cmd +What: /sys/class/scsi_host/hostX/ahci_host_caps +What: /sys/class/scsi_host/hostX/ahci_host_cap2 +Date: Mar, 2010 +KernelVersion: v2.6.35 +Contact: linux-ide@vger.kernel.org +Description: + [to be documented] + +What: /sys/class/scsi_host/hostX/ahci_host_version +Date: Mar, 2010 +KernelVersion: v2.6.35 +Contact: linux-ide@vger.kernel.org +Description: + (RO) Display the version of the AHCI spec implemented by the + host. + +What: /sys/class/scsi_host/hostX/em_buffer +Date: Apr, 2010 +KernelVersion: v2.6.35 +Contact: linux-ide@vger.kernel.org +Description: + (RW) Allows access to AHCI EM (enclosure management) buffer + directly if the host supports EM. + + For eg. the AHCI driver supports SGPIO EM messages but the + SATA/AHCI specs do not define the SGPIO message format of the EM + buffer. Different hardware(HW) vendors may have different + definitions. With the em_buffer attribute, this issue can be + solved by allowing HW vendors to provide userland drivers and + tools for their SGPIO initiators. + +What: /sys/class/scsi_host/hostX/em_message_supported +Date: Oct, 2009 +KernelVersion: v2.6.39 +Contact: linux-ide@vger.kernel.org +Description: + (RO) Displays supported enclosure management message types. -- cgit v1.2.3 From c5489f9fc053c744c609f34b32efca395cc2fdad Mon Sep 17 00:00:00 2001 From: Michal Oleszczyk Date: Fri, 2 Feb 2018 13:10:29 +0100 Subject: sgtl5000: change digital_mute policy Current implementation mute codec in global way (DAC block). That means when user routes sound not from I2S but from AUX source (LINE_IN) it also will be muted by alsa core. This should not happen. Signed-off-by: Michal Oleszczyk Signed-off-by: Mark Brown --- sound/soc/codecs/sgtl5000.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/sound/soc/codecs/sgtl5000.c b/sound/soc/codecs/sgtl5000.c index e1ab5537d27a..c445a0794a27 100644 --- a/sound/soc/codecs/sgtl5000.c +++ b/sound/soc/codecs/sgtl5000.c @@ -529,10 +529,15 @@ static const struct snd_kcontrol_new sgtl5000_snd_controls[] = { static int sgtl5000_digital_mute(struct snd_soc_dai *codec_dai, int mute) { struct snd_soc_codec *codec = codec_dai->codec; - u16 adcdac_ctrl = SGTL5000_DAC_MUTE_LEFT | SGTL5000_DAC_MUTE_RIGHT; + u16 i2s_pwr = SGTL5000_I2S_IN_POWERUP; - snd_soc_update_bits(codec, SGTL5000_CHIP_ADCDAC_CTRL, - adcdac_ctrl, mute ? adcdac_ctrl : 0); + /* + * During 'digital mute' do not mute DAC + * because LINE_IN would be muted aswell. We want to mute + * only I2S block - this can be done by powering it off + */ + snd_soc_update_bits(codec, SGTL5000_CHIP_DIG_POWER, + i2s_pwr, mute ? 0 : i2s_pwr); return 0; } @@ -1237,6 +1242,10 @@ static int sgtl5000_probe(struct snd_soc_codec *codec) */ snd_soc_write(codec, SGTL5000_DAP_CTRL, 0); + /* Unmute DAC after start */ + snd_soc_update_bits(codec, SGTL5000_CHIP_ADCDAC_CTRL, + SGTL5000_DAC_MUTE_LEFT | SGTL5000_DAC_MUTE_RIGHT, 0); + return 0; err: -- cgit v1.2.3 From dbe7d4c6d11999bda20bcea2572263150ff231ef Mon Sep 17 00:00:00 2001 From: Sylwester Nawrocki Date: Mon, 5 Feb 2018 18:05:00 +0100 Subject: ASoC: samsung: Add the DT binding files entry to MAINTAINERS This patch adds missing DT binding files to the Samsung ASoC drivers entry. Signed-off-by: Sylwester Nawrocki Acked-by: Krzysztof Kozlowski Signed-off-by: Mark Brown --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index 3bdc260e36b7..2161c1df9de3 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -12091,6 +12091,7 @@ M: Sylwester Nawrocki L: alsa-devel@alsa-project.org (moderated for non-subscribers) S: Supported F: sound/soc/samsung/ +F: Documentation/devicetree/bindings/sound/samsung* SAMSUNG EXYNOS PSEUDO RANDOM NUMBER GENERATOR (RNG) DRIVER M: Krzysztof Kozlowski -- cgit v1.2.3 From b23af6ad8d2f708c4c3f92dd8f82c233247ba8bf Mon Sep 17 00:00:00 2001 From: Philipp Rossak Date: Wed, 14 Feb 2018 15:10:24 +0100 Subject: ARM: dts: sun6i: a31s: bpi-m2: improve pmic properties The eldoin is supplied from the dcdc1 regulator. The N_VBUSEN pin is connected to an external power regulator (SY6280AAC). With this commit we update the pmic binding properties to support those features. Fixes: 7daa21370075 ("ARM: dts: sunxi: Add regulators for Sinovoip BPI-M2") Cc: Signed-off-by: Philipp Rossak Signed-off-by: Maxime Ripard --- arch/arm/boot/dts/sun6i-a31s-sinovoip-bpi-m2.dts | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm/boot/dts/sun6i-a31s-sinovoip-bpi-m2.dts b/arch/arm/boot/dts/sun6i-a31s-sinovoip-bpi-m2.dts index 51e6f1d21c32..fb34f32502cf 100644 --- a/arch/arm/boot/dts/sun6i-a31s-sinovoip-bpi-m2.dts +++ b/arch/arm/boot/dts/sun6i-a31s-sinovoip-bpi-m2.dts @@ -163,6 +163,8 @@ reg = <0x68>; interrupt-parent = <&nmi_intc>; interrupts = <0 IRQ_TYPE_LEVEL_LOW>; + eldoin-supply = <®_dcdc1>; + x-powers,drive-vbus-en; }; }; -- cgit v1.2.3 From 70b8d21496758dd7ff600ec9de0ee3812fac7a40 Mon Sep 17 00:00:00 2001 From: Philipp Rossak Date: Wed, 14 Feb 2018 15:10:25 +0100 Subject: ARM: dts: sun6i: a31s: bpi-m2: add missing regulators This patch fixes a bootproblem with the Bananapi M2 board. Since there are some regulators missing we add them right now. Those values come from the schematic, below you can find a small overview: * reg_aldo1: 3,3V, powers the wifi * reg_aldo2: 2,5V, powers the IO of the RTL8211E * reg_aldo3: 3,3V, powers the audio * reg_dldo1: 3,0V, powers the RTL8211E * reg_dldo2: 2,8V, powers the analog part of the csi * reg_dldo3: 3,3V, powers misc * reg_eldo1: 1,8V, powers the csi * reg_ldo_io1:1,8V, powers the gpio * reg_dc5ldo: needs to be always on This patch updates also the vmmc-supply properties on the mmc0 and mmc2 node to use the allready existent regulators. We can now remove the sunxi-common-regulators.dtsi include since we don't need it anymore. Fixes: 7daa21370075 ("ARM: dts: sunxi: Add regulators for Sinovoip BPI-M2") Cc: Signed-off-by: Philipp Rossak Signed-off-by: Maxime Ripard --- arch/arm/boot/dts/sun6i-a31s-sinovoip-bpi-m2.dts | 61 ++++++++++++++++++++++-- 1 file changed, 58 insertions(+), 3 deletions(-) diff --git a/arch/arm/boot/dts/sun6i-a31s-sinovoip-bpi-m2.dts b/arch/arm/boot/dts/sun6i-a31s-sinovoip-bpi-m2.dts index fb34f32502cf..b2758dd8ce43 100644 --- a/arch/arm/boot/dts/sun6i-a31s-sinovoip-bpi-m2.dts +++ b/arch/arm/boot/dts/sun6i-a31s-sinovoip-bpi-m2.dts @@ -42,7 +42,6 @@ /dts-v1/; #include "sun6i-a31s.dtsi" -#include "sunxi-common-regulators.dtsi" #include / { @@ -99,6 +98,7 @@ pinctrl-0 = <&gmac_pins_rgmii_a>, <&gmac_phy_reset_pin_bpi_m2>; phy = <&phy1>; phy-mode = "rgmii"; + phy-supply = <®_dldo1>; snps,reset-gpio = <&pio 0 21 GPIO_ACTIVE_HIGH>; /* PA21 */ snps,reset-active-low; snps,reset-delays-us = <0 10000 30000>; @@ -118,7 +118,7 @@ &mmc0 { pinctrl-names = "default"; pinctrl-0 = <&mmc0_pins_a>, <&mmc0_cd_pin_bpi_m2>; - vmmc-supply = <®_vcc3v0>; + vmmc-supply = <®_dcdc1>; bus-width = <4>; cd-gpios = <&pio 0 4 GPIO_ACTIVE_HIGH>; /* PA4 */ cd-inverted; @@ -132,7 +132,7 @@ &mmc2 { pinctrl-names = "default"; pinctrl-0 = <&mmc2_pins_a>; - vmmc-supply = <®_vcc3v0>; + vmmc-supply = <®_aldo1>; mmc-pwrseq = <&mmc2_pwrseq>; bus-width = <4>; non-removable; @@ -195,7 +195,28 @@ #include "axp22x.dtsi" +®_aldo1 { + regulator-min-microvolt = <3300000>; + regulator-max-microvolt = <3300000>; + regulator-name = "vcc-wifi"; +}; + +®_aldo2 { + regulator-always-on; + regulator-min-microvolt = <2500000>; + regulator-max-microvolt = <2500000>; + regulator-name = "vcc-gmac"; +}; + +®_aldo3 { + regulator-always-on; + regulator-min-microvolt = <3000000>; + regulator-max-microvolt = <3000000>; + regulator-name = "avcc"; +}; + ®_dc5ldo { + regulator-always-on; regulator-min-microvolt = <700000>; regulator-max-microvolt = <1320000>; regulator-name = "vdd-cpus"; @@ -235,6 +256,40 @@ regulator-name = "vcc-dram"; }; +®_dldo1 { + regulator-min-microvolt = <3000000>; + regulator-max-microvolt = <3000000>; + regulator-name = "vcc-mac"; +}; + +®_dldo2 { + regulator-min-microvolt = <2800000>; + regulator-max-microvolt = <2800000>; + regulator-name = "avdd-csi"; +}; + +®_dldo3 { + regulator-always-on; + regulator-min-microvolt = <3300000>; + regulator-max-microvolt = <3300000>; + regulator-name = "vcc-pb"; +}; + +®_eldo1 { + regulator-min-microvolt = <1800000>; + regulator-max-microvolt = <1800000>; + regulator-name = "vdd-csi"; + status = "okay"; +}; + +®_ldo_io1 { + regulator-always-on; + regulator-min-microvolt = <1800000>; + regulator-max-microvolt = <1800000>; + regulator-name = "vcc-pm-cpus"; + status = "okay"; +}; + &uart0 { pinctrl-names = "default"; pinctrl-0 = <&uart0_pins_a>; -- cgit v1.2.3 From 0a815fc929e62f54a22a09e10e18f038e6d63c39 Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Fri, 2 Feb 2018 13:35:10 +0100 Subject: MAINTAINERS: update email address for Maxime Ripard Free Electrons is now Bootlin, change my email address accordingly. Acked-by: Chen-Yu Tsai Signed-off-by: Maxime Ripard --- MAINTAINERS | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 3bdc260e36b7..20fbeede1357 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1152,7 +1152,7 @@ S: Maintained F: drivers/clk/sunxi/ ARM/Allwinner sunXi SoC support -M: Maxime Ripard +M: Maxime Ripard M: Chen-Yu Tsai L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) S: Maintained @@ -4624,7 +4624,7 @@ F: include/uapi/drm/drm* F: include/linux/vga* DRM DRIVERS FOR ALLWINNER A10 -M: Maxime Ripard +M: Maxime Ripard L: dri-devel@lists.freedesktop.org S: Supported F: drivers/gpu/drm/sun4i/ -- cgit v1.2.3 From 510c321b557121861601f9d259aadd65aa274f35 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Wed, 14 Feb 2018 19:06:02 +0800 Subject: xfrm: reuse uncached_list to track xdsts In early time, when freeing a xdst, it would be inserted into dst_garbage.list first. Then if it's refcnt was still held somewhere, later it would be put into dst_busy_list in dst_gc_task(). When one dev was being unregistered, the dev of these dsts in dst_busy_list would be set with loopback_dev and put this dev. So that this dev's removal wouldn't get blocked, and avoid the kmsg warning: kernel:unregister_netdevice: waiting for veth0 to become \ free. Usage count = 2 However after Commit 52df157f17e5 ("xfrm: take refcnt of dst when creating struct xfrm_dst bundle"), the xdst will not be freed with dst gc, and this warning happens. To fix it, we need to find these xdsts that are still held by others when removing the dev, and free xdst's dev and set it with loopback_dev. But unfortunately after flow_cache for xfrm was deleted, no list tracks them anymore. So we need to save these xdsts somewhere to release the xdst's dev later. To make this easier, this patch is to reuse uncached_list to track xdsts, so that the dev refcnt can be released in the event NETDEV_UNREGISTER process of fib_netdev_notifier. Thanks to Florian, we could move forward this fix quickly. Fixes: 52df157f17e5 ("xfrm: take refcnt of dst when creating struct xfrm_dst bundle") Reported-by: Jianlin Shi Reported-by: Hangbin Liu Tested-by: Eyal Birger Signed-off-by: Xin Long Signed-off-by: Steffen Klassert --- include/net/ip6_route.h | 3 +++ include/net/route.h | 3 +++ net/ipv4/route.c | 21 +++++++++++++-------- net/ipv4/xfrm4_policy.c | 4 +++- net/ipv6/route.c | 4 ++-- net/ipv6/xfrm6_policy.c | 5 +++++ 6 files changed, 29 insertions(+), 11 deletions(-) diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index 27d23a65f3cd..ac0866bb9e93 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -179,6 +179,9 @@ void rt6_disable_ip(struct net_device *dev, unsigned long event); void rt6_sync_down_dev(struct net_device *dev, unsigned long event); void rt6_multipath_rebalance(struct rt6_info *rt); +void rt6_uncached_list_add(struct rt6_info *rt); +void rt6_uncached_list_del(struct rt6_info *rt); + static inline const struct rt6_info *skb_rt6_info(const struct sk_buff *skb) { const struct dst_entry *dst = skb_dst(skb); diff --git a/include/net/route.h b/include/net/route.h index 1eb9ce470e25..40b870d58f38 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -227,6 +227,9 @@ struct in_ifaddr; void fib_add_ifaddr(struct in_ifaddr *); void fib_del_ifaddr(struct in_ifaddr *, struct in_ifaddr *); +void rt_add_uncached_list(struct rtable *rt); +void rt_del_uncached_list(struct rtable *rt); + static inline void ip_rt_put(struct rtable *rt) { /* dst_release() accepts a NULL parameter. diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 49cc1c1df1ba..1d1e4abe04b0 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1383,7 +1383,7 @@ struct uncached_list { static DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt_uncached_list); -static void rt_add_uncached_list(struct rtable *rt) +void rt_add_uncached_list(struct rtable *rt) { struct uncached_list *ul = raw_cpu_ptr(&rt_uncached_list); @@ -1394,14 +1394,8 @@ static void rt_add_uncached_list(struct rtable *rt) spin_unlock_bh(&ul->lock); } -static void ipv4_dst_destroy(struct dst_entry *dst) +void rt_del_uncached_list(struct rtable *rt) { - struct dst_metrics *p = (struct dst_metrics *)DST_METRICS_PTR(dst); - struct rtable *rt = (struct rtable *) dst; - - if (p != &dst_default_metrics && refcount_dec_and_test(&p->refcnt)) - kfree(p); - if (!list_empty(&rt->rt_uncached)) { struct uncached_list *ul = rt->rt_uncached_list; @@ -1411,6 +1405,17 @@ static void ipv4_dst_destroy(struct dst_entry *dst) } } +static void ipv4_dst_destroy(struct dst_entry *dst) +{ + struct dst_metrics *p = (struct dst_metrics *)DST_METRICS_PTR(dst); + struct rtable *rt = (struct rtable *)dst; + + if (p != &dst_default_metrics && refcount_dec_and_test(&p->refcnt)) + kfree(p); + + rt_del_uncached_list(rt); +} + void rt_flush_dev(struct net_device *dev) { struct net *net = dev_net(dev); diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index 05017e2c849c..8d33f7b311f4 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -102,6 +102,7 @@ static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev, xdst->u.rt.rt_pmtu = rt->rt_pmtu; xdst->u.rt.rt_table_id = rt->rt_table_id; INIT_LIST_HEAD(&xdst->u.rt.rt_uncached); + rt_add_uncached_list(&xdst->u.rt); return 0; } @@ -241,7 +242,8 @@ static void xfrm4_dst_destroy(struct dst_entry *dst) struct xfrm_dst *xdst = (struct xfrm_dst *)dst; dst_destroy_metrics_generic(dst); - + if (xdst->u.rt.rt_uncached_list) + rt_del_uncached_list(&xdst->u.rt); xfrm_dst_destroy(xdst); } diff --git a/net/ipv6/route.c b/net/ipv6/route.c index fb2d251c0500..38b75e9d6eae 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -128,7 +128,7 @@ struct uncached_list { static DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt6_uncached_list); -static void rt6_uncached_list_add(struct rt6_info *rt) +void rt6_uncached_list_add(struct rt6_info *rt) { struct uncached_list *ul = raw_cpu_ptr(&rt6_uncached_list); @@ -139,7 +139,7 @@ static void rt6_uncached_list_add(struct rt6_info *rt) spin_unlock_bh(&ul->lock); } -static void rt6_uncached_list_del(struct rt6_info *rt) +void rt6_uncached_list_del(struct rt6_info *rt) { if (!list_empty(&rt->rt6i_uncached)) { struct uncached_list *ul = rt->rt6i_uncached_list; diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index 09fb44ee3b45..416fe67271a9 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -113,6 +113,9 @@ static int xfrm6_fill_dst(struct xfrm_dst *xdst, struct net_device *dev, xdst->u.rt6.rt6i_gateway = rt->rt6i_gateway; xdst->u.rt6.rt6i_dst = rt->rt6i_dst; xdst->u.rt6.rt6i_src = rt->rt6i_src; + INIT_LIST_HEAD(&xdst->u.rt6.rt6i_uncached); + rt6_uncached_list_add(&xdst->u.rt6); + atomic_inc(&dev_net(dev)->ipv6.rt6_stats->fib_rt_uncache); return 0; } @@ -244,6 +247,8 @@ static void xfrm6_dst_destroy(struct dst_entry *dst) if (likely(xdst->u.rt6.rt6i_idev)) in6_dev_put(xdst->u.rt6.rt6i_idev); dst_destroy_metrics_generic(dst); + if (xdst->u.rt6.rt6i_uncached_list) + rt6_uncached_list_del(&xdst->u.rt6); xfrm_dst_destroy(xdst); } -- cgit v1.2.3 From 5ab2ba931255d8bf03009c06d58dce97de32797c Mon Sep 17 00:00:00 2001 From: Sara Sharon Date: Tue, 29 Mar 2016 10:56:57 +0300 Subject: iwlwifi: mvm: fix security bug in PN checking A previous patch allowed the same PN for packets originating from the same AMSDU by copying PN only for the last packet in the series. This however is bogus since we cannot assume the last frame will be received on the same queue, and if it is received on a different ueue we will end up not incrementing the PN and possibly let the next packet to have the same PN and pass through. Change the logic instead to driver explicitly indicate for the second sub frame and on to be allowed to have the same PN as the first subframe. Indicate it to mac80211 as well for the fallback queue. Fixes: f1ae02b186d9 ("iwlwifi: mvm: allow same PN for de-aggregated AMSDU") Signed-off-by: Sara Sharon Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c | 39 ++++++++++++++------------- 1 file changed, 20 insertions(+), 19 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c index a3f7c1bf3cc8..580de5851fc7 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c @@ -71,6 +71,7 @@ static inline int iwl_mvm_check_pn(struct iwl_mvm *mvm, struct sk_buff *skb, struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data; struct ieee80211_rx_status *stats = IEEE80211_SKB_RXCB(skb); struct iwl_mvm_key_pn *ptk_pn; + int res; u8 tid, keyidx; u8 pn[IEEE80211_CCMP_PN_LEN]; u8 *extiv; @@ -127,12 +128,13 @@ static inline int iwl_mvm_check_pn(struct iwl_mvm *mvm, struct sk_buff *skb, pn[4] = extiv[1]; pn[5] = extiv[0]; - if (memcmp(pn, ptk_pn->q[queue].pn[tid], - IEEE80211_CCMP_PN_LEN) <= 0) + res = memcmp(pn, ptk_pn->q[queue].pn[tid], IEEE80211_CCMP_PN_LEN); + if (res < 0) + return -1; + if (!res && !(stats->flag & RX_FLAG_ALLOW_SAME_PN)) return -1; - if (!(stats->flag & RX_FLAG_AMSDU_MORE)) - memcpy(ptk_pn->q[queue].pn[tid], pn, IEEE80211_CCMP_PN_LEN); + memcpy(ptk_pn->q[queue].pn[tid], pn, IEEE80211_CCMP_PN_LEN); stats->flag |= RX_FLAG_PN_VALIDATED; return 0; @@ -314,28 +316,21 @@ static void iwl_mvm_rx_csum(struct ieee80211_sta *sta, } /* - * returns true if a packet outside BA session is a duplicate and - * should be dropped + * returns true if a packet is a duplicate and should be dropped. + * Updates AMSDU PN tracking info */ -static bool iwl_mvm_is_nonagg_dup(struct ieee80211_sta *sta, int queue, - struct ieee80211_rx_status *rx_status, - struct ieee80211_hdr *hdr, - struct iwl_rx_mpdu_desc *desc) +static bool iwl_mvm_is_dup(struct ieee80211_sta *sta, int queue, + struct ieee80211_rx_status *rx_status, + struct ieee80211_hdr *hdr, + struct iwl_rx_mpdu_desc *desc) { struct iwl_mvm_sta *mvm_sta; struct iwl_mvm_rxq_dup_data *dup_data; - u8 baid, tid, sub_frame_idx; + u8 tid, sub_frame_idx; if (WARN_ON(IS_ERR_OR_NULL(sta))) return false; - baid = (le32_to_cpu(desc->reorder_data) & - IWL_RX_MPDU_REORDER_BAID_MASK) >> - IWL_RX_MPDU_REORDER_BAID_SHIFT; - - if (baid != IWL_RX_REORDER_DATA_INVALID_BAID) - return false; - mvm_sta = iwl_mvm_sta_from_mac80211(sta); dup_data = &mvm_sta->dup_data[queue]; @@ -365,6 +360,12 @@ static bool iwl_mvm_is_nonagg_dup(struct ieee80211_sta *sta, int queue, dup_data->last_sub_frame[tid] >= sub_frame_idx)) return true; + /* Allow same PN as the first subframe for following sub frames */ + if (dup_data->last_seq[tid] == hdr->seq_ctrl && + sub_frame_idx > dup_data->last_sub_frame[tid] && + desc->mac_flags2 & IWL_RX_MPDU_MFLG2_AMSDU) + rx_status->flag |= RX_FLAG_ALLOW_SAME_PN; + dup_data->last_seq[tid] = hdr->seq_ctrl; dup_data->last_sub_frame[tid] = sub_frame_idx; @@ -971,7 +972,7 @@ void iwl_mvm_rx_mpdu_mq(struct iwl_mvm *mvm, struct napi_struct *napi, if (ieee80211_is_data(hdr->frame_control)) iwl_mvm_rx_csum(sta, skb, desc); - if (iwl_mvm_is_nonagg_dup(sta, queue, rx_status, hdr, desc)) { + if (iwl_mvm_is_dup(sta, queue, rx_status, hdr, desc)) { kfree_skb(skb); goto out; } -- cgit v1.2.3 From fc07bd8ce19bff9e7479c04077ddb5957d1a27be Mon Sep 17 00:00:00 2001 From: Sara Sharon Date: Thu, 21 Dec 2017 15:05:28 +0200 Subject: iwlwifi: mvm: fix IBSS for devices that support station type API In IBSS, the mac80211 sets the cab_queue to be invalid. However, the multicast station uses it, so we need to override it. A previous patch did it, but it was nested inside the if's and was applied only for legacy FWs that don't support the new station type API, instead of being applied for all paths. In addition, add a missing NL80211_IFTYPE_ADHOC to the initialization of the queues in iwl_mvm_mac_ctxt_init() Fixes: ee48b72211f8 ("iwlwifi: mvm: support ibss in dqa mode") Signed-off-by: Sara Sharon Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c | 3 ++- drivers/net/wireless/intel/iwlwifi/mvm/sta.c | 24 +++++++++++------------ 2 files changed, 14 insertions(+), 13 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c index 2f22e14e00fe..8ba16fc24e3a 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c @@ -438,7 +438,8 @@ int iwl_mvm_mac_ctxt_init(struct iwl_mvm *mvm, struct ieee80211_vif *vif) } /* Allocate the CAB queue for softAP and GO interfaces */ - if (vif->type == NL80211_IFTYPE_AP) { + if (vif->type == NL80211_IFTYPE_AP || + vif->type == NL80211_IFTYPE_ADHOC) { /* * For TVQM this will be overwritten later with the FW assigned * queue value (when queue is enabled). diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c index 6b2674e02606..dbd2ba2bb714 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c @@ -2052,6 +2052,17 @@ int iwl_mvm_add_mcast_sta(struct iwl_mvm *mvm, struct ieee80211_vif *vif) vif->type != NL80211_IFTYPE_ADHOC)) return -ENOTSUPP; + /* + * In IBSS, ieee80211_check_queues() sets the cab_queue to be + * invalid, so make sure we use the queue we want. + * Note that this is done here as we want to avoid making DQA + * changes in mac80211 layer. + */ + if (vif->type == NL80211_IFTYPE_ADHOC) { + vif->cab_queue = IWL_MVM_DQA_GCAST_QUEUE; + mvmvif->cab_queue = vif->cab_queue; + } + /* * While in previous FWs we had to exclude cab queue from TFD queue * mask, now it is needed as any other queue. @@ -2083,20 +2094,9 @@ int iwl_mvm_add_mcast_sta(struct iwl_mvm *mvm, struct ieee80211_vif *vif) timeout); mvmvif->cab_queue = queue; } else if (!fw_has_api(&mvm->fw->ucode_capa, - IWL_UCODE_TLV_API_STA_TYPE)) { - /* - * In IBSS, ieee80211_check_queues() sets the cab_queue to be - * invalid, so make sure we use the queue we want. - * Note that this is done here as we want to avoid making DQA - * changes in mac80211 layer. - */ - if (vif->type == NL80211_IFTYPE_ADHOC) { - vif->cab_queue = IWL_MVM_DQA_GCAST_QUEUE; - mvmvif->cab_queue = vif->cab_queue; - } + IWL_UCODE_TLV_API_STA_TYPE)) iwl_mvm_enable_txq(mvm, vif->cab_queue, vif->cab_queue, 0, &cfg, timeout); - } return 0; } -- cgit v1.2.3 From 4437ba7ee7de7e71d11deb91c87a370e4ffd2601 Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Wed, 27 Dec 2017 08:58:02 +0200 Subject: iwlwifi: pcie: don't warn if we use all the transmit pointers Our Transmit Frame Descriptor (TFD) is a DMA descriptor that includes several pointers to be able to transmit a packet which is not physically contiguous. Depending on the hardware being use, we can have 20 or 25 pointers in a single TFD. In both cases, it is more than enough and it is quite hard to hit this limit. It has been reported that when using specific applications (Ktorrent), we can actually use all the pointers and then a long standing bug showed up. When we free the TFD, we check its number of valid pointers and make sure it doesn't exceed the number of pointers the hardware support. This check had an off by one bug: it is perfectly valid to free the 20 pointers if the TFD has 20 pointers. Fix that. https://bugzilla.kernel.org/show_bug.cgi?id=197981 Signed-off-by: Emmanuel Grumbach Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c | 2 +- drivers/net/wireless/intel/iwlwifi/pcie/tx.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c b/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c index 6d0a907d5ba5..fabae0f60683 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c @@ -147,7 +147,7 @@ static void iwl_pcie_gen2_tfd_unmap(struct iwl_trans *trans, /* Sanity check on number of chunks */ num_tbs = iwl_pcie_gen2_get_num_tbs(trans, tfd); - if (num_tbs >= trans_pcie->max_tbs) { + if (num_tbs > trans_pcie->max_tbs) { IWL_ERR(trans, "Too many chunks: %i\n", num_tbs); return; } diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/tx.c b/drivers/net/wireless/intel/iwlwifi/pcie/tx.c index 3f85713c41dc..1a566287993d 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/tx.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/tx.c @@ -378,7 +378,7 @@ static void iwl_pcie_tfd_unmap(struct iwl_trans *trans, /* Sanity check on number of chunks */ num_tbs = iwl_pcie_tfd_get_num_tbs(trans, tfd); - if (num_tbs >= trans_pcie->max_tbs) { + if (num_tbs > trans_pcie->max_tbs) { IWL_ERR(trans, "Too many chunks: %i\n", num_tbs); /* @todo issue fatal error, it is quite serious situation */ return; -- cgit v1.2.3 From 4b7f7ee2a5f523cf002328c6c02cb3e28a0fb30b Mon Sep 17 00:00:00 2001 From: Shaul Triebitz Date: Thu, 28 Dec 2017 13:28:30 +0200 Subject: iwlwifi: align timestamp cancel with timestamp start Canceling the periodic timestamp work should be done in the opposite flow to where it was started. This also prevents from sending the MARKER command during the mac_stop flow - causing a false queue hang (FW is no longer there to send a response). Fixes: 93b167c13a3a ("iwlwifi: runtime: sync FW and host clocks for logs") Signed-off-by: Shaul Triebitz Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/fw/init.c | 6 ------ drivers/net/wireless/intel/iwlwifi/mvm/mvm.h | 2 ++ drivers/net/wireless/intel/iwlwifi/mvm/ops.c | 2 -- 3 files changed, 2 insertions(+), 8 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/fw/init.c b/drivers/net/wireless/intel/iwlwifi/fw/init.c index c39fe84bb4c4..45f21acbd842 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/init.c +++ b/drivers/net/wireless/intel/iwlwifi/fw/init.c @@ -76,9 +76,3 @@ void iwl_fw_runtime_init(struct iwl_fw_runtime *fwrt, struct iwl_trans *trans, iwl_fwrt_dbgfs_register(fwrt, dbgfs_dir); } IWL_EXPORT_SYMBOL(iwl_fw_runtime_init); - -void iwl_fw_runtime_exit(struct iwl_fw_runtime *fwrt) -{ - iwl_fw_cancel_timestamp(fwrt); -} -IWL_EXPORT_SYMBOL(iwl_fw_runtime_exit); diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h index 2d28e0804218..89ff02d7c876 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h @@ -90,6 +90,7 @@ #include "fw/runtime.h" #include "fw/dbg.h" #include "fw/acpi.h" +#include "fw/debugfs.h" #define IWL_MVM_MAX_ADDRESSES 5 /* RSSI offset for WkP */ @@ -1783,6 +1784,7 @@ static inline u32 iwl_mvm_flushable_queues(struct iwl_mvm *mvm) static inline void iwl_mvm_stop_device(struct iwl_mvm *mvm) { + iwl_fw_cancel_timestamp(&mvm->fwrt); iwl_free_fw_paging(&mvm->fwrt); clear_bit(IWL_MVM_STATUS_FIRMWARE_RUNNING, &mvm->status); iwl_fw_dump_conf_clear(&mvm->fwrt); diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/ops.c b/drivers/net/wireless/intel/iwlwifi/mvm/ops.c index 5d525a0023dc..6bb347bf0d6e 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/ops.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/ops.c @@ -802,7 +802,6 @@ iwl_op_mode_mvm_start(struct iwl_trans *trans, const struct iwl_cfg *cfg, iwl_mvm_leds_exit(mvm); iwl_mvm_thermal_exit(mvm); out_free: - iwl_fw_runtime_exit(&mvm->fwrt); iwl_fw_flush_dump(&mvm->fwrt); if (iwlmvm_mod_params.init_dbg) @@ -843,7 +842,6 @@ static void iwl_op_mode_mvm_stop(struct iwl_op_mode *op_mode) #if defined(CONFIG_PM_SLEEP) && defined(CONFIG_IWLWIFI_DEBUGFS) kfree(mvm->d3_resume_sram); #endif - iwl_fw_runtime_exit(&mvm->fwrt); iwl_trans_op_mode_leave(mvm->trans); iwl_phy_db_free(mvm->phy_db); -- cgit v1.2.3 From 6b7a5aea71b342ec0593d23b08383e1f33da4c9a Mon Sep 17 00:00:00 2001 From: Naftali Goldstein Date: Thu, 28 Dec 2017 15:53:04 +0200 Subject: iwlwifi: mvm: always init rs with 20mhz bandwidth rates In AP mode, when a new station associates, rs is initialized immediately upon association completion, before the phy context is updated with the association parameters, so the sta bandwidth might be wider than the phy context allows. To avoid this issue, always initialize rs with 20mhz bandwidth rate, and after authorization, when the phy context is already up-to-date, re-init rs with the correct bw. Signed-off-by: Naftali Goldstein Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c | 4 ++++ drivers/net/wireless/intel/iwlwifi/mvm/rs.c | 28 ++++++++++++++++------- 2 files changed, 24 insertions(+), 8 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c index 8aed40a8bc38..08de822c3ef0 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c @@ -2682,6 +2682,10 @@ static int iwl_mvm_mac_sta_state(struct ieee80211_hw *hw, /* enable beacon filtering */ WARN_ON(iwl_mvm_enable_beacon_filter(mvm, vif, 0)); + + iwl_mvm_rs_rate_init(mvm, sta, mvmvif->phy_ctxt->channel->band, + false); + ret = 0; } else if (old_state == IEEE80211_STA_AUTHORIZED && new_state == IEEE80211_STA_ASSOC) { diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rs.c b/drivers/net/wireless/intel/iwlwifi/mvm/rs.c index 60abb0084ee5..47f4c7a1d80d 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/rs.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/rs.c @@ -2684,7 +2684,8 @@ static void rs_get_initial_rate(struct iwl_mvm *mvm, struct ieee80211_sta *sta, struct iwl_lq_sta *lq_sta, enum nl80211_band band, - struct rs_rate *rate) + struct rs_rate *rate, + bool init) { int i, nentries; unsigned long active_rate; @@ -2738,14 +2739,25 @@ static void rs_get_initial_rate(struct iwl_mvm *mvm, */ if (sta->vht_cap.vht_supported && best_rssi > IWL_RS_LOW_RSSI_THRESHOLD) { - switch (sta->bandwidth) { - case IEEE80211_STA_RX_BW_160: - case IEEE80211_STA_RX_BW_80: - case IEEE80211_STA_RX_BW_40: + /* + * In AP mode, when a new station associates, rs is initialized + * immediately upon association completion, before the phy + * context is updated with the association parameters, so the + * sta bandwidth might be wider than the phy context allows. + * To avoid this issue, always initialize rs with 20mhz + * bandwidth rate, and after authorization, when the phy context + * is already up-to-date, re-init rs with the correct bw. + */ + u32 bw = init ? RATE_MCS_CHAN_WIDTH_20 : rs_bw_from_sta_bw(sta); + + switch (bw) { + case RATE_MCS_CHAN_WIDTH_40: + case RATE_MCS_CHAN_WIDTH_80: + case RATE_MCS_CHAN_WIDTH_160: initial_rates = rs_optimal_rates_vht; nentries = ARRAY_SIZE(rs_optimal_rates_vht); break; - case IEEE80211_STA_RX_BW_20: + case RATE_MCS_CHAN_WIDTH_20: initial_rates = rs_optimal_rates_vht_20mhz; nentries = ARRAY_SIZE(rs_optimal_rates_vht_20mhz); break; @@ -2756,7 +2768,7 @@ static void rs_get_initial_rate(struct iwl_mvm *mvm, active_rate = lq_sta->active_siso_rate; rate->type = LQ_VHT_SISO; - rate->bw = rs_bw_from_sta_bw(sta); + rate->bw = bw; } else if (sta->ht_cap.ht_supported && best_rssi > IWL_RS_LOW_RSSI_THRESHOLD) { initial_rates = rs_optimal_rates_ht; @@ -2839,7 +2851,7 @@ static void rs_initialize_lq(struct iwl_mvm *mvm, tbl = &(lq_sta->lq_info[active_tbl]); rate = &tbl->rate; - rs_get_initial_rate(mvm, sta, lq_sta, band, rate); + rs_get_initial_rate(mvm, sta, lq_sta, band, rate, init); rs_init_optimal_rate(mvm, sta, lq_sta); WARN_ONCE(rate->ant != ANT_A && rate->ant != ANT_B, -- cgit v1.2.3 From aba62a9e9a4064c5ea9deb33b5b1392f263cad24 Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Fri, 16 Feb 2018 12:45:13 -0200 Subject: MAINTAINERS: Add myself as sgtl5000 maintainer I would like helping maintaining and reviewing/testing sgtl5000 related patches. Signed-off-by: Fabio Estevam Signed-off-by: Mark Brown --- MAINTAINERS | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 3bdc260e36b7..4e283d131def 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -9921,6 +9921,13 @@ F: Documentation/ABI/stable/sysfs-bus-nvmem F: include/linux/nvmem-consumer.h F: include/linux/nvmem-provider.h +NXP SGTL5000 DRIVER +M: Fabio Estevam +L: alsa-devel@alsa-project.org (moderated for non-subscribers) +S: Maintained +F: Documentation/devicetree/bindings/sound/sgtl5000.txt +F: sound/soc/codecs/sgtl5000* + NXP TDA998X DRM DRIVER M: Russell King S: Supported -- cgit v1.2.3 From a8992973edbb2555e956b90f6fe97c4bc14d761d Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Fri, 16 Feb 2018 11:58:54 -0200 Subject: ASoC: sgtl5000: Fix suspend/resume Commit 8419caa72702 ("ASoC: sgtl5000: Do not disable regulators in SND_SOC_BIAS_OFF") causes the sgtl5000 to fail after a suspend/resume sequence: Playing WAVE '/media/a2002011001-e02.wav' : Signed 16 bit Little Endian, Rate 44100 Hz, Stereo aplay: pcm_write:2051: write error: Input/output error The problem is caused by the fact that the aforementioned commit dropped the cache handling, so re-introduce the register map resync to fix the problem. Suggested-by: Mark Brown Signed-off-by: Fabio Estevam Signed-off-by: Mark Brown Cc: --- sound/soc/codecs/sgtl5000.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/sound/soc/codecs/sgtl5000.c b/sound/soc/codecs/sgtl5000.c index c445a0794a27..c5c76ab8ccf1 100644 --- a/sound/soc/codecs/sgtl5000.c +++ b/sound/soc/codecs/sgtl5000.c @@ -876,15 +876,26 @@ static int sgtl5000_pcm_hw_params(struct snd_pcm_substream *substream, static int sgtl5000_set_bias_level(struct snd_soc_codec *codec, enum snd_soc_bias_level level) { + struct sgtl5000_priv *sgtl = snd_soc_codec_get_drvdata(codec); + int ret; + switch (level) { case SND_SOC_BIAS_ON: case SND_SOC_BIAS_PREPARE: case SND_SOC_BIAS_STANDBY: + regcache_cache_only(sgtl->regmap, false); + ret = regcache_sync(sgtl->regmap); + if (ret) { + regcache_cache_only(sgtl->regmap, true); + return ret; + } + snd_soc_update_bits(codec, SGTL5000_CHIP_ANA_POWER, SGTL5000_REFTOP_POWERUP, SGTL5000_REFTOP_POWERUP); break; case SND_SOC_BIAS_OFF: + regcache_cache_only(sgtl->regmap, true); snd_soc_update_bits(codec, SGTL5000_CHIP_ANA_POWER, SGTL5000_REFTOP_POWERUP, 0); break; -- cgit v1.2.3 From 50c330973c0c9f1e300b07bbab78d306dcc6e612 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Fri, 16 Feb 2018 16:57:56 +0000 Subject: irqchip/gic-v3-its: Fix misplaced __iomem annotations Save 26 lines worth of Sparse complaints by fixing up this minor mishap. The pointee lies in the __iomem space; the pointer does not. Signed-off-by: Robin Murphy Signed-off-by: Marc Zyngier --- drivers/irqchip/irq-gic-v3-its.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c index 1d3056f53747..94b7d74d519f 100644 --- a/drivers/irqchip/irq-gic-v3-its.c +++ b/drivers/irqchip/irq-gic-v3-its.c @@ -2495,7 +2495,7 @@ static int its_vpe_set_affinity(struct irq_data *d, static void its_vpe_schedule(struct its_vpe *vpe) { - void * __iomem vlpi_base = gic_data_rdist_vlpi_base(); + void __iomem *vlpi_base = gic_data_rdist_vlpi_base(); u64 val; /* Schedule the VPE */ @@ -2527,7 +2527,7 @@ static void its_vpe_schedule(struct its_vpe *vpe) static void its_vpe_deschedule(struct its_vpe *vpe) { - void * __iomem vlpi_base = gic_data_rdist_vlpi_base(); + void __iomem *vlpi_base = gic_data_rdist_vlpi_base(); u32 count = 1000000; /* 1s! */ bool clean; u64 val; -- cgit v1.2.3 From 9c7be59fc519af9081c46c48f06f2b8fadf55ad8 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Fri, 16 Feb 2018 10:48:20 +0100 Subject: libata: Apply NOLPM quirk to Crucial MX100 512GB SSDs Various people have reported the Crucial MX100 512GB model not working with LPM set to min_power. I've now received a report that it also does not work with the new med_power_with_dipm level. It does work with medium_power, but that has no measurable power-savings and given the amount of people being bitten by the other levels not working, this commit just disables LPM altogether. Note all reporters of this have either the 512GB model (max capacity), or are not specifying their SSD's size. So for now this quirk assumes this is a problem with the 512GB model only. Buglink: https://bugzilla.kernel.org/show_bug.cgi?id=89261 Buglink: https://github.com/linrunner/TLP/issues/84 Cc: stable@vger.kernel.org Signed-off-by: Hans de Goede Signed-off-by: Tejun Heo --- drivers/ata/libata-core.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index 61b09968d032..28cad49fc846 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -4530,6 +4530,11 @@ static const struct ata_blacklist_entry ata_device_blacklist [] = { { "PIONEER DVD-RW DVR-212D", NULL, ATA_HORKAGE_NOSETXFER }, { "PIONEER DVD-RW DVR-216D", NULL, ATA_HORKAGE_NOSETXFER }, + /* The 512GB version of the MX100 has both queued TRIM and LPM issues */ + { "Crucial_CT512MX100*", NULL, ATA_HORKAGE_NO_NCQ_TRIM | + ATA_HORKAGE_ZERO_AFTER_TRIM | + ATA_HORKAGE_NOLPM, }, + /* devices that don't properly handle queued TRIM commands */ { "Micron_M500_*", NULL, ATA_HORKAGE_NO_NCQ_TRIM | ATA_HORKAGE_ZERO_AFTER_TRIM, }, -- cgit v1.2.3 From 15d9f3d116c02a485441d758d9ca0a2e4f3b30be Mon Sep 17 00:00:00 2001 From: Dennis Zhou Date: Thu, 15 Feb 2018 10:08:14 -0600 Subject: percpu: match chunk allocator declarations with definitions At some point the function declaration parameters got out of sync with the function definitions in percpu-vm.c and percpu-km.c. This patch makes them match again. Signed-off-by: Dennis Zhou Acked-by: Christoph Lameter Signed-off-by: Tejun Heo --- mm/percpu.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/mm/percpu.c b/mm/percpu.c index 50e7fdf84055..e1ea41002173 100644 --- a/mm/percpu.c +++ b/mm/percpu.c @@ -1277,8 +1277,10 @@ static void pcpu_chunk_depopulated(struct pcpu_chunk *chunk, * pcpu_addr_to_page - translate address to physical address * pcpu_verify_alloc_info - check alloc_info is acceptable during init */ -static int pcpu_populate_chunk(struct pcpu_chunk *chunk, int off, int size); -static void pcpu_depopulate_chunk(struct pcpu_chunk *chunk, int off, int size); +static int pcpu_populate_chunk(struct pcpu_chunk *chunk, + int page_start, int page_end); +static void pcpu_depopulate_chunk(struct pcpu_chunk *chunk, + int page_start, int page_end); static struct pcpu_chunk *pcpu_create_chunk(void); static void pcpu_destroy_chunk(struct pcpu_chunk *chunk); static struct page *pcpu_addr_to_page(void *addr); -- cgit v1.2.3 From 47504ee04b9241548ae2c28be7d0b01cff3b7aa6 Mon Sep 17 00:00:00 2001 From: Dennis Zhou Date: Fri, 16 Feb 2018 12:07:19 -0600 Subject: percpu: add __GFP_NORETRY semantics to the percpu balancing path Percpu memory using the vmalloc area based chunk allocator lazily populates chunks by first requesting the full virtual address space required for the chunk and subsequently adding pages as allocations come through. To ensure atomic allocations can succeed, a workqueue item is used to maintain a minimum number of empty pages. In certain scenarios, such as reported in [1], it is possible that physical memory becomes quite scarce which can result in either a rather long time spent trying to find free pages or worse, a kernel panic. This patch adds support for __GFP_NORETRY and __GFP_NOWARN passing them through to the underlying allocators. This should prevent any unnecessary panics potentially caused by the workqueue item. The passing of gfp around is as additional flags rather than a full set of flags. The next patch will change these to caller passed semantics. V2: Added const modifier to gfp flags in the balance path. Removed an extra whitespace. [1] https://lkml.org/lkml/2018/2/12/551 Signed-off-by: Dennis Zhou Suggested-by: Daniel Borkmann Reported-by: syzbot+adb03f3f0bb57ce3acda@syzkaller.appspotmail.com Acked-by: Christoph Lameter Signed-off-by: Tejun Heo --- mm/percpu-km.c | 8 ++++---- mm/percpu-vm.c | 18 +++++++++++------- mm/percpu.c | 44 +++++++++++++++++++++++++++----------------- 3 files changed, 42 insertions(+), 28 deletions(-) diff --git a/mm/percpu-km.c b/mm/percpu-km.c index d2a76642c4ae..0d88d7bd5706 100644 --- a/mm/percpu-km.c +++ b/mm/percpu-km.c @@ -34,7 +34,7 @@ #include static int pcpu_populate_chunk(struct pcpu_chunk *chunk, - int page_start, int page_end) + int page_start, int page_end, gfp_t gfp) { return 0; } @@ -45,18 +45,18 @@ static void pcpu_depopulate_chunk(struct pcpu_chunk *chunk, /* nada */ } -static struct pcpu_chunk *pcpu_create_chunk(void) +static struct pcpu_chunk *pcpu_create_chunk(gfp_t gfp) { const int nr_pages = pcpu_group_sizes[0] >> PAGE_SHIFT; struct pcpu_chunk *chunk; struct page *pages; int i; - chunk = pcpu_alloc_chunk(); + chunk = pcpu_alloc_chunk(gfp); if (!chunk) return NULL; - pages = alloc_pages(GFP_KERNEL, order_base_2(nr_pages)); + pages = alloc_pages(gfp | GFP_KERNEL, order_base_2(nr_pages)); if (!pages) { pcpu_free_chunk(chunk); return NULL; diff --git a/mm/percpu-vm.c b/mm/percpu-vm.c index 9158e5a81391..0af71eb2fff0 100644 --- a/mm/percpu-vm.c +++ b/mm/percpu-vm.c @@ -37,7 +37,7 @@ static struct page **pcpu_get_pages(void) lockdep_assert_held(&pcpu_alloc_mutex); if (!pages) - pages = pcpu_mem_zalloc(pages_size); + pages = pcpu_mem_zalloc(pages_size, 0); return pages; } @@ -73,18 +73,21 @@ static void pcpu_free_pages(struct pcpu_chunk *chunk, * @pages: array to put the allocated pages into, indexed by pcpu_page_idx() * @page_start: page index of the first page to be allocated * @page_end: page index of the last page to be allocated + 1 + * @gfp: allocation flags passed to the underlying allocator * * Allocate pages [@page_start,@page_end) into @pages for all units. * The allocation is for @chunk. Percpu core doesn't care about the * content of @pages and will pass it verbatim to pcpu_map_pages(). */ static int pcpu_alloc_pages(struct pcpu_chunk *chunk, - struct page **pages, int page_start, int page_end) + struct page **pages, int page_start, int page_end, + gfp_t gfp) { - const gfp_t gfp = GFP_KERNEL | __GFP_HIGHMEM; unsigned int cpu, tcpu; int i; + gfp |= GFP_KERNEL | __GFP_HIGHMEM; + for_each_possible_cpu(cpu) { for (i = page_start; i < page_end; i++) { struct page **pagep = &pages[pcpu_page_idx(cpu, i)]; @@ -262,6 +265,7 @@ static void pcpu_post_map_flush(struct pcpu_chunk *chunk, * @chunk: chunk of interest * @page_start: the start page * @page_end: the end page + * @gfp: allocation flags passed to the underlying memory allocator * * For each cpu, populate and map pages [@page_start,@page_end) into * @chunk. @@ -270,7 +274,7 @@ static void pcpu_post_map_flush(struct pcpu_chunk *chunk, * pcpu_alloc_mutex, does GFP_KERNEL allocation. */ static int pcpu_populate_chunk(struct pcpu_chunk *chunk, - int page_start, int page_end) + int page_start, int page_end, gfp_t gfp) { struct page **pages; @@ -278,7 +282,7 @@ static int pcpu_populate_chunk(struct pcpu_chunk *chunk, if (!pages) return -ENOMEM; - if (pcpu_alloc_pages(chunk, pages, page_start, page_end)) + if (pcpu_alloc_pages(chunk, pages, page_start, page_end, gfp)) return -ENOMEM; if (pcpu_map_pages(chunk, pages, page_start, page_end)) { @@ -325,12 +329,12 @@ static void pcpu_depopulate_chunk(struct pcpu_chunk *chunk, pcpu_free_pages(chunk, pages, page_start, page_end); } -static struct pcpu_chunk *pcpu_create_chunk(void) +static struct pcpu_chunk *pcpu_create_chunk(gfp_t gfp) { struct pcpu_chunk *chunk; struct vm_struct **vms; - chunk = pcpu_alloc_chunk(); + chunk = pcpu_alloc_chunk(gfp); if (!chunk) return NULL; diff --git a/mm/percpu.c b/mm/percpu.c index e1ea41002173..f97443d488a8 100644 --- a/mm/percpu.c +++ b/mm/percpu.c @@ -447,10 +447,12 @@ static void pcpu_next_fit_region(struct pcpu_chunk *chunk, int alloc_bits, /** * pcpu_mem_zalloc - allocate memory * @size: bytes to allocate + * @gfp: allocation flags * * Allocate @size bytes. If @size is smaller than PAGE_SIZE, - * kzalloc() is used; otherwise, vzalloc() is used. The returned - * memory is always zeroed. + * kzalloc() is used; otherwise, the equivalent of vzalloc() is used. + * This is to facilitate passing through whitelisted flags. The + * returned memory is always zeroed. * * CONTEXT: * Does GFP_KERNEL allocation. @@ -458,15 +460,16 @@ static void pcpu_next_fit_region(struct pcpu_chunk *chunk, int alloc_bits, * RETURNS: * Pointer to the allocated area on success, NULL on failure. */ -static void *pcpu_mem_zalloc(size_t size) +static void *pcpu_mem_zalloc(size_t size, gfp_t gfp) { if (WARN_ON_ONCE(!slab_is_available())) return NULL; if (size <= PAGE_SIZE) - return kzalloc(size, GFP_KERNEL); + return kzalloc(size, gfp | GFP_KERNEL); else - return vzalloc(size); + return __vmalloc(size, gfp | GFP_KERNEL | __GFP_ZERO, + PAGE_KERNEL); } /** @@ -1154,12 +1157,12 @@ static struct pcpu_chunk * __init pcpu_alloc_first_chunk(unsigned long tmp_addr, return chunk; } -static struct pcpu_chunk *pcpu_alloc_chunk(void) +static struct pcpu_chunk *pcpu_alloc_chunk(gfp_t gfp) { struct pcpu_chunk *chunk; int region_bits; - chunk = pcpu_mem_zalloc(pcpu_chunk_struct_size); + chunk = pcpu_mem_zalloc(pcpu_chunk_struct_size, gfp); if (!chunk) return NULL; @@ -1168,17 +1171,17 @@ static struct pcpu_chunk *pcpu_alloc_chunk(void) region_bits = pcpu_chunk_map_bits(chunk); chunk->alloc_map = pcpu_mem_zalloc(BITS_TO_LONGS(region_bits) * - sizeof(chunk->alloc_map[0])); + sizeof(chunk->alloc_map[0]), gfp); if (!chunk->alloc_map) goto alloc_map_fail; chunk->bound_map = pcpu_mem_zalloc(BITS_TO_LONGS(region_bits + 1) * - sizeof(chunk->bound_map[0])); + sizeof(chunk->bound_map[0]), gfp); if (!chunk->bound_map) goto bound_map_fail; chunk->md_blocks = pcpu_mem_zalloc(pcpu_chunk_nr_blocks(chunk) * - sizeof(chunk->md_blocks[0])); + sizeof(chunk->md_blocks[0]), gfp); if (!chunk->md_blocks) goto md_blocks_fail; @@ -1278,10 +1281,10 @@ static void pcpu_chunk_depopulated(struct pcpu_chunk *chunk, * pcpu_verify_alloc_info - check alloc_info is acceptable during init */ static int pcpu_populate_chunk(struct pcpu_chunk *chunk, - int page_start, int page_end); + int page_start, int page_end, gfp_t gfp); static void pcpu_depopulate_chunk(struct pcpu_chunk *chunk, int page_start, int page_end); -static struct pcpu_chunk *pcpu_create_chunk(void); +static struct pcpu_chunk *pcpu_create_chunk(gfp_t gfp); static void pcpu_destroy_chunk(struct pcpu_chunk *chunk); static struct page *pcpu_addr_to_page(void *addr); static int __init pcpu_verify_alloc_info(const struct pcpu_alloc_info *ai); @@ -1423,7 +1426,7 @@ restart: } if (list_empty(&pcpu_slot[pcpu_nr_slots - 1])) { - chunk = pcpu_create_chunk(); + chunk = pcpu_create_chunk(0); if (!chunk) { err = "failed to allocate new chunk"; goto fail; @@ -1452,7 +1455,7 @@ area_found: page_start, page_end) { WARN_ON(chunk->immutable); - ret = pcpu_populate_chunk(chunk, rs, re); + ret = pcpu_populate_chunk(chunk, rs, re, 0); spin_lock_irqsave(&pcpu_lock, flags); if (ret) { @@ -1563,10 +1566,17 @@ void __percpu *__alloc_reserved_percpu(size_t size, size_t align) * pcpu_balance_workfn - manage the amount of free chunks and populated pages * @work: unused * - * Reclaim all fully free chunks except for the first one. + * Reclaim all fully free chunks except for the first one. This is also + * responsible for maintaining the pool of empty populated pages. However, + * it is possible that this is called when physical memory is scarce causing + * OOM killer to be triggered. We should avoid doing so until an actual + * allocation causes the failure as it is possible that requests can be + * serviced from already backed regions. */ static void pcpu_balance_workfn(struct work_struct *work) { + /* gfp flags passed to underlying allocators */ + const gfp_t gfp = __GFP_NORETRY | __GFP_NOWARN; LIST_HEAD(to_free); struct list_head *free_head = &pcpu_slot[pcpu_nr_slots - 1]; struct pcpu_chunk *chunk, *next; @@ -1647,7 +1657,7 @@ retry_pop: chunk->nr_pages) { int nr = min(re - rs, nr_to_pop); - ret = pcpu_populate_chunk(chunk, rs, rs + nr); + ret = pcpu_populate_chunk(chunk, rs, rs + nr, gfp); if (!ret) { nr_to_pop -= nr; spin_lock_irq(&pcpu_lock); @@ -1664,7 +1674,7 @@ retry_pop: if (nr_to_pop) { /* ran out of chunks to populate, create a new one and retry */ - chunk = pcpu_create_chunk(); + chunk = pcpu_create_chunk(gfp); if (chunk) { spin_lock_irq(&pcpu_lock); pcpu_chunk_relocate(chunk, -1); -- cgit v1.2.3 From 554fef1c39ee148623a496e04569dabb11463406 Mon Sep 17 00:00:00 2001 From: Dennis Zhou Date: Fri, 16 Feb 2018 12:09:58 -0600 Subject: percpu: allow select gfp to be passed to underlying allocators The prior patch added support for passing gfp flags through to the underlying allocators. This patch allows users to pass along gfp flags (currently only __GFP_NORETRY and __GFP_NOWARN) to the underlying allocators. This should allow users to decide if they are ok with failing allocations recovering in a more graceful way. Additionally, gfp passing was done as additional flags in the previous patch. Instead, change this to caller passed semantics. GFP_KERNEL is also removed as the default flag. It continues to be used for internally caused underlying percpu allocations. V2: Removed gfp_percpu_mask in favor of doing it inline. Removed GFP_KERNEL as a default flag for __alloc_percpu_gfp. Signed-off-by: Dennis Zhou Suggested-by: Daniel Borkmann Acked-by: Christoph Lameter Signed-off-by: Tejun Heo --- mm/percpu-km.c | 2 +- mm/percpu-vm.c | 4 ++-- mm/percpu.c | 16 +++++++--------- 3 files changed, 10 insertions(+), 12 deletions(-) diff --git a/mm/percpu-km.c b/mm/percpu-km.c index 0d88d7bd5706..38de70ab1a0d 100644 --- a/mm/percpu-km.c +++ b/mm/percpu-km.c @@ -56,7 +56,7 @@ static struct pcpu_chunk *pcpu_create_chunk(gfp_t gfp) if (!chunk) return NULL; - pages = alloc_pages(gfp | GFP_KERNEL, order_base_2(nr_pages)); + pages = alloc_pages(gfp, order_base_2(nr_pages)); if (!pages) { pcpu_free_chunk(chunk); return NULL; diff --git a/mm/percpu-vm.c b/mm/percpu-vm.c index 0af71eb2fff0..d8078de912de 100644 --- a/mm/percpu-vm.c +++ b/mm/percpu-vm.c @@ -37,7 +37,7 @@ static struct page **pcpu_get_pages(void) lockdep_assert_held(&pcpu_alloc_mutex); if (!pages) - pages = pcpu_mem_zalloc(pages_size, 0); + pages = pcpu_mem_zalloc(pages_size, GFP_KERNEL); return pages; } @@ -86,7 +86,7 @@ static int pcpu_alloc_pages(struct pcpu_chunk *chunk, unsigned int cpu, tcpu; int i; - gfp |= GFP_KERNEL | __GFP_HIGHMEM; + gfp |= __GFP_HIGHMEM; for_each_possible_cpu(cpu) { for (i = page_start; i < page_end; i++) { diff --git a/mm/percpu.c b/mm/percpu.c index f97443d488a8..fa3f854634a1 100644 --- a/mm/percpu.c +++ b/mm/percpu.c @@ -454,9 +454,6 @@ static void pcpu_next_fit_region(struct pcpu_chunk *chunk, int alloc_bits, * This is to facilitate passing through whitelisted flags. The * returned memory is always zeroed. * - * CONTEXT: - * Does GFP_KERNEL allocation. - * * RETURNS: * Pointer to the allocated area on success, NULL on failure. */ @@ -466,10 +463,9 @@ static void *pcpu_mem_zalloc(size_t size, gfp_t gfp) return NULL; if (size <= PAGE_SIZE) - return kzalloc(size, gfp | GFP_KERNEL); + return kzalloc(size, gfp); else - return __vmalloc(size, gfp | GFP_KERNEL | __GFP_ZERO, - PAGE_KERNEL); + return __vmalloc(size, gfp | __GFP_ZERO, PAGE_KERNEL); } /** @@ -1344,6 +1340,8 @@ static struct pcpu_chunk *pcpu_chunk_addr_search(void *addr) static void __percpu *pcpu_alloc(size_t size, size_t align, bool reserved, gfp_t gfp) { + /* whitelisted flags that can be passed to the backing allocators */ + gfp_t pcpu_gfp = gfp & (GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN); bool is_atomic = (gfp & GFP_KERNEL) != GFP_KERNEL; bool do_warn = !(gfp & __GFP_NOWARN); static int warn_limit = 10; @@ -1426,7 +1424,7 @@ restart: } if (list_empty(&pcpu_slot[pcpu_nr_slots - 1])) { - chunk = pcpu_create_chunk(0); + chunk = pcpu_create_chunk(pcpu_gfp); if (!chunk) { err = "failed to allocate new chunk"; goto fail; @@ -1455,7 +1453,7 @@ area_found: page_start, page_end) { WARN_ON(chunk->immutable); - ret = pcpu_populate_chunk(chunk, rs, re, 0); + ret = pcpu_populate_chunk(chunk, rs, re, pcpu_gfp); spin_lock_irqsave(&pcpu_lock, flags); if (ret) { @@ -1576,7 +1574,7 @@ void __percpu *__alloc_reserved_percpu(size_t size, size_t align) static void pcpu_balance_workfn(struct work_struct *work) { /* gfp flags passed to underlying allocators */ - const gfp_t gfp = __GFP_NORETRY | __GFP_NOWARN; + const gfp_t gfp = GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN; LIST_HEAD(to_free); struct list_head *free_head = &pcpu_slot[pcpu_nr_slots - 1]; struct pcpu_chunk *chunk, *next; -- cgit v1.2.3 From 143a4454daaf0e80a2b9f37159a0d6d2b61e64ed Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sat, 17 Feb 2018 15:16:22 +0800 Subject: xfrm: do not call rcu_read_unlock when afinfo is NULL in xfrm_get_tos When xfrm_policy_get_afinfo returns NULL, it will not hold rcu read lock. In this case, rcu_read_unlock should not be called in xfrm_get_tos, just like other places where it's calling xfrm_policy_get_afinfo. Fixes: f5e2bb4f5b22 ("xfrm: policy: xfrm_get_tos cannot fail") Signed-off-by: Xin Long Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_policy.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 8b3811ff002d..150d46633ce6 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -1458,10 +1458,13 @@ xfrm_tmpl_resolve(struct xfrm_policy **pols, int npols, const struct flowi *fl, static int xfrm_get_tos(const struct flowi *fl, int family) { const struct xfrm_policy_afinfo *afinfo; - int tos = 0; + int tos; afinfo = xfrm_policy_get_afinfo(family); - tos = afinfo ? afinfo->get_tos(fl) : 0; + if (!afinfo) + return 0; + + tos = afinfo->get_tos(fl); rcu_read_unlock(); -- cgit v1.2.3 From 5682e268350f9eccdbb04006605c1b7068a7b323 Mon Sep 17 00:00:00 2001 From: Chen-Yu Tsai Date: Sat, 17 Feb 2018 21:05:04 +0800 Subject: clk: sunxi-ng: a31: Fix CLK_OUT_* clock ops When support for the A31/A31s CCU was first added, the clock ops for the CLK_OUT_* clocks was set to the wrong type. The clocks are MP-type, but the ops was set for div (M) clocks. This went unnoticed until now. This was because while they are different clocks, their data structures aligned in a way that ccu_div_ops would access the second ccu_div_internal and ccu_mux_internal structures, which were valid, if not incorrect. Furthermore, the use of these CLK_OUT_* was for feeding a precise 32.768 kHz clock signal to the WiFi chip. This was achievable by using the parent with the same clock rate and no divider. So the incorrect divider setting did not affect this usage. Commit 946797aa3f08 ("clk: sunxi-ng: Support fixed post-dividers on MP style clocks") added a new field to the ccu_mp structure, which broke the aforementioned alignment. Now the system crashes as div_ops tries to look up a nonexistent table. Reported-by: Philipp Rossak Tested-by: Philipp Rossak Fixes: c6e6c96d8fa6 ("clk: sunxi-ng: Add A31/A31s clocks") Cc: Signed-off-by: Chen-Yu Tsai Signed-off-by: Maxime Ripard --- drivers/clk/sunxi-ng/ccu-sun6i-a31.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/clk/sunxi-ng/ccu-sun6i-a31.c b/drivers/clk/sunxi-ng/ccu-sun6i-a31.c index 72b16ed1012b..3b97f60540ad 100644 --- a/drivers/clk/sunxi-ng/ccu-sun6i-a31.c +++ b/drivers/clk/sunxi-ng/ccu-sun6i-a31.c @@ -762,7 +762,7 @@ static struct ccu_mp out_a_clk = { .features = CCU_FEATURE_FIXED_PREDIV, .hw.init = CLK_HW_INIT_PARENTS("out-a", clk_out_parents, - &ccu_div_ops, + &ccu_mp_ops, 0), }, }; @@ -783,7 +783,7 @@ static struct ccu_mp out_b_clk = { .features = CCU_FEATURE_FIXED_PREDIV, .hw.init = CLK_HW_INIT_PARENTS("out-b", clk_out_parents, - &ccu_div_ops, + &ccu_mp_ops, 0), }, }; @@ -804,7 +804,7 @@ static struct ccu_mp out_c_clk = { .features = CCU_FEATURE_FIXED_PREDIV, .hw.init = CLK_HW_INIT_PARENTS("out-c", clk_out_parents, - &ccu_div_ops, + &ccu_mp_ops, 0), }, }; -- cgit v1.2.3 From 2d30e9494f1ea320aaaad0cff9ddd92c87eac355 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Sun, 18 Feb 2018 23:01:44 +0100 Subject: ASoC: rt5651: Fix regcache sync errors on resume The ALC5651 does not like multi-write accesses, avoid them. This fixes: rt5651 i2c-10EC5651:00: Unable to sync registers 0x27-0x28. -121 Errors on resume (and all registers after the registers in the error not being synced). Signed-off-by: Hans de Goede Signed-off-by: Mark Brown Cc: stable@vger.kernel.org --- sound/soc/codecs/rt5651.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/soc/codecs/rt5651.c b/sound/soc/codecs/rt5651.c index 831b297978a4..45a73049cf64 100644 --- a/sound/soc/codecs/rt5651.c +++ b/sound/soc/codecs/rt5651.c @@ -1722,6 +1722,7 @@ static const struct regmap_config rt5651_regmap = { .num_reg_defaults = ARRAY_SIZE(rt5651_reg), .ranges = rt5651_ranges, .num_ranges = ARRAY_SIZE(rt5651_ranges), + .use_single_rw = true, }; #if defined(CONFIG_OF) -- cgit v1.2.3 From 013cb81e89f8a70deef086ca29a923faf5585ab0 Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Mon, 19 Feb 2018 07:44:07 +0100 Subject: xfrm: Fix infinite loop in xfrm_get_dst_nexthop with transport mode. On transport mode we forget to fetch the child dst_entry before we continue the while loop, this leads to an infinite loop. Fix this by fetching the child dst_entry before we continue the while loop. Fixes: 0f6c480f23f4 ("xfrm: Move dst->path into struct xfrm_dst") Reported-by: syzbot+7d03c810e50aaedef98a@syzkaller.appspotmail.com Tested-by: Florian Westphal Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_policy.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 150d46633ce6..625b3fca5704 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -2732,14 +2732,14 @@ static const void *xfrm_get_dst_nexthop(const struct dst_entry *dst, while (dst->xfrm) { const struct xfrm_state *xfrm = dst->xfrm; + dst = xfrm_dst_child(dst); + if (xfrm->props.mode == XFRM_MODE_TRANSPORT) continue; if (xfrm->type->flags & XFRM_TYPE_REMOTE_COADDR) daddr = xfrm->coaddr; else if (!(xfrm->type->flags & XFRM_TYPE_LOCAL_COADDR)) daddr = &xfrm->id.daddr; - - dst = xfrm_dst_child(dst); } return daddr; } -- cgit v1.2.3 From 5e558f8afaec8957932b1dbe5aeff800f9fc6957 Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Tue, 20 Feb 2018 16:19:05 +0200 Subject: ASoC: hdmi-codec: Fix module unloading caused kernel crash The hcp->chmap_info must not be freed up in the hdmi_codec_remove() function as it leads to kernel crash due ALSA core's pcm_chmap_ctl_private_free() is trying to free it up again when the card destroyed via snd_card_free. Commit cd6111b26280a ("ASoC: hdmi-codec: add channel mapping control") should not have added the kfree(hcp->chmap_info); to the hdmi_codec_remove function. Signed-off-by: Peter Ujfalusi Reviewed-by: Jyri Sarha Tested-by: Jyri Sarha Signed-off-by: Mark Brown --- sound/soc/codecs/hdmi-codec.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/sound/soc/codecs/hdmi-codec.c b/sound/soc/codecs/hdmi-codec.c index 5672e516bec3..c1830ccd3bb8 100644 --- a/sound/soc/codecs/hdmi-codec.c +++ b/sound/soc/codecs/hdmi-codec.c @@ -798,12 +798,7 @@ static int hdmi_codec_probe(struct platform_device *pdev) static int hdmi_codec_remove(struct platform_device *pdev) { - struct device *dev = &pdev->dev; - struct hdmi_codec_priv *hcp; - - hcp = dev_get_drvdata(dev); - kfree(hcp->chmap_info); - snd_soc_unregister_codec(dev); + snd_soc_unregister_codec(&pdev->dev); return 0; } -- cgit v1.2.3 From b17e5729a630d8326a48ec34ef02e6b4464a6aef Mon Sep 17 00:00:00 2001 From: Kai-Heng Feng Date: Sun, 18 Feb 2018 22:17:09 +0800 Subject: libata: disable LPM for Crucial BX100 SSD 500GB drive After Laptop Mode Tools starts to use min_power for LPM, a user found out Crucial BX100 SSD can't get mounted. Crucial BX100 SSD 500GB drive don't work well with min_power. This also happens to med_power_with_dipm. So let's disable LPM for Crucial BX100 SSD 500GB drive. BugLink: https://bugs.launchpad.net/bugs/1726930 Signed-off-by: Kai-Heng Feng Signed-off-by: Tejun Heo Cc: stable@vger.kernel.org --- drivers/ata/libata-core.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index 28cad49fc846..cb789f8849ae 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -4530,6 +4530,9 @@ static const struct ata_blacklist_entry ata_device_blacklist [] = { { "PIONEER DVD-RW DVR-212D", NULL, ATA_HORKAGE_NOSETXFER }, { "PIONEER DVD-RW DVR-216D", NULL, ATA_HORKAGE_NOSETXFER }, + /* Crucial BX100 SSD 500GB has broken LPM support */ + { "CT500BX100SSD1", "MU02", ATA_HORKAGE_NOLPM }, + /* The 512GB version of the MX100 has both queued TRIM and LPM issues */ { "Crucial_CT512MX100*", NULL, ATA_HORKAGE_NO_NCQ_TRIM | ATA_HORKAGE_ZERO_AFTER_TRIM | -- cgit v1.2.3 From d1897c9538edafd4ae6bbd03cc075962ddde2c21 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 21 Feb 2018 11:39:22 -0800 Subject: cgroup: fix rule checking for threaded mode switching A domain cgroup isn't allowed to be turned threaded if its subtree is populated or domain controllers are enabled. cgroup_enable_threaded() depended on cgroup_can_be_thread_root() test to enforce this rule. A parent which has populated domain descendants or have domain controllers enabled can't become a thread root, so the above rules are enforced automatically. However, for the root cgroup which can host mixed domain and threaded children, cgroup_can_be_thread_root() doesn't check any of those conditions and thus first level cgroups ends up escaping those rules. This patch fixes the bug by adding explicit checks for those rules in cgroup_enable_threaded(). Reported-by: Michael Kerrisk (man-pages) Signed-off-by: Tejun Heo Fixes: 8cfd8147df67 ("cgroup: implement cgroup v2 thread support") Cc: stable@vger.kernel.org # v4.14+ --- kernel/cgroup/cgroup.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index 8cda3bc3ae22..4bfb2908ec15 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -3183,6 +3183,16 @@ static int cgroup_enable_threaded(struct cgroup *cgrp) if (cgroup_is_threaded(cgrp)) return 0; + /* + * If @cgroup is populated or has domain controllers enabled, it + * can't be switched. While the below cgroup_can_be_thread_root() + * test can catch the same conditions, that's only when @parent is + * not mixable, so let's check it explicitly. + */ + if (cgroup_is_populated(cgrp) || + cgrp->subtree_control & ~cgrp_dfl_threaded_ss_mask) + return -EOPNOTSUPP; + /* we're joining the parent's domain, ensure its validity */ if (!cgroup_is_valid_domain(dom_cgrp) || !cgroup_can_be_thread_root(dom_cgrp)) -- cgit v1.2.3 From a275b315334dea3c2151094765387ede421aac92 Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Mon, 15 Jan 2018 13:21:48 -0200 Subject: clk: imx51-imx53: Fix UART4/5 registration on i.MX50 and i.MX53 Since commit 59dc3d8c8673 ("clk: imx51: uart4, uart5 gates only exist on imx50, imx53") the following warnings are seen on i.MX53: [ 2.776190] ------------[ cut here ]------------ [ 2.780948] WARNING: CPU: 0 PID: 1 at ../drivers/clk/clk.c:811 clk_core_disable+0xc4/0xe0 [ 2.789145] Modules linked in: [ 2.792236] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 4.15.0-rc7-next-20180115 #1 [ 2.799735] Hardware name: Freescale i.MX53 (Device Tree Support) [ 2.805845] Backtrace: [ 2.808329] [] (dump_backtrace) from [] (show_stack+0x18/0x1c) [ 2.815919] r7:00000000 r6:60000093 r5:00000000 r4:c10798d4 [ 2.821607] [] (show_stack) from [] (dump_stack+0xb4/0xe8) [ 2.828854] [] (dump_stack) from [] (__warn+0xf0/0x11c) [ 2.835837] r9:00000000 r8:0000032b r7:00000009 r6:c0d429f8 r5:00000000 r4:00000000 [ 2.843601] [] (__warn) from [] (warn_slowpath_null+0x44/0x50) [ 2.851191] r8:c1008908 r7:c0e08874 r6:c04bfac8 r5:0000032b r4:c0d429f8 [ 2.857913] [] (warn_slowpath_null) from [] (clk_core_disable+0xc4/0xe0) [ 2.866369] r6:dc02bb00 r5:dc02a980 r4:dc02a980 [ 2.871011] [] (clk_core_disable) from [] (clk_core_disable_lock+0x20/0x2c) [ 2.879726] r5:dc02a980 r4:80000013 [ 2.883323] [] (clk_core_disable_lock) from [] (clk_disable+0x24/0x28) [ 2.891604] r5:c0f6b3e4 r4:0000001c [ 2.895209] [] (clk_disable) from [] (imx_clk_disable_uart+0x50/0x68) [ 2.903412] [] (imx_clk_disable_uart) from [] (do_one_initcall+0x50/0x19c) [ 2.912043] r7:c0e08874 r6:c0f63854 r5:c0f233bc r4:ffffe000 [ 2.917726] [] (do_one_initcall) from [] (kernel_init_freeable+0x118/0x1d0) [ 2.926447] r9:c0f63858 r8:000000f0 r7:c0e08874 r6:c0f63854 r5:c107b500 r4:c0f75260 [ 2.934220] [] (kernel_init_freeable) from [] (kernel_init+0x10/0x118) [ 2.942506] r10:00000000 r9:00000000 r8:00000000 r7:00000000 r6:00000000 r5:c0a4a5e0 [ 2.950351] r4:00000000 [ 2.952908] [] (kernel_init) from [] (ret_from_fork+0x14/0x20) [ 2.960496] Exception stack(0xdc05dfb0 to 0xdc05dff8) [ 2.965569] dfa0: 00000000 00000000 00000000 00000000 [ 2.973768] dfc0: 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 [ 2.981965] dfe0: 00000000 00000000 00000000 00000000 00000013 00000000 [ 2.988596] r5:c0a4a5e0 r4:00000000 [ 2.992188] ---[ end trace 346e26f708876edd ]--- [ 2.997420] ------------[ cut here ]------------ In order to fix the problem UART4/5 registration needs to happen only on i.MX50 and i.MX53. So let mx51_clocks_init() register only UART1-3 and mx50_clocks_init()/mx53_clocks_init register all the UART1-5 ports. Fixes: 59dc3d8c8673 ("clk: imx51: uart4, uart5 gates only exist on imx50, imx53") Signed-off-by: Fabio Estevam Reviewed-by: Philipp Zabel Signed-off-by: Shawn Guo --- drivers/clk/imx/clk-imx51-imx53.c | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/drivers/clk/imx/clk-imx51-imx53.c b/drivers/clk/imx/clk-imx51-imx53.c index c864992e6983..caa8bd40692c 100644 --- a/drivers/clk/imx/clk-imx51-imx53.c +++ b/drivers/clk/imx/clk-imx51-imx53.c @@ -131,7 +131,17 @@ static const char *ieee1588_sels[] = { "pll3_sw", "pll4_sw", "dummy" /* usbphy2_ static struct clk *clk[IMX5_CLK_END]; static struct clk_onecell_data clk_data; -static struct clk ** const uart_clks[] __initconst = { +static struct clk ** const uart_clks_mx51[] __initconst = { + &clk[IMX5_CLK_UART1_IPG_GATE], + &clk[IMX5_CLK_UART1_PER_GATE], + &clk[IMX5_CLK_UART2_IPG_GATE], + &clk[IMX5_CLK_UART2_PER_GATE], + &clk[IMX5_CLK_UART3_IPG_GATE], + &clk[IMX5_CLK_UART3_PER_GATE], + NULL +}; + +static struct clk ** const uart_clks_mx50_mx53[] __initconst = { &clk[IMX5_CLK_UART1_IPG_GATE], &clk[IMX5_CLK_UART1_PER_GATE], &clk[IMX5_CLK_UART2_IPG_GATE], @@ -321,8 +331,6 @@ static void __init mx5_clocks_common_init(void __iomem *ccm_base) clk_prepare_enable(clk[IMX5_CLK_TMAX1]); clk_prepare_enable(clk[IMX5_CLK_TMAX2]); /* esdhc2, fec */ clk_prepare_enable(clk[IMX5_CLK_TMAX3]); /* esdhc1, esdhc4 */ - - imx_register_uart_clocks(uart_clks); } static void __init mx50_clocks_init(struct device_node *np) @@ -388,6 +396,8 @@ static void __init mx50_clocks_init(struct device_node *np) r = clk_round_rate(clk[IMX5_CLK_USBOH3_PER_GATE], 54000000); clk_set_rate(clk[IMX5_CLK_USBOH3_PER_GATE], r); + + imx_register_uart_clocks(uart_clks_mx50_mx53); } CLK_OF_DECLARE(imx50_ccm, "fsl,imx50-ccm", mx50_clocks_init); @@ -477,6 +487,8 @@ static void __init mx51_clocks_init(struct device_node *np) val = readl(MXC_CCM_CLPCR); val |= 1 << 23; writel(val, MXC_CCM_CLPCR); + + imx_register_uart_clocks(uart_clks_mx51); } CLK_OF_DECLARE(imx51_ccm, "fsl,imx51-ccm", mx51_clocks_init); @@ -606,5 +618,7 @@ static void __init mx53_clocks_init(struct device_node *np) r = clk_round_rate(clk[IMX5_CLK_USBOH3_PER_GATE], 54000000); clk_set_rate(clk[IMX5_CLK_USBOH3_PER_GATE], r); + + imx_register_uart_clocks(uart_clks_mx50_mx53); } CLK_OF_DECLARE(imx53_ccm, "fsl,imx53-ccm", mx53_clocks_init); -- cgit v1.2.3 From ba6887836178d43b3665b9da075c2c5dfe1d207c Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Thu, 22 Feb 2018 10:02:49 -0800 Subject: ARM: OMAP: Fix dmtimer init for omap1 We need to enable PM runtime on omap1 also as otherwise we will get errors: omap_timer omap_timer.1: omap_dm_timer_probe: pm_runtime_get_sync failed! omap_timer: probe of omap_timer.1 failed with error -13 ... We are checking for OMAP_TIMER_NEEDS_RESET flag elsewhere so this is safe to do. Cc: Aaro Koskinen Cc: Keerthy Cc: Ladislav Michl Signed-off-by: Tony Lindgren --- arch/arm/plat-omap/dmtimer.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/arch/arm/plat-omap/dmtimer.c b/arch/arm/plat-omap/dmtimer.c index d443e481c3e9..8805a59bae53 100644 --- a/arch/arm/plat-omap/dmtimer.c +++ b/arch/arm/plat-omap/dmtimer.c @@ -888,11 +888,8 @@ static int omap_dm_timer_probe(struct platform_device *pdev) timer->irq = irq->start; timer->pdev = pdev; - /* Skip pm_runtime_enable for OMAP1 */ - if (!(timer->capability & OMAP_TIMER_NEEDS_RESET)) { - pm_runtime_enable(dev); - pm_runtime_irq_safe(dev); - } + pm_runtime_enable(dev); + pm_runtime_irq_safe(dev); if (!timer->reserved) { ret = pm_runtime_get_sync(dev); -- cgit v1.2.3 From accd4f36a7d11c2d54544007eb65e10604dcf2f5 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 23 Feb 2018 08:12:42 -0800 Subject: percpu: add a schedule point in pcpu_balance_workfn() When a large BPF percpu map is destroyed, I have seen pcpu_balance_workfn() holding cpu for hundreds of milliseconds. On KASAN config and 112 hyperthreads, average time to destroy a chunk is ~4 ms. [ 2489.841376] destroy chunk 1 in 4148689 ns ... [ 2490.093428] destroy chunk 32 in 4072718 ns Signed-off-by: Eric Dumazet Signed-off-by: Tejun Heo --- mm/percpu.c | 1 + 1 file changed, 1 insertion(+) diff --git a/mm/percpu.c b/mm/percpu.c index fa3f854634a1..36e7b65ba6cf 100644 --- a/mm/percpu.c +++ b/mm/percpu.c @@ -1610,6 +1610,7 @@ static void pcpu_balance_workfn(struct work_struct *work) spin_unlock_irq(&pcpu_lock); } pcpu_destroy_chunk(chunk); + cond_resched(); } /* -- cgit v1.2.3 From 3b821409632ab778d46e807516b457dfa72736ed Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 23 Feb 2018 20:47:17 -0500 Subject: lock_parent() needs to recheck if dentry got __dentry_kill'ed under it In case when dentry passed to lock_parent() is protected from freeing only by the fact that it's on a shrink list and trylock of parent fails, we could get hit by __dentry_kill() (and subsequent dentry_kill(parent)) between unlocking dentry and locking presumed parent. We need to recheck that dentry is alive once we lock both it and parent *and* postpone rcu_read_unlock() until after that point. Otherwise we could return a pointer to struct dentry that already is rcu-scheduled for freeing, with ->d_lock held on it; caller's subsequent attempt to unlock it can end up with memory corruption. Cc: stable@vger.kernel.org # 3.12+, counting backports Signed-off-by: Al Viro --- fs/dcache.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/fs/dcache.c b/fs/dcache.c index 7c38f39958bc..32aaab21e648 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -647,11 +647,16 @@ again: spin_unlock(&parent->d_lock); goto again; } - rcu_read_unlock(); - if (parent != dentry) + if (parent != dentry) { spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); - else + if (unlikely(dentry->d_lockref.count < 0)) { + spin_unlock(&parent->d_lock); + parent = NULL; + } + } else { parent = NULL; + } + rcu_read_unlock(); return parent; } -- cgit v1.2.3 From b91e146c38b003c899710ede6d05fc824675e386 Mon Sep 17 00:00:00 2001 From: Richard Lai Date: Sat, 17 Feb 2018 16:28:24 +0000 Subject: iio: chemical: ccs811: Corrected firmware boot/application mode transition CCS811 has different I2C register maps in boot and application mode. When CCS811 is in boot mode, register APP_START (0xF4) is used to transit the firmware state from boot to application mode. However, APP_START is not a valid register location when CCS811 is in application mode (refer to "CCS811 Bootloader Register Map" and "CCS811 Application Register Map" in CCS811 datasheet). The driver should not attempt to perform a write to APP_START while CCS811 is in application mode, as this is not a valid or documented register location. When prob function is being called, the driver assumes the CCS811 sensor is in boot mode, and attempts to perform a write to APP_START. Although CCS811 powers-up in boot mode, it may have already been transited to application mode by previous instances, e.g. unload and reload device driver by the system, or explicitly by user. Depending on the system design, CCS811 sensor may be permanently connected to system power source rather than power controlled by GPIO, hence it is possible that the sensor is never power reset, thus the firmware could be in either boot or application mode at any given time when driver prob function is being called. This patch checks the STATUS register before attempting to send a write to APP_START. Only if the firmware is not in application mode and has valid firmware application loaded, then it will continue to start transiting the firmware boot to application mode. Signed-off-by: Richard Lai Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/chemical/ccs811.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/iio/chemical/ccs811.c b/drivers/iio/chemical/ccs811.c index fbe2431f5b81..1ea9f5513b02 100644 --- a/drivers/iio/chemical/ccs811.c +++ b/drivers/iio/chemical/ccs811.c @@ -133,6 +133,9 @@ static int ccs811_start_sensor_application(struct i2c_client *client) if (ret < 0) return ret; + if ((ret & CCS811_STATUS_FW_MODE_APPLICATION)) + return 0; + if ((ret & CCS811_STATUS_APP_VALID_MASK) != CCS811_STATUS_APP_VALID_LOADED) return -EIO; -- cgit v1.2.3 From 4e4f9fbc5620a060dc7d3a651cdfb001c1d7c2f9 Mon Sep 17 00:00:00 2001 From: Fabrice Gasnier Date: Fri, 23 Feb 2018 13:50:55 +0100 Subject: iio: adc: stm32-dfsdm: fix compatible data use Fix use of compatible data: stm32h7 regmap configuration is statically used. Rather use regmap_cfg from compatible data. Fixes: bed73904e76f ("IIO: ADC: add stm32 DFSDM core support") Signed-off-by: Fabrice Gasnier Acked-by: Arnaud Pouliquen Signed-off-by: Jonathan Cameron --- drivers/iio/adc/stm32-dfsdm-core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iio/adc/stm32-dfsdm-core.c b/drivers/iio/adc/stm32-dfsdm-core.c index 6290332cfd3f..0635f9390b20 100644 --- a/drivers/iio/adc/stm32-dfsdm-core.c +++ b/drivers/iio/adc/stm32-dfsdm-core.c @@ -274,7 +274,7 @@ static int stm32_dfsdm_probe(struct platform_device *pdev) dfsdm->regmap = devm_regmap_init_mmio_clk(&pdev->dev, "dfsdm", dfsdm->base, - &stm32h7_dfsdm_regmap_cfg); + dev_data->regmap_cfg); if (IS_ERR(dfsdm->regmap)) { ret = PTR_ERR(dfsdm->regmap); dev_err(&pdev->dev, "%s: Failed to allocate regmap: %d\n", -- cgit v1.2.3 From c278609bdde9df330b7fe3d61ec1bc1a8cac4a78 Mon Sep 17 00:00:00 2001 From: Fabrice Gasnier Date: Fri, 23 Feb 2018 13:50:56 +0100 Subject: iio: adc: stm32-dfsdm: fix call to stop channel stm32_dfsdm_stop_channel must be called with channel id, not filter id. Fixes: e2e6771c6462 ("IIO: ADC: add STM32 DFSDM sigma delta ADC support") Signed-off-by: Fabrice Gasnier Acked-by: Arnaud Pouliquen Signed-off-by: Jonathan Cameron --- drivers/iio/adc/stm32-dfsdm-adc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iio/adc/stm32-dfsdm-adc.c b/drivers/iio/adc/stm32-dfsdm-adc.c index daa026d6a94f..0eff8119119c 100644 --- a/drivers/iio/adc/stm32-dfsdm-adc.c +++ b/drivers/iio/adc/stm32-dfsdm-adc.c @@ -464,7 +464,7 @@ stop_channels: regmap_update_bits(regmap, DFSDM_CR1(adc->fl_id), DFSDM_CR1_RCONT_MASK, 0); - stm32_dfsdm_stop_channel(adc->dfsdm, adc->fl_id); + stm32_dfsdm_stop_channel(adc->dfsdm, adc->ch_id); return ret; } -- cgit v1.2.3 From 179858efd94f49945d41919c1d09a8e17c2afa98 Mon Sep 17 00:00:00 2001 From: Fabrice Gasnier Date: Fri, 23 Feb 2018 13:50:57 +0100 Subject: iio: adc: stm32-dfsdm: fix clock source selection Add missing clock source selection. In case "audio" clock is provided, it's unused currently: "dfsdm" clock is wrongly used by default. Fixes: bed73904e76f ("IIO: ADC: add stm32 DFSDM core support") Signed-off-by: Fabrice Gasnier Acked-by: Arnaud Pouliquen Signed-off-by: Jonathan Cameron --- drivers/iio/adc/stm32-dfsdm-core.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/iio/adc/stm32-dfsdm-core.c b/drivers/iio/adc/stm32-dfsdm-core.c index 0635f9390b20..e50efdcc41ff 100644 --- a/drivers/iio/adc/stm32-dfsdm-core.c +++ b/drivers/iio/adc/stm32-dfsdm-core.c @@ -83,7 +83,7 @@ int stm32_dfsdm_start_dfsdm(struct stm32_dfsdm *dfsdm) { struct dfsdm_priv *priv = container_of(dfsdm, struct dfsdm_priv, dfsdm); struct device *dev = &priv->pdev->dev; - unsigned int clk_div = priv->spi_clk_out_div; + unsigned int clk_div = priv->spi_clk_out_div, clk_src; int ret; if (atomic_inc_return(&priv->n_active_ch) == 1) { @@ -100,6 +100,14 @@ int stm32_dfsdm_start_dfsdm(struct stm32_dfsdm *dfsdm) } } + /* select clock source, e.g. 0 for "dfsdm" or 1 for "audio" */ + clk_src = priv->aclk ? 1 : 0; + ret = regmap_update_bits(dfsdm->regmap, DFSDM_CHCFGR1(0), + DFSDM_CHCFGR1_CKOUTSRC_MASK, + DFSDM_CHCFGR1_CKOUTSRC(clk_src)); + if (ret < 0) + goto disable_aclk; + /* Output the SPI CLKOUT (if clk_div == 0 clock if OFF) */ ret = regmap_update_bits(dfsdm->regmap, DFSDM_CHCFGR1(0), DFSDM_CHCFGR1_CKOUTDIV_MASK, -- cgit v1.2.3 From 0645af1b6988467b8362aaf44fb4013abb045bee Mon Sep 17 00:00:00 2001 From: Fabrice Gasnier Date: Fri, 23 Feb 2018 13:50:58 +0100 Subject: iio: adc: stm32-dfsdm: fix multiple channel initialization When several channels are registered (e.g. via st,adc-channels property): - channels array is wrongly filled in. Only 1st element in array is being initialized with last registered channel. Fix it by passing reference to relevant channel (e.g. array[index]). - only last initialized channel can work properly (e.g. unique 'ch_id' is used). Converting any other channel result in conversion timeout. Fix it by getting rid of 'ch_id', use chan->channel instead. Signed-off-by: Fabrice Gasnier Acked-by: Arnaud Pouliquen Signed-off-by: Jonathan Cameron --- drivers/iio/adc/stm32-dfsdm-adc.c | 39 +++++++++++++++++++++------------------ 1 file changed, 21 insertions(+), 18 deletions(-) diff --git a/drivers/iio/adc/stm32-dfsdm-adc.c b/drivers/iio/adc/stm32-dfsdm-adc.c index 0eff8119119c..01422d11753c 100644 --- a/drivers/iio/adc/stm32-dfsdm-adc.c +++ b/drivers/iio/adc/stm32-dfsdm-adc.c @@ -54,7 +54,6 @@ struct stm32_dfsdm_adc { struct stm32_dfsdm *dfsdm; const struct stm32_dfsdm_dev_data *dev_data; unsigned int fl_id; - unsigned int ch_id; /* ADC specific */ unsigned int oversamp; @@ -384,7 +383,7 @@ static ssize_t dfsdm_adc_audio_set_spiclk(struct iio_dev *indio_dev, { struct stm32_dfsdm_adc *adc = iio_priv(indio_dev); struct stm32_dfsdm_filter *fl = &adc->dfsdm->fl_list[adc->fl_id]; - struct stm32_dfsdm_channel *ch = &adc->dfsdm->ch_list[adc->ch_id]; + struct stm32_dfsdm_channel *ch = &adc->dfsdm->ch_list[chan->channel]; unsigned int sample_freq = adc->sample_freq; unsigned int spi_freq; int ret; @@ -419,18 +418,20 @@ static ssize_t dfsdm_adc_audio_set_spiclk(struct iio_dev *indio_dev, return len; } -static int stm32_dfsdm_start_conv(struct stm32_dfsdm_adc *adc, bool dma) +static int stm32_dfsdm_start_conv(struct stm32_dfsdm_adc *adc, + const struct iio_chan_spec *chan, + bool dma) { struct regmap *regmap = adc->dfsdm->regmap; int ret; unsigned int dma_en = 0, cont_en = 0; - ret = stm32_dfsdm_start_channel(adc->dfsdm, adc->ch_id); + ret = stm32_dfsdm_start_channel(adc->dfsdm, chan->channel); if (ret < 0) return ret; ret = stm32_dfsdm_filter_configure(adc->dfsdm, adc->fl_id, - adc->ch_id); + chan->channel); if (ret < 0) goto stop_channels; @@ -464,12 +465,13 @@ stop_channels: regmap_update_bits(regmap, DFSDM_CR1(adc->fl_id), DFSDM_CR1_RCONT_MASK, 0); - stm32_dfsdm_stop_channel(adc->dfsdm, adc->ch_id); + stm32_dfsdm_stop_channel(adc->dfsdm, chan->channel); return ret; } -static void stm32_dfsdm_stop_conv(struct stm32_dfsdm_adc *adc) +static void stm32_dfsdm_stop_conv(struct stm32_dfsdm_adc *adc, + const struct iio_chan_spec *chan) { struct regmap *regmap = adc->dfsdm->regmap; @@ -482,7 +484,7 @@ static void stm32_dfsdm_stop_conv(struct stm32_dfsdm_adc *adc) regmap_update_bits(regmap, DFSDM_CR1(adc->fl_id), DFSDM_CR1_RCONT_MASK, 0); - stm32_dfsdm_stop_channel(adc->dfsdm, adc->ch_id); + stm32_dfsdm_stop_channel(adc->dfsdm, chan->channel); } static int stm32_dfsdm_set_watermark(struct iio_dev *indio_dev, @@ -609,6 +611,7 @@ static int stm32_dfsdm_adc_dma_start(struct iio_dev *indio_dev) static int stm32_dfsdm_postenable(struct iio_dev *indio_dev) { struct stm32_dfsdm_adc *adc = iio_priv(indio_dev); + const struct iio_chan_spec *chan = &indio_dev->channels[0]; int ret; /* Reset adc buffer index */ @@ -618,7 +621,7 @@ static int stm32_dfsdm_postenable(struct iio_dev *indio_dev) if (ret < 0) return ret; - ret = stm32_dfsdm_start_conv(adc, true); + ret = stm32_dfsdm_start_conv(adc, chan, true); if (ret) { dev_err(&indio_dev->dev, "Can't start conversion\n"); goto stop_dfsdm; @@ -635,7 +638,7 @@ static int stm32_dfsdm_postenable(struct iio_dev *indio_dev) return 0; err_stop_conv: - stm32_dfsdm_stop_conv(adc); + stm32_dfsdm_stop_conv(adc, chan); stop_dfsdm: stm32_dfsdm_stop_dfsdm(adc->dfsdm); @@ -645,11 +648,12 @@ stop_dfsdm: static int stm32_dfsdm_predisable(struct iio_dev *indio_dev) { struct stm32_dfsdm_adc *adc = iio_priv(indio_dev); + const struct iio_chan_spec *chan = &indio_dev->channels[0]; if (adc->dma_chan) dmaengine_terminate_all(adc->dma_chan); - stm32_dfsdm_stop_conv(adc); + stm32_dfsdm_stop_conv(adc, chan); stm32_dfsdm_stop_dfsdm(adc->dfsdm); @@ -730,7 +734,7 @@ static int stm32_dfsdm_single_conv(struct iio_dev *indio_dev, if (ret < 0) goto stop_dfsdm; - ret = stm32_dfsdm_start_conv(adc, false); + ret = stm32_dfsdm_start_conv(adc, chan, false); if (ret < 0) { regmap_update_bits(adc->dfsdm->regmap, DFSDM_CR2(adc->fl_id), DFSDM_CR2_REOCIE_MASK, DFSDM_CR2_REOCIE(0)); @@ -751,7 +755,7 @@ static int stm32_dfsdm_single_conv(struct iio_dev *indio_dev, else ret = IIO_VAL_INT; - stm32_dfsdm_stop_conv(adc); + stm32_dfsdm_stop_conv(adc, chan); stop_dfsdm: stm32_dfsdm_stop_dfsdm(adc->dfsdm); @@ -765,7 +769,7 @@ static int stm32_dfsdm_write_raw(struct iio_dev *indio_dev, { struct stm32_dfsdm_adc *adc = iio_priv(indio_dev); struct stm32_dfsdm_filter *fl = &adc->dfsdm->fl_list[adc->fl_id]; - struct stm32_dfsdm_channel *ch = &adc->dfsdm->ch_list[adc->ch_id]; + struct stm32_dfsdm_channel *ch = &adc->dfsdm->ch_list[chan->channel]; unsigned int spi_freq = adc->spi_freq; int ret = -EINVAL; @@ -972,7 +976,6 @@ static int stm32_dfsdm_adc_chan_init_one(struct iio_dev *indio_dev, } ch->scan_type.realbits = 24; ch->scan_type.storagebits = 32; - adc->ch_id = ch->channel; return stm32_dfsdm_chan_configure(adc->dfsdm, &adc->dfsdm->ch_list[ch->channel]); @@ -1001,7 +1004,7 @@ static int stm32_dfsdm_audio_init(struct iio_dev *indio_dev) } ch->info_mask_separate = BIT(IIO_CHAN_INFO_SAMP_FREQ); - d_ch = &adc->dfsdm->ch_list[adc->ch_id]; + d_ch = &adc->dfsdm->ch_list[ch->channel]; if (d_ch->src != DFSDM_CHANNEL_SPI_CLOCK_EXTERNAL) adc->spi_freq = adc->dfsdm->spi_master_freq; @@ -1042,8 +1045,8 @@ static int stm32_dfsdm_adc_init(struct iio_dev *indio_dev) return -ENOMEM; for (chan_idx = 0; chan_idx < num_ch; chan_idx++) { - ch->scan_index = chan_idx; - ret = stm32_dfsdm_adc_chan_init_one(indio_dev, ch); + ch[chan_idx].scan_index = chan_idx; + ret = stm32_dfsdm_adc_chan_init_one(indio_dev, &ch[chan_idx]); if (ret < 0) { dev_err(&indio_dev->dev, "Channels init failed\n"); return ret; -- cgit v1.2.3 From 6de2aeb5746e79b50d2b99bd63f08da880f735a3 Mon Sep 17 00:00:00 2001 From: Fabrice Gasnier Date: Fri, 23 Feb 2018 12:07:21 +0100 Subject: dt-bindings: iio: adc: sd-modulator: fix io-channel-cells io-channel-cells should be <0> since sigma delta modulator exports only one channel, as described in ../iio/iio-bindings.txt "IIO providers" section. Only the phandle is necessary for IIO consumers in this case. Fixes: af11143757b7 ("IIO: Add DT bindings for sigma delta adc modulator") Signed-off-by: Fabrice Gasnier Acked-by: Arnaud Pouliquen Signed-off-by: Jonathan Cameron --- Documentation/devicetree/bindings/iio/adc/sigma-delta-modulator.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/devicetree/bindings/iio/adc/sigma-delta-modulator.txt b/Documentation/devicetree/bindings/iio/adc/sigma-delta-modulator.txt index e9ebb8a20e0d..ba24ca7ba95e 100644 --- a/Documentation/devicetree/bindings/iio/adc/sigma-delta-modulator.txt +++ b/Documentation/devicetree/bindings/iio/adc/sigma-delta-modulator.txt @@ -3,11 +3,11 @@ Device-Tree bindings for sigma delta modulator Required properties: - compatible: should be "ads1201", "sd-modulator". "sd-modulator" can be use as a generic SD modulator if modulator not specified in compatible list. -- #io-channel-cells = <1>: See the IIO bindings section "IIO consumers". +- #io-channel-cells = <0>: See the IIO bindings section "IIO consumers". Example node: ads1202: adc@0 { compatible = "sd-modulator"; - #io-channel-cells = <1>; + #io-channel-cells = <0>; }; -- cgit v1.2.3 From 78dc897b7ee67205423dbbc6b56be49fb18d15b5 Mon Sep 17 00:00:00 2001 From: Larry Finger Date: Thu, 22 Feb 2018 14:28:59 -0600 Subject: rtlwifi: rtl8723be: Fix loss of signal In commit c713fb071edc ("rtlwifi: rtl8821ae: Fix connection lost problem correctly") a problem in rtl8821ae that caused loss of signal was fixed. That same problem has now been reported for rtl8723be. Accordingly, the ASPM L1 latency has been increased from 0 to 7 to fix the instability. Signed-off-by: Larry Finger Cc: Stable Tested-by: James Cameron Signed-off-by: Kalle Valo --- drivers/net/wireless/realtek/rtlwifi/rtl8723be/hw.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8723be/hw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8723be/hw.c index f9ccd13c79f9..e7bbbc95cdb1 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8723be/hw.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8723be/hw.c @@ -1125,7 +1125,8 @@ static void _rtl8723be_enable_aspm_back_door(struct ieee80211_hw *hw) /* Configuration Space offset 0x70f BIT7 is used to control L0S */ tmp8 = _rtl8723be_dbi_read(rtlpriv, 0x70f); - _rtl8723be_dbi_write(rtlpriv, 0x70f, tmp8 | BIT(7)); + _rtl8723be_dbi_write(rtlpriv, 0x70f, tmp8 | BIT(7) | + ASPM_L1_LATENCY << 3); /* Configuration Space offset 0x719 Bit3 is for L1 * BIT4 is for clock request -- cgit v1.2.3 From 015555fd4d2930bc0c86952c46ad88b3392f66e4 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Mon, 19 Feb 2018 14:55:54 +0000 Subject: fs: dcache: Avoid livelock between d_alloc_parallel and __d_add If d_alloc_parallel runs concurrently with __d_add, it is possible for d_alloc_parallel to continuously retry whilst i_dir_seq has been incremented to an odd value by __d_add: CPU0: __d_add n = start_dir_add(dir); cmpxchg(&dir->i_dir_seq, n, n + 1) == n CPU1: d_alloc_parallel retry: seq = smp_load_acquire(&parent->d_inode->i_dir_seq) & ~1; hlist_bl_lock(b); bit_spin_lock(0, (unsigned long *)b); // Always succeeds CPU0: __d_lookup_done(dentry) hlist_bl_lock bit_spin_lock(0, (unsigned long *)b); // Never succeeds CPU1: if (unlikely(parent->d_inode->i_dir_seq != seq)) { hlist_bl_unlock(b); goto retry; } Since the simple bit_spin_lock used to implement hlist_bl_lock does not provide any fairness guarantees, then CPU1 can starve CPU0 of the lock and prevent it from reaching end_dir_add(dir), therefore CPU1 cannot exit its retry loop because the sequence number always has the bottom bit set. This patch resolves the livelock by not taking hlist_bl_lock in d_alloc_parallel if the sequence counter is odd, since any subsequent masked comparison with i_dir_seq will fail anyway. Cc: Peter Zijlstra Cc: Al Viro Reported-by: Naresh Madhusudana Acked-by: Peter Zijlstra (Intel) Reviewed-by: Matthew Wilcox Signed-off-by: Will Deacon Signed-off-by: Al Viro --- fs/dcache.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/fs/dcache.c b/fs/dcache.c index 32aaab21e648..bde3b6662601 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -2479,7 +2479,7 @@ struct dentry *d_alloc_parallel(struct dentry *parent, retry: rcu_read_lock(); - seq = smp_load_acquire(&parent->d_inode->i_dir_seq) & ~1; + seq = smp_load_acquire(&parent->d_inode->i_dir_seq); r_seq = read_seqbegin(&rename_lock); dentry = __d_lookup_rcu(parent, name, &d_seq); if (unlikely(dentry)) { @@ -2500,6 +2500,12 @@ retry: rcu_read_unlock(); goto retry; } + + if (unlikely(seq & 1)) { + rcu_read_unlock(); + goto retry; + } + hlist_bl_lock(b); if (unlikely(parent->d_inode->i_dir_seq != seq)) { hlist_bl_unlock(b); -- cgit v1.2.3 From 8cc07c808c9d595e81cbe5aad419b7769eb2e5c9 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Mon, 19 Feb 2018 14:55:55 +0000 Subject: fs: dcache: Use READ_ONCE when accessing i_dir_seq i_dir_seq is subject to concurrent modification by a cmpxchg or store-release operation, so ensure that the relaxed access in d_alloc_parallel uses READ_ONCE. Reported-by: Peter Zijlstra Signed-off-by: Will Deacon Signed-off-by: Al Viro --- fs/dcache.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/dcache.c b/fs/dcache.c index bde3b6662601..8945e6cabd93 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -2507,7 +2507,7 @@ retry: } hlist_bl_lock(b); - if (unlikely(parent->d_inode->i_dir_seq != seq)) { + if (unlikely(READ_ONCE(parent->d_inode->i_dir_seq) != seq)) { hlist_bl_unlock(b); rcu_read_unlock(); goto retry; -- cgit v1.2.3 From 5a3386790a172cf738194e1574f631cd43c6140a Mon Sep 17 00:00:00 2001 From: Yong Deng Date: Mon, 26 Feb 2018 10:43:52 +0800 Subject: ASoC: sun4i-i2s: Fix RX slot number of SUN8I I2S's RX slot number of SUN8I should be shifted 4 bit to left. Fixes: 7d2993811a1e ("ASoC: sun4i-i2s: Add support for H3") Signed-off-by: Yong Deng Reviewed-by: Chen-Yu Tsai Signed-off-by: Mark Brown Cc: stable@vger.kernel.org --- sound/soc/sunxi/sun4i-i2s.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/sunxi/sun4i-i2s.c b/sound/soc/sunxi/sun4i-i2s.c index dca1143c1150..a4aa931ebfae 100644 --- a/sound/soc/sunxi/sun4i-i2s.c +++ b/sound/soc/sunxi/sun4i-i2s.c @@ -104,7 +104,7 @@ #define SUN8I_I2S_CHAN_CFG_REG 0x30 #define SUN8I_I2S_CHAN_CFG_RX_SLOT_NUM_MASK GENMASK(6, 4) -#define SUN8I_I2S_CHAN_CFG_RX_SLOT_NUM(chan) (chan - 1) +#define SUN8I_I2S_CHAN_CFG_RX_SLOT_NUM(chan) ((chan - 1) << 4) #define SUN8I_I2S_CHAN_CFG_TX_SLOT_NUM_MASK GENMASK(2, 0) #define SUN8I_I2S_CHAN_CFG_TX_SLOT_NUM(chan) (chan - 1) -- cgit v1.2.3 From 084a804e01205bcd74cd0849bc72cb5c88f8e648 Mon Sep 17 00:00:00 2001 From: Roger Quadros Date: Tue, 27 Feb 2018 12:41:41 +0200 Subject: usb: dwc3: Fix lock-up on ID change during system suspend/resume To reproduce the lock up do the following - connect otg host adapter and a USB device to the dual-role port so that it is in host mode. - suspend to mem. - disconnect otg adapter. - resume the system. If we call dwc3_host_exit() before tasks are thawed xhci_plat_remove() seems to lock up at the second usb_remove_hcd() call. To work around this we queue the _dwc3_set_mode() work on the system_freezable_wq. Fixes: 41ce1456e1db ("usb: dwc3: core: make dwc3_set_mode() work properly") Cc: # v4.12+ Suggested-by: Manu Gautam Signed-off-by: Roger Quadros Signed-off-by: Felipe Balbi --- drivers/usb/dwc3/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/dwc3/core.c b/drivers/usb/dwc3/core.c index f1d838a4acd6..e94bf91cc58a 100644 --- a/drivers/usb/dwc3/core.c +++ b/drivers/usb/dwc3/core.c @@ -175,7 +175,7 @@ void dwc3_set_mode(struct dwc3 *dwc, u32 mode) dwc->desired_dr_role = mode; spin_unlock_irqrestore(&dwc->lock, flags); - queue_work(system_power_efficient_wq, &dwc->drd_work); + queue_work(system_freezable_wq, &dwc->drd_work); } u32 dwc3_core_fifo_space(struct dwc3_ep *dep, u8 type) -- cgit v1.2.3 From d7789f5bcdb298c4a302db471b1b20f74a20de95 Mon Sep 17 00:00:00 2001 From: Richard Fitzgerald Date: Wed, 28 Feb 2018 10:31:10 +0000 Subject: ASoC: wm_adsp: For TLV controls only register TLV get/set Normal 512-byte get/set of a TLV isn't supported but we were registering the normal get/set anyway and relying on omitting the SNDRV_CTL_ELEM_ACCESS_[READ|WRITE] flags to prevent them being called. Trouble is if this gets broken in the core ALSA code - as it has been since at least 4.14 - the standard get/set can be called unexpectedly and corrupt memory. There's no point providing functions that won't be called and it's a trivial change. The benefit is that if the ALSA core gets broken again we get a big fat immediate NULL dereference instead of a memory corruption timebomb. Signed-off-by: Richard Fitzgerald Signed-off-by: Mark Brown Cc: stable@vger.kernel.org --- sound/soc/codecs/wm_adsp.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/sound/soc/codecs/wm_adsp.c b/sound/soc/codecs/wm_adsp.c index 66e32f5d2917..989d093abda7 100644 --- a/sound/soc/codecs/wm_adsp.c +++ b/sound/soc/codecs/wm_adsp.c @@ -1204,12 +1204,14 @@ static int wmfw_add_ctl(struct wm_adsp *dsp, struct wm_coeff_ctl *ctl) kcontrol->put = wm_coeff_put_acked; break; default: - kcontrol->get = wm_coeff_get; - kcontrol->put = wm_coeff_put; - - ctl->bytes_ext.max = ctl->len; - ctl->bytes_ext.get = wm_coeff_tlv_get; - ctl->bytes_ext.put = wm_coeff_tlv_put; + if (kcontrol->access & SNDRV_CTL_ELEM_ACCESS_TLV_CALLBACK) { + ctl->bytes_ext.max = ctl->len; + ctl->bytes_ext.get = wm_coeff_tlv_get; + ctl->bytes_ext.put = wm_coeff_tlv_put; + } else { + kcontrol->get = wm_coeff_get; + kcontrol->put = wm_coeff_put; + } break; } -- cgit v1.2.3 From 28b0f8a6962a24ed21737578f3b1b07424635c9e Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 13 Feb 2018 07:38:08 -0800 Subject: tty: make n_tty_read() always abort if hangup is in progress A tty is hung up by __tty_hangup() setting file->f_op to hung_up_tty_fops, which is skipped on ttys whose write operation isn't tty_write(). This means that, for example, /dev/console whose write op is redirected_tty_write() is never actually marked hung up. Because n_tty_read() uses the hung up status to decide whether to abort the waiting readers, the lack of hung-up marking can lead to the following scenario. 1. A session contains two processes. The leader and its child. The child ignores SIGHUP. 2. The leader exits and starts disassociating from the controlling terminal (/dev/console). 3. __tty_hangup() skips setting f_op to hung_up_tty_fops. 4. SIGHUP is delivered and ignored. 5. tty_ldisc_hangup() is invoked. It wakes up the waits which should clear the read lockers of tty->ldisc_sem. 6. The reader wakes up but because tty_hung_up_p() is false, it doesn't abort and goes back to sleep while read-holding tty->ldisc_sem. 7. The leader progresses to tty_ldisc_lock() in tty_ldisc_hangup() and is now stuck in D sleep indefinitely waiting for tty->ldisc_sem. The following is Alan's explanation on why some ttys aren't hung up. http://lkml.kernel.org/r/20171101170908.6ad08580@alans-desktop 1. It broke the serial consoles because they would hang up and close down the hardware. With tty_port that *should* be fixable properly for any cases remaining. 2. The console layer was (and still is) completely broken and doens't refcount properly. So if you turn on console hangups it breaks (as indeed does freeing consoles and half a dozen other things). As neither can be fixed quickly, this patch works around the problem by introducing a new flag, TTY_HUPPING, which is used solely to tell n_tty_read() that hang-up is in progress for the console and the readers should be aborted regardless of the hung-up status of the device. The following is a sample hung task warning caused by this issue. INFO: task agetty:2662 blocked for more than 120 seconds. Not tainted 4.11.3-dbg-tty-lockup-02478-gfd6c7ee-dirty #28 "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. 0 2662 1 0x00000086 Call Trace: __schedule+0x267/0x890 schedule+0x36/0x80 schedule_timeout+0x23c/0x2e0 ldsem_down_write+0xce/0x1f6 tty_ldisc_lock+0x16/0x30 tty_ldisc_hangup+0xb3/0x1b0 __tty_hangup+0x300/0x410 disassociate_ctty+0x6c/0x290 do_exit+0x7ef/0xb00 do_group_exit+0x3f/0xa0 get_signal+0x1b3/0x5d0 do_signal+0x28/0x660 exit_to_usermode_loop+0x46/0x86 do_syscall_64+0x9c/0xb0 entry_SYSCALL64_slow_path+0x25/0x25 The following is the repro. Run "$PROG /dev/console". The parent process hangs in D state. #include #include #include #include #include #include #include #include #include #include #include #include int main(int argc, char **argv) { struct sigaction sact = { .sa_handler = SIG_IGN }; struct timespec ts1s = { .tv_sec = 1 }; pid_t pid; int fd; if (argc < 2) { fprintf(stderr, "test-hung-tty /dev/$TTY\n"); return 1; } /* fork a child to ensure that it isn't already the session leader */ pid = fork(); if (pid < 0) { perror("fork"); return 1; } if (pid > 0) { /* top parent, wait for everyone */ while (waitpid(-1, NULL, 0) >= 0) ; if (errno != ECHILD) perror("waitpid"); return 0; } /* new session, start a new session and set the controlling tty */ if (setsid() < 0) { perror("setsid"); return 1; } fd = open(argv[1], O_RDWR); if (fd < 0) { perror("open"); return 1; } if (ioctl(fd, TIOCSCTTY, 1) < 0) { perror("ioctl"); return 1; } /* fork a child, sleep a bit and exit */ pid = fork(); if (pid < 0) { perror("fork"); return 1; } if (pid > 0) { nanosleep(&ts1s, NULL); printf("Session leader exiting\n"); exit(0); } /* * The child ignores SIGHUP and keeps reading from the controlling * tty. Because SIGHUP is ignored, the child doesn't get killed on * parent exit and the bug in n_tty makes the read(2) block the * parent's control terminal hangup attempt. The parent ends up in * D sleep until the child is explicitly killed. */ sigaction(SIGHUP, &sact, NULL); printf("Child reading tty\n"); while (1) { char buf[1024]; if (read(fd, buf, sizeof(buf)) < 0) { perror("read"); return 1; } } return 0; } Signed-off-by: Tejun Heo Cc: Alan Cox Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/tty/n_tty.c | 6 ++++++ drivers/tty/tty_io.c | 9 +++++++++ include/linux/tty.h | 1 + 3 files changed, 16 insertions(+) diff --git a/drivers/tty/n_tty.c b/drivers/tty/n_tty.c index 5c0e59e8fe46..cbe98bc2b998 100644 --- a/drivers/tty/n_tty.c +++ b/drivers/tty/n_tty.c @@ -2180,6 +2180,12 @@ static ssize_t n_tty_read(struct tty_struct *tty, struct file *file, } if (tty_hung_up_p(file)) break; + /* + * Abort readers for ttys which never actually + * get hung up. See __tty_hangup(). + */ + if (test_bit(TTY_HUPPING, &tty->flags)) + break; if (!timeout) break; if (file->f_flags & O_NONBLOCK) { diff --git a/drivers/tty/tty_io.c b/drivers/tty/tty_io.c index eb9133b472f4..63114ea35ec1 100644 --- a/drivers/tty/tty_io.c +++ b/drivers/tty/tty_io.c @@ -586,6 +586,14 @@ static void __tty_hangup(struct tty_struct *tty, int exit_session) return; } + /* + * Some console devices aren't actually hung up for technical and + * historical reasons, which can lead to indefinite interruptible + * sleep in n_tty_read(). The following explicitly tells + * n_tty_read() to abort readers. + */ + set_bit(TTY_HUPPING, &tty->flags); + /* inuse_filps is protected by the single tty lock, this really needs to change if we want to flush the workqueue with the lock held */ @@ -640,6 +648,7 @@ static void __tty_hangup(struct tty_struct *tty, int exit_session) * from the ldisc side, which is now guaranteed. */ set_bit(TTY_HUPPED, &tty->flags); + clear_bit(TTY_HUPPING, &tty->flags); tty_unlock(tty); if (f) diff --git a/include/linux/tty.h b/include/linux/tty.h index 0a6c71e0ad01..47f8af22f216 100644 --- a/include/linux/tty.h +++ b/include/linux/tty.h @@ -364,6 +364,7 @@ struct tty_file_private { #define TTY_PTY_LOCK 16 /* pty private */ #define TTY_NO_WRITE_SPLIT 17 /* Preserve write boundaries to driver */ #define TTY_HUPPED 18 /* Post driver->hangup() */ +#define TTY_HUPPING 19 /* Hangup in progress */ #define TTY_LDISC_HALTED 22 /* Line discipline is halted */ /* Values for tty->flow_change */ -- cgit v1.2.3 From fd63a8903a2c40425a9811c3371dd4d0f42c0ad3 Mon Sep 17 00:00:00 2001 From: Jonas Danielsson Date: Mon, 29 Jan 2018 12:39:15 +0100 Subject: tty/serial: atmel: add new version check for usart On our at91sam9260 based board the usart0 and usart1 ports report their versions (ATMEL_US_VERSION) as 0x10302. This version is not included in the current checks in the driver. Signed-off-by: Jonas Danielsson Acked-by: Richard Genoud Acked-by: Nicolas Ferre Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/atmel_serial.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/tty/serial/atmel_serial.c b/drivers/tty/serial/atmel_serial.c index df46a9e88c34..e287fe8f10fc 100644 --- a/drivers/tty/serial/atmel_serial.c +++ b/drivers/tty/serial/atmel_serial.c @@ -1734,6 +1734,7 @@ static void atmel_get_ip_name(struct uart_port *port) switch (version) { case 0x302: case 0x10213: + case 0x10302: dev_dbg(port->dev, "This version is usart\n"); atmel_port->has_frac_baudrate = true; atmel_port->has_hw_timer = true; -- cgit v1.2.3 From e7f3e99cb1a667d04d60d02957fbed58b50d4e5a Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 2 Feb 2018 20:39:13 +0200 Subject: serial: 8250_pci: Don't fail on multiport card class Do not fail on multiport cards in serial_pci_is_class_communication(). It restores behaviour for SUNIX multiport cards, that enumerated by class and have a custom board data. Moreover it allows users to reenumerate port-by-port from user space. Fixes: 7d8905d06405 ("serial: 8250_pci: Enable device after we check black list") Reported-by: Nikola Ciprich Signed-off-by: Andy Shevchenko Tested-by: Nikola Ciprich Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/8250_pci.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/tty/serial/8250/8250_pci.c b/drivers/tty/serial/8250/8250_pci.c index 54adf8d56350..d580625acc79 100644 --- a/drivers/tty/serial/8250/8250_pci.c +++ b/drivers/tty/serial/8250/8250_pci.c @@ -3387,11 +3387,9 @@ static int serial_pci_is_class_communication(struct pci_dev *dev) /* * If it is not a communications device or the programming * interface is greater than 6, give up. - * - * (Should we try to make guesses for multiport serial devices - * later?) */ if ((((dev->class >> 8) != PCI_CLASS_COMMUNICATION_SERIAL) && + ((dev->class >> 8) != PCI_CLASS_COMMUNICATION_MULTISERIAL) && ((dev->class >> 8) != PCI_CLASS_COMMUNICATION_MODEM)) || (dev->class & 0xff) > 6) return -ENODEV; @@ -3428,6 +3426,12 @@ serial_pci_guess_board(struct pci_dev *dev, struct pciserial_board *board) { int num_iomem, num_port, first_port = -1, i; + /* + * Should we try to make guesses for multiport serial devices later? + */ + if ((dev->class >> 8) == PCI_CLASS_COMMUNICATION_MULTISERIAL) + return -ENODEV; + num_iomem = num_port = 0; for (i = 0; i < PCI_NUM_BAR_RESOURCES; i++) { if (pci_resource_flags(dev, i) & IORESOURCE_IO) { -- cgit v1.2.3 From 714569064adee3c114a2a6490735b94abe269068 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Sat, 3 Feb 2018 12:27:23 +0100 Subject: serial: core: mark port as initialized in autoconfig This is a followup on 44117a1d1732 ("serial: core: mark port as initialized after successful IRQ change"). Nikola has been using autoconfig via setserial and reported a crash similar to what I fixed in the earlier mentioned commit. Here I do the same fixup for the autoconfig. I wasn't sure that this is the right approach. Nikola confirmed that it fixes his crash. Fixes: b3b576461864 ("tty: serial_core: convert uart_open to use tty_port_open") Link: http://lkml.kernel.org/r/20180131072000.GD1853@localhost.localdomain Reported-by: Nikola Ciprich Tested-by: Nikola Ciprich Cc: Signed-off-by: Sebastian Andrzej Siewior Tested-by: Nikola Ciprich Acked-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/serial_core.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/tty/serial/serial_core.c b/drivers/tty/serial/serial_core.c index c8dde56b532b..35b9201db3b4 100644 --- a/drivers/tty/serial/serial_core.c +++ b/drivers/tty/serial/serial_core.c @@ -1144,6 +1144,8 @@ static int uart_do_autoconfig(struct tty_struct *tty,struct uart_state *state) uport->ops->config_port(uport, flags); ret = uart_startup(tty, state, 1); + if (ret == 0) + tty_port_set_initialized(port, true); if (ret > 0) ret = 0; } -- cgit v1.2.3 From 1f66dd36bb18437397ea0d7882c52f7e3c476e15 Mon Sep 17 00:00:00 2001 From: Greentime Hu Date: Tue, 13 Feb 2018 17:09:08 +0800 Subject: earlycon: add reg-offset to physical address before mapping It will get the wrong virtual address because port->mapbase is not added the correct reg-offset yet. We have to update it before earlycon_map() is called Signed-off-by: Greentime Hu Acked-by: Arnd Bergmann Cc: Peter Hurley Cc: stable@vger.kernel.org Fixes: 088da2a17619 ("of: earlycon: Initialize port fields from DT properties") Acked-by: Rob Herring Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/earlycon.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/tty/serial/earlycon.c b/drivers/tty/serial/earlycon.c index 870e84fb6e39..a24278380fec 100644 --- a/drivers/tty/serial/earlycon.c +++ b/drivers/tty/serial/earlycon.c @@ -245,11 +245,12 @@ int __init of_setup_earlycon(const struct earlycon_id *match, } port->mapbase = addr; port->uartclk = BASE_BAUD * 16; - port->membase = earlycon_map(port->mapbase, SZ_4K); val = of_get_flat_dt_prop(node, "reg-offset", NULL); if (val) port->mapbase += be32_to_cpu(*val); + port->membase = earlycon_map(port->mapbase, SZ_4K); + val = of_get_flat_dt_prop(node, "reg-shift", NULL); if (val) port->regshift = be32_to_cpu(*val); -- cgit v1.2.3 From 9f2068f35729948bde84d87a40d135015911345d Mon Sep 17 00:00:00 2001 From: Nikola Ciprich Date: Tue, 13 Feb 2018 15:04:46 +0100 Subject: serial: 8250_pci: Add Brainboxes UC-260 4 port serial device Add PCI ids for two variants of Brainboxes UC-260 quad port PCI serial cards. Suggested-by: Andy Shevchenko Signed-off-by: Nikola Ciprich Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/8250_pci.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/tty/serial/8250/8250_pci.c b/drivers/tty/serial/8250/8250_pci.c index d580625acc79..a93f77ab3da0 100644 --- a/drivers/tty/serial/8250/8250_pci.c +++ b/drivers/tty/serial/8250/8250_pci.c @@ -4702,6 +4702,17 @@ static const struct pci_device_id serial_pci_tbl[] = { { PCI_VENDOR_ID_INTASHIELD, PCI_DEVICE_ID_INTASHIELD_IS400, PCI_ANY_ID, PCI_ANY_ID, 0, 0, /* 135a.0dc0 */ pbn_b2_4_115200 }, + /* + * BrainBoxes UC-260 + */ + { PCI_VENDOR_ID_INTASHIELD, 0x0D21, + PCI_ANY_ID, PCI_ANY_ID, + PCI_CLASS_COMMUNICATION_MULTISERIAL << 8, 0xffff00, + pbn_b2_4_115200 }, + { PCI_VENDOR_ID_INTASHIELD, 0x0E34, + PCI_ANY_ID, PCI_ANY_ID, + PCI_CLASS_COMMUNICATION_MULTISERIAL << 8, 0xffff00, + pbn_b2_4_115200 }, /* * Perle PCI-RAS cards */ -- cgit v1.2.3 From 7842055bfce4bf0170d0f61df8b2add8399697be Mon Sep 17 00:00:00 2001 From: Ulrich Hecht Date: Thu, 15 Feb 2018 13:02:27 +0100 Subject: serial: sh-sci: prevent lockup on full TTY buffers When the TTY buffers fill up to the configured maximum, a system lockup occurs: [ 598.820128] INFO: rcu_preempt detected stalls on CPUs/tasks: [ 598.825796] 0-...!: (1 GPs behind) idle=5a6/2/0 softirq=1974/1974 fqs=1 [ 598.832577] (detected by 3, t=62517 jiffies, g=296, c=295, q=126) [ 598.838755] Task dump for CPU 0: [ 598.841977] swapper/0 R running task 0 0 0 0x00000022 [ 598.849023] Call trace: [ 598.851476] __switch_to+0x98/0xb0 [ 598.854870] (null) This can be prevented by doing a dummy read of the RX data register. This issue affects both HSCIF and SCIF ports. Reported for R-Car H3 ES2.0; reproduced and fixed on H3 ES1.1. Probably affects other R-Car platforms as well. Reported-by: Yoshihiro Shimoda Signed-off-by: Ulrich Hecht Reviewed-by: Geert Uytterhoeven Cc: stable Tested-by: Nguyen Viet Dung Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/sh-sci.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/tty/serial/sh-sci.c b/drivers/tty/serial/sh-sci.c index 7257c078e155..44adf9db38f8 100644 --- a/drivers/tty/serial/sh-sci.c +++ b/drivers/tty/serial/sh-sci.c @@ -885,6 +885,8 @@ static void sci_receive_chars(struct uart_port *port) /* Tell the rest of the system the news. New characters! */ tty_flip_buffer_push(tport); } else { + /* TTY buffers full; read from RX reg to prevent lockup */ + serial_port_in(port, SCxRDR); serial_port_in(port, SCxSR); /* dummy read */ sci_clear_SCxSR(port, SCxSR_RDxF_CLEAR(port)); } -- cgit v1.2.3 From 5d7f77ec72d10c421bc33958f06a5583f2d27ed6 Mon Sep 17 00:00:00 2001 From: phil eichinger Date: Mon, 19 Feb 2018 10:24:15 +0100 Subject: serial: imx: fix bogus dev_err MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Only one of the two is really required, not both: * have_rtscts or * have_rtsgpio In imx_rs485_config() this is done correctly, so RS485 is working, just the error message is false. Signed-off-by: Phil Eichinger Reviewed-by: Fabio Estevam Fixes: b8f3bff057b0 ("serial: imx: Support common rs485 binding for RTS polarity" Acked-by: Uwe Kleine-König Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/imx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/tty/serial/imx.c b/drivers/tty/serial/imx.c index 1d7ca382bc12..a33c685af990 100644 --- a/drivers/tty/serial/imx.c +++ b/drivers/tty/serial/imx.c @@ -2093,7 +2093,7 @@ static int serial_imx_probe(struct platform_device *pdev) uart_get_rs485_mode(&pdev->dev, &sport->port.rs485); if (sport->port.rs485.flags & SER_RS485_ENABLED && - (!sport->have_rtscts || !sport->have_rtsgpio)) + (!sport->have_rtscts && !sport->have_rtsgpio)) dev_err(&pdev->dev, "no RTS control, disabling rs485\n"); imx_rs485_config(&sport->port, &sport->port.rs485); -- cgit v1.2.3 From b8b549eec8187ac1b12075d69a2d84d89b5e811a Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Wed, 28 Feb 2018 09:23:19 +0100 Subject: xfrm: Fix ESN sequence number handling for IPsec GSO packets. When IPsec offloading was introduced, we accidentally incremented the sequence number counter on the xfrm_state by one packet too much in the ESN case. This leads to a sequence number gap of one packet after each GSO packet. Fix this by setting the sequence number to the correct value. Fixes: d7dbefc45cf5 ("xfrm: Add xfrm_replay_overflow functions for offloading") Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_replay.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/xfrm/xfrm_replay.c b/net/xfrm/xfrm_replay.c index 1d38c6acf8af..9e3a5e85f828 100644 --- a/net/xfrm/xfrm_replay.c +++ b/net/xfrm/xfrm_replay.c @@ -660,7 +660,7 @@ static int xfrm_replay_overflow_offload_esn(struct xfrm_state *x, struct sk_buff } else { XFRM_SKB_CB(skb)->seq.output.low = oseq + 1; XFRM_SKB_CB(skb)->seq.output.hi = oseq_hi; - xo->seq.low = oseq = oseq + 1; + xo->seq.low = oseq + 1; xo->seq.hi = oseq_hi; oseq += skb_shinfo(skb)->gso_segs; } -- cgit v1.2.3 From 2560da49de5d0cfec22e9564023aebfffa094732 Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Tue, 27 Feb 2018 12:47:11 -0800 Subject: arm64: dts: rockchip: Fix rk3399-gru-* s2r (pinctrl hogs, wifi reset) Back in the early days when gru devices were still under development we found an issue where the WiFi reset line needed to be configured as early as possible during the boot process to avoid the WiFi module being in a bad state. We found that the way to get the kernel to do this in the earliest possible place was to configure this line in the pinctrl hogs, so that's what we did. For some history here you can see . After the time that change landed in the kernel, we landed a firmware change to configure this line even earlier. See . However, even after the firmware change landed we kept the kernel change to deal with the fact that some people working on devices might take a little while to update their firmware. At this there are definitely zero devices out in the wild that have firmware without the fix in it. Specifically looking in the firmware branch several critically important fixes for memory stability landed after the patch in coreboot and I know we didn't ship without those. Thus, by now, everyone should have the new firmware and it's safe to not have the kernel set this up in a pinctrl hog. Historically, even though it wasn't needed to have this in a pinctrl hog, we still kept it since it didn't hurt. Pinctrl would apply the default hog at bootup and then would never touch things again. That all changed with commit 981ed1bfbc6c ("pinctrl: Really force states during suspend/resume"). After that commit then we'll re-apply the default hog at resume time and that can screw up the reset state of WiFi. ...and on rk3399 if you touch a device on PCIe in the wrong way then the whole system can go haywire. That's what was happening. Specifically you'd resume a rk3399-gru-* device and it would mostly resume, then would crash with some crazy weird crash. One could say, perhaps, that the recent pinctrl change was at fault (and should be fixed) since it changed behavior. ...but that's not really true. The device tree for rk3399-gru is really to blame. Specifically since the pinctrl is defined in the hog and not in the "wlan-pd-n" node then the actual user of this pin doesn't have a pinctrl entry for it. That's bad. Let's fix our problems by just moving the control of "wlan_module_reset_l pinctrl" out of the hog and put them in the proper place. NOTE: in theory, I think it should actually be possible to have a pin controlled _both_ by the hog and by an actual device. Once the device claims the pin I think the hog is supposed to let go. I'm not 100% sure that this works and in any case this solution would be more complex than is necessary. Reported-by: Marc Zyngier Fixes: 48f4d9796d99 ("arm64: dts: rockchip: add Gru/Kevin DTS") Fixes: 981ed1bfbc6c ("pinctrl: Really force states during suspend/resume") Signed-off-by: Douglas Anderson Tested-by: Enric Balletbo i Serra Tested-by: Marc Zyngier Signed-off-by: Heiko Stuebner --- arch/arm64/boot/dts/rockchip/rk3399-gru.dtsi | 16 +++------------- 1 file changed, 3 insertions(+), 13 deletions(-) diff --git a/arch/arm64/boot/dts/rockchip/rk3399-gru.dtsi b/arch/arm64/boot/dts/rockchip/rk3399-gru.dtsi index 03f195025390..204bdb9857b9 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-gru.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3399-gru.dtsi @@ -406,8 +406,9 @@ wlan_pd_n: wlan-pd-n { compatible = "regulator-fixed"; regulator-name = "wlan_pd_n"; + pinctrl-names = "default"; + pinctrl-0 = <&wlan_module_reset_l>; - /* Note the wlan_module_reset_l pinctrl */ enable-active-high; gpio = <&gpio1 11 GPIO_ACTIVE_HIGH>; @@ -983,12 +984,6 @@ ap_i2c_audio: &i2c8 { pinctrl-0 = < &ap_pwroff /* AP will auto-assert this when in S3 */ &clk_32k /* This pin is always 32k on gru boards */ - - /* - * We want this driven low ASAP; firmware should help us, but - * we can help ourselves too. - */ - &wlan_module_reset_l >; pcfg_output_low: pcfg-output-low { @@ -1168,12 +1163,7 @@ ap_i2c_audio: &i2c8 { }; wlan_module_reset_l: wlan-module-reset-l { - /* - * We want this driven low ASAP (As {Soon,Strongly} As - * Possible), to avoid leakage through the powered-down - * WiFi. - */ - rockchip,pins = <1 11 RK_FUNC_GPIO &pcfg_output_low>; + rockchip,pins = <1 11 RK_FUNC_GPIO &pcfg_pull_none>; }; bt_host_wake_l: bt-host-wake-l { -- cgit v1.2.3 From cb57469c9573f6018cd1302953dd45d6e05aba7b Mon Sep 17 00:00:00 2001 From: Joel Fernandes Date: Fri, 16 Feb 2018 11:02:01 -0800 Subject: staging: android: ashmem: Fix lockdep issue during llseek ashmem_mutex create a chain of dependencies like so: (1) mmap syscall -> mmap_sem -> (acquired) ashmem_mmap ashmem_mutex (try to acquire) (block) (2) llseek syscall -> ashmem_llseek -> ashmem_mutex -> (acquired) inode_lock -> inode->i_rwsem (try to acquire) (block) (3) getdents -> iterate_dir -> inode_lock -> inode->i_rwsem (acquired) copy_to_user -> mmap_sem (try to acquire) There is a lock ordering created between mmap_sem and inode->i_rwsem causing a lockdep splat [2] during a syzcaller test, this patch fixes the issue by unlocking the mutex earlier. Functionally that's Ok since we don't need to protect vfs_llseek. [1] https://patchwork.kernel.org/patch/10185031/ [2] https://lkml.org/lkml/2018/1/10/48 Acked-by: Todd Kjos Cc: Arve Hjonnevag Cc: stable@vger.kernel.org Reported-by: syzbot+8ec30bb7bf1a981a2012@syzkaller.appspotmail.com Signed-off-by: Joel Fernandes Acked-by: Greg Hackmann Signed-off-by: Greg Kroah-Hartman --- drivers/staging/android/ashmem.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/drivers/staging/android/ashmem.c b/drivers/staging/android/ashmem.c index 6dbba5aff191..d5450365769c 100644 --- a/drivers/staging/android/ashmem.c +++ b/drivers/staging/android/ashmem.c @@ -326,24 +326,23 @@ static loff_t ashmem_llseek(struct file *file, loff_t offset, int origin) mutex_lock(&ashmem_mutex); if (asma->size == 0) { - ret = -EINVAL; - goto out; + mutex_unlock(&ashmem_mutex); + return -EINVAL; } if (!asma->file) { - ret = -EBADF; - goto out; + mutex_unlock(&ashmem_mutex); + return -EBADF; } + mutex_unlock(&ashmem_mutex); + ret = vfs_llseek(asma->file, offset, origin); if (ret < 0) - goto out; + return ret; /** Copy f_pos from backing file, since f_ops->llseek() sets it */ file->f_pos = asma->file->f_pos; - -out: - mutex_unlock(&ashmem_mutex); return ret; } -- cgit v1.2.3 From f0e8c61110c2c85903b136ba070daf643a8b6842 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Wed, 28 Feb 2018 11:57:50 +0100 Subject: Bluetooth: btusb: Remove Yoga 920 from the btusb_needs_reset_resume_table Commit 1fdb92697469 ("Bluetooth: btusb: Use DMI matching for QCA reset_resume quirking"), added the Lenovo Yoga 920 to the btusb_needs_reset_resume_table. Testing has shown that this is a false positive and the problems where caused by issues with the initial fix: commit fd865802c66b ("Bluetooth: btusb: fix QCA Rome suspend/resume"), which has already been reverted. So the QCA Rome BT in the Yoga 920 does not need a reset-resume quirk at all and this commit removes it from the btusb_needs_reset_resume_table. Note that after this commit the btusb_needs_reset_resume_table is now empty. It is kept around on purpose, since this whole series of commits started for a reason and there are actually broken platforms around, which need to be added to it. BugLink: https://bugzilla.redhat.com/show_bug.cgi?id=1514836 Fixes: 1fdb92697469 ("Bluetooth: btusb: Use DMI matching for QCA ...") Cc: stable@vger.kernel.org Cc: Brian Norris Cc: Kai-Heng Feng Tested-by: Kevin Fenzi Suggested-by: Brian Norris Signed-off-by: Hans de Goede Reviewed-by: Brian Norris Signed-off-by: Marcel Holtmann --- drivers/bluetooth/btusb.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index 60bf04b8f103..041c17e0c668 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -385,13 +385,6 @@ static const struct usb_device_id blacklist_table[] = { * the module itself. So we use a DMI list to match known broken platforms. */ static const struct dmi_system_id btusb_needs_reset_resume_table[] = { - { - /* Lenovo Yoga 920 (QCA Rome device 0cf3:e300) */ - .matches = { - DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), - DMI_MATCH(DMI_PRODUCT_VERSION, "Lenovo YOGA 920"), - }, - }, {} }; -- cgit v1.2.3 From 0c6e526646c04ce31d4aaa280ed2237dd1cd774c Mon Sep 17 00:00:00 2001 From: Kai-Heng Feng Date: Thu, 1 Mar 2018 13:42:52 +0800 Subject: Bluetooth: btusb: Add Dell OptiPlex 3060 to btusb_needs_reset_resume_table The issue can be reproduced before commit fd865802c66b ("Bluetooth: btusb: fix QCA Rome suspend/resume") gets introduced, so the reset resume quirk is still needed for this system. T: Bus=01 Lev=01 Prnt=01 Port=13 Cnt=01 Dev#= 4 Spd=12 MxCh= 0 D: Ver= 2.01 Cls=e0(wlcon) Sub=01 Prot=01 MxPS=64 #Cfgs= 1 P: Vendor=0cf3 ProdID=e007 Rev=00.01 C: #Ifs= 2 Cfg#= 1 Atr=e0 MxPwr=100mA I: If#= 0 Alt= 0 #EPs= 3 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb I: If#= 1 Alt= 0 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb Cc: stable@vger.kernel.org Cc: Brian Norris Cc: Hans de Goede Signed-off-by: Kai-Heng Feng Signed-off-by: Marcel Holtmann --- drivers/bluetooth/btusb.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index 041c17e0c668..382be00a8329 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -385,6 +385,13 @@ static const struct usb_device_id blacklist_table[] = { * the module itself. So we use a DMI list to match known broken platforms. */ static const struct dmi_system_id btusb_needs_reset_resume_table[] = { + { + /* Dell OptiPlex 3060 (QCA ROME device 0cf3:e007) */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex 3060"), + }, + }, {} }; -- cgit v1.2.3 From 64e759f58f128730b97a3c3a26d283c075ad7c86 Mon Sep 17 00:00:00 2001 From: Szymon Janc Date: Mon, 26 Feb 2018 15:41:53 +0100 Subject: Bluetooth: Fix missing encryption refresh on Security Request If Security Request is received on connection that is already encrypted with sufficient security master should perform encryption key refresh procedure instead of just ignoring Slave Security Request (Core Spec 5.0 Vol 3 Part H 2.4.6). > ACL Data RX: Handle 3585 flags 0x02 dlen 6 SMP: Security Request (0x0b) len 1 Authentication requirement: Bonding, No MITM, SC, No Keypresses (0x09) < HCI Command: LE Start Encryption (0x08|0x0019) plen 28 Handle: 3585 Random number: 0x0000000000000000 Encrypted diversifier: 0x0000 Long term key: 44264272a5c426a9e868f034cf0e69f3 > HCI Event: Command Status (0x0f) plen 4 LE Start Encryption (0x08|0x0019) ncmd 1 Status: Success (0x00) > HCI Event: Encryption Key Refresh Complete (0x30) plen 3 Status: Success (0x00) Handle: 3585 Signed-off-by: Szymon Janc Signed-off-by: Marcel Holtmann --- net/bluetooth/smp.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c index 01117ae84f1d..a2ddae2f37d7 100644 --- a/net/bluetooth/smp.c +++ b/net/bluetooth/smp.c @@ -2296,8 +2296,14 @@ static u8 smp_cmd_security_req(struct l2cap_conn *conn, struct sk_buff *skb) else sec_level = authreq_to_seclevel(auth); - if (smp_sufficient_security(hcon, sec_level, SMP_USE_LTK)) + if (smp_sufficient_security(hcon, sec_level, SMP_USE_LTK)) { + /* If link is already encrypted with sufficient security we + * still need refresh encryption as per Core Spec 5.0 Vol 3, + * Part H 2.4.6 + */ + smp_ltk_encrypt(conn, hcon->sec_level); return 0; + } if (sec_level > hcon->pending_sec_level) hcon->pending_sec_level = sec_level; -- cgit v1.2.3 From 6f54120e17e311fd7ac42b9ec2a0611caa5b46ad Mon Sep 17 00:00:00 2001 From: Jason Yan Date: Wed, 28 Feb 2018 09:11:10 +0800 Subject: ata: do not schedule hot plug if it is a sas host We've got a kernel panic when using sata disk with sas controller: [115946.152283] Unable to handle kernel NULL pointer dereference at virtual address 000007d8 [115946.223963] CPU: 0 PID: 22175 Comm: kworker/0:1 Tainted: G W OEL 4.14.0 #1 [115946.232925] Workqueue: events ata_scsi_hotplug [115946.237938] task: ffff8021ee50b180 task.stack: ffff00000d5d0000 [115946.244717] PC is at sas_find_dev_by_rphy+0x44/0x114 [115946.250224] LR is at sas_find_dev_by_rphy+0x3c/0x114 ...... [115946.355701] Process kworker/0:1 (pid: 22175, stack limit = 0xffff00000d5d0000) [115946.363369] Call trace: [115946.456356] [] sas_find_dev_by_rphy+0x44/0x114 [115946.462908] [] sas_target_alloc+0x20/0x5c [115946.469408] [] scsi_alloc_target+0x250/0x308 [115946.475781] [] __scsi_add_device+0xb0/0x154 [115946.481991] [] ata_scsi_scan_host+0x180/0x218 [115946.488367] [] ata_scsi_hotplug+0xb0/0xcc [115946.494801] [] process_one_work+0x144/0x390 [115946.501115] [] worker_thread+0x144/0x418 [115946.507093] [] kthread+0x10c/0x138 [115946.512792] [] ret_from_fork+0x10/0x18 We found that Ding Xiang has reported a similar bug before: https://patchwork.kernel.org/patch/9179817/ And this bug still exists in mainline. Since libsas handles hotplug and device adding/removing itself, do not need to schedule ata hot plug task here if it is a sas host. Signed-off-by: Jason Yan Cc: Ding Xiang Cc: stable@vger.kernel.org Signed-off-by: Tejun Heo --- drivers/ata/libata-eh.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c index 11c3137d7b0a..c016829a38fd 100644 --- a/drivers/ata/libata-eh.c +++ b/drivers/ata/libata-eh.c @@ -815,7 +815,8 @@ void ata_scsi_port_error_handler(struct Scsi_Host *host, struct ata_port *ap) if (ap->pflags & ATA_PFLAG_LOADING) ap->pflags &= ~ATA_PFLAG_LOADING; - else if (ap->pflags & ATA_PFLAG_SCSI_HOTPLUG) + else if ((ap->pflags & ATA_PFLAG_SCSI_HOTPLUG) && + !(ap->flags & ATA_FLAG_SAS_HOST)) schedule_delayed_work(&ap->hotplug_task, 0); if (ap->pflags & ATA_PFLAG_RECOVERED) -- cgit v1.2.3 From 835a1d5cdefffe08bec46efff11daea9b389baa9 Mon Sep 17 00:00:00 2001 From: Heiko Stuebner Date: Fri, 2 Mar 2018 08:36:31 +0100 Subject: Revert "arm64: dts: rockchip: add usb3-phy otg-port support for rk3399" This reverts commit c301b327aea898af558b2387252a2f5fc0117dee. While this works splendidly on rk3399-gru devices using the cros-ec extcon, other rk3399-based devices using the fusb302 or no power-delivery controller at all don't probe at all anymore, as the typec-phy currently always expects the extcon to be available and therefore defers probing indefinitly on these. Signed-off-by: Heiko Stuebner --- arch/arm64/boot/dts/rockchip/rk3399.dtsi | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/arm64/boot/dts/rockchip/rk3399.dtsi b/arch/arm64/boot/dts/rockchip/rk3399.dtsi index 2605118d4b4c..0b81ca1d07e7 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3399.dtsi @@ -411,8 +411,8 @@ reg = <0x0 0xfe800000 0x0 0x100000>; interrupts = ; dr_mode = "otg"; - phys = <&u2phy0_otg>, <&tcphy0_usb3>; - phy-names = "usb2-phy", "usb3-phy"; + phys = <&u2phy0_otg>; + phy-names = "usb2-phy"; phy_type = "utmi_wide"; snps,dis_enblslpm_quirk; snps,dis-u2-freeclk-exists-quirk; @@ -444,8 +444,8 @@ reg = <0x0 0xfe900000 0x0 0x100000>; interrupts = ; dr_mode = "otg"; - phys = <&u2phy1_otg>, <&tcphy1_usb3>; - phy-names = "usb2-phy", "usb3-phy"; + phys = <&u2phy1_otg>; + phy-names = "usb2-phy"; phy_type = "utmi_wide"; snps,dis_enblslpm_quirk; snps,dis-u2-freeclk-exists-quirk; -- cgit v1.2.3 From 7f8ae00f63725e835b5ed8a97bc18bf1c950acb2 Mon Sep 17 00:00:00 2001 From: Haim Dreyfuss Date: Wed, 27 Dec 2017 15:21:18 +0200 Subject: iwlwifi: Cancel and set MARKER_CMD timer during suspend-resume While entering to D3 mode there is a gap between the time the driver handles the D3_CONFIG_CMD response to the time the host is going to sleep. In between there might be cases which MARKER_CMD can tailgate. Also during resume flow the MARKER_CMD might get sent while D0I3_CMD is being handled in the FW. Cancel MARKER_CMD timer and set it again properly during suspend resume flows to prevent this command from being sent accidentlly. Signed-off-by: Haim Dreyfuss Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/fw/debugfs.h | 18 ++++++++++++++++++ drivers/net/wireless/intel/iwlwifi/fw/init.c | 12 ++++++++++++ drivers/net/wireless/intel/iwlwifi/fw/runtime.h | 4 ++++ drivers/net/wireless/intel/iwlwifi/mvm/d3.c | 8 ++++++++ 4 files changed, 42 insertions(+) diff --git a/drivers/net/wireless/intel/iwlwifi/fw/debugfs.h b/drivers/net/wireless/intel/iwlwifi/fw/debugfs.h index e57ff92a68ae..3da468d2cc92 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/debugfs.h +++ b/drivers/net/wireless/intel/iwlwifi/fw/debugfs.h @@ -75,6 +75,20 @@ static inline void iwl_fw_cancel_timestamp(struct iwl_fw_runtime *fwrt) cancel_delayed_work_sync(&fwrt->timestamp.wk); } +static inline void iwl_fw_suspend_timestamp(struct iwl_fw_runtime *fwrt) +{ + cancel_delayed_work_sync(&fwrt->timestamp.wk); +} + +static inline void iwl_fw_resume_timestamp(struct iwl_fw_runtime *fwrt) +{ + if (!fwrt->timestamp.delay) + return; + + schedule_delayed_work(&fwrt->timestamp.wk, + round_jiffies_relative(fwrt->timestamp.delay)); +} + #else static inline int iwl_fwrt_dbgfs_register(struct iwl_fw_runtime *fwrt, struct dentry *dbgfs_dir) @@ -84,4 +98,8 @@ static inline int iwl_fwrt_dbgfs_register(struct iwl_fw_runtime *fwrt, static inline void iwl_fw_cancel_timestamp(struct iwl_fw_runtime *fwrt) {} +static inline void iwl_fw_suspend_timestamp(struct iwl_fw_runtime *fwrt) {} + +static inline void iwl_fw_resume_timestamp(struct iwl_fw_runtime *fwrt) {} + #endif /* CONFIG_IWLWIFI_DEBUGFS */ diff --git a/drivers/net/wireless/intel/iwlwifi/fw/init.c b/drivers/net/wireless/intel/iwlwifi/fw/init.c index 45f21acbd842..2efac307909e 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/init.c +++ b/drivers/net/wireless/intel/iwlwifi/fw/init.c @@ -76,3 +76,15 @@ void iwl_fw_runtime_init(struct iwl_fw_runtime *fwrt, struct iwl_trans *trans, iwl_fwrt_dbgfs_register(fwrt, dbgfs_dir); } IWL_EXPORT_SYMBOL(iwl_fw_runtime_init); + +void iwl_fw_runtime_suspend(struct iwl_fw_runtime *fwrt) +{ + iwl_fw_suspend_timestamp(fwrt); +} +IWL_EXPORT_SYMBOL(iwl_fw_runtime_suspend); + +void iwl_fw_runtime_resume(struct iwl_fw_runtime *fwrt) +{ + iwl_fw_resume_timestamp(fwrt); +} +IWL_EXPORT_SYMBOL(iwl_fw_runtime_resume); diff --git a/drivers/net/wireless/intel/iwlwifi/fw/runtime.h b/drivers/net/wireless/intel/iwlwifi/fw/runtime.h index e25c049f980f..f3197f7c13b6 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/runtime.h +++ b/drivers/net/wireless/intel/iwlwifi/fw/runtime.h @@ -150,6 +150,10 @@ void iwl_fw_runtime_init(struct iwl_fw_runtime *fwrt, struct iwl_trans *trans, void iwl_fw_runtime_exit(struct iwl_fw_runtime *fwrt); +void iwl_fw_runtime_suspend(struct iwl_fw_runtime *fwrt); + +void iwl_fw_runtime_resume(struct iwl_fw_runtime *fwrt); + static inline void iwl_fw_set_current_image(struct iwl_fw_runtime *fwrt, enum iwl_ucode_type cur_fw_img) { diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/d3.c b/drivers/net/wireless/intel/iwlwifi/mvm/d3.c index 0e6cf39285f4..2efe9b099556 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/d3.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/d3.c @@ -1098,6 +1098,8 @@ int iwl_mvm_suspend(struct ieee80211_hw *hw, struct cfg80211_wowlan *wowlan) /* make sure the d0i3 exit work is not pending */ flush_work(&mvm->d0i3_exit_work); + iwl_fw_runtime_suspend(&mvm->fwrt); + ret = iwl_trans_suspend(trans); if (ret) return ret; @@ -2012,6 +2014,8 @@ int iwl_mvm_resume(struct ieee80211_hw *hw) mvm->trans->system_pm_mode = IWL_PLAT_PM_MODE_DISABLED; + iwl_fw_runtime_resume(&mvm->fwrt); + return ret; } @@ -2038,6 +2042,8 @@ static int iwl_mvm_d3_test_open(struct inode *inode, struct file *file) mvm->trans->system_pm_mode = IWL_PLAT_PM_MODE_D3; + iwl_fw_runtime_suspend(&mvm->fwrt); + /* start pseudo D3 */ rtnl_lock(); err = __iwl_mvm_suspend(mvm->hw, mvm->hw->wiphy->wowlan_config, true); @@ -2098,6 +2104,8 @@ static int iwl_mvm_d3_test_release(struct inode *inode, struct file *file) __iwl_mvm_resume(mvm, true); rtnl_unlock(); + iwl_fw_runtime_resume(&mvm->fwrt); + mvm->trans->system_pm_mode = IWL_PLAT_PM_MODE_DISABLED; iwl_abort_notification_waits(&mvm->notif_wait); -- cgit v1.2.3 From de04d4fbf87b769ab18c480e4f020c53e74bbdd2 Mon Sep 17 00:00:00 2001 From: Sara Sharon Date: Tue, 2 Jan 2018 11:40:15 +0200 Subject: iwlwifi: mvm: fix TX of CCMP 256 We don't have enough room in the TX command for a CCMP 256 key, and need to use key from table. Fixes: 3264bf032bd9 ("[BUGFIX] iwlwifi: mvm: Fix CCMP IV setting") Signed-off-by: Sara Sharon Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/mvm/tx.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/tx.c b/drivers/net/wireless/intel/iwlwifi/mvm/tx.c index dda77b327c98..57ad6019ffad 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/tx.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/tx.c @@ -419,11 +419,11 @@ static void iwl_mvm_set_tx_cmd_crypto(struct iwl_mvm *mvm, { struct ieee80211_key_conf *keyconf = info->control.hw_key; u8 *crypto_hdr = skb_frag->data + hdrlen; + enum iwl_tx_cmd_sec_ctrl type = TX_CMD_SEC_CCM; u64 pn; switch (keyconf->cipher) { case WLAN_CIPHER_SUITE_CCMP: - case WLAN_CIPHER_SUITE_CCMP_256: iwl_mvm_set_tx_cmd_ccmp(info, tx_cmd); iwl_mvm_set_tx_cmd_pn(info, crypto_hdr); break; @@ -447,13 +447,16 @@ static void iwl_mvm_set_tx_cmd_crypto(struct iwl_mvm *mvm, break; case WLAN_CIPHER_SUITE_GCMP: case WLAN_CIPHER_SUITE_GCMP_256: + type = TX_CMD_SEC_GCMP; + /* Fall through */ + case WLAN_CIPHER_SUITE_CCMP_256: /* TODO: Taking the key from the table might introduce a race * when PTK rekeying is done, having an old packets with a PN * based on the old key but the message encrypted with a new * one. * Need to handle this. */ - tx_cmd->sec_ctl |= TX_CMD_SEC_GCMP | TX_CMD_SEC_KEY_FROM_TABLE; + tx_cmd->sec_ctl |= type | TX_CMD_SEC_KEY_FROM_TABLE; tx_cmd->key[0] = keyconf->hw_key_idx; iwl_mvm_set_tx_cmd_pn(info, crypto_hdr); break; -- cgit v1.2.3 From 40d53f4a60c9eb10d4fa58066c23ba1af8a59e39 Mon Sep 17 00:00:00 2001 From: Andrei Otcheretianski Date: Thu, 4 Jan 2018 17:39:08 +0200 Subject: iwlwifi: mvm: Fix channel switch for count 0 and 1 It was assumed that apply_time==0 implies immediate scheduling, which is wrong. Instead, the fw expects the START_IMMEDIATELY flag to be set. Otherwise, this resulted in 0x3063 assert. Fix that. While at it rename the T2_V2_START_IMMEDIATELY to TE_V2_START_IMMEDIATELY. Fixes: f5d8f50f271d ("iwlwifi: mvm: Fix channel switch in case of count <= 1") Signed-off-by: Andrei Otcheretianski Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/fw/api/time-event.h | 4 ++-- drivers/net/wireless/intel/iwlwifi/mvm/time-event.c | 6 ++++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/fw/api/time-event.h b/drivers/net/wireless/intel/iwlwifi/fw/api/time-event.h index 3721a3ed358b..f824bebceb06 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/api/time-event.h +++ b/drivers/net/wireless/intel/iwlwifi/fw/api/time-event.h @@ -211,7 +211,7 @@ enum { * @TE_V2_NOTIF_HOST_FRAG_END:request/receive notification on frag end * @TE_V2_NOTIF_INTERNAL_FRAG_START: internal FW use. * @TE_V2_NOTIF_INTERNAL_FRAG_END: internal FW use. - * @T2_V2_START_IMMEDIATELY: start time event immediately + * @TE_V2_START_IMMEDIATELY: start time event immediately * @TE_V2_DEP_OTHER: depends on another time event * @TE_V2_DEP_TSF: depends on a specific time * @TE_V2_EVENT_SOCIOPATHIC: can't co-exist with other events of tha same MAC @@ -230,7 +230,7 @@ enum iwl_time_event_policy { TE_V2_NOTIF_HOST_FRAG_END = BIT(5), TE_V2_NOTIF_INTERNAL_FRAG_START = BIT(6), TE_V2_NOTIF_INTERNAL_FRAG_END = BIT(7), - T2_V2_START_IMMEDIATELY = BIT(11), + TE_V2_START_IMMEDIATELY = BIT(11), /* placement characteristics */ TE_V2_DEP_OTHER = BIT(TE_V2_PLACEMENT_POS), diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/time-event.c b/drivers/net/wireless/intel/iwlwifi/mvm/time-event.c index 200ab50ec86b..acb217e666db 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/time-event.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/time-event.c @@ -616,7 +616,7 @@ void iwl_mvm_protect_session(struct iwl_mvm *mvm, time_cmd.repeat = 1; time_cmd.policy = cpu_to_le16(TE_V2_NOTIF_HOST_EVENT_START | TE_V2_NOTIF_HOST_EVENT_END | - T2_V2_START_IMMEDIATELY); + TE_V2_START_IMMEDIATELY); if (!wait_for_notif) { iwl_mvm_time_event_send_add(mvm, vif, te_data, &time_cmd); @@ -803,7 +803,7 @@ int iwl_mvm_start_p2p_roc(struct iwl_mvm *mvm, struct ieee80211_vif *vif, time_cmd.repeat = 1; time_cmd.policy = cpu_to_le16(TE_V2_NOTIF_HOST_EVENT_START | TE_V2_NOTIF_HOST_EVENT_END | - T2_V2_START_IMMEDIATELY); + TE_V2_START_IMMEDIATELY); return iwl_mvm_time_event_send_add(mvm, vif, te_data, &time_cmd); } @@ -913,6 +913,8 @@ int iwl_mvm_schedule_csa_period(struct iwl_mvm *mvm, time_cmd.interval = cpu_to_le32(1); time_cmd.policy = cpu_to_le16(TE_V2_NOTIF_HOST_EVENT_START | TE_V2_ABSENCE); + if (!apply_time) + time_cmd.policy |= cpu_to_le16(TE_V2_START_IMMEDIATELY); return iwl_mvm_time_event_send_add(mvm, vif, te_data, &time_cmd); } -- cgit v1.2.3 From 63dd5d022f4766e6b05ee611124afcc7cbfbb953 Mon Sep 17 00:00:00 2001 From: Sara Sharon Date: Sun, 7 Jan 2018 14:30:49 +0200 Subject: iwlwifi: mvm: fix assert 0x2B00 on older FWs We should add the multicast station before adding the broadcast station. However, in older FW, the firmware will start beaconing when we add the multicast station, and since the broadcast station is not added at this point so the transmission of the beacon will fail on assert 0x2b00. This is fixed in later firmware, so make the order of addition depend on the TLV. Fixes: 26d6c16bed53 ("iwlwifi: mvm: add multicast station") Signed-off-by: Sara Sharon Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c | 45 ++++++++++++++++++----- 1 file changed, 35 insertions(+), 10 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c index 08de822c3ef0..ebf511150f4d 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c @@ -8,6 +8,7 @@ * Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved. * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH * Copyright(c) 2016 - 2017 Intel Deutschland GmbH + * Copyright(c) 2018 Intel Corporation * * This program is free software; you can redistribute it and/or modify * it under the terms of version 2 of the GNU General Public License as @@ -2106,15 +2107,40 @@ static int iwl_mvm_start_ap_ibss(struct ieee80211_hw *hw, if (ret) goto out_remove; - ret = iwl_mvm_add_mcast_sta(mvm, vif); - if (ret) - goto out_unbind; - - /* Send the bcast station. At this stage the TBTT and DTIM time events - * are added and applied to the scheduler */ - ret = iwl_mvm_send_add_bcast_sta(mvm, vif); - if (ret) - goto out_rm_mcast; + /* + * This is not very nice, but the simplest: + * For older FWs adding the mcast sta before the bcast station may + * cause assert 0x2b00. + * This is fixed in later FW so make the order of removal depend on + * the TLV + */ + if (fw_has_api(&mvm->fw->ucode_capa, IWL_UCODE_TLV_API_STA_TYPE)) { + ret = iwl_mvm_add_mcast_sta(mvm, vif); + if (ret) + goto out_unbind; + /* + * Send the bcast station. At this stage the TBTT and DTIM time + * events are added and applied to the scheduler + */ + ret = iwl_mvm_send_add_bcast_sta(mvm, vif); + if (ret) { + iwl_mvm_rm_mcast_sta(mvm, vif); + goto out_unbind; + } + } else { + /* + * Send the bcast station. At this stage the TBTT and DTIM time + * events are added and applied to the scheduler + */ + iwl_mvm_send_add_bcast_sta(mvm, vif); + if (ret) + goto out_unbind; + iwl_mvm_add_mcast_sta(mvm, vif); + if (ret) { + iwl_mvm_send_rm_bcast_sta(mvm, vif); + goto out_unbind; + } + } /* must be set before quota calculations */ mvmvif->ap_ibss_active = true; @@ -2144,7 +2170,6 @@ out_quota_failed: iwl_mvm_power_update_mac(mvm); mvmvif->ap_ibss_active = false; iwl_mvm_send_rm_bcast_sta(mvm, vif); -out_rm_mcast: iwl_mvm_rm_mcast_sta(mvm, vif); out_unbind: iwl_mvm_binding_remove_vif(mvm, vif); -- cgit v1.2.3 From 8745f12a6600dd9d31122588621d4c8ddb332cd7 Mon Sep 17 00:00:00 2001 From: Shaul Triebitz Date: Thu, 11 Jan 2018 16:18:46 +0200 Subject: iwlwifi: avoid collecting firmware dump if not loaded Trying to collect firmware debug data while firmware is not loaded causes various errors (e.g. failing NIC access). This causes even a bigger issue if at that time the HW radio is off. In that case, when later turning the radio on, the Driver fails to read the HW (registers contain garbage values). (It may be that the CSR_GP_CNTRL_REG_FLAG_RFKILL_WAKE_L1A_EN bit is cleared on faulty NIC access - since the same behavior was seen in HW RFKILL toggling before setting that bit.) Signed-off-by: Shaul Triebitz Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/fw/dbg.c | 13 +++++++++++-- drivers/net/wireless/intel/iwlwifi/fw/dbg.h | 3 +++ drivers/net/wireless/intel/iwlwifi/fw/runtime.h | 3 +++ drivers/net/wireless/intel/iwlwifi/mvm/debugfs.c | 5 ++--- drivers/net/wireless/intel/iwlwifi/mvm/ops.c | 8 ++++++++ 5 files changed, 27 insertions(+), 5 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/fw/dbg.c b/drivers/net/wireless/intel/iwlwifi/fw/dbg.c index 67aefc8fc9ac..7bd704a3e640 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/dbg.c +++ b/drivers/net/wireless/intel/iwlwifi/fw/dbg.c @@ -8,6 +8,7 @@ * Copyright(c) 2008 - 2014 Intel Corporation. All rights reserved. * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH * Copyright(c) 2015 - 2017 Intel Deutschland GmbH + * Copyright(c) 2018 Intel Corporation * * This program is free software; you can redistribute it and/or modify * it under the terms of version 2 of the GNU General Public License as @@ -33,6 +34,7 @@ * Copyright(c) 2005 - 2014 Intel Corporation. All rights reserved. * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH * Copyright(c) 2015 - 2017 Intel Deutschland GmbH + * Copyright(c) 2018 Intel Corporation * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -942,7 +944,6 @@ dump_trans_data: out: iwl_fw_free_dump_desc(fwrt); - fwrt->dump.trig = NULL; clear_bit(IWL_FWRT_STATUS_DUMPING, &fwrt->status); IWL_DEBUG_INFO(fwrt, "WRT dump done\n"); } @@ -1112,6 +1113,14 @@ void iwl_fw_error_dump_wk(struct work_struct *work) fwrt->ops->dump_start(fwrt->ops_ctx)) return; + if (fwrt->ops && fwrt->ops->fw_running && + !fwrt->ops->fw_running(fwrt->ops_ctx)) { + IWL_ERR(fwrt, "Firmware not running - cannot dump error\n"); + iwl_fw_free_dump_desc(fwrt); + clear_bit(IWL_FWRT_STATUS_DUMPING, &fwrt->status); + goto out; + } + if (fwrt->trans->cfg->device_family == IWL_DEVICE_FAMILY_7000) { /* stop recording */ iwl_fw_dbg_stop_recording(fwrt); @@ -1145,7 +1154,7 @@ void iwl_fw_error_dump_wk(struct work_struct *work) iwl_write_prph(fwrt->trans, DBGC_OUT_CTRL, out_ctrl); } } - +out: if (fwrt->ops && fwrt->ops->dump_end) fwrt->ops->dump_end(fwrt->ops_ctx); } diff --git a/drivers/net/wireless/intel/iwlwifi/fw/dbg.h b/drivers/net/wireless/intel/iwlwifi/fw/dbg.h index 223fb77a3aa9..72259bff9922 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/dbg.h +++ b/drivers/net/wireless/intel/iwlwifi/fw/dbg.h @@ -8,6 +8,7 @@ * Copyright(c) 2008 - 2014 Intel Corporation. All rights reserved. * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH * Copyright(c) 2015 - 2017 Intel Deutschland GmbH + * Copyright(c) 2018 Intel Corporation * * This program is free software; you can redistribute it and/or modify * it under the terms of version 2 of the GNU General Public License as @@ -33,6 +34,7 @@ * Copyright(c) 2005 - 2014 Intel Corporation. All rights reserved. * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH * Copyright(c) 2015 - 2017 Intel Deutschland GmbH + * Copyright(c) 2018 Intel Corporation * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -91,6 +93,7 @@ static inline void iwl_fw_free_dump_desc(struct iwl_fw_runtime *fwrt) if (fwrt->dump.desc != &iwl_dump_desc_assert) kfree(fwrt->dump.desc); fwrt->dump.desc = NULL; + fwrt->dump.trig = NULL; } void iwl_fw_error_dump(struct iwl_fw_runtime *fwrt); diff --git a/drivers/net/wireless/intel/iwlwifi/fw/runtime.h b/drivers/net/wireless/intel/iwlwifi/fw/runtime.h index f3197f7c13b6..3fb940ebd74a 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/runtime.h +++ b/drivers/net/wireless/intel/iwlwifi/fw/runtime.h @@ -6,6 +6,7 @@ * GPL LICENSE SUMMARY * * Copyright(c) 2017 Intel Deutschland GmbH + * Copyright(c) 2018 Intel Corporation * * This program is free software; you can redistribute it and/or modify * it under the terms of version 2 of the GNU General Public License as @@ -26,6 +27,7 @@ * BSD LICENSE * * Copyright(c) 2017 Intel Deutschland GmbH + * Copyright(c) 2018 Intel Corporation * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -68,6 +70,7 @@ struct iwl_fw_runtime_ops { int (*dump_start)(void *ctx); void (*dump_end)(void *ctx); + bool (*fw_running)(void *ctx); }; #define MAX_NUM_LMAC 2 diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/debugfs.c b/drivers/net/wireless/intel/iwlwifi/mvm/debugfs.c index a7892c1254a2..9c436d8d001d 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/debugfs.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/debugfs.c @@ -8,6 +8,7 @@ * Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved. * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH * Copyright(c) 2016 - 2017 Intel Deutschland GmbH + * Copyright(c) 2018 Intel Corporation * * This program is free software; you can redistribute it and/or modify * it under the terms of version 2 of the GNU General Public License as @@ -35,6 +36,7 @@ * Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved. * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH * Copyright(c) 2016 - 2017 Intel Deutschland GmbH + * Copyright(c) 2018 Intel Corporation * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -1281,9 +1283,6 @@ static ssize_t iwl_dbgfs_fw_dbg_collect_write(struct iwl_mvm *mvm, { int ret; - if (!iwl_mvm_firmware_running(mvm)) - return -EIO; - ret = iwl_mvm_ref_sync(mvm, IWL_MVM_REF_PRPH_WRITE); if (ret) return ret; diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/ops.c b/drivers/net/wireless/intel/iwlwifi/mvm/ops.c index 6bb347bf0d6e..ab7fb5aad984 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/ops.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/ops.c @@ -8,6 +8,7 @@ * Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved. * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH * Copyright(c) 2016 - 2017 Intel Deutschland GmbH + * Copyright(c) 2018 Intel Corporation * * This program is free software; you can redistribute it and/or modify * it under the terms of version 2 of the GNU General Public License as @@ -35,6 +36,7 @@ * Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved. * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH * Copyright(c) 2016 - 2017 Intel Deutschland GmbH + * Copyright(c) 2018 Intel Corporation * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -552,9 +554,15 @@ static void iwl_mvm_fwrt_dump_end(void *ctx) iwl_mvm_unref(mvm, IWL_MVM_REF_FW_DBG_COLLECT); } +static bool iwl_mvm_fwrt_fw_running(void *ctx) +{ + return iwl_mvm_firmware_running(ctx); +} + static const struct iwl_fw_runtime_ops iwl_mvm_fwrt_ops = { .dump_start = iwl_mvm_fwrt_dump_start, .dump_end = iwl_mvm_fwrt_dump_end, + .fw_running = iwl_mvm_fwrt_fw_running, }; static struct iwl_op_mode * -- cgit v1.2.3 From e4f13ad07823b24a1537518d2163bd164292fb10 Mon Sep 17 00:00:00 2001 From: Sara Sharon Date: Mon, 15 Jan 2018 13:50:59 +0200 Subject: iwlwifi: mvm: fix "failed to remove key" message When the GTK is installed, we install it to HW with the station ID of the AP. Mac80211 will try to remove it only after the AP sta is removed, which will result in a failure to remove key since we do not have any station for it. This is a valid situation, but a previous commit removed the early return and added a return with error value, which resulted in an error message that is confusing to users. Remove the error return value. Fixes: 85aeb58cec1a ("iwlwifi: mvm: Enable security on new TX API") Signed-off-by: Sara Sharon Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/mvm/sta.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c index dbd2ba2bb714..3739e42b34e4 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c @@ -3170,8 +3170,9 @@ static int __iwl_mvm_remove_sta_key(struct iwl_mvm *mvm, u8 sta_id, int ret, size; u32 status; + /* This is a valid situation for GTK removal */ if (sta_id == IWL_MVM_INVALID_STA) - return -EINVAL; + return 0; key_flags = cpu_to_le16((keyconf->keyidx << STA_KEY_FLG_KEYID_POS) & STA_KEY_FLG_KEYID_MSK); -- cgit v1.2.3 From 7c305de2b9548ab6b65fce342c78618bbed5db73 Mon Sep 17 00:00:00 2001 From: Ilan Peer Date: Mon, 22 Jan 2018 08:55:06 +0200 Subject: iwlwifi: mvm: Direct multicast frames to the correct station Multicast frames for NL80211_IFTYPE_AP and NL80211_IFTYPE_ADHOC were directed to the broadcast station, however, as the broadcast station did not have keys configured, these frames were sent unencrypted. Fix this by using the multicast station which is the station for which encryption keys are configured. Signed-off-by: Ilan Peer Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/mvm/tx.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/tx.c b/drivers/net/wireless/intel/iwlwifi/mvm/tx.c index 57ad6019ffad..af6dfceab6b8 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/tx.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/tx.c @@ -648,7 +648,11 @@ int iwl_mvm_tx_skb_non_sta(struct iwl_mvm *mvm, struct sk_buff *skb) if (info.control.vif->type == NL80211_IFTYPE_P2P_DEVICE || info.control.vif->type == NL80211_IFTYPE_AP || info.control.vif->type == NL80211_IFTYPE_ADHOC) { - sta_id = mvmvif->bcast_sta.sta_id; + if (info.control.vif->type == NL80211_IFTYPE_P2P_DEVICE) + sta_id = mvmvif->bcast_sta.sta_id; + else + sta_id = mvmvif->mcast_sta.sta_id; + queue = iwl_mvm_get_ctrl_vif_queue(mvm, &info, hdr->frame_control); if (queue < 0) -- cgit v1.2.3 From 6508de0305d560235b366cc2cc98f7bcfb029e92 Mon Sep 17 00:00:00 2001 From: Ilan Peer Date: Thu, 25 Jan 2018 15:22:41 +0200 Subject: iwlwifi: mvm: Correctly set the tid for mcast queue In the scheduler config command, the meaning of tid == 0xf was intended to indicate the configuration is for management frames. However, tid == 0xf was also used for the multicast queue that was meant only for multicast data frames, which resulted with the FW not encrypting multicast data frames. As multicast frames do not have a QoS header, fix this by setting tid == 0, to indicate that this is a data queue and not management one. Signed-off-by: Ilan Peer Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/mvm/sta.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c index 3739e42b34e4..630e23cb0ffb 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c @@ -2039,7 +2039,7 @@ int iwl_mvm_add_mcast_sta(struct iwl_mvm *mvm, struct ieee80211_vif *vif) struct iwl_trans_txq_scd_cfg cfg = { .fifo = IWL_MVM_TX_FIFO_MCAST, .sta_id = msta->sta_id, - .tid = IWL_MAX_TID_COUNT, + .tid = 0, .aggregate = false, .frame_limit = IWL_FRAME_LIMIT, }; @@ -2090,7 +2090,7 @@ int iwl_mvm_add_mcast_sta(struct iwl_mvm *mvm, struct ieee80211_vif *vif) if (iwl_mvm_has_new_tx_api(mvm)) { int queue = iwl_mvm_tvqm_enable_txq(mvm, vif->cab_queue, msta->sta_id, - IWL_MAX_TID_COUNT, + 0, timeout); mvmvif->cab_queue = queue; } else if (!fw_has_api(&mvm->fw->ucode_capa, @@ -2115,7 +2115,7 @@ int iwl_mvm_rm_mcast_sta(struct iwl_mvm *mvm, struct ieee80211_vif *vif) iwl_mvm_flush_sta(mvm, &mvmvif->mcast_sta, true, 0); iwl_mvm_disable_txq(mvm, mvmvif->cab_queue, vif->cab_queue, - IWL_MAX_TID_COUNT, 0); + 0, 0); ret = iwl_mvm_rm_sta_common(mvm, mvmvif->mcast_sta.sta_id); if (ret) -- cgit v1.2.3 From de655fa8fb237d0eaf838e8833b464c1dec90070 Mon Sep 17 00:00:00 2001 From: Ulf Magnusson Date: Mon, 5 Feb 2018 02:21:30 +0100 Subject: iwlwifi: fix malformed CONFIG_IWLWIFI_PCIE_RTPM default 'default false' should be 'default n', though they happen to have the same effect here, due to undefined symbols ('false' in this case) evaluating to n in a tristate sense. Remove the default instead of changing it. bool and tristate symbols implicitly default to n. Discovered with the https://github.com/ulfalizer/Kconfiglib/blob/master/examples/list_undefined.py script. Signed-off-by: Ulf Magnusson Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/Kconfig | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/wireless/intel/iwlwifi/Kconfig b/drivers/net/wireless/intel/iwlwifi/Kconfig index c5f2ddf9b0fe..e5a2fc738ac3 100644 --- a/drivers/net/wireless/intel/iwlwifi/Kconfig +++ b/drivers/net/wireless/intel/iwlwifi/Kconfig @@ -91,7 +91,6 @@ config IWLWIFI_BCAST_FILTERING config IWLWIFI_PCIE_RTPM bool "Enable runtime power management mode for PCIe devices" depends on IWLMVM && PM && EXPERT - default false help Say Y here to enable runtime power management for PCIe devices. If enabled, the device will go into low power mode -- cgit v1.2.3 From d02f51cbcf12b09ab945873e35046045875eed9a Mon Sep 17 00:00:00 2001 From: Daniel Axtens Date: Sat, 3 Mar 2018 03:03:46 +0100 Subject: bpf: fix bpf_skb_adjust_net/bpf_skb_proto_xlat to deal with gso sctp skbs SCTP GSO skbs have a gso_size of GSO_BY_FRAGS, so any sort of unconditionally mangling of that will result in nonsense value and would corrupt the skb later on. Therefore, i) add two helpers skb_increase_gso_size() and skb_decrease_gso_size() that would throw a one time warning and bail out for such skbs and ii) refuse and return early with an error in those BPF helpers that are affected. We do need to bail out as early as possible from there before any changes on the skb have been performed. Fixes: 6578171a7ff0 ("bpf: add bpf_skb_change_proto helper") Co-authored-by: Daniel Borkmann Signed-off-by: Daniel Axtens Cc: Marcelo Ricardo Leitner Acked-by: Alexei Starovoitov Signed-off-by: Alexei Starovoitov --- Documentation/networking/segmentation-offloads.txt | 11 +++- include/linux/skbuff.h | 22 ++++++++ net/core/filter.c | 60 +++++++++++++++------- 3 files changed, 73 insertions(+), 20 deletions(-) diff --git a/Documentation/networking/segmentation-offloads.txt b/Documentation/networking/segmentation-offloads.txt index d47480b61ac6..23a8dd91a9ec 100644 --- a/Documentation/networking/segmentation-offloads.txt +++ b/Documentation/networking/segmentation-offloads.txt @@ -153,8 +153,15 @@ To signal this, gso_size is set to the special value GSO_BY_FRAGS. Therefore, any code in the core networking stack must be aware of the possibility that gso_size will be GSO_BY_FRAGS and handle that case -appropriately. (For size checks, the skb_gso_validate_*_len family of -helpers do this automatically.) +appropriately. + +There are a couple of helpers to make this easier: + + - For size checks, the skb_gso_validate_*_len family of helpers correctly + considers GSO_BY_FRAGS. + + - For manipulating packets, skb_increase_gso_size and skb_decrease_gso_size + will check for GSO_BY_FRAGS and WARN if asked to manipulate these skbs. This also affects drivers with the NETIF_F_FRAGLIST & NETIF_F_GSO_SCTP bits set. Note also that NETIF_F_GSO_SCTP is included in NETIF_F_GSO_SOFTWARE. diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index c1e66bdcf583..8c67c33f40c9 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -4038,6 +4038,12 @@ static inline bool skb_is_gso_v6(const struct sk_buff *skb) return skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6; } +/* Note: Should be called only if skb_is_gso(skb) is true */ +static inline bool skb_is_gso_sctp(const struct sk_buff *skb) +{ + return skb_shinfo(skb)->gso_type & SKB_GSO_SCTP; +} + static inline void skb_gso_reset(struct sk_buff *skb) { skb_shinfo(skb)->gso_size = 0; @@ -4045,6 +4051,22 @@ static inline void skb_gso_reset(struct sk_buff *skb) skb_shinfo(skb)->gso_type = 0; } +static inline void skb_increase_gso_size(struct skb_shared_info *shinfo, + u16 increment) +{ + if (WARN_ON_ONCE(shinfo->gso_size == GSO_BY_FRAGS)) + return; + shinfo->gso_size += increment; +} + +static inline void skb_decrease_gso_size(struct skb_shared_info *shinfo, + u16 decrement) +{ + if (WARN_ON_ONCE(shinfo->gso_size == GSO_BY_FRAGS)) + return; + shinfo->gso_size -= decrement; +} + void __skb_warn_lro_forwarding(const struct sk_buff *skb); static inline bool skb_warn_if_lro(const struct sk_buff *skb) diff --git a/net/core/filter.c b/net/core/filter.c index 0c121adbdbaa..48aa7c7320db 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -2087,6 +2087,10 @@ static int bpf_skb_proto_4_to_6(struct sk_buff *skb) u32 off = skb_mac_header_len(skb); int ret; + /* SCTP uses GSO_BY_FRAGS, thus cannot adjust it. */ + if (skb_is_gso(skb) && unlikely(skb_is_gso_sctp(skb))) + return -ENOTSUPP; + ret = skb_cow(skb, len_diff); if (unlikely(ret < 0)) return ret; @@ -2096,19 +2100,21 @@ static int bpf_skb_proto_4_to_6(struct sk_buff *skb) return ret; if (skb_is_gso(skb)) { + struct skb_shared_info *shinfo = skb_shinfo(skb); + /* SKB_GSO_TCPV4 needs to be changed into * SKB_GSO_TCPV6. */ - if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4) { - skb_shinfo(skb)->gso_type &= ~SKB_GSO_TCPV4; - skb_shinfo(skb)->gso_type |= SKB_GSO_TCPV6; + if (shinfo->gso_type & SKB_GSO_TCPV4) { + shinfo->gso_type &= ~SKB_GSO_TCPV4; + shinfo->gso_type |= SKB_GSO_TCPV6; } /* Due to IPv6 header, MSS needs to be downgraded. */ - skb_shinfo(skb)->gso_size -= len_diff; + skb_decrease_gso_size(shinfo, len_diff); /* Header must be checked, and gso_segs recomputed. */ - skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY; - skb_shinfo(skb)->gso_segs = 0; + shinfo->gso_type |= SKB_GSO_DODGY; + shinfo->gso_segs = 0; } skb->protocol = htons(ETH_P_IPV6); @@ -2123,6 +2129,10 @@ static int bpf_skb_proto_6_to_4(struct sk_buff *skb) u32 off = skb_mac_header_len(skb); int ret; + /* SCTP uses GSO_BY_FRAGS, thus cannot adjust it. */ + if (skb_is_gso(skb) && unlikely(skb_is_gso_sctp(skb))) + return -ENOTSUPP; + ret = skb_unclone(skb, GFP_ATOMIC); if (unlikely(ret < 0)) return ret; @@ -2132,19 +2142,21 @@ static int bpf_skb_proto_6_to_4(struct sk_buff *skb) return ret; if (skb_is_gso(skb)) { + struct skb_shared_info *shinfo = skb_shinfo(skb); + /* SKB_GSO_TCPV6 needs to be changed into * SKB_GSO_TCPV4. */ - if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6) { - skb_shinfo(skb)->gso_type &= ~SKB_GSO_TCPV6; - skb_shinfo(skb)->gso_type |= SKB_GSO_TCPV4; + if (shinfo->gso_type & SKB_GSO_TCPV6) { + shinfo->gso_type &= ~SKB_GSO_TCPV6; + shinfo->gso_type |= SKB_GSO_TCPV4; } /* Due to IPv4 header, MSS can be upgraded. */ - skb_shinfo(skb)->gso_size += len_diff; + skb_increase_gso_size(shinfo, len_diff); /* Header must be checked, and gso_segs recomputed. */ - skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY; - skb_shinfo(skb)->gso_segs = 0; + shinfo->gso_type |= SKB_GSO_DODGY; + shinfo->gso_segs = 0; } skb->protocol = htons(ETH_P_IP); @@ -2243,6 +2255,10 @@ static int bpf_skb_net_grow(struct sk_buff *skb, u32 len_diff) u32 off = skb_mac_header_len(skb) + bpf_skb_net_base_len(skb); int ret; + /* SCTP uses GSO_BY_FRAGS, thus cannot adjust it. */ + if (skb_is_gso(skb) && unlikely(skb_is_gso_sctp(skb))) + return -ENOTSUPP; + ret = skb_cow(skb, len_diff); if (unlikely(ret < 0)) return ret; @@ -2252,11 +2268,13 @@ static int bpf_skb_net_grow(struct sk_buff *skb, u32 len_diff) return ret; if (skb_is_gso(skb)) { + struct skb_shared_info *shinfo = skb_shinfo(skb); + /* Due to header grow, MSS needs to be downgraded. */ - skb_shinfo(skb)->gso_size -= len_diff; + skb_decrease_gso_size(shinfo, len_diff); /* Header must be checked, and gso_segs recomputed. */ - skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY; - skb_shinfo(skb)->gso_segs = 0; + shinfo->gso_type |= SKB_GSO_DODGY; + shinfo->gso_segs = 0; } return 0; @@ -2267,6 +2285,10 @@ static int bpf_skb_net_shrink(struct sk_buff *skb, u32 len_diff) u32 off = skb_mac_header_len(skb) + bpf_skb_net_base_len(skb); int ret; + /* SCTP uses GSO_BY_FRAGS, thus cannot adjust it. */ + if (skb_is_gso(skb) && unlikely(skb_is_gso_sctp(skb))) + return -ENOTSUPP; + ret = skb_unclone(skb, GFP_ATOMIC); if (unlikely(ret < 0)) return ret; @@ -2276,11 +2298,13 @@ static int bpf_skb_net_shrink(struct sk_buff *skb, u32 len_diff) return ret; if (skb_is_gso(skb)) { + struct skb_shared_info *shinfo = skb_shinfo(skb); + /* Due to header shrink, MSS can be upgraded. */ - skb_shinfo(skb)->gso_size += len_diff; + skb_increase_gso_size(shinfo, len_diff); /* Header must be checked, and gso_segs recomputed. */ - skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY; - skb_shinfo(skb)->gso_segs = 0; + shinfo->gso_type |= SKB_GSO_DODGY; + shinfo->gso_segs = 0; } return 0; -- cgit v1.2.3 From 74c12c630fe310eb7fcae1b292257d47781fff0a Mon Sep 17 00:00:00 2001 From: Linus Lüssing Date: Sun, 4 Mar 2018 13:08:17 +0100 Subject: batman-adv: Fix multicast packet loss with a single WANT_ALL_IPV4/6 flag MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As the kernel doc describes too the code is supposed to skip adding multicast TT entries if both the WANT_ALL_IPV4 and WANT_ALL_IPV6 flags are present. Unfortunately, the current code even skips adding multicast TT entries if only either the WANT_ALL_IPV4 or WANT_ALL_IPV6 is present. This could lead to IPv6 multicast packet loss if only an IGMP but not an MLD querier is present for instance or vice versa. Fixes: 687937ab3489 ("batman-adv: Add multicast optimization support for bridged setups") Signed-off-by: Linus Lüssing Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich --- net/batman-adv/multicast.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/batman-adv/multicast.c b/net/batman-adv/multicast.c index cbdeb47ec3f6..d70640135e3a 100644 --- a/net/batman-adv/multicast.c +++ b/net/batman-adv/multicast.c @@ -543,8 +543,8 @@ update: bat_priv->mcast.enabled = true; } - return !(mcast_data.flags & - (BATADV_MCAST_WANT_ALL_IPV4 | BATADV_MCAST_WANT_ALL_IPV6)); + return !(mcast_data.flags & BATADV_MCAST_WANT_ALL_IPV4 && + mcast_data.flags & BATADV_MCAST_WANT_ALL_IPV6); } /** -- cgit v1.2.3 From 28b2182dad43f6f8fcbd167539a26714fd12bd64 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Fri, 2 Mar 2018 11:36:32 +0100 Subject: ahci: Add PCI-id for the Highpoint Rocketraid 644L card Like the Highpoint Rocketraid 642L and cards using a Marvel 88SE9235 controller in general, this RAID card also supports AHCI mode and short of a custom driver, this is the only way to make it work under Linux. Note that even though the card is called to 644L, it has a product-id of 0x0645. Cc: stable@vger.kernel.org BugLink: https://bugzilla.redhat.com/show_bug.cgi?id=1534106 Signed-off-by: Hans de Goede Signed-off-by: Tejun Heo Acked-by: Bjorn Helgaas --- drivers/ata/ahci.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c index 355a95a83a34..1ff17799769d 100644 --- a/drivers/ata/ahci.c +++ b/drivers/ata/ahci.c @@ -550,7 +550,9 @@ static const struct pci_device_id ahci_pci_tbl[] = { .driver_data = board_ahci_yes_fbs }, { PCI_DEVICE(PCI_VENDOR_ID_MARVELL_EXT, 0x9230), .driver_data = board_ahci_yes_fbs }, - { PCI_DEVICE(PCI_VENDOR_ID_TTI, 0x0642), + { PCI_DEVICE(PCI_VENDOR_ID_TTI, 0x0642), /* highpoint rocketraid 642L */ + .driver_data = board_ahci_yes_fbs }, + { PCI_DEVICE(PCI_VENDOR_ID_TTI, 0x0645), /* highpoint rocketraid 644L */ .driver_data = board_ahci_yes_fbs }, /* Promise */ -- cgit v1.2.3 From 1903be8222b7c278ca897c129ce477c1dd6403a8 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Fri, 2 Mar 2018 11:36:33 +0100 Subject: PCI: Add function 1 DMA alias quirk for Highpoint RocketRAID 644L The Highpoint RocketRAID 644L uses a Marvel 88SE9235 controller, as with other Marvel controllers this needs a function 1 DMA alias quirk. Note the RocketRAID 642L uses the same Marvel 88SE9235 controller and already is listed with a function 1 DMA alias quirk. Cc: stable@vger.kernel.org BugLink: https://bugzilla.redhat.com/show_bug.cgi?id=1534106 Signed-off-by: Hans de Goede Acked-by: Bjorn Helgaas Signed-off-by: Tejun Heo --- drivers/pci/quirks.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index fc734014206f..b1a3a36073b4 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -3901,6 +3901,8 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MARVELL_EXT, 0x9230, quirk_dma_func1_alias); DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_TTI, 0x0642, quirk_dma_func1_alias); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_TTI, 0x0645, + quirk_dma_func1_alias); /* https://bugs.gentoo.org/show_bug.cgi?id=497630 */ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_JMICRON, PCI_DEVICE_ID_JMICRON_JMB388_ESD, -- cgit v1.2.3 From c7151602255a36ba07c84fe2baeef846fdb988b8 Mon Sep 17 00:00:00 2001 From: Evgeniy Didin Date: Wed, 28 Feb 2018 14:53:18 +0300 Subject: mmc: dw_mmc: Fix the DTO/CTO timeout overflow calculation for 32-bit systems The commit 9d9491a7da2a ("mmc: dw_mmc: Fix the DTO timeout calculation") and commit 4c2357f57dd5 ("mmc: dw_mmc: Fix the CTO timeout calculation") made changes, which cause multiply overflow for 32-bit systems. The broken timeout calculations leads to unexpected ETIMEDOUT errors and causes stacktrace splat (such as below) during normal data exchange with SD-card. | Running : 4M-check-reassembly-tcp-cmykw2-rotatew2.out -v0 -w1 | - Info: Finished target initialization. | mmcblk0: error -110 transferring data, sector 320544, nr 2048, cmd | response 0x900, card status 0x0 DIV_ROUND_UP_ULL helps to escape usage of __udivdi3() from libgcc and so code gets compiled on all 32-bit platforms as opposed to usage of DIV_ROUND_UP when we may only compile stuff on a very few arches. Lets cast this multiply to u64 type to prevent the overflow. Fixes: 9d9491a7da2a ("mmc: dw_mmc: Fix the DTO timeout calculation") Fixes: 4c2357f57dd5 ("mmc: dw_mmc: Fix the CTO timeout calculation") Tested-by: Vineet Gupta Reported-by: Vineet Gupta # ARC STAR 9001306872 HSDK, sdio: board crashes when copying big files Signed-off-by: Evgeniy Didin Cc: # 4.14 Reviewed-by: Andy Shevchenko Reviewed-by: Douglas Anderson Reviewed-by: Shawn Lin Reviewed-by: Jisheng Zhang Acked-by: Jaehoon Chung Signed-off-by: Ulf Hansson --- drivers/mmc/host/dw_mmc.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/mmc/host/dw_mmc.c b/drivers/mmc/host/dw_mmc.c index d9b4acefed31..545550591389 100644 --- a/drivers/mmc/host/dw_mmc.c +++ b/drivers/mmc/host/dw_mmc.c @@ -413,7 +413,9 @@ static inline void dw_mci_set_cto(struct dw_mci *host) cto_div = (mci_readl(host, CLKDIV) & 0xff) * 2; if (cto_div == 0) cto_div = 1; - cto_ms = DIV_ROUND_UP(MSEC_PER_SEC * cto_clks * cto_div, host->bus_hz); + + cto_ms = DIV_ROUND_UP_ULL((u64)MSEC_PER_SEC * cto_clks * cto_div, + host->bus_hz); /* add a bit spare time */ cto_ms += 10; @@ -1948,8 +1950,9 @@ static void dw_mci_set_drto(struct dw_mci *host) drto_div = (mci_readl(host, CLKDIV) & 0xff) * 2; if (drto_div == 0) drto_div = 1; - drto_ms = DIV_ROUND_UP(MSEC_PER_SEC * drto_clks * drto_div, - host->bus_hz); + + drto_ms = DIV_ROUND_UP_ULL((u64)MSEC_PER_SEC * drto_clks * drto_div, + host->bus_hz); /* add a bit spare time */ drto_ms += 10; -- cgit v1.2.3 From 1a087f032111a88e826877449dfb93ceb22b78b9 Mon Sep 17 00:00:00 2001 From: Xinyong Date: Fri, 2 Mar 2018 19:20:07 +0800 Subject: usb: gadget: f_fs: Fix use-after-free in ffs_fs_kill_sb() When I debug a kernel crash issue in funcitonfs, found ffs_data.ref overflowed, While functionfs is unmounting, ffs_data is put twice. Commit 43938613c6fd ("drivers, usb: convert ffs_data.ref from atomic_t to refcount_t") can avoid refcount overflow, but that is risk some situations. So no need put ffs data in ffs_fs_kill_sb, already put in ffs_data_closed. The issue can be reproduced in Mediatek mt6763 SoC, ffs for ADB device. KASAN enabled configuration reports use-after-free errro. BUG: KASAN: use-after-free in refcount_dec_and_test+0x14/0xe0 at addr ffffffc0579386a0 Read of size 4 by task umount/4650 ==================================================== BUG kmalloc-512 (Tainted: P W O ): kasan: bad access detected ----------------------------------------------------------------------------- INFO: Allocated in ffs_fs_mount+0x194/0x844 age=22856 cpu=2 pid=566 alloc_debug_processing+0x1ac/0x1e8 ___slab_alloc.constprop.63+0x640/0x648 __slab_alloc.isra.57.constprop.62+0x24/0x34 kmem_cache_alloc_trace+0x1a8/0x2bc ffs_fs_mount+0x194/0x844 mount_fs+0x6c/0x1d0 vfs_kern_mount+0x50/0x1b4 do_mount+0x258/0x1034 INFO: Freed in ffs_data_put+0x25c/0x320 age=0 cpu=3 pid=4650 free_debug_processing+0x22c/0x434 __slab_free+0x2d8/0x3a0 kfree+0x254/0x264 ffs_data_put+0x25c/0x320 ffs_data_closed+0x124/0x15c ffs_fs_kill_sb+0xb8/0x110 deactivate_locked_super+0x6c/0x98 deactivate_super+0xb0/0xbc INFO: Object 0xffffffc057938600 @offset=1536 fp=0x (null) ...... Call trace: [] dump_backtrace+0x0/0x250 [] show_stack+0x14/0x1c [] dump_stack+0xa0/0xc8 [] print_trailer+0x158/0x260 [] object_err+0x3c/0x40 [] kasan_report_error+0x2a8/0x754 [] kasan_report+0x5c/0x60 [] __asan_load4+0x70/0x88 [] refcount_dec_and_test+0x14/0xe0 [] ffs_data_put+0x80/0x320 [] ffs_fs_kill_sb+0xc8/0x110 [] deactivate_locked_super+0x6c/0x98 [] deactivate_super+0xb0/0xbc [] cleanup_mnt+0x64/0xec [] __cleanup_mnt+0x10/0x18 [] task_work_run+0xcc/0x124 [] do_notify_resume+0x60/0x70 [] work_pending+0x10/0x14 Cc: stable@vger.kernel.org Signed-off-by: Xinyong Signed-off-by: Felipe Balbi --- drivers/usb/gadget/function/f_fs.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c index c2592d883f67..d2428a9e8900 100644 --- a/drivers/usb/gadget/function/f_fs.c +++ b/drivers/usb/gadget/function/f_fs.c @@ -1538,7 +1538,6 @@ ffs_fs_kill_sb(struct super_block *sb) if (sb->s_fs_info) { ffs_release_dev(sb->s_fs_info); ffs_data_closed(sb->s_fs_info); - ffs_data_put(sb->s_fs_info); } } -- cgit v1.2.3 From 4c437920fa216f66f6a5d469cae2a0360cc2d9c7 Mon Sep 17 00:00:00 2001 From: Amelie Delaunay Date: Thu, 1 Mar 2018 11:05:34 +0100 Subject: dt-bindings: usb: fix the STM32F7 DWC2 OTG HS core binding This patch fixes binding documentation for DWC2 controller in HS mode found on STMicroelectronics STM32F7 SoC. The v2 former patch [1] had been acked by Rob Herring, but v1 was merged. [1] https://patchwork.kernel.org/patch/9925575/ Fixes: 000777dadc7e ("dt-bindings: usb: Document the STM32F7xx DWC2 ...") Signed-off-by: Amelie Delaunay Reviewed-by: Rob Herring Signed-off-by: Felipe Balbi --- Documentation/devicetree/bindings/usb/dwc2.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/usb/dwc2.txt b/Documentation/devicetree/bindings/usb/dwc2.txt index e64d903bcbe8..46da5f184460 100644 --- a/Documentation/devicetree/bindings/usb/dwc2.txt +++ b/Documentation/devicetree/bindings/usb/dwc2.txt @@ -19,7 +19,7 @@ Required properties: configured in FS mode; - "st,stm32f4x9-hsotg": The DWC2 USB HS controller instance in STM32F4x9 SoCs configured in HS mode; - - "st,stm32f7xx-hsotg": The DWC2 USB HS controller instance in STM32F7xx SoCs + - "st,stm32f7-hsotg": The DWC2 USB HS controller instance in STM32F7 SoCs configured in HS mode; - reg : Should contain 1 register range (address and length) - interrupts : Should contain 1 interrupt -- cgit v1.2.3 From 1a149e3554e0324a3d551dfb327bdb67b150a320 Mon Sep 17 00:00:00 2001 From: Amelie Delaunay Date: Thu, 1 Mar 2018 11:05:35 +0100 Subject: usb: dwc2: fix STM32F7 USB OTG HS compatible This patch fixes compatible for STM32F7 USB OTG HS and consistently rename dw2_set_params function. The v2 former patch [1] had been acked by Paul Young, but v1 was merged. [1] https://patchwork.kernel.org/patch/9925573/ Fixes: d8fae8b93682 ("usb: dwc2: add support for STM32F7xx USB OTG HS") Signed-off-by: Amelie Delaunay Signed-off-by: Felipe Balbi --- drivers/usb/dwc2/params.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/usb/dwc2/params.c b/drivers/usb/dwc2/params.c index 03fd20f0b496..c4a47496d2fb 100644 --- a/drivers/usb/dwc2/params.c +++ b/drivers/usb/dwc2/params.c @@ -137,7 +137,7 @@ static void dwc2_set_stm32f4x9_fsotg_params(struct dwc2_hsotg *hsotg) p->activate_stm_fs_transceiver = true; } -static void dwc2_set_stm32f7xx_hsotg_params(struct dwc2_hsotg *hsotg) +static void dwc2_set_stm32f7_hsotg_params(struct dwc2_hsotg *hsotg) { struct dwc2_core_params *p = &hsotg->params; @@ -164,8 +164,8 @@ const struct of_device_id dwc2_of_match_table[] = { { .compatible = "st,stm32f4x9-fsotg", .data = dwc2_set_stm32f4x9_fsotg_params }, { .compatible = "st,stm32f4x9-hsotg" }, - { .compatible = "st,stm32f7xx-hsotg", - .data = dwc2_set_stm32f7xx_hsotg_params }, + { .compatible = "st,stm32f7-hsotg", + .data = dwc2_set_stm32f7_hsotg_params }, {}, }; MODULE_DEVICE_TABLE(of, dwc2_of_match_table); -- cgit v1.2.3 From 54f02945f703404cdf17c9618316b3d3387fa072 Mon Sep 17 00:00:00 2001 From: Yoshihiro Shimoda Date: Tue, 27 Feb 2018 17:16:02 +0900 Subject: usb: renesas_usbhs: add binding for r8a77965 This patch adds binding for r8a77965 (R-Car M3-N). Signed-off-by: Yoshihiro Shimoda Signed-off-by: Felipe Balbi --- Documentation/devicetree/bindings/usb/renesas_usbhs.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/usb/renesas_usbhs.txt b/Documentation/devicetree/bindings/usb/renesas_usbhs.txt index d060172f1529..43960faf5a88 100644 --- a/Documentation/devicetree/bindings/usb/renesas_usbhs.txt +++ b/Documentation/devicetree/bindings/usb/renesas_usbhs.txt @@ -12,6 +12,7 @@ Required properties: - "renesas,usbhs-r8a7794" for r8a7794 (R-Car E2) compatible device - "renesas,usbhs-r8a7795" for r8a7795 (R-Car H3) compatible device - "renesas,usbhs-r8a7796" for r8a7796 (R-Car M3-W) compatible device + - "renesas,usbhs-r8a77965" for r8a77965 (R-Car M3-N) compatible device - "renesas,usbhs-r8a77995" for r8a77995 (R-Car D3) compatible device - "renesas,usbhs-r7s72100" for r7s72100 (RZ/A1) compatible device - "renesas,rcar-gen2-usbhs" for R-Car Gen2 or RZ/G1 compatible devices -- cgit v1.2.3 From c6ba5084ce0d00d4a005b0577d9e764d39b638e1 Mon Sep 17 00:00:00 2001 From: Yoshihiro Shimoda Date: Tue, 27 Feb 2018 17:16:03 +0900 Subject: usb: gadget: udc: renesas_usb3: add binging for r8a77965 This patch adds binding for r8a77965 (R-Car M3-N). Signed-off-by: Yoshihiro Shimoda Signed-off-by: Felipe Balbi --- Documentation/devicetree/bindings/usb/renesas_usb3.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/usb/renesas_usb3.txt b/Documentation/devicetree/bindings/usb/renesas_usb3.txt index 87a45e2f9b7f..2c071bb5801e 100644 --- a/Documentation/devicetree/bindings/usb/renesas_usb3.txt +++ b/Documentation/devicetree/bindings/usb/renesas_usb3.txt @@ -4,6 +4,7 @@ Required properties: - compatible: Must contain one of the following: - "renesas,r8a7795-usb3-peri" - "renesas,r8a7796-usb3-peri" + - "renesas,r8a77965-usb3-peri" - "renesas,rcar-gen3-usb3-peri" for a generic R-Car Gen3 compatible device -- cgit v1.2.3 From 2fe4c22c53fc2e3f35be2cd0033cb3d15ebd41b1 Mon Sep 17 00:00:00 2001 From: Sean Young Date: Tue, 27 Feb 2018 08:03:56 -0500 Subject: media: rc: lirc does not use LIRC_CAN_SEND_SCANCODE feature Since commit 02d742f4b209 ("media: lirc: lirc daemon fails to detect raw IR device"), the feature LIRC_CAN_SEND_SCANCODE is no longer used as it tripped up lircd. The ability to send scancodes for IR Tx is implied by LIRC_CAN_SEND_PULSE (i.e. any device that can send can use IR Tx encoders). So, remove LIRC_CAN_SEND_SCANCODE since it never used. This fixes: Documentation/output/lirc.h.rst:6: WARNING: undefined label: lirc-can-send-scancode (if the link has no caption the label must precede a section header As this flag was added for kernel 4.16, let's remove it, while not too late. Signed-off-by: Sean Young Signed-off-by: Mauro Carvalho Chehab --- include/uapi/linux/lirc.h | 1 - 1 file changed, 1 deletion(-) diff --git a/include/uapi/linux/lirc.h b/include/uapi/linux/lirc.h index 4fe580d36e41..f5bf06ecd87d 100644 --- a/include/uapi/linux/lirc.h +++ b/include/uapi/linux/lirc.h @@ -54,7 +54,6 @@ #define LIRC_CAN_SEND_RAW LIRC_MODE2SEND(LIRC_MODE_RAW) #define LIRC_CAN_SEND_PULSE LIRC_MODE2SEND(LIRC_MODE_PULSE) #define LIRC_CAN_SEND_MODE2 LIRC_MODE2SEND(LIRC_MODE_MODE2) -#define LIRC_CAN_SEND_SCANCODE LIRC_MODE2SEND(LIRC_MODE_SCANCODE) #define LIRC_CAN_SEND_LIRCCODE LIRC_MODE2SEND(LIRC_MODE_LIRCCODE) #define LIRC_CAN_SEND_MASK 0x0000003f -- cgit v1.2.3 From e113d65ae417ae6d9be229649b81d404c47ade79 Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Wed, 28 Feb 2018 05:47:07 -0500 Subject: media: tegra-cec: reset rx_buf_cnt when start bit detected If a start bit is detected, then reset the receive buffer counter to 0. This ensures that no stale data is in the buffer if a message is broken off midstream due to e.g. a Low Drive condition and then retransmitted. The only Rx interrupts we need to listen to are RX_REGISTER_FULL (i.e. a valid byte was received) and RX_START_BIT_DETECTED (i.e. a new message starts and we need to reset the counter). Signed-off-by: Hans Verkuil Cc: # for v4.15 and up Signed-off-by: Mauro Carvalho Chehab --- drivers/media/platform/tegra-cec/tegra_cec.c | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/drivers/media/platform/tegra-cec/tegra_cec.c b/drivers/media/platform/tegra-cec/tegra_cec.c index 92f93a880015..aba488cd0e64 100644 --- a/drivers/media/platform/tegra-cec/tegra_cec.c +++ b/drivers/media/platform/tegra-cec/tegra_cec.c @@ -172,16 +172,13 @@ static irqreturn_t tegra_cec_irq_handler(int irq, void *data) } } - if (status & (TEGRA_CEC_INT_STAT_RX_REGISTER_OVERRUN | - TEGRA_CEC_INT_STAT_RX_BUS_ANOMALY_DETECTED | - TEGRA_CEC_INT_STAT_RX_START_BIT_DETECTED | - TEGRA_CEC_INT_STAT_RX_BUS_ERROR_DETECTED)) { + if (status & TEGRA_CEC_INT_STAT_RX_START_BIT_DETECTED) { cec_write(cec, TEGRA_CEC_INT_STAT, - (TEGRA_CEC_INT_STAT_RX_REGISTER_OVERRUN | - TEGRA_CEC_INT_STAT_RX_BUS_ANOMALY_DETECTED | - TEGRA_CEC_INT_STAT_RX_START_BIT_DETECTED | - TEGRA_CEC_INT_STAT_RX_BUS_ERROR_DETECTED)); - } else if (status & TEGRA_CEC_INT_STAT_RX_REGISTER_FULL) { + TEGRA_CEC_INT_STAT_RX_START_BIT_DETECTED); + cec->rx_done = false; + cec->rx_buf_cnt = 0; + } + if (status & TEGRA_CEC_INT_STAT_RX_REGISTER_FULL) { u32 v; cec_write(cec, TEGRA_CEC_INT_STAT, @@ -255,7 +252,7 @@ static int tegra_cec_adap_enable(struct cec_adapter *adap, bool enable) TEGRA_CEC_INT_MASK_TX_BUS_ANOMALY_DETECTED | TEGRA_CEC_INT_MASK_TX_FRAME_TRANSMITTED | TEGRA_CEC_INT_MASK_RX_REGISTER_FULL | - TEGRA_CEC_INT_MASK_RX_REGISTER_OVERRUN); + TEGRA_CEC_INT_MASK_RX_START_BIT_DETECTED); cec_write(cec, TEGRA_CEC_HW_CONTROL, TEGRA_CEC_HWCTRL_TX_RX_MODE); return 0; -- cgit v1.2.3 From 2c27476e398bfd9fa2572ff80a0de16f0becc900 Mon Sep 17 00:00:00 2001 From: Michael Ira Krufky Date: Tue, 16 Jan 2018 22:16:12 -0500 Subject: media: dvb: fix a Kconfig typo drivers/media/Kconfig: typo: replace `with` with `which` Signed-off-by: Michael Ira Krufky Signed-off-by: Mauro Carvalho Chehab --- drivers/media/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/media/Kconfig b/drivers/media/Kconfig index 372c074bb1b9..86c1a190d946 100644 --- a/drivers/media/Kconfig +++ b/drivers/media/Kconfig @@ -151,7 +151,7 @@ config DVB_MMAP select VIDEOBUF2_VMALLOC default n help - This option enables DVB experimental memory-mapped API, with + This option enables DVB experimental memory-mapped API, which reduces the number of context switches to read DVB buffers, as the buffers can use mmap() syscalls. -- cgit v1.2.3 From 4e943a890cef42e90f43ce6be64728a290b97c55 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Thu, 1 Mar 2018 14:25:35 -0600 Subject: ARM: dts: rockchip: Add missing #sound-dai-cells on rk3288 dtc now gives the following warning: arch/arm/boot/dts/rk3288-tinker.dtb: Warning (sound_dai_property): /sound/simple-audio-card,codec: Missing property '#sound-dai-cells' in node /hdmi@ff980000 or bad phandle (referred from sound-dai[0]) Add the missing #sound-dai-cells property. Cc: Heiko Stuebner Cc: linux-rockchip@lists.infradead.org Signed-off-by: Rob Herring Signed-off-by: Heiko Stuebner --- arch/arm/boot/dts/rk3288.dtsi | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm/boot/dts/rk3288.dtsi b/arch/arm/boot/dts/rk3288.dtsi index 6102e4e7f35c..354aff45c1af 100644 --- a/arch/arm/boot/dts/rk3288.dtsi +++ b/arch/arm/boot/dts/rk3288.dtsi @@ -927,6 +927,7 @@ i2s: i2s@ff890000 { compatible = "rockchip,rk3288-i2s", "rockchip,rk3066-i2s"; reg = <0x0 0xff890000 0x0 0x10000>; + #sound-dai-cells = <0>; interrupts = ; #address-cells = <1>; #size-cells = <0>; @@ -1176,6 +1177,7 @@ compatible = "rockchip,rk3288-dw-hdmi"; reg = <0x0 0xff980000 0x0 0x20000>; reg-io-width = <4>; + #sound-dai-cells = <0>; rockchip,grf = <&grf>; interrupts = ; clocks = <&cru PCLK_HDMI_CTRL>, <&cru SCLK_HDMI_HDCP>, <&cru SCLK_HDMI_CEC>; -- cgit v1.2.3 From 745d0bd3af99ccc8c5f5822f808cd133eadad6ac Mon Sep 17 00:00:00 2001 From: Benjamin Poirier Date: Wed, 31 Jan 2018 16:26:27 +0900 Subject: e1000e: Remove Other from EIAC It was reported that emulated e1000e devices in vmware esxi 6.5 Build 7526125 do not link up after commit 4aea7a5c5e94 ("e1000e: Avoid receiver overrun interrupt bursts", v4.15-rc1). Some tracing shows that after e1000e_trigger_lsc() is called, ICR reads out as 0x0 in e1000_msix_other() on emulated e1000e devices. In comparison, on real e1000e 82574 hardware, icr=0x80000004 (_INT_ASSERTED | _LSC) in the same situation. Some experimentation showed that this flaw in vmware e1000e emulation can be worked around by not setting Other in EIAC. This is how it was before 16ecba59bc33 ("e1000e: Do not read ICR in Other interrupt", v4.5-rc1). Fixes: 4aea7a5c5e94 ("e1000e: Avoid receiver overrun interrupt bursts") Signed-off-by: Benjamin Poirier Tested-by: Aaron Brown Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/e1000e/netdev.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c index 1298b69f990b..153ad406c65e 100644 --- a/drivers/net/ethernet/intel/e1000e/netdev.c +++ b/drivers/net/ethernet/intel/e1000e/netdev.c @@ -1918,6 +1918,8 @@ static irqreturn_t e1000_msix_other(int __always_unused irq, void *data) bool enable = true; icr = er32(ICR); + ew32(ICR, E1000_ICR_OTHER); + if (icr & E1000_ICR_RXO) { ew32(ICR, E1000_ICR_RXO); enable = false; @@ -2040,7 +2042,6 @@ static void e1000_configure_msix(struct e1000_adapter *adapter) hw->hw_addr + E1000_EITR_82574(vector)); else writel(1, hw->hw_addr + E1000_EITR_82574(vector)); - adapter->eiac_mask |= E1000_IMS_OTHER; /* Cause Tx interrupts on every write back */ ivar |= BIT(31); @@ -2265,7 +2266,7 @@ static void e1000_irq_enable(struct e1000_adapter *adapter) if (adapter->msix_entries) { ew32(EIAC_82574, adapter->eiac_mask & E1000_EIAC_MASK_82574); - ew32(IMS, adapter->eiac_mask | E1000_IMS_LSC); + ew32(IMS, adapter->eiac_mask | E1000_IMS_OTHER | E1000_IMS_LSC); } else if (hw->mac.type >= e1000_pch_lpt) { ew32(IMS, IMS_ENABLE_MASK | E1000_IMS_ECCER); } else { -- cgit v1.2.3 From 1f0ea19722ef9dfa229a9540f70b8d1c34a98a6a Mon Sep 17 00:00:00 2001 From: Benjamin Poirier Date: Thu, 8 Feb 2018 15:47:12 +0900 Subject: Partial revert "e1000e: Avoid receiver overrun interrupt bursts" This partially reverts commit 4aea7a5c5e940c1723add439f4088844cd26196d. We keep the fix for the first part of the problem (1) described in the log of that commit, that is to read ICR in the other interrupt handler. We remove the fix for the second part of the problem (2), Other interrupt throttling. Bursts of "Other" interrupts may once again occur during rxo (receive overflow) traffic conditions. This is deemed acceptable in the interest of avoiding unforeseen fallout from changes that are not strictly necessary. As discussed, the e1000e driver should be in "maintenance mode". Link: https://www.spinics.net/lists/netdev/msg480675.html Signed-off-by: Benjamin Poirier Acked-by: Alexander Duyck Tested-by: Aaron Brown Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/e1000e/netdev.c | 16 ++-------------- 1 file changed, 2 insertions(+), 14 deletions(-) diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c index 153ad406c65e..3b36efa6228d 100644 --- a/drivers/net/ethernet/intel/e1000e/netdev.c +++ b/drivers/net/ethernet/intel/e1000e/netdev.c @@ -1915,21 +1915,10 @@ static irqreturn_t e1000_msix_other(int __always_unused irq, void *data) struct e1000_adapter *adapter = netdev_priv(netdev); struct e1000_hw *hw = &adapter->hw; u32 icr; - bool enable = true; icr = er32(ICR); ew32(ICR, E1000_ICR_OTHER); - if (icr & E1000_ICR_RXO) { - ew32(ICR, E1000_ICR_RXO); - enable = false; - /* napi poll will re-enable Other, make sure it runs */ - if (napi_schedule_prep(&adapter->napi)) { - adapter->total_rx_bytes = 0; - adapter->total_rx_packets = 0; - __napi_schedule(&adapter->napi); - } - } if (icr & E1000_ICR_LSC) { ew32(ICR, E1000_ICR_LSC); hw->mac.get_link_status = true; @@ -1938,7 +1927,7 @@ static irqreturn_t e1000_msix_other(int __always_unused irq, void *data) mod_timer(&adapter->watchdog_timer, jiffies + 1); } - if (enable && !test_bit(__E1000_DOWN, &adapter->state)) + if (!test_bit(__E1000_DOWN, &adapter->state)) ew32(IMS, E1000_IMS_OTHER); return IRQ_HANDLED; @@ -2708,8 +2697,7 @@ static int e1000e_poll(struct napi_struct *napi, int weight) napi_complete_done(napi, work_done); if (!test_bit(__E1000_DOWN, &adapter->state)) { if (adapter->msix_entries) - ew32(IMS, adapter->rx_ring->ims_val | - E1000_IMS_OTHER); + ew32(IMS, adapter->rx_ring->ims_val); else e1000_irq_enable(adapter); } -- cgit v1.2.3 From 361a954e6a7215de11a6179ad9bdc07d7e394b04 Mon Sep 17 00:00:00 2001 From: Benjamin Poirier Date: Thu, 8 Feb 2018 15:47:13 +0900 Subject: e1000e: Fix queue interrupt re-raising in Other interrupt Restores the ICS write for Rx/Tx queue interrupts which was present before commit 16ecba59bc33 ("e1000e: Do not read ICR in Other interrupt", v4.5-rc1) but was not restored in commit 4aea7a5c5e94 ("e1000e: Avoid receiver overrun interrupt bursts", v4.15-rc1). This re-raises the queue interrupts in case the txq or rxq bits were set in ICR and the Other interrupt handler read and cleared ICR before the queue interrupt was raised. Fixes: 4aea7a5c5e94 ("e1000e: Avoid receiver overrun interrupt bursts") Signed-off-by: Benjamin Poirier Acked-by: Alexander Duyck Tested-by: Aaron Brown Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/e1000e/netdev.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c index 3b36efa6228d..2c9609bee2ae 100644 --- a/drivers/net/ethernet/intel/e1000e/netdev.c +++ b/drivers/net/ethernet/intel/e1000e/netdev.c @@ -1919,6 +1919,9 @@ static irqreturn_t e1000_msix_other(int __always_unused irq, void *data) icr = er32(ICR); ew32(ICR, E1000_ICR_OTHER); + if (icr & adapter->eiac_mask) + ew32(ICS, (icr & adapter->eiac_mask)); + if (icr & E1000_ICR_LSC) { ew32(ICR, E1000_ICR_LSC); hw->mac.get_link_status = true; -- cgit v1.2.3 From 116f4a640b3197401bc93b8adc6c35040308ceff Mon Sep 17 00:00:00 2001 From: Benjamin Poirier Date: Thu, 8 Feb 2018 15:47:14 +0900 Subject: e1000e: Avoid missed interrupts following ICR read The 82574 specification update errata 12 states that interrupts may be missed if ICR is read while INT_ASSERTED is not set. Avoid that problem by setting all bits related to events that can trigger the Other interrupt in IMS. The Other interrupt is raised for such events regardless of whether or not they are set in IMS. However, only when they are set is the INT_ASSERTED bit also set in ICR. By doing this, we ensure that INT_ASSERTED is always set when we read ICR in e1000_msix_other() and steer clear of the errata. This also ensures that ICR will automatically be cleared on read, therefore we no longer need to clear bits explicitly. Signed-off-by: Benjamin Poirier Acked-by: Alexander Duyck Tested-by: Aaron Brown Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/e1000e/defines.h | 21 ++++++++++++++++++++- drivers/net/ethernet/intel/e1000e/netdev.c | 11 ++++------- 2 files changed, 24 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/intel/e1000e/defines.h b/drivers/net/ethernet/intel/e1000e/defines.h index afb7ebe20b24..824fd44e25f0 100644 --- a/drivers/net/ethernet/intel/e1000e/defines.h +++ b/drivers/net/ethernet/intel/e1000e/defines.h @@ -400,6 +400,10 @@ #define E1000_ICR_RXDMT0 0x00000010 /* Rx desc min. threshold (0) */ #define E1000_ICR_RXO 0x00000040 /* Receiver Overrun */ #define E1000_ICR_RXT0 0x00000080 /* Rx timer intr (ring 0) */ +#define E1000_ICR_MDAC 0x00000200 /* MDIO Access Complete */ +#define E1000_ICR_SRPD 0x00010000 /* Small Receive Packet Detected */ +#define E1000_ICR_ACK 0x00020000 /* Receive ACK Frame Detected */ +#define E1000_ICR_MNG 0x00040000 /* Manageability Event Detected */ #define E1000_ICR_ECCER 0x00400000 /* Uncorrectable ECC Error */ /* If this bit asserted, the driver should claim the interrupt */ #define E1000_ICR_INT_ASSERTED 0x80000000 @@ -407,7 +411,7 @@ #define E1000_ICR_RXQ1 0x00200000 /* Rx Queue 1 Interrupt */ #define E1000_ICR_TXQ0 0x00400000 /* Tx Queue 0 Interrupt */ #define E1000_ICR_TXQ1 0x00800000 /* Tx Queue 1 Interrupt */ -#define E1000_ICR_OTHER 0x01000000 /* Other Interrupts */ +#define E1000_ICR_OTHER 0x01000000 /* Other Interrupt */ /* PBA ECC Register */ #define E1000_PBA_ECC_COUNTER_MASK 0xFFF00000 /* ECC counter mask */ @@ -431,12 +435,27 @@ E1000_IMS_RXSEQ | \ E1000_IMS_LSC) +/* These are all of the events related to the OTHER interrupt. + */ +#define IMS_OTHER_MASK ( \ + E1000_IMS_LSC | \ + E1000_IMS_RXO | \ + E1000_IMS_MDAC | \ + E1000_IMS_SRPD | \ + E1000_IMS_ACK | \ + E1000_IMS_MNG) + /* Interrupt Mask Set */ #define E1000_IMS_TXDW E1000_ICR_TXDW /* Transmit desc written back */ #define E1000_IMS_LSC E1000_ICR_LSC /* Link Status Change */ #define E1000_IMS_RXSEQ E1000_ICR_RXSEQ /* Rx sequence error */ #define E1000_IMS_RXDMT0 E1000_ICR_RXDMT0 /* Rx desc min. threshold */ +#define E1000_IMS_RXO E1000_ICR_RXO /* Receiver Overrun */ #define E1000_IMS_RXT0 E1000_ICR_RXT0 /* Rx timer intr */ +#define E1000_IMS_MDAC E1000_ICR_MDAC /* MDIO Access Complete */ +#define E1000_IMS_SRPD E1000_ICR_SRPD /* Small Receive Packet */ +#define E1000_IMS_ACK E1000_ICR_ACK /* Receive ACK Frame Detected */ +#define E1000_IMS_MNG E1000_ICR_MNG /* Manageability Event */ #define E1000_IMS_ECCER E1000_ICR_ECCER /* Uncorrectable ECC Error */ #define E1000_IMS_RXQ0 E1000_ICR_RXQ0 /* Rx Queue 0 Interrupt */ #define E1000_IMS_RXQ1 E1000_ICR_RXQ1 /* Rx Queue 1 Interrupt */ diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c index 2c9609bee2ae..9fd4050a91ca 100644 --- a/drivers/net/ethernet/intel/e1000e/netdev.c +++ b/drivers/net/ethernet/intel/e1000e/netdev.c @@ -1914,16 +1914,12 @@ static irqreturn_t e1000_msix_other(int __always_unused irq, void *data) struct net_device *netdev = data; struct e1000_adapter *adapter = netdev_priv(netdev); struct e1000_hw *hw = &adapter->hw; - u32 icr; - - icr = er32(ICR); - ew32(ICR, E1000_ICR_OTHER); + u32 icr = er32(ICR); if (icr & adapter->eiac_mask) ew32(ICS, (icr & adapter->eiac_mask)); if (icr & E1000_ICR_LSC) { - ew32(ICR, E1000_ICR_LSC); hw->mac.get_link_status = true; /* guard against interrupt when we're going down */ if (!test_bit(__E1000_DOWN, &adapter->state)) @@ -1931,7 +1927,7 @@ static irqreturn_t e1000_msix_other(int __always_unused irq, void *data) } if (!test_bit(__E1000_DOWN, &adapter->state)) - ew32(IMS, E1000_IMS_OTHER); + ew32(IMS, E1000_IMS_OTHER | IMS_OTHER_MASK); return IRQ_HANDLED; } @@ -2258,7 +2254,8 @@ static void e1000_irq_enable(struct e1000_adapter *adapter) if (adapter->msix_entries) { ew32(EIAC_82574, adapter->eiac_mask & E1000_EIAC_MASK_82574); - ew32(IMS, adapter->eiac_mask | E1000_IMS_OTHER | E1000_IMS_LSC); + ew32(IMS, adapter->eiac_mask | E1000_IMS_OTHER | + IMS_OTHER_MASK); } else if (hw->mac.type >= e1000_pch_lpt) { ew32(IMS, IMS_ENABLE_MASK | E1000_IMS_ECCER); } else { -- cgit v1.2.3 From 4e7dc08e57c95673d2edaba8983c3de4dd1f65f5 Mon Sep 17 00:00:00 2001 From: Benjamin Poirier Date: Tue, 20 Feb 2018 15:12:00 +0900 Subject: e1000e: Fix check_for_link return value with autoneg off When autoneg is off, the .check_for_link callback functions clear the get_link_status flag and systematically return a "pseudo-error". This means that the link is not detected as up until the next execution of the e1000_watchdog_task() 2 seconds later. Fixes: 19110cfbb34d ("e1000e: Separate signaling for link check/link up") Signed-off-by: Benjamin Poirier Acked-by: Sasha Neftin Tested-by: Aaron Brown Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/e1000e/ich8lan.c | 2 +- drivers/net/ethernet/intel/e1000e/mac.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/e1000e/ich8lan.c b/drivers/net/ethernet/intel/e1000e/ich8lan.c index 31277d3bb7dc..ff308b05d68c 100644 --- a/drivers/net/ethernet/intel/e1000e/ich8lan.c +++ b/drivers/net/ethernet/intel/e1000e/ich8lan.c @@ -1602,7 +1602,7 @@ static s32 e1000_check_for_copper_link_ich8lan(struct e1000_hw *hw) * we have already determined whether we have link or not. */ if (!mac->autoneg) - return -E1000_ERR_CONFIG; + return 1; /* Auto-Neg is enabled. Auto Speed Detection takes care * of MAC speed/duplex configuration. So we only need to diff --git a/drivers/net/ethernet/intel/e1000e/mac.c b/drivers/net/ethernet/intel/e1000e/mac.c index f457c5703d0c..db735644b312 100644 --- a/drivers/net/ethernet/intel/e1000e/mac.c +++ b/drivers/net/ethernet/intel/e1000e/mac.c @@ -450,7 +450,7 @@ s32 e1000e_check_for_copper_link(struct e1000_hw *hw) * we have already determined whether we have link or not. */ if (!mac->autoneg) - return -E1000_ERR_CONFIG; + return 1; /* Auto-Neg is enabled. Auto Speed Detection takes care * of MAC speed/duplex configuration. So we only need to -- cgit v1.2.3 From aea3fca005fb45f80869f2e8d56fd4e64c1d1fdb Mon Sep 17 00:00:00 2001 From: Pierre-Yves Kerbrat Date: Fri, 26 Jan 2018 11:24:12 +0100 Subject: e1000e: allocate ring descriptors with dma_zalloc_coherent Descriptor rings were not initialized at zero when allocated When area contained garbage data, it caused skb_over_panic in e1000_clean_rx_irq (if data had E1000_RXD_STAT_DD bit set) This patch makes use of dma_zalloc_coherent to make sure the ring is memset at 0 to prevent the area from containing garbage. Following is the signature of the panic: IODDR0@0.0: skbuff: skb_over_panic: text:80407b20 len:64010 put:64010 head:ab46d800 data:ab46d842 tail:0xab47d24c end:0xab46df40 dev:eth0 IODDR0@0.0: BUG: failure at net/core/skbuff.c:105/skb_panic()! IODDR0@0.0: Kernel panic - not syncing: BUG! IODDR0@0.0: IODDR0@0.0: Process swapper/0 (pid: 0, threadinfo=81728000, task=8173cc00 ,cpu: 0) IODDR0@0.0: SP = <815a1c0c> IODDR0@0.0: Stack: 00000001 IODDR0@0.0: b2d89800 815e33ac IODDR0@0.0: ea73c040 00000001 IODDR0@0.0: 60040003 0000fa0a IODDR0@0.0: 00000002 IODDR0@0.0: IODDR0@0.0: 804540c0 815a1c70 IODDR0@0.0: b2744000 602ac070 IODDR0@0.0: 815a1c44 b2d89800 IODDR0@0.0: 8173cc00 815a1c08 IODDR0@0.0: IODDR0@0.0: 00000006 IODDR0@0.0: 815a1b50 00000000 IODDR0@0.0: 80079434 00000001 IODDR0@0.0: ab46df40 b2744000 IODDR0@0.0: b2d89800 IODDR0@0.0: IODDR0@0.0: 0000fa0a 8045745c IODDR0@0.0: 815a1c88 0000fa0a IODDR0@0.0: 80407b20 b2789f80 IODDR0@0.0: 00000005 80407b20 IODDR0@0.0: IODDR0@0.0: IODDR0@0.0: Call Trace: IODDR0@0.0: [<804540bc>] skb_panic+0xa4/0xa8 IODDR0@0.0: [<80079430>] console_unlock+0x2f8/0x6d0 IODDR0@0.0: [<80457458>] skb_put+0xa0/0xc0 IODDR0@0.0: [<80407b1c>] e1000_clean_rx_irq+0x2dc/0x3e8 IODDR0@0.0: [<80407b1c>] e1000_clean_rx_irq+0x2dc/0x3e8 IODDR0@0.0: [<804079c8>] e1000_clean_rx_irq+0x188/0x3e8 IODDR0@0.0: [<80407b1c>] e1000_clean_rx_irq+0x2dc/0x3e8 IODDR0@0.0: [<80468b48>] __dev_kfree_skb_any+0x88/0xa8 IODDR0@0.0: [<804101ac>] e1000e_poll+0x94/0x288 IODDR0@0.0: [<8046e9d4>] net_rx_action+0x19c/0x4e8 IODDR0@0.0: ... IODDR0@0.0: Maximum depth to print reached. Use kstack= To specify a custom value (where 0 means to display the full backtrace) IODDR0@0.0: ---[ end Kernel panic - not syncing: BUG! Signed-off-by: Pierre-Yves Kerbrat Signed-off-by: Marius Gligor Tested-by: Aaron Brown Reviewed-by: Alexander Duyck Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/e1000e/netdev.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c index 9fd4050a91ca..c0f23446bf26 100644 --- a/drivers/net/ethernet/intel/e1000e/netdev.c +++ b/drivers/net/ethernet/intel/e1000e/netdev.c @@ -2323,8 +2323,8 @@ static int e1000_alloc_ring_dma(struct e1000_adapter *adapter, { struct pci_dev *pdev = adapter->pdev; - ring->desc = dma_alloc_coherent(&pdev->dev, ring->size, &ring->dma, - GFP_KERNEL); + ring->desc = dma_zalloc_coherent(&pdev->dev, ring->size, &ring->dma, + GFP_KERNEL); if (!ring->desc) return -ENOMEM; -- cgit v1.2.3 From 0d3601d2994a4ad9edf0b44f2ea34c68ff494ea5 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 5 Mar 2018 16:07:38 +0100 Subject: netfilter: nft_set_hash: skip fixed hash if timeout is specified Fixed hash supports to timeouts, so skip it. Otherwise, userspace hits EOPNOTSUPP. Fixes: 6c03ae210ce3 ("netfilter: nft_set_hash: add non-resizable hashtable implementation") Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_set_hash.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/nft_set_hash.c b/net/netfilter/nft_set_hash.c index 3f1624ee056f..d40591fe1b2f 100644 --- a/net/netfilter/nft_set_hash.c +++ b/net/netfilter/nft_set_hash.c @@ -674,7 +674,7 @@ static const struct nft_set_ops * nft_hash_select_ops(const struct nft_ctx *ctx, const struct nft_set_desc *desc, u32 flags) { - if (desc->size) { + if (desc->size && !(flags & NFT_SET_TIMEOUT)) { switch (desc->klen) { case 4: return &nft_hash_fast_ops; -- cgit v1.2.3 From a42ae5905140c324362fe5036ae1dbb16e4d359c Mon Sep 17 00:00:00 2001 From: Frank Mori Hess Date: Thu, 15 Feb 2018 15:13:42 -0500 Subject: staging: comedi: fix comedi_nsamples_left. A rounding error was causing comedi_nsamples_left to return the wrong value when nsamples was not a multiple of the scan length. Cc: # v4.4+ Signed-off-by: Frank Mori Hess Reviewed-by: Ian Abbott Signed-off-by: Greg Kroah-Hartman --- drivers/staging/comedi/drivers.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/staging/comedi/drivers.c b/drivers/staging/comedi/drivers.c index e618a87521a3..9d733471ca2e 100644 --- a/drivers/staging/comedi/drivers.c +++ b/drivers/staging/comedi/drivers.c @@ -475,8 +475,7 @@ unsigned int comedi_nsamples_left(struct comedi_subdevice *s, struct comedi_cmd *cmd = &async->cmd; if (cmd->stop_src == TRIG_COUNT) { - unsigned int nscans = nsamples / cmd->scan_end_arg; - unsigned int scans_left = __comedi_nscans_left(s, nscans); + unsigned int scans_left = __comedi_nscans_left(s, cmd->stop_arg); unsigned int scan_pos = comedi_bytes_to_samples(s, async->scan_progress); unsigned long long samples_left = 0; -- cgit v1.2.3 From 740a5759bf222332fbb5eda42f89aa25ba38f9b2 Mon Sep 17 00:00:00 2001 From: Yisheng Xie Date: Wed, 28 Feb 2018 14:59:22 +0800 Subject: staging: android: ashmem: Fix possible deadlock in ashmem_ioctl ashmem_mutex may create a chain of dependencies like: CPU0 CPU1 mmap syscall ioctl syscall -> mmap_sem (acquired) -> ashmem_ioctl -> ashmem_mmap -> ashmem_mutex (acquired) -> ashmem_mutex (try to acquire) -> copy_from_user -> mmap_sem (try to acquire) There is a lock odering problem between mmap_sem and ashmem_mutex causing a lockdep splat[1] during a syzcaller test. This patch fixes the problem by move copy_from_user out of ashmem_mutex. [1] https://www.spinics.net/lists/kernel/msg2733200.html Fixes: ce8a3a9e76d0 (staging: android: ashmem: Fix a race condition in pin ioctls) Reported-by: syzbot+d7a918a7a8e1c952bc36@syzkaller.appspotmail.com Signed-off-by: Yisheng Xie Signed-off-by: Greg Kroah-Hartman --- drivers/staging/android/ashmem.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/staging/android/ashmem.c b/drivers/staging/android/ashmem.c index d5450365769c..86580b6df33d 100644 --- a/drivers/staging/android/ashmem.c +++ b/drivers/staging/android/ashmem.c @@ -701,16 +701,14 @@ static int ashmem_pin_unpin(struct ashmem_area *asma, unsigned long cmd, size_t pgstart, pgend; int ret = -EINVAL; + if (unlikely(copy_from_user(&pin, p, sizeof(pin)))) + return -EFAULT; + mutex_lock(&ashmem_mutex); if (unlikely(!asma->file)) goto out_unlock; - if (unlikely(copy_from_user(&pin, p, sizeof(pin)))) { - ret = -EFAULT; - goto out_unlock; - } - /* per custom, you can pass zero for len to mean "everything onward" */ if (!pin.len) pin.len = PAGE_ALIGN(asma->size) - pin.offset; -- cgit v1.2.3 From cd526676de1ece03a8ea7cea726cf879a2dba2b8 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Fri, 2 Mar 2018 15:08:36 -0800 Subject: net: dsa: b53: Use strlcpy() for ethtool::get_strings Our statistics strings are allocated at initialization without being bound to a specific size, yet, we would copy ETH_GSTRING_LEN bytes using memcpy() which would create out of bounds accesses, this was flagged by KASAN. Replace this with strlcpy() to make sure we are bound the source buffer size and we also always NUL-terminate strings. Fixes: 967dd82ffc52 ("net: dsa: b53: Add support for Broadcom RoboSwitch") Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/dsa/b53/b53_common.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c index db830a1141d9..63e02a54d537 100644 --- a/drivers/net/dsa/b53/b53_common.c +++ b/drivers/net/dsa/b53/b53_common.c @@ -814,8 +814,8 @@ void b53_get_strings(struct dsa_switch *ds, int port, uint8_t *data) unsigned int i; for (i = 0; i < mib_size; i++) - memcpy(data + i * ETH_GSTRING_LEN, - mibs[i].name, ETH_GSTRING_LEN); + strlcpy(data + i * ETH_GSTRING_LEN, + mibs[i].name, ETH_GSTRING_LEN); } EXPORT_SYMBOL(b53_get_strings); -- cgit v1.2.3 From 98409b2bbca36b578be8e66758e6acb96e4c91da Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Fri, 2 Mar 2018 15:08:37 -0800 Subject: net: phy: marvell: Use strlcpy() for ethtool::get_strings Our statistics strings are allocated at initialization without being bound to a specific size, yet, we would copy ETH_GSTRING_LEN bytes using memcpy() which would create out of bounds accesses, this was flagged by KASAN. Replace this with strlcpy() to make sure we are bound the source buffer size and we also always NUL-terminate strings. Fixes: d2fa47d9dd5c ("phy: marvell: Add ethtool statistics counters") Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/phy/marvell.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/phy/marvell.c b/drivers/net/phy/marvell.c index 22d9bc9c33a4..0e0978d8a0eb 100644 --- a/drivers/net/phy/marvell.c +++ b/drivers/net/phy/marvell.c @@ -1452,8 +1452,8 @@ static void marvell_get_strings(struct phy_device *phydev, u8 *data) int i; for (i = 0; i < ARRAY_SIZE(marvell_hw_stats); i++) { - memcpy(data + i * ETH_GSTRING_LEN, - marvell_hw_stats[i].string, ETH_GSTRING_LEN); + strlcpy(data + i * ETH_GSTRING_LEN, + marvell_hw_stats[i].string, ETH_GSTRING_LEN); } } -- cgit v1.2.3 From 55f53567afe5f0cd2fd9e006b174c08c31c466f8 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Fri, 2 Mar 2018 15:08:38 -0800 Subject: net: phy: micrel: Use strlcpy() for ethtool::get_strings Our statistics strings are allocated at initialization without being bound to a specific size, yet, we would copy ETH_GSTRING_LEN bytes using memcpy() which would create out of bounds accesses, this was flagged by KASAN. Replace this with strlcpy() to make sure we are bound the source buffer size and we also always NUL-terminate strings. Fixes: 2b2427d06426 ("phy: micrel: Add ethtool statistics counters") Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/phy/micrel.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c index 0f45310300f6..49be85afbea9 100644 --- a/drivers/net/phy/micrel.c +++ b/drivers/net/phy/micrel.c @@ -664,8 +664,8 @@ static void kszphy_get_strings(struct phy_device *phydev, u8 *data) int i; for (i = 0; i < ARRAY_SIZE(kszphy_hw_stats); i++) { - memcpy(data + i * ETH_GSTRING_LEN, - kszphy_hw_stats[i].string, ETH_GSTRING_LEN); + strlcpy(data + i * ETH_GSTRING_LEN, + kszphy_hw_stats[i].string, ETH_GSTRING_LEN); } } -- cgit v1.2.3 From 8a17eefa235f73b60c0ca7d397d2e4f66f85f413 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Fri, 2 Mar 2018 15:08:39 -0800 Subject: net: phy: broadcom: Use strlcpy() for ethtool::get_strings Our statistics strings are allocated at initialization without being bound to a specific size, yet, we would copy ETH_GSTRING_LEN bytes using memcpy() which would create out of bounds accesses, this was flagged by KASAN. Replace this with strlcpy() to make sure we are bound the source buffer size and we also always NUL-terminate strings. Fixes: 820ee17b8d3b ("net: phy: broadcom: Add support code for reading PHY counters") Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/phy/bcm-phy-lib.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/phy/bcm-phy-lib.c b/drivers/net/phy/bcm-phy-lib.c index 171010eb4d9c..5ad130c3da43 100644 --- a/drivers/net/phy/bcm-phy-lib.c +++ b/drivers/net/phy/bcm-phy-lib.c @@ -341,8 +341,8 @@ void bcm_phy_get_strings(struct phy_device *phydev, u8 *data) unsigned int i; for (i = 0; i < ARRAY_SIZE(bcm_phy_hw_stats); i++) - memcpy(data + i * ETH_GSTRING_LEN, - bcm_phy_hw_stats[i].string, ETH_GSTRING_LEN); + strlcpy(data + i * ETH_GSTRING_LEN, + bcm_phy_hw_stats[i].string, ETH_GSTRING_LEN); } EXPORT_SYMBOL_GPL(bcm_phy_get_strings); -- cgit v1.2.3 From 22b8d8115d1b940d548a5fddcbce4cdd30083cfc Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Fri, 2 Mar 2018 16:34:22 -0800 Subject: ethernet: natsemi: correct spelling Correct spelling of National Semi-conductor (no hyphen) in drivers/net/ethernet/. Signed-off-by: Randy Dunlap Signed-off-by: David S. Miller --- drivers/net/ethernet/8390/Kconfig | 2 +- drivers/net/ethernet/natsemi/Kconfig | 6 +++--- drivers/net/ethernet/natsemi/Makefile | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/8390/Kconfig b/drivers/net/ethernet/8390/Kconfig index 29c3075bfb05..fdc673484add 100644 --- a/drivers/net/ethernet/8390/Kconfig +++ b/drivers/net/ethernet/8390/Kconfig @@ -3,7 +3,7 @@ # config NET_VENDOR_8390 - bool "National Semi-conductor 8390 devices" + bool "National Semiconductor 8390 devices" default y depends on NET_VENDOR_NATSEMI ---help--- diff --git a/drivers/net/ethernet/natsemi/Kconfig b/drivers/net/ethernet/natsemi/Kconfig index a10ef50e4f12..017fb2322589 100644 --- a/drivers/net/ethernet/natsemi/Kconfig +++ b/drivers/net/ethernet/natsemi/Kconfig @@ -1,16 +1,16 @@ # -# National Semi-conductor device configuration +# National Semiconductor device configuration # config NET_VENDOR_NATSEMI - bool "National Semi-conductor devices" + bool "National Semiconductor devices" default y ---help--- If you have a network (Ethernet) card belonging to this class, say Y. Note that the answer to this question doesn't directly affect the kernel: saying N will just cause the configurator to skip all - the questions about National Semi-conductor devices. If you say Y, + the questions about National Semiconductor devices. If you say Y, you will be asked for your specific card in the following questions. if NET_VENDOR_NATSEMI diff --git a/drivers/net/ethernet/natsemi/Makefile b/drivers/net/ethernet/natsemi/Makefile index cc664977596e..a759aa09ef59 100644 --- a/drivers/net/ethernet/natsemi/Makefile +++ b/drivers/net/ethernet/natsemi/Makefile @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 # -# Makefile for the National Semi-conductor Sonic devices. +# Makefile for the National Semiconductor Sonic devices. # obj-$(CONFIG_MACSONIC) += macsonic.o -- cgit v1.2.3 From 9a513c905bb95bef79d96feb08621c1ec8d8c4bb Mon Sep 17 00:00:00 2001 From: Oliver Neukum Date: Tue, 6 Mar 2018 15:04:24 +0100 Subject: uas: fix comparison for error code A typo broke the comparison. Fixes: cbeef22fd611 ("usb: uas: unconditionally bring back host after reset") Signed-off-by: Oliver Neukum CC: stable@kernel.org Acked-by: Hans de Goede Signed-off-by: Greg Kroah-Hartman --- drivers/usb/storage/uas.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/storage/uas.c b/drivers/usb/storage/uas.c index 3b1b9695177a..6034c39b67d1 100644 --- a/drivers/usb/storage/uas.c +++ b/drivers/usb/storage/uas.c @@ -1076,7 +1076,7 @@ static int uas_post_reset(struct usb_interface *intf) return 0; err = uas_configure_endpoints(devinfo); - if (err && err != ENODEV) + if (err && err != -ENODEV) shost_printk(KERN_ERR, shost, "%s: alloc streams error %d after reset", __func__, err); -- cgit v1.2.3 From cb88a0588717ba6c756cb5972d75766b273a6817 Mon Sep 17 00:00:00 2001 From: Danilo Krummrich Date: Tue, 6 Mar 2018 09:38:49 +0100 Subject: usb: quirks: add control message delay for 1b1c:1b20 Corsair Strafe RGB keyboard does not respond to usb control messages sometimes and hence generates timeouts. Commit de3af5bf259d ("usb: quirks: add delay init quirk for Corsair Strafe RGB keyboard") tried to fix those timeouts by adding USB_QUIRK_DELAY_INIT. Unfortunately, even with this quirk timeouts of usb_control_msg() can still be seen, but with a lower frequency (approx. 1 out of 15): [ 29.103520] usb 1-8: string descriptor 0 read error: -110 [ 34.363097] usb 1-8: can't set config #1, error -110 Adding further delays to different locations where usb control messages are issued just moves the timeouts to other locations, e.g.: [ 35.400533] usbhid 1-8:1.0: can't add hid device: -110 [ 35.401014] usbhid: probe of 1-8:1.0 failed with error -110 The only way to reliably avoid those issues is having a pause after each usb control message. In approx. 200 boot cycles no more timeouts were seen. Addionaly, keep USB_QUIRK_DELAY_INIT as it turned out to be necessary to have the delay in hub_port_connect() after hub_port_init(). The overall boot time seems not to be influenced by these additional delays, even on fast machines and lightweight distributions. Fixes: de3af5bf259d ("usb: quirks: add delay init quirk for Corsair Strafe RGB keyboard") Cc: stable@vger.kernel.org Signed-off-by: Danilo Krummrich Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/message.c | 4 ++++ drivers/usb/core/quirks.c | 3 ++- include/linux/usb/quirks.h | 3 +++ 3 files changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/usb/core/message.c b/drivers/usb/core/message.c index c64cf6c4a83d..0c11d40a12bc 100644 --- a/drivers/usb/core/message.c +++ b/drivers/usb/core/message.c @@ -151,6 +151,10 @@ int usb_control_msg(struct usb_device *dev, unsigned int pipe, __u8 request, ret = usb_internal_control_msg(dev, pipe, dr, data, size, timeout); + /* Linger a bit, prior to the next control message. */ + if (dev->quirks & USB_QUIRK_DELAY_CTRL_MSG) + msleep(200); + kfree(dr); return ret; diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c index f4a548471f0f..54b019e267c5 100644 --- a/drivers/usb/core/quirks.c +++ b/drivers/usb/core/quirks.c @@ -230,7 +230,8 @@ static const struct usb_device_id usb_quirk_list[] = { { USB_DEVICE(0x1b1c, 0x1b13), .driver_info = USB_QUIRK_DELAY_INIT }, /* Corsair Strafe RGB */ - { USB_DEVICE(0x1b1c, 0x1b20), .driver_info = USB_QUIRK_DELAY_INIT }, + { USB_DEVICE(0x1b1c, 0x1b20), .driver_info = USB_QUIRK_DELAY_INIT | + USB_QUIRK_DELAY_CTRL_MSG }, /* Corsair K70 LUX */ { USB_DEVICE(0x1b1c, 0x1b36), .driver_info = USB_QUIRK_DELAY_INIT }, diff --git a/include/linux/usb/quirks.h b/include/linux/usb/quirks.h index f1fcec2fd5f8..b7a99ce56bc9 100644 --- a/include/linux/usb/quirks.h +++ b/include/linux/usb/quirks.h @@ -63,4 +63,7 @@ */ #define USB_QUIRK_DISCONNECT_SUSPEND BIT(12) +/* Device needs a pause after every control message. */ +#define USB_QUIRK_DELAY_CTRL_MSG BIT(13) + #endif /* __LINUX_USB_QUIRKS_H */ -- cgit v1.2.3 From df6b074dbe248d8c43a82131e8fd429e401841a5 Mon Sep 17 00:00:00 2001 From: Merlijn Wajer Date: Mon, 5 Mar 2018 11:35:10 -0600 Subject: usb: musb: call pm_runtime_{get,put}_sync before reading vbus registers Without pm_runtime_{get,put}_sync calls in place, reading vbus status via /sys causes the following error: Unhandled fault: external abort on non-linefetch (0x1028) at 0xfa0ab060 pgd = b333e822 [fa0ab060] *pgd=48011452(bad) [] (musb_default_readb) from [] (musb_vbus_show+0x58/0xe4) [] (musb_vbus_show) from [] (dev_attr_show+0x20/0x44) [] (dev_attr_show) from [] (sysfs_kf_seq_show+0x80/0xdc) [] (sysfs_kf_seq_show) from [] (seq_read+0x250/0x448) [] (seq_read) from [] (__vfs_read+0x1c/0x118) [] (__vfs_read) from [] (vfs_read+0x90/0x144) [] (vfs_read) from [] (SyS_read+0x3c/0x74) [] (SyS_read) from [] (ret_fast_syscall+0x0/0x54) Solution was suggested by Tony Lindgren . Signed-off-by: Merlijn Wajer Acked-by: Tony Lindgren Signed-off-by: Bin Liu Signed-off-by: Greg Kroah-Hartman --- drivers/usb/musb/musb_core.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/musb/musb_core.c b/drivers/usb/musb/musb_core.c index eef4ad578b31..c344ef4e5355 100644 --- a/drivers/usb/musb/musb_core.c +++ b/drivers/usb/musb/musb_core.c @@ -1756,6 +1756,7 @@ vbus_show(struct device *dev, struct device_attribute *attr, char *buf) int vbus; u8 devctl; + pm_runtime_get_sync(dev); spin_lock_irqsave(&musb->lock, flags); val = musb->a_wait_bcon; vbus = musb_platform_get_vbus_status(musb); @@ -1769,6 +1770,7 @@ vbus_show(struct device *dev, struct device_attribute *attr, char *buf) vbus = 0; } spin_unlock_irqrestore(&musb->lock, flags); + pm_runtime_put_sync(dev); return sprintf(buf, "Vbus %s, timeout %lu msec\n", vbus ? "on" : "off", val); -- cgit v1.2.3 From 6f566af3462897fc743e3af6ea8cc790a13148ba Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Tue, 6 Mar 2018 10:50:05 +0100 Subject: Revert "typec: tcpm: Only request matching pdos" Commit 57e6f0d7b804 ("typec: tcpm: Only request matching pdos") is causing a regression, before this commit e.g. the GPD win and GPD pocket devices were charging at 9V 3A with a PD charger, now they are instead slowly discharging at 5V 0.4A, as this commit causes the ports max_snk_mv/ma/mw settings to be completely ignored. Arguably the way to fix this would be to add a PDO_VAR() describing the voltage range to the snk_caps of boards which can handle any voltage in their range, but the "typec: tcpm: Only request matching pdos" commit looks at the type of PDO advertised by the source/charger and if that is fixed (as it typically is) only compairs against PDO_FIXED entries in the snk_caps so supporting a range of voltage would require adding a PDO_FIXED entry for *every possible* voltage to snk_caps. AFAICT there is no reason why a fixed source_cap cannot be matched against a variable snk_cap, so at a minimum the commit should be rewritten to support that. For now lets revert the "typec: tcpm: Only request matching pdos" commit, fixing the regression. Fixes: 57e6f0d7b804 ("typec: tcpm: Only request matching pdos") Cc: Badhri Jagan Sridharan Signed-off-by: Hans de Goede Acked-by: Heikki Krogerus Acked-by: Adam Thomson Reviewed-by: Guenter Roeck Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/tcpm.c | 163 ++++++++++++----------------------------------- 1 file changed, 42 insertions(+), 121 deletions(-) diff --git a/drivers/usb/typec/tcpm.c b/drivers/usb/typec/tcpm.c index f4d563ee7690..8b637a4b474b 100644 --- a/drivers/usb/typec/tcpm.c +++ b/drivers/usb/typec/tcpm.c @@ -252,9 +252,6 @@ struct tcpm_port { unsigned int nr_src_pdo; u32 snk_pdo[PDO_MAX_OBJECTS]; unsigned int nr_snk_pdo; - unsigned int nr_fixed; /* number of fixed sink PDOs */ - unsigned int nr_var; /* number of variable sink PDOs */ - unsigned int nr_batt; /* number of battery sink PDOs */ u32 snk_vdo[VDO_MAX_OBJECTS]; unsigned int nr_snk_vdo; @@ -1770,90 +1767,39 @@ static int tcpm_pd_check_request(struct tcpm_port *port) return 0; } -#define min_power(x, y) min(pdo_max_power(x), pdo_max_power(y)) -#define min_current(x, y) min(pdo_max_current(x), pdo_max_current(y)) - -static int tcpm_pd_select_pdo(struct tcpm_port *port, int *sink_pdo, - int *src_pdo) +static int tcpm_pd_select_pdo(struct tcpm_port *port) { - unsigned int i, j, max_mw = 0, max_mv = 0, mw = 0, mv = 0, ma = 0; + unsigned int i, max_mw = 0, max_mv = 0; int ret = -EINVAL; /* - * Select the source PDO providing the most power which has a - * matchig sink cap. + * Select the source PDO providing the most power while staying within + * the board's voltage limits. Prefer PDO providing exp */ for (i = 0; i < port->nr_source_caps; i++) { u32 pdo = port->source_caps[i]; enum pd_pdo_type type = pdo_type(pdo); + unsigned int mv, ma, mw; - if (type == PDO_TYPE_FIXED) { - for (j = 0; j < port->nr_fixed; j++) { - if (pdo_fixed_voltage(pdo) == - pdo_fixed_voltage(port->snk_pdo[j])) { - ma = min_current(pdo, port->snk_pdo[j]); - mv = pdo_fixed_voltage(pdo); - mw = ma * mv / 1000; - if (mw > max_mw || - (mw == max_mw && mv > max_mv)) { - ret = 0; - *src_pdo = i; - *sink_pdo = j; - max_mw = mw; - max_mv = mv; - } - /* There could only be one fixed pdo - * at a specific voltage level. - * So breaking here. - */ - break; - } - } - } else if (type == PDO_TYPE_BATT) { - for (j = port->nr_fixed; - j < port->nr_fixed + - port->nr_batt; - j++) { - if (pdo_min_voltage(pdo) >= - pdo_min_voltage(port->snk_pdo[j]) && - pdo_max_voltage(pdo) <= - pdo_max_voltage(port->snk_pdo[j])) { - mw = min_power(pdo, port->snk_pdo[j]); - mv = pdo_min_voltage(pdo); - if (mw > max_mw || - (mw == max_mw && mv > max_mv)) { - ret = 0; - *src_pdo = i; - *sink_pdo = j; - max_mw = mw; - max_mv = mv; - } - } - } - } else if (type == PDO_TYPE_VAR) { - for (j = port->nr_fixed + - port->nr_batt; - j < port->nr_fixed + - port->nr_batt + - port->nr_var; - j++) { - if (pdo_min_voltage(pdo) >= - pdo_min_voltage(port->snk_pdo[j]) && - pdo_max_voltage(pdo) <= - pdo_max_voltage(port->snk_pdo[j])) { - ma = min_current(pdo, port->snk_pdo[j]); - mv = pdo_min_voltage(pdo); - mw = ma * mv / 1000; - if (mw > max_mw || - (mw == max_mw && mv > max_mv)) { - ret = 0; - *src_pdo = i; - *sink_pdo = j; - max_mw = mw; - max_mv = mv; - } - } - } + if (type == PDO_TYPE_FIXED) + mv = pdo_fixed_voltage(pdo); + else + mv = pdo_min_voltage(pdo); + + if (type == PDO_TYPE_BATT) { + mw = pdo_max_power(pdo); + } else { + ma = min(pdo_max_current(pdo), + port->max_snk_ma); + mw = ma * mv / 1000; + } + + /* Perfer higher voltages if available */ + if ((mw > max_mw || (mw == max_mw && mv > max_mv)) && + mv <= port->max_snk_mv) { + ret = i; + max_mw = mw; + max_mv = mv; } } @@ -1865,14 +1811,13 @@ static int tcpm_pd_build_request(struct tcpm_port *port, u32 *rdo) unsigned int mv, ma, mw, flags; unsigned int max_ma, max_mw; enum pd_pdo_type type; - int src_pdo_index, snk_pdo_index; - u32 pdo, matching_snk_pdo; + int index; + u32 pdo; - if (tcpm_pd_select_pdo(port, &snk_pdo_index, &src_pdo_index) < 0) + index = tcpm_pd_select_pdo(port); + if (index < 0) return -EINVAL; - - pdo = port->source_caps[src_pdo_index]; - matching_snk_pdo = port->snk_pdo[snk_pdo_index]; + pdo = port->source_caps[index]; type = pdo_type(pdo); if (type == PDO_TYPE_FIXED) @@ -1880,28 +1825,26 @@ static int tcpm_pd_build_request(struct tcpm_port *port, u32 *rdo) else mv = pdo_min_voltage(pdo); - /* Select maximum available current within the sink pdo's limit */ + /* Select maximum available current within the board's power limit */ if (type == PDO_TYPE_BATT) { - mw = min_power(pdo, matching_snk_pdo); - ma = 1000 * mw / mv; + mw = pdo_max_power(pdo); + ma = 1000 * min(mw, port->max_snk_mw) / mv; } else { - ma = min_current(pdo, matching_snk_pdo); - mw = ma * mv / 1000; + ma = min(pdo_max_current(pdo), + 1000 * port->max_snk_mw / mv); } + ma = min(ma, port->max_snk_ma); flags = RDO_USB_COMM | RDO_NO_SUSPEND; /* Set mismatch bit if offered power is less than operating power */ + mw = ma * mv / 1000; max_ma = ma; max_mw = mw; if (mw < port->operating_snk_mw) { flags |= RDO_CAP_MISMATCH; - if (type == PDO_TYPE_BATT && - (pdo_max_power(matching_snk_pdo) > pdo_max_power(pdo))) - max_mw = pdo_max_power(matching_snk_pdo); - else if (pdo_max_current(matching_snk_pdo) > - pdo_max_current(pdo)) - max_ma = pdo_max_current(matching_snk_pdo); + max_mw = port->operating_snk_mw; + max_ma = max_mw * 1000 / mv; } tcpm_log(port, "cc=%d cc1=%d cc2=%d vbus=%d vconn=%s polarity=%d", @@ -1910,16 +1853,16 @@ static int tcpm_pd_build_request(struct tcpm_port *port, u32 *rdo) port->polarity); if (type == PDO_TYPE_BATT) { - *rdo = RDO_BATT(src_pdo_index + 1, mw, max_mw, flags); + *rdo = RDO_BATT(index + 1, mw, max_mw, flags); tcpm_log(port, "Requesting PDO %d: %u mV, %u mW%s", - src_pdo_index, mv, mw, + index, mv, mw, flags & RDO_CAP_MISMATCH ? " [mismatch]" : ""); } else { - *rdo = RDO_FIXED(src_pdo_index + 1, ma, max_ma, flags); + *rdo = RDO_FIXED(index + 1, ma, max_ma, flags); tcpm_log(port, "Requesting PDO %d: %u mV, %u mA%s", - src_pdo_index, mv, ma, + index, mv, ma, flags & RDO_CAP_MISMATCH ? " [mismatch]" : ""); } @@ -3650,19 +3593,6 @@ int tcpm_update_sink_capabilities(struct tcpm_port *port, const u32 *pdo, } EXPORT_SYMBOL_GPL(tcpm_update_sink_capabilities); -static int nr_type_pdos(const u32 *pdo, unsigned int nr_pdo, - enum pd_pdo_type type) -{ - int count = 0; - int i; - - for (i = 0; i < nr_pdo; i++) { - if (pdo_type(pdo[i]) == type) - count++; - } - return count; -} - struct tcpm_port *tcpm_register_port(struct device *dev, struct tcpc_dev *tcpc) { struct tcpm_port *port; @@ -3708,15 +3638,6 @@ struct tcpm_port *tcpm_register_port(struct device *dev, struct tcpc_dev *tcpc) tcpc->config->nr_src_pdo); port->nr_snk_pdo = tcpm_copy_pdos(port->snk_pdo, tcpc->config->snk_pdo, tcpc->config->nr_snk_pdo); - port->nr_fixed = nr_type_pdos(port->snk_pdo, - port->nr_snk_pdo, - PDO_TYPE_FIXED); - port->nr_var = nr_type_pdos(port->snk_pdo, - port->nr_snk_pdo, - PDO_TYPE_VAR); - port->nr_batt = nr_type_pdos(port->snk_pdo, - port->nr_snk_pdo, - PDO_TYPE_BATT); port->nr_snk_vdo = tcpm_copy_vdos(port->snk_vdo, tcpc->config->snk_vdo, tcpc->config->nr_snk_vdo); -- cgit v1.2.3 From 655296c8bbeffcf020558c4455305d597a73bde1 Mon Sep 17 00:00:00 2001 From: Michael Kelley Date: Sun, 4 Mar 2018 22:24:08 -0700 Subject: Drivers: hv: vmbus: Fix ring buffer signaling Fix bugs in signaling the Hyper-V host when freeing space in the host->guest ring buffer: 1. The interrupt_mask must not be used to determine whether to signal on the host->guest ring buffer 2. The ring buffer write_index must be read (via hv_get_bytes_to_write) *after* pending_send_sz is read in order to avoid a race condition 3. Comparisons with pending_send_sz must treat the "equals" case as not-enough-space 4. Don't signal if the pending_send_sz feature is not present. Older versions of Hyper-V that don't implement this feature will poll. Fixes: 03bad714a161 ("vmbus: more host signalling avoidance") Cc: Stable # 4.14 and above Signed-off-by: Michael Kelley Signed-off-by: K. Y. Srinivasan Signed-off-by: Greg Kroah-Hartman --- drivers/hv/ring_buffer.c | 52 ++++++++++++++++++++++++++++++++---------------- 1 file changed, 35 insertions(+), 17 deletions(-) diff --git a/drivers/hv/ring_buffer.c b/drivers/hv/ring_buffer.c index 50e071444a5c..8699bb969e7e 100644 --- a/drivers/hv/ring_buffer.c +++ b/drivers/hv/ring_buffer.c @@ -417,13 +417,24 @@ __hv_pkt_iter_next(struct vmbus_channel *channel, } EXPORT_SYMBOL_GPL(__hv_pkt_iter_next); +/* How many bytes were read in this iterator cycle */ +static u32 hv_pkt_iter_bytes_read(const struct hv_ring_buffer_info *rbi, + u32 start_read_index) +{ + if (rbi->priv_read_index >= start_read_index) + return rbi->priv_read_index - start_read_index; + else + return rbi->ring_datasize - start_read_index + + rbi->priv_read_index; +} + /* * Update host ring buffer after iterating over packets. */ void hv_pkt_iter_close(struct vmbus_channel *channel) { struct hv_ring_buffer_info *rbi = &channel->inbound; - u32 orig_write_sz = hv_get_bytes_to_write(rbi); + u32 curr_write_sz, pending_sz, bytes_read, start_read_index; /* * Make sure all reads are done before we update the read index since @@ -431,8 +442,12 @@ void hv_pkt_iter_close(struct vmbus_channel *channel) * is updated. */ virt_rmb(); + start_read_index = rbi->ring_buffer->read_index; rbi->ring_buffer->read_index = rbi->priv_read_index; + if (!rbi->ring_buffer->feature_bits.feat_pending_send_sz) + return; + /* * Issue a full memory barrier before making the signaling decision. * Here is the reason for having this barrier: @@ -446,26 +461,29 @@ void hv_pkt_iter_close(struct vmbus_channel *channel) */ virt_mb(); - /* If host has disabled notifications then skip */ - if (rbi->ring_buffer->interrupt_mask) + pending_sz = READ_ONCE(rbi->ring_buffer->pending_send_sz); + if (!pending_sz) return; - if (rbi->ring_buffer->feature_bits.feat_pending_send_sz) { - u32 pending_sz = READ_ONCE(rbi->ring_buffer->pending_send_sz); + /* + * Ensure the read of write_index in hv_get_bytes_to_write() + * happens after the read of pending_send_sz. + */ + virt_rmb(); + curr_write_sz = hv_get_bytes_to_write(rbi); + bytes_read = hv_pkt_iter_bytes_read(rbi, start_read_index); - /* - * If there was space before we began iteration, - * then host was not blocked. Also handles case where - * pending_sz is zero then host has nothing pending - * and does not need to be signaled. - */ - if (orig_write_sz > pending_sz) - return; + /* + * If there was space before we began iteration, + * then host was not blocked. + */ - /* If pending write will not fit, don't give false hope. */ - if (hv_get_bytes_to_write(rbi) < pending_sz) - return; - } + if (curr_write_sz - bytes_read > pending_sz) + return; + + /* If pending write will not fit, don't give false hope. */ + if (curr_write_sz <= pending_sz) + return; vmbus_setevent(channel); } -- cgit v1.2.3 From 167f5594b5efa20a26ff03b3424f793887e6b448 Mon Sep 17 00:00:00 2001 From: Oliver O'Halloran Date: Tue, 6 Mar 2018 15:56:47 +1100 Subject: kernel/memremap: Remove stale devres_free() call devm_memremap_pages() was re-worked in e8d513483300 "memremap: change devm_memremap_pages interface to use struct dev_pagemap" to take a caller allocated struct dev_pagemap as a function parameter. A call to devres_free() was left in the error cleanup path which results in a kernel panic if the remap fails for some reason. Remove it to fix the panic and let devm_memremap_pages() fail gracefully. Fixes: e8d513483300 ("memremap: change devm_memremap_pages interface...") Signed-off-by: Oliver O'Halloran Reviewed-by: Christoph Hellwig Reviewed-by: Logan Gunthorpe Signed-off-by: Dan Williams --- kernel/memremap.c | 1 - 1 file changed, 1 deletion(-) diff --git a/kernel/memremap.c b/kernel/memremap.c index 4dd4274cabe2..895e6b76b25e 100644 --- a/kernel/memremap.c +++ b/kernel/memremap.c @@ -427,7 +427,6 @@ void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap) err_pfn_remap: err_radix: pgmap_radix_release(res, pgoff); - devres_free(pgmap); return ERR_PTR(error); } EXPORT_SYMBOL(devm_memremap_pages); -- cgit v1.2.3 From 2cbb4ea7de167b02ffa63e9cdfdb07a7e7094615 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Fri, 16 Feb 2018 11:03:03 -0800 Subject: net: Only honor ifindex in IP_PKTINFO if non-0 Only allow ifindex from IP_PKTINFO to override SO_BINDTODEVICE settings if the index is actually set in the message. Signed-off-by: David Ahern Signed-off-by: David S. Miller --- net/ipv4/ip_sockglue.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index 9c41a0cef1a5..74c962b9b09c 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -258,7 +258,8 @@ int ip_cmsg_send(struct sock *sk, struct msghdr *msg, struct ipcm_cookie *ipc, src_info = (struct in6_pktinfo *)CMSG_DATA(cmsg); if (!ipv6_addr_v4mapped(&src_info->ipi6_addr)) return -EINVAL; - ipc->oif = src_info->ipi6_ifindex; + if (src_info->ipi6_ifindex) + ipc->oif = src_info->ipi6_ifindex; ipc->addr = src_info->ipi6_addr.s6_addr32[3]; continue; } @@ -288,7 +289,8 @@ int ip_cmsg_send(struct sock *sk, struct msghdr *msg, struct ipcm_cookie *ipc, if (cmsg->cmsg_len != CMSG_LEN(sizeof(struct in_pktinfo))) return -EINVAL; info = (struct in_pktinfo *)CMSG_DATA(cmsg); - ipc->oif = info->ipi_ifindex; + if (info->ipi_ifindex) + ipc->oif = info->ipi_ifindex; ipc->addr = info->ipi_spec_dst.s_addr; break; } -- cgit v1.2.3 From 20d5de51e7052ae3fb645d8c5c584ee7383b2a93 Mon Sep 17 00:00:00 2001 From: Jiri Benc Date: Tue, 6 Mar 2018 14:50:10 +0100 Subject: tools: bpftool: fix compilation with older headers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Compilation of bpftool on a distro that lacks eBPF support in the installed kernel headers fails with: common.c: In function ‘is_bpffs’: common.c:96:40: error: ‘BPF_FS_MAGIC’ undeclared (first use in this function) return (unsigned long)st_fs.f_type == BPF_FS_MAGIC; ^ Fix this the same way it is already in tools/lib/bpf/libbpf.c and tools/lib/api/fs/fs.c. Signed-off-by: Jiri Benc Signed-off-by: Daniel Borkmann --- tools/bpf/bpftool/common.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tools/bpf/bpftool/common.c b/tools/bpf/bpftool/common.c index 0b482c0070e0..465995281dcd 100644 --- a/tools/bpf/bpftool/common.c +++ b/tools/bpf/bpftool/common.c @@ -55,6 +55,10 @@ #include "main.h" +#ifndef BPF_FS_MAGIC +#define BPF_FS_MAGIC 0xcafe4a11 +#endif + void p_err(const char *fmt, ...) { va_list ap; -- cgit v1.2.3 From 864449eea7c600596e305ffdc4a6a846414b222c Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Mon, 26 Feb 2018 15:26:01 +0100 Subject: scsi: mpt3sas: Do not mark fw_event workqueue as WQ_MEM_RECLAIM The firmware event workqueue should not be marked as WQ_MEM_RECLAIM as it's doesn't need to make forward progress under memory pressure. In the current state it will result in a deadlock if the device had been forcefully removed. Cc: Sreekanth Reddy Cc: Suganath Prabu Subramani Acked-by: Sreekanth Reddy Signed-off-by: Hannes Reinecke Signed-off-by: Martin K. Petersen --- drivers/scsi/mpt3sas/mpt3sas_scsih.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/mpt3sas/mpt3sas_scsih.c b/drivers/scsi/mpt3sas/mpt3sas_scsih.c index c2ea13c7e37e..a1cb0236c550 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_scsih.c +++ b/drivers/scsi/mpt3sas/mpt3sas_scsih.c @@ -10558,7 +10558,7 @@ _scsih_probe(struct pci_dev *pdev, const struct pci_device_id *id) snprintf(ioc->firmware_event_name, sizeof(ioc->firmware_event_name), "fw_event_%s%d", ioc->driver_name, ioc->id); ioc->firmware_event_thread = alloc_ordered_workqueue( - ioc->firmware_event_name, WQ_MEM_RECLAIM); + ioc->firmware_event_name, 0); if (!ioc->firmware_event_thread) { pr_err(MPT3SAS_FMT "failure at %s:%d/%s()!\n", ioc->name, __FILE__, __LINE__, __func__); -- cgit v1.2.3 From 4b433924b2755a94f99258c178684a0e05c344de Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Fri, 2 Mar 2018 07:19:28 +0900 Subject: scsi: sd_zbc: Fix potential memory leak Rework sd_zbc_check_zone_size() to avoid a memory leak due to an early return if sd_zbc_report_zones() fails. Reported-by: David.butterfield Signed-off-by: Damien Le Moal Cc: stable@vger.kernel.org Reviewed-by: Bart Van Assche Signed-off-by: Martin K. Petersen --- drivers/scsi/sd_zbc.c | 35 +++++++++++++++-------------------- 1 file changed, 15 insertions(+), 20 deletions(-) diff --git a/drivers/scsi/sd_zbc.c b/drivers/scsi/sd_zbc.c index 27793b9f54c0..9049a189c8e5 100644 --- a/drivers/scsi/sd_zbc.c +++ b/drivers/scsi/sd_zbc.c @@ -486,7 +486,7 @@ static int sd_zbc_check_capacity(struct scsi_disk *sdkp, unsigned char *buf) */ static int sd_zbc_check_zone_size(struct scsi_disk *sdkp) { - u64 zone_blocks; + u64 zone_blocks = 0; sector_t block = 0; unsigned char *buf; unsigned char *rec; @@ -504,10 +504,8 @@ static int sd_zbc_check_zone_size(struct scsi_disk *sdkp) /* Do a report zone to get the same field */ ret = sd_zbc_report_zones(sdkp, buf, SD_ZBC_BUF_SIZE, 0); - if (ret) { - zone_blocks = 0; - goto out; - } + if (ret) + goto out_free; same = buf[4] & 0x0f; if (same > 0) { @@ -547,7 +545,7 @@ static int sd_zbc_check_zone_size(struct scsi_disk *sdkp) ret = sd_zbc_report_zones(sdkp, buf, SD_ZBC_BUF_SIZE, block); if (ret) - return ret; + goto out_free; } } while (block < sdkp->capacity); @@ -555,35 +553,32 @@ static int sd_zbc_check_zone_size(struct scsi_disk *sdkp) zone_blocks = sdkp->zone_blocks; out: - kfree(buf); - if (!zone_blocks) { if (sdkp->first_scan) sd_printk(KERN_NOTICE, sdkp, "Devices with non constant zone " "size are not supported\n"); - return -ENODEV; - } - - if (!is_power_of_2(zone_blocks)) { + ret = -ENODEV; + } else if (!is_power_of_2(zone_blocks)) { if (sdkp->first_scan) sd_printk(KERN_NOTICE, sdkp, "Devices with non power of 2 zone " "size are not supported\n"); - return -ENODEV; - } - - if (logical_to_sectors(sdkp->device, zone_blocks) > UINT_MAX) { + ret = -ENODEV; + } else if (logical_to_sectors(sdkp->device, zone_blocks) > UINT_MAX) { if (sdkp->first_scan) sd_printk(KERN_NOTICE, sdkp, "Zone size too large\n"); - return -ENODEV; + ret = -ENODEV; + } else { + sdkp->zone_blocks = zone_blocks; + sdkp->zone_shift = ilog2(zone_blocks); } - sdkp->zone_blocks = zone_blocks; - sdkp->zone_shift = ilog2(zone_blocks); +out_free: + kfree(buf); - return 0; + return ret; } static int sd_zbc_setup(struct scsi_disk *sdkp) -- cgit v1.2.3 From 6a2cf8d3663e13e19af636c2a8d92e766261dc45 Mon Sep 17 00:00:00 2001 From: Bill Kuzeja Date: Mon, 5 Mar 2018 00:02:55 -0500 Subject: scsi: qla2xxx: Fix crashes in qla2x00_probe_one on probe failure Because of the shifting around of code in qla2x00_probe_one recently, failures during adapter initialization can lead to problems, i.e. NULL pointer crashes and doubly freed data structures which cause eventual panics. This V2 version makes the relevant memory free routines idempotent, so repeat calls won't cause any harm. I also removed the problematic probe_init_failed exit point as it is not needed. Fixes: d64d6c5671db ("scsi: qla2xxx: Fix NULL pointer crash due to probe failure") Signed-off-by: Bill Kuzeja Acked-by: Himanshu Madhani Reviewed-by: Hannes Reinecke Signed-off-by: Martin K. Petersen --- drivers/scsi/qla2xxx/qla_os.c | 59 +++++++++++++++++++++++++++---------------- 1 file changed, 37 insertions(+), 22 deletions(-) diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c index 285911e81728..5c5dcca4d1da 100644 --- a/drivers/scsi/qla2xxx/qla_os.c +++ b/drivers/scsi/qla2xxx/qla_os.c @@ -454,7 +454,7 @@ static int qla2x00_alloc_queues(struct qla_hw_data *ha, struct req_que *req, ha->req_q_map[0] = req; set_bit(0, ha->rsp_qid_map); set_bit(0, ha->req_qid_map); - return 1; + return 0; fail_qpair_map: kfree(ha->base_qpair); @@ -471,6 +471,9 @@ fail_req_map: static void qla2x00_free_req_que(struct qla_hw_data *ha, struct req_que *req) { + if (!ha->req_q_map) + return; + if (IS_QLAFX00(ha)) { if (req && req->ring_fx00) dma_free_coherent(&ha->pdev->dev, @@ -481,14 +484,17 @@ static void qla2x00_free_req_que(struct qla_hw_data *ha, struct req_que *req) (req->length + 1) * sizeof(request_t), req->ring, req->dma); - if (req) + if (req) { kfree(req->outstanding_cmds); - - kfree(req); + kfree(req); + } } static void qla2x00_free_rsp_que(struct qla_hw_data *ha, struct rsp_que *rsp) { + if (!ha->rsp_q_map) + return; + if (IS_QLAFX00(ha)) { if (rsp && rsp->ring) dma_free_coherent(&ha->pdev->dev, @@ -499,7 +505,8 @@ static void qla2x00_free_rsp_que(struct qla_hw_data *ha, struct rsp_que *rsp) (rsp->length + 1) * sizeof(response_t), rsp->ring, rsp->dma); } - kfree(rsp); + if (rsp) + kfree(rsp); } static void qla2x00_free_queues(struct qla_hw_data *ha) @@ -1723,6 +1730,8 @@ __qla2x00_abort_all_cmds(struct qla_qpair *qp, int res) struct qla_tgt_cmd *cmd; uint8_t trace = 0; + if (!ha->req_q_map) + return; spin_lock_irqsave(qp->qp_lock_ptr, flags); req = qp->req; for (cnt = 1; cnt < req->num_outstanding_cmds; cnt++) { @@ -3095,14 +3104,14 @@ qla2x00_probe_one(struct pci_dev *pdev, const struct pci_device_id *id) /* Set up the irqs */ ret = qla2x00_request_irqs(ha, rsp); if (ret) - goto probe_hw_failed; + goto probe_failed; /* Alloc arrays of request and response ring ptrs */ - if (!qla2x00_alloc_queues(ha, req, rsp)) { + if (qla2x00_alloc_queues(ha, req, rsp)) { ql_log(ql_log_fatal, base_vha, 0x003d, "Failed to allocate memory for queue pointers..." "aborting.\n"); - goto probe_init_failed; + goto probe_failed; } if (ha->mqenable && shost_use_blk_mq(host)) { @@ -3387,15 +3396,6 @@ skip_dpc: return 0; -probe_init_failed: - qla2x00_free_req_que(ha, req); - ha->req_q_map[0] = NULL; - clear_bit(0, ha->req_qid_map); - qla2x00_free_rsp_que(ha, rsp); - ha->rsp_q_map[0] = NULL; - clear_bit(0, ha->rsp_qid_map); - ha->max_req_queues = ha->max_rsp_queues = 0; - probe_failed: if (base_vha->timer_active) qla2x00_stop_timer(base_vha); @@ -4508,11 +4508,17 @@ qla2x00_mem_free(struct qla_hw_data *ha) if (ha->init_cb) dma_free_coherent(&ha->pdev->dev, ha->init_cb_size, ha->init_cb, ha->init_cb_dma); - vfree(ha->optrom_buffer); - kfree(ha->nvram); - kfree(ha->npiv_info); - kfree(ha->swl); - kfree(ha->loop_id_map); + + if (ha->optrom_buffer) + vfree(ha->optrom_buffer); + if (ha->nvram) + kfree(ha->nvram); + if (ha->npiv_info) + kfree(ha->npiv_info); + if (ha->swl) + kfree(ha->swl); + if (ha->loop_id_map) + kfree(ha->loop_id_map); ha->srb_mempool = NULL; ha->ctx_mempool = NULL; @@ -4528,6 +4534,15 @@ qla2x00_mem_free(struct qla_hw_data *ha) ha->ex_init_cb_dma = 0; ha->async_pd = NULL; ha->async_pd_dma = 0; + ha->loop_id_map = NULL; + ha->npiv_info = NULL; + ha->optrom_buffer = NULL; + ha->swl = NULL; + ha->nvram = NULL; + ha->mctp_dump = NULL; + ha->dcbx_tlv = NULL; + ha->xgmac_data = NULL; + ha->sfp_data = NULL; ha->s_dma_pool = NULL; ha->dl_dma_pool = NULL; -- cgit v1.2.3 From 20bd1d026aacc5399464f8328f305985c493cde3 Mon Sep 17 00:00:00 2001 From: Jeremy Cline Date: Tue, 6 Mar 2018 21:47:32 +0000 Subject: scsi: sd: Keep disk read-only when re-reading partition If the read-only flag is true on a SCSI disk, re-reading the partition table sets the flag back to false. To observe this bug, you can run: 1. blockdev --setro /dev/sda 2. blockdev --rereadpt /dev/sda 3. blockdev --getro /dev/sda This commit reads the disk's old state and combines it with the device disk-reported state rather than unconditionally marking it as RW. Reported-by: Li Ning Signed-off-by: Jeremy Cline Signed-off-by: Martin K. Petersen --- drivers/scsi/sd.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index ab75ebd518a7..3b45f7fc5620 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -2624,6 +2624,7 @@ sd_read_write_protect_flag(struct scsi_disk *sdkp, unsigned char *buffer) int res; struct scsi_device *sdp = sdkp->device; struct scsi_mode_data data; + int disk_ro = get_disk_ro(sdkp->disk); int old_wp = sdkp->write_prot; set_disk_ro(sdkp->disk, 0); @@ -2664,7 +2665,7 @@ sd_read_write_protect_flag(struct scsi_disk *sdkp, unsigned char *buffer) "Test WP failed, assume Write Enabled\n"); } else { sdkp->write_prot = ((data.device_specific & 0x80) != 0); - set_disk_ro(sdkp->disk, sdkp->write_prot); + set_disk_ro(sdkp->disk, sdkp->write_prot || disk_ro); if (sdkp->first_scan || old_wp != sdkp->write_prot) { sd_printk(KERN_NOTICE, sdkp, "Write Protect is %s\n", sdkp->write_prot ? "on" : "off"); -- cgit v1.2.3 From 87cdf3148b11d46382dbce2754ae7036aba96380 Mon Sep 17 00:00:00 2001 From: Yossi Kuperman Date: Sun, 4 Mar 2018 21:03:52 +0200 Subject: xfrm: Verify MAC header exists before overwriting eth_hdr(skb)->h_proto Artem Savkov reported that commit 5efec5c655dd leads to a packet loss under IPSec configuration. It appears that his setup consists of a TUN device, which does not have a MAC header. Make sure MAC header exists. Note: TUN device sets a MAC header pointer, although it does not have one. Fixes: 5efec5c655dd ("xfrm: Fix eth_hdr(skb)->h_proto to reflect inner IP version") Reported-by: Artem Savkov Tested-by: Artem Savkov Signed-off-by: Yossi Kuperman Signed-off-by: Steffen Klassert --- net/ipv4/xfrm4_mode_tunnel.c | 3 ++- net/ipv6/xfrm6_mode_tunnel.c | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/net/ipv4/xfrm4_mode_tunnel.c b/net/ipv4/xfrm4_mode_tunnel.c index 63faeee989a9..2a9764bd1719 100644 --- a/net/ipv4/xfrm4_mode_tunnel.c +++ b/net/ipv4/xfrm4_mode_tunnel.c @@ -92,7 +92,8 @@ static int xfrm4_mode_tunnel_input(struct xfrm_state *x, struct sk_buff *skb) skb_reset_network_header(skb); skb_mac_header_rebuild(skb); - eth_hdr(skb)->h_proto = skb->protocol; + if (skb->mac_len) + eth_hdr(skb)->h_proto = skb->protocol; err = 0; diff --git a/net/ipv6/xfrm6_mode_tunnel.c b/net/ipv6/xfrm6_mode_tunnel.c index bb935a3b7fea..de1b0b8c53b0 100644 --- a/net/ipv6/xfrm6_mode_tunnel.c +++ b/net/ipv6/xfrm6_mode_tunnel.c @@ -92,7 +92,8 @@ static int xfrm6_mode_tunnel_input(struct xfrm_state *x, struct sk_buff *skb) skb_reset_network_header(skb); skb_mac_header_rebuild(skb); - eth_hdr(skb)->h_proto = skb->protocol; + if (skb->mac_len) + eth_hdr(skb)->h_proto = skb->protocol; err = 0; -- cgit v1.2.3 From 933897342d0714ae1c10729cbaeecea0c6178db5 Mon Sep 17 00:00:00 2001 From: Arend Van Spriel Date: Wed, 28 Feb 2018 21:15:19 +0100 Subject: brcmfmac: add possibility to obtain firmware error The feature module needs to evaluate the actual firmware error return upon a control command. This adds a flag to struct brcmf_if that the caller can set. This flag is checked to determine the error code that needs to be returned. Fixes: b69c1df47281 ("brcmfmac: separate firmware errors from i/o errors") Reviewed-by: Hante Meuleman Reviewed-by: Pieter-Paul Giesberts Reviewed-by: Franky Lin Signed-off-by: Arend van Spriel Signed-off-by: Kalle Valo --- drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.h | 2 ++ drivers/net/wireless/broadcom/brcm80211/brcmfmac/feature.c | 10 ++++++++++ drivers/net/wireless/broadcom/brcm80211/brcmfmac/fwil.c | 3 +++ 3 files changed, 15 insertions(+) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.h b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.h index df8a1ecb9924..232dcbb83311 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.h +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.h @@ -181,6 +181,7 @@ enum brcmf_netif_stop_reason { * @netif_stop_lock: spinlock for update netif_stop from multiple sources. * @pend_8021x_cnt: tracks outstanding number of 802.1x frames. * @pend_8021x_wait: used for signalling change in count. + * @fwil_fwerr: flag indicating fwil layer should return firmware error codes. */ struct brcmf_if { struct brcmf_pub *drvr; @@ -198,6 +199,7 @@ struct brcmf_if { wait_queue_head_t pend_8021x_wait; struct in6_addr ipv6_addr_tbl[NDOL_MAX_ENTRIES]; u8 ipv6addr_idx; + bool fwil_fwerr; }; int brcmf_netdev_wait_pend8021x(struct brcmf_if *ifp); diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/feature.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/feature.c index 47de35a33853..bede7b7fd996 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/feature.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/feature.c @@ -104,6 +104,9 @@ static void brcmf_feat_iovar_int_get(struct brcmf_if *ifp, u32 data; int err; + /* we need to know firmware error */ + ifp->fwil_fwerr = true; + err = brcmf_fil_iovar_int_get(ifp, name, &data); if (err == 0) { brcmf_dbg(INFO, "enabling feature: %s\n", brcmf_feat_names[id]); @@ -112,6 +115,8 @@ static void brcmf_feat_iovar_int_get(struct brcmf_if *ifp, brcmf_dbg(TRACE, "%s feature check failed: %d\n", brcmf_feat_names[id], err); } + + ifp->fwil_fwerr = false; } static void brcmf_feat_iovar_data_set(struct brcmf_if *ifp, @@ -120,6 +125,9 @@ static void brcmf_feat_iovar_data_set(struct brcmf_if *ifp, { int err; + /* we need to know firmware error */ + ifp->fwil_fwerr = true; + err = brcmf_fil_iovar_data_set(ifp, name, data, len); if (err != -BRCMF_FW_UNSUPPORTED) { brcmf_dbg(INFO, "enabling feature: %s\n", brcmf_feat_names[id]); @@ -128,6 +136,8 @@ static void brcmf_feat_iovar_data_set(struct brcmf_if *ifp, brcmf_dbg(TRACE, "%s feature check failed: %d\n", brcmf_feat_names[id], err); } + + ifp->fwil_fwerr = false; } #define MAX_CAPS_BUFFER_SIZE 512 diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fwil.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fwil.c index f2cfdd3b2bf1..fc5751116d99 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fwil.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fwil.c @@ -131,6 +131,9 @@ brcmf_fil_cmd_data(struct brcmf_if *ifp, u32 cmd, void *data, u32 len, bool set) brcmf_fil_get_errstr((u32)(-fwerr)), fwerr); err = -EBADE; } + if (ifp->fwil_fwerr) + return fwerr; + return err; } -- cgit v1.2.3 From 455f3e76cfc0d893585a5f358b9ddbe9c1e1e53b Mon Sep 17 00:00:00 2001 From: Arend Van Spriel Date: Wed, 28 Feb 2018 21:15:20 +0100 Subject: brcmfmac: fix P2P_DEVICE ethernet address generation The firmware has a requirement that the P2P_DEVICE address should be different from the address of the primary interface. When not specified by user-space, the driver generates the MAC address for the P2P_DEVICE interface using the MAC address of the primary interface and setting the locally administered bit. However, the MAC address of the primary interface may already have that bit set causing the creation of the P2P_DEVICE interface to fail with -EBUSY. Fix this by using a random address instead to determine the P2P_DEVICE address. Cc: stable@vger.kernel.org # 3.10.y Reported-by: Hans de Goede Reviewed-by: Hante Meuleman Reviewed-by: Pieter-Paul Giesberts Reviewed-by: Franky Lin Signed-off-by: Arend van Spriel Signed-off-by: Kalle Valo --- .../net/wireless/broadcom/brcm80211/brcmfmac/p2p.c | 24 ++++++++++------------ 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.c index 2ee54133efa1..82064e909784 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.c @@ -462,25 +462,23 @@ static int brcmf_p2p_set_firmware(struct brcmf_if *ifp, u8 *p2p_mac) * @dev_addr: optional device address. * * P2P needs mac addresses for P2P device and interface. If no device - * address it specified, these are derived from the primary net device, ie. - * the permanent ethernet address of the device. + * address it specified, these are derived from a random ethernet + * address. */ static void brcmf_p2p_generate_bss_mac(struct brcmf_p2p_info *p2p, u8 *dev_addr) { - struct brcmf_if *pri_ifp = p2p->bss_idx[P2PAPI_BSSCFG_PRIMARY].vif->ifp; - bool local_admin = false; + bool random_addr = false; - if (!dev_addr || is_zero_ether_addr(dev_addr)) { - dev_addr = pri_ifp->mac_addr; - local_admin = true; - } + if (!dev_addr || is_zero_ether_addr(dev_addr)) + random_addr = true; - /* Generate the P2P Device Address. This consists of the device's - * primary MAC address with the locally administered bit set. + /* Generate the P2P Device Address obtaining a random ethernet + * address with the locally administered bit set. */ - memcpy(p2p->dev_addr, dev_addr, ETH_ALEN); - if (local_admin) - p2p->dev_addr[0] |= 0x02; + if (random_addr) + eth_random_addr(p2p->dev_addr); + else + memcpy(p2p->dev_addr, dev_addr, ETH_ALEN); /* Generate the P2P Interface Address. If the discovery and connection * BSSCFGs need to simultaneously co-exist, then this address must be -- cgit v1.2.3 From 25b5cdfcce1b57971840505dfc78556bd12dea6d Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Fri, 2 Mar 2018 16:01:48 +0100 Subject: dt-bindings: net: renesas-ravb: Make stream buffer optional The Stream Buffer for EtherAVB-IF (STBE) is an optional component, and is not present on all SoCs. Document this in the DT bindings, including a list of SoCs that do have it. Fixes: 785ec87483d1e24a ("ravb: document R8A77970 bindings") Fixes: f231c4178a655b09 ("dt-bindings: net: renesas-ravb: Add support for R8A77995 RAVB") Signed-off-by: Geert Uytterhoeven Reviewed-by: Simon Horman Acked-by: Sergei Shtylyov Reviewed-by: Rob Herring Signed-off-by: David S. Miller --- Documentation/devicetree/bindings/net/renesas,ravb.txt | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/net/renesas,ravb.txt b/Documentation/devicetree/bindings/net/renesas,ravb.txt index 92fd4b2f17b2..b4dc455eb155 100644 --- a/Documentation/devicetree/bindings/net/renesas,ravb.txt +++ b/Documentation/devicetree/bindings/net/renesas,ravb.txt @@ -27,7 +27,11 @@ Required properties: SoC-specific version corresponding to the platform first followed by the generic version. -- reg: offset and length of (1) the register block and (2) the stream buffer. +- reg: Offset and length of (1) the register block and (2) the stream buffer. + The region for the register block is mandatory. + The region for the stream buffer is optional, as it is only present on + R-Car Gen2 and RZ/G1 SoCs, and on R-Car H3 (R8A7795), M3-W (R8A7796), + and M3-N (R8A77965). - interrupts: A list of interrupt-specifiers, one for each entry in interrupt-names. If interrupt-names is not present, an interrupt specifier -- cgit v1.2.3 From dbe8a9dfd7eafe2832f80e6e29e5b1575576c987 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Wed, 7 Mar 2018 14:20:33 +0100 Subject: ARM: ux500: Fix PMU IRQ regression Commit 2b05f6ae1ee5 ("ARM: ux500: remove PMU IRQ bouncer") deleted some code to bounce and work around the weird PMU IRQs in the DB8500 ASIC, but did a semantic mistake: since the auxdata was now unused, the call to of_platform_populate() was removed, but this does not work: the default platform population will only kick in if .init_machine() is assigned NULL, and since the U8540 was still using the callback that was not the case. Fix this by reinstating the call to of_platform_populate(), but pass NULL as auxdata. Cc: Mark Rutland Cc: Will Deacon Fixes: 2b05f6ae1ee5 ("ARM: ux500: remove PMU IRQ bouncer") Signed-off-by: Linus Walleij Signed-off-by: Arnd Bergmann --- arch/arm/mach-ux500/cpu-db8500.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/arm/mach-ux500/cpu-db8500.c b/arch/arm/mach-ux500/cpu-db8500.c index 7e5d7a083707..36cd23c8be9b 100644 --- a/arch/arm/mach-ux500/cpu-db8500.c +++ b/arch/arm/mach-ux500/cpu-db8500.c @@ -133,6 +133,9 @@ static void __init u8500_init_machine(void) if (of_machine_is_compatible("st-ericsson,u8540")) of_platform_populate(NULL, u8500_local_bus_nodes, u8540_auxdata_lookup, NULL); + else + of_platform_populate(NULL, u8500_local_bus_nodes, + NULL, NULL); } static const char * stericsson_dt_platform_compat[] = { -- cgit v1.2.3 From d3dcf8eb615537526bd42ff27a081d46d337816e Mon Sep 17 00:00:00 2001 From: Paul Blakey Date: Sun, 4 Mar 2018 17:29:48 +0200 Subject: rhashtable: Fix rhlist duplicates insertion When inserting duplicate objects (those with the same key), current rhlist implementation messes up the chain pointers by updating the bucket pointer instead of prev next pointer to the newly inserted node. This causes missing elements on removal and travesal. Fix that by properly updating pprev pointer to point to the correct rhash_head next pointer. Issue: 1241076 Change-Id: I86b2c140bcb4aeb10b70a72a267ff590bb2b17e7 Fixes: ca26893f05e8 ('rhashtable: Add rhlist interface') Signed-off-by: Paul Blakey Acked-by: Herbert Xu Signed-off-by: David S. Miller --- include/linux/rhashtable.h | 4 +++- lib/rhashtable.c | 4 +++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index c9df2527e0cd..668a21f04b09 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -766,8 +766,10 @@ slow_path: if (!key || (params.obj_cmpfn ? params.obj_cmpfn(&arg, rht_obj(ht, head)) : - rhashtable_compare(&arg, rht_obj(ht, head)))) + rhashtable_compare(&arg, rht_obj(ht, head)))) { + pprev = &head->next; continue; + } data = rht_obj(ht, head); diff --git a/lib/rhashtable.c b/lib/rhashtable.c index 3825c30aaa36..47de025b6245 100644 --- a/lib/rhashtable.c +++ b/lib/rhashtable.c @@ -506,8 +506,10 @@ static void *rhashtable_lookup_one(struct rhashtable *ht, if (!key || (ht->p.obj_cmpfn ? ht->p.obj_cmpfn(&arg, rht_obj(ht, head)) : - rhashtable_compare(&arg, rht_obj(ht, head)))) + rhashtable_compare(&arg, rht_obj(ht, head)))) { + pprev = &head->next; continue; + } if (!ht->rhlist) return rht_obj(ht, head); -- cgit v1.2.3 From 499ac3b60f657dae82055fc81c7b01e6242ac9bc Mon Sep 17 00:00:00 2001 From: Paul Blakey Date: Sun, 4 Mar 2018 17:29:49 +0200 Subject: test_rhashtable: add test case for rhltable with duplicate objects Tries to insert duplicates in the middle of bucket's chain: bucket 1: [[val 21 (tid=1)]] -> [[ val 1 (tid=2), val 1 (tid=0) ]] Reuses tid to distinguish the elements insertion order. Signed-off-by: Paul Blakey Acked-by: Herbert Xu Signed-off-by: David S. Miller --- lib/test_rhashtable.c | 134 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 134 insertions(+) diff --git a/lib/test_rhashtable.c b/lib/test_rhashtable.c index 76d3667fdea2..f4000c137dbe 100644 --- a/lib/test_rhashtable.c +++ b/lib/test_rhashtable.c @@ -79,6 +79,21 @@ struct thread_data { struct test_obj *objs; }; +static u32 my_hashfn(const void *data, u32 len, u32 seed) +{ + const struct test_obj_rhl *obj = data; + + return (obj->value.id % 10) << RHT_HASH_RESERVED_SPACE; +} + +static int my_cmpfn(struct rhashtable_compare_arg *arg, const void *obj) +{ + const struct test_obj_rhl *test_obj = obj; + const struct test_obj_val *val = arg->key; + + return test_obj->value.id - val->id; +} + static struct rhashtable_params test_rht_params = { .head_offset = offsetof(struct test_obj, node), .key_offset = offsetof(struct test_obj, value), @@ -87,6 +102,17 @@ static struct rhashtable_params test_rht_params = { .nulls_base = (3U << RHT_BASE_SHIFT), }; +static struct rhashtable_params test_rht_params_dup = { + .head_offset = offsetof(struct test_obj_rhl, list_node), + .key_offset = offsetof(struct test_obj_rhl, value), + .key_len = sizeof(struct test_obj_val), + .hashfn = jhash, + .obj_hashfn = my_hashfn, + .obj_cmpfn = my_cmpfn, + .nelem_hint = 128, + .automatic_shrinking = false, +}; + static struct semaphore prestart_sem; static struct semaphore startup_sem = __SEMAPHORE_INITIALIZER(startup_sem, 0); @@ -465,6 +491,112 @@ static int __init test_rhashtable_max(struct test_obj *array, return err; } +static unsigned int __init print_ht(struct rhltable *rhlt) +{ + struct rhashtable *ht; + const struct bucket_table *tbl; + char buff[512] = ""; + unsigned int i, cnt = 0; + + ht = &rhlt->ht; + tbl = rht_dereference(ht->tbl, ht); + for (i = 0; i < tbl->size; i++) { + struct rhash_head *pos, *next; + struct test_obj_rhl *p; + + pos = rht_dereference(tbl->buckets[i], ht); + next = !rht_is_a_nulls(pos) ? rht_dereference(pos->next, ht) : NULL; + + if (!rht_is_a_nulls(pos)) { + sprintf(buff, "%s\nbucket[%d] -> ", buff, i); + } + + while (!rht_is_a_nulls(pos)) { + struct rhlist_head *list = container_of(pos, struct rhlist_head, rhead); + sprintf(buff, "%s[[", buff); + do { + pos = &list->rhead; + list = rht_dereference(list->next, ht); + p = rht_obj(ht, pos); + + sprintf(buff, "%s val %d (tid=%d)%s", buff, p->value.id, p->value.tid, + list? ", " : " "); + cnt++; + } while (list); + + pos = next, + next = !rht_is_a_nulls(pos) ? + rht_dereference(pos->next, ht) : NULL; + + sprintf(buff, "%s]]%s", buff, !rht_is_a_nulls(pos) ? " -> " : ""); + } + } + printk(KERN_ERR "\n---- ht: ----%s\n-------------\n", buff); + + return cnt; +} + +static int __init test_insert_dup(struct test_obj_rhl *rhl_test_objects, + int cnt, bool slow) +{ + struct rhltable rhlt; + unsigned int i, ret; + const char *key; + int err = 0; + + err = rhltable_init(&rhlt, &test_rht_params_dup); + if (WARN_ON(err)) + return err; + + for (i = 0; i < cnt; i++) { + rhl_test_objects[i].value.tid = i; + key = rht_obj(&rhlt.ht, &rhl_test_objects[i].list_node.rhead); + key += test_rht_params_dup.key_offset; + + if (slow) { + err = PTR_ERR(rhashtable_insert_slow(&rhlt.ht, key, + &rhl_test_objects[i].list_node.rhead)); + if (err == -EAGAIN) + err = 0; + } else + err = rhltable_insert(&rhlt, + &rhl_test_objects[i].list_node, + test_rht_params_dup); + if (WARN(err, "error %d on element %d/%d (%s)\n", err, i, cnt, slow? "slow" : "fast")) + goto skip_print; + } + + ret = print_ht(&rhlt); + WARN(ret != cnt, "missing rhltable elements (%d != %d, %s)\n", ret, cnt, slow? "slow" : "fast"); + +skip_print: + rhltable_destroy(&rhlt); + + return 0; +} + +static int __init test_insert_duplicates_run(void) +{ + struct test_obj_rhl rhl_test_objects[3] = {}; + + pr_info("test inserting duplicates\n"); + + /* two different values that map to same bucket */ + rhl_test_objects[0].value.id = 1; + rhl_test_objects[1].value.id = 21; + + /* and another duplicate with same as [0] value + * which will be second on the bucket list */ + rhl_test_objects[2].value.id = rhl_test_objects[0].value.id; + + test_insert_dup(rhl_test_objects, 2, false); + test_insert_dup(rhl_test_objects, 3, false); + test_insert_dup(rhl_test_objects, 2, true); + test_insert_dup(rhl_test_objects, 3, true); + + return 0; +} + static int thread_lookup_test(struct thread_data *tdata) { unsigned int entries = tdata->entries; @@ -613,6 +745,8 @@ static int __init test_rht_init(void) do_div(total_time, runs); pr_info("Average test time: %llu\n", total_time); + test_insert_duplicates_run(); + if (!tcount) return 0; -- cgit v1.2.3 From 803fafbe0cd522fa6b9e41ca3b96cfb2e2a2222d Mon Sep 17 00:00:00 2001 From: Denis Kirjanov Date: Sun, 4 Mar 2018 21:48:17 +0300 Subject: fsl/fman: avoid sleeping in atomic context while adding an address __dev_mc_add grabs an adress spinlock so use atomic context in kmalloc. / # ifconfig eth0 inet 192.168.0.111 [ 89.331622] BUG: sleeping function called from invalid context at mm/slab.h:420 [ 89.339002] in_atomic(): 1, irqs_disabled(): 0, pid: 1035, name: ifconfig [ 89.345799] 2 locks held by ifconfig/1035: [ 89.349908] #0: (rtnl_mutex){+.+.}, at: [<(ptrval)>] devinet_ioctl+0xc0/0x8a0 [ 89.357258] #1: (_xmit_ETHER){+...}, at: [<(ptrval)>] __dev_mc_add+0x28/0x80 [ 89.364520] CPU: 1 PID: 1035 Comm: ifconfig Not tainted 4.16.0-rc3-dirty #8 [ 89.371464] Call Trace: [ 89.373908] [e959db60] [c066f948] dump_stack+0xa4/0xfc (unreliable) [ 89.380177] [e959db80] [c00671d8] ___might_sleep+0x248/0x280 [ 89.385833] [e959dba0] [c01aec34] kmem_cache_alloc_trace+0x174/0x320 [ 89.392179] [e959dbd0] [c04ab920] dtsec_add_hash_mac_address+0x130/0x240 [ 89.398874] [e959dc00] [c04a9d74] set_multi+0x174/0x1b0 [ 89.404093] [e959dc30] [c04afb68] dpaa_set_rx_mode+0x68/0xe0 [ 89.409745] [e959dc40] [c057baf8] __dev_mc_add+0x58/0x80 [ 89.415052] [e959dc60] [c060fd64] igmp_group_added+0x164/0x190 [ 89.420878] [e959dca0] [c060ffa8] ip_mc_inc_group+0x218/0x460 [ 89.426617] [e959dce0] [c06120fc] ip_mc_up+0x3c/0x190 [ 89.431662] [e959dd10] [c0607270] inetdev_event+0x250/0x620 [ 89.437227] [e959dd50] [c005f190] notifier_call_chain+0x80/0xf0 [ 89.443138] [e959dd80] [c0573a74] __dev_notify_flags+0x54/0xf0 [ 89.448964] [e959dda0] [c05743f8] dev_change_flags+0x48/0x60 [ 89.454615] [e959ddc0] [c0606744] devinet_ioctl+0x544/0x8a0 [ 89.460180] [e959de10] [c060987c] inet_ioctl+0x9c/0x1f0 [ 89.465400] [e959de80] [c05479a8] sock_ioctl+0x168/0x460 [ 89.470708] [e959ded0] [c01cf3ec] do_vfs_ioctl+0xac/0x8c0 [ 89.476099] [e959df20] [c01cfc40] SyS_ioctl+0x40/0xc0 [ 89.481147] [e959df40] [c0011318] ret_from_syscall+0x0/0x3c [ 89.486715] --- interrupt: c01 at 0x1006943c [ 89.486715] LR = 0x100c45ec Signed-off-by: Denis Kirjanov Acked-by: Madalin Bucur Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/fman/fman_dtsec.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/freescale/fman/fman_dtsec.c b/drivers/net/ethernet/freescale/fman/fman_dtsec.c index ea43b4974149..7af31ddd093f 100644 --- a/drivers/net/ethernet/freescale/fman/fman_dtsec.c +++ b/drivers/net/ethernet/freescale/fman/fman_dtsec.c @@ -1100,7 +1100,7 @@ int dtsec_add_hash_mac_address(struct fman_mac *dtsec, enet_addr_t *eth_addr) set_bucket(dtsec->regs, bucket, true); /* Create element to be added to the driver hash table */ - hash_entry = kmalloc(sizeof(*hash_entry), GFP_KERNEL); + hash_entry = kmalloc(sizeof(*hash_entry), GFP_ATOMIC); if (!hash_entry) return -ENOMEM; hash_entry->addr = addr; -- cgit v1.2.3 From 35d889d10b649fda66121891ec05eca88150059d Mon Sep 17 00:00:00 2001 From: Alexey Kodanev Date: Mon, 5 Mar 2018 20:52:54 +0300 Subject: sch_netem: fix skb leak in netem_enqueue() When we exceed current packets limit and we have more than one segment in the list returned by skb_gso_segment(), netem drops only the first one, skipping the rest, hence kmemleak reports: unreferenced object 0xffff880b5d23b600 (size 1024): comm "softirq", pid 0, jiffies 4384527763 (age 2770.629s) hex dump (first 32 bytes): 00 80 23 5d 0b 88 ff ff 00 00 00 00 00 00 00 00 ..#]............ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ backtrace: [<00000000d8a19b9d>] __alloc_skb+0xc9/0x520 [<000000001709b32f>] skb_segment+0x8c8/0x3710 [<00000000c7b9bb88>] tcp_gso_segment+0x331/0x1830 [<00000000c921cba1>] inet_gso_segment+0x476/0x1370 [<000000008b762dd4>] skb_mac_gso_segment+0x1f9/0x510 [<000000002182660a>] __skb_gso_segment+0x1dd/0x620 [<00000000412651b9>] netem_enqueue+0x1536/0x2590 [sch_netem] [<0000000005d3b2a9>] __dev_queue_xmit+0x1167/0x2120 [<00000000fc5f7327>] ip_finish_output2+0x998/0xf00 [<00000000d309e9d3>] ip_output+0x1aa/0x2c0 [<000000007ecbd3a4>] tcp_transmit_skb+0x18db/0x3670 [<0000000042d2a45f>] tcp_write_xmit+0x4d4/0x58c0 [<0000000056a44199>] tcp_tasklet_func+0x3d9/0x540 [<0000000013d06d02>] tasklet_action+0x1ca/0x250 [<00000000fcde0b8b>] __do_softirq+0x1b4/0x5a3 [<00000000e7ed027c>] irq_exit+0x1e2/0x210 Fix it by adding the rest of the segments, if any, to skb 'to_free' list. Add new __qdisc_drop_all() and qdisc_drop_all() functions because they can be useful in the future if we need to drop segmented GSO packets in other places. Fixes: 6071bd1aa13e ("netem: Segment GSO packets on enqueue") Signed-off-by: Alexey Kodanev Acked-by: Neil Horman Signed-off-by: David S. Miller --- include/net/sch_generic.h | 19 +++++++++++++++++++ net/sched/sch_netem.c | 2 +- 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index e2ab13687fb9..2092d33194dd 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -824,6 +824,16 @@ static inline void __qdisc_drop(struct sk_buff *skb, struct sk_buff **to_free) *to_free = skb; } +static inline void __qdisc_drop_all(struct sk_buff *skb, + struct sk_buff **to_free) +{ + if (skb->prev) + skb->prev->next = *to_free; + else + skb->next = *to_free; + *to_free = skb; +} + static inline unsigned int __qdisc_queue_drop_head(struct Qdisc *sch, struct qdisc_skb_head *qh, struct sk_buff **to_free) @@ -956,6 +966,15 @@ static inline int qdisc_drop(struct sk_buff *skb, struct Qdisc *sch, return NET_XMIT_DROP; } +static inline int qdisc_drop_all(struct sk_buff *skb, struct Qdisc *sch, + struct sk_buff **to_free) +{ + __qdisc_drop_all(skb, to_free); + qdisc_qstats_drop(sch); + + return NET_XMIT_DROP; +} + /* Length to Time (L2T) lookup in a qdisc_rate_table, to determine how long it will take to send a packet given its size. */ diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index 7c179addebcd..7d6801fc5340 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -509,7 +509,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch, } if (unlikely(sch->q.qlen >= sch->limit)) - return qdisc_drop(skb, sch, to_free); + return qdisc_drop_all(skb, sch, to_free); qdisc_qstats_backlog_inc(sch, skb); -- cgit v1.2.3 From 2695578b896aea472b2c0dcbe9d92daa71738484 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 5 Mar 2018 11:41:13 -0800 Subject: net: usbnet: fix potential deadlock on 32bit hosts Marek reported a LOCKDEP issue occurring on 32bit host, that we tracked down to the fact that usbnet could either run from soft or hard irqs. This patch adds u64_stats_update_begin_irqsave() and u64_stats_update_end_irqrestore() helpers to solve this case. [ 17.768040] ================================ [ 17.772239] WARNING: inconsistent lock state [ 17.776511] 4.16.0-rc3-next-20180227-00007-g876c53a7493c #453 Not tainted [ 17.783329] -------------------------------- [ 17.787580] inconsistent {IN-HARDIRQ-W} -> {HARDIRQ-ON-W} usage. [ 17.793607] swapper/0/0 [HC0[0]:SC1[1]:HE1:SE0] takes: [ 17.798751] (&syncp->seq#5){?.-.}, at: [<9b22e5f0>] asix_rx_fixup_internal+0x188/0x288 [ 17.806790] {IN-HARDIRQ-W} state was registered at: [ 17.811677] tx_complete+0x100/0x208 [ 17.815319] __usb_hcd_giveback_urb+0x60/0xf0 [ 17.819770] xhci_giveback_urb_in_irq+0xa8/0x240 [ 17.824469] xhci_td_cleanup+0xf4/0x16c [ 17.828367] xhci_irq+0xe74/0x2240 [ 17.831827] usb_hcd_irq+0x24/0x38 [ 17.835343] __handle_irq_event_percpu+0x98/0x510 [ 17.840111] handle_irq_event_percpu+0x1c/0x58 [ 17.844623] handle_irq_event+0x38/0x5c [ 17.848519] handle_fasteoi_irq+0xa4/0x138 [ 17.852681] generic_handle_irq+0x18/0x28 [ 17.856760] __handle_domain_irq+0x6c/0xe4 [ 17.860941] gic_handle_irq+0x54/0xa0 [ 17.864666] __irq_svc+0x70/0xb0 [ 17.867964] arch_cpu_idle+0x20/0x3c [ 17.871578] arch_cpu_idle+0x20/0x3c [ 17.875190] do_idle+0x144/0x218 [ 17.878468] cpu_startup_entry+0x18/0x1c [ 17.882454] start_kernel+0x394/0x400 [ 17.886177] irq event stamp: 161912 [ 17.889616] hardirqs last enabled at (161912): [<7bedfacf>] __netdev_alloc_skb+0xcc/0x140 [ 17.897893] hardirqs last disabled at (161911): [] __netdev_alloc_skb+0x94/0x140 [ 17.904903] exynos5-hsi2c 12ca0000.i2c: tx timeout [ 17.906116] softirqs last enabled at (161904): [<387102ff>] irq_enter+0x78/0x80 [ 17.906123] softirqs last disabled at (161905): [] irq_exit+0x134/0x158 [ 17.925722]. [ 17.925722] other info that might help us debug this: [ 17.933435] Possible unsafe locking scenario: [ 17.933435]. [ 17.940331] CPU0 [ 17.942488] ---- [ 17.944894] lock(&syncp->seq#5); [ 17.948274] [ 17.950847] lock(&syncp->seq#5); [ 17.954386]. [ 17.954386] *** DEADLOCK *** [ 17.954386]. [ 17.962422] no locks held by swapper/0/0. Fixes: c8b5d129ee29 ("net: usbnet: support 64bit stats") Signed-off-by: Eric Dumazet Reported-by: Marek Szyprowski Cc: Greg Ungerer Signed-off-by: David S. Miller --- drivers/net/usb/usbnet.c | 10 ++++++---- include/linux/u64_stats_sync.h | 22 ++++++++++++++++++++++ 2 files changed, 28 insertions(+), 4 deletions(-) diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c index 8a22ff67b026..d9eea8cfe6cb 100644 --- a/drivers/net/usb/usbnet.c +++ b/drivers/net/usb/usbnet.c @@ -315,6 +315,7 @@ static void __usbnet_status_stop_force(struct usbnet *dev) void usbnet_skb_return (struct usbnet *dev, struct sk_buff *skb) { struct pcpu_sw_netstats *stats64 = this_cpu_ptr(dev->stats64); + unsigned long flags; int status; if (test_bit(EVENT_RX_PAUSED, &dev->flags)) { @@ -326,10 +327,10 @@ void usbnet_skb_return (struct usbnet *dev, struct sk_buff *skb) if (skb->protocol == 0) skb->protocol = eth_type_trans (skb, dev->net); - u64_stats_update_begin(&stats64->syncp); + flags = u64_stats_update_begin_irqsave(&stats64->syncp); stats64->rx_packets++; stats64->rx_bytes += skb->len; - u64_stats_update_end(&stats64->syncp); + u64_stats_update_end_irqrestore(&stats64->syncp, flags); netif_dbg(dev, rx_status, dev->net, "< rx, len %zu, type 0x%x\n", skb->len + sizeof (struct ethhdr), skb->protocol); @@ -1248,11 +1249,12 @@ static void tx_complete (struct urb *urb) if (urb->status == 0) { struct pcpu_sw_netstats *stats64 = this_cpu_ptr(dev->stats64); + unsigned long flags; - u64_stats_update_begin(&stats64->syncp); + flags = u64_stats_update_begin_irqsave(&stats64->syncp); stats64->tx_packets += entry->packets; stats64->tx_bytes += entry->length; - u64_stats_update_end(&stats64->syncp); + u64_stats_update_end_irqrestore(&stats64->syncp, flags); } else { dev->net->stats.tx_errors++; diff --git a/include/linux/u64_stats_sync.h b/include/linux/u64_stats_sync.h index 5bdbd9f49395..07ee0f84a46c 100644 --- a/include/linux/u64_stats_sync.h +++ b/include/linux/u64_stats_sync.h @@ -90,6 +90,28 @@ static inline void u64_stats_update_end(struct u64_stats_sync *syncp) #endif } +static inline unsigned long +u64_stats_update_begin_irqsave(struct u64_stats_sync *syncp) +{ + unsigned long flags = 0; + +#if BITS_PER_LONG==32 && defined(CONFIG_SMP) + local_irq_save(flags); + write_seqcount_begin(&syncp->seq); +#endif + return flags; +} + +static inline void +u64_stats_update_end_irqrestore(struct u64_stats_sync *syncp, + unsigned long flags) +{ +#if BITS_PER_LONG==32 && defined(CONFIG_SMP) + write_seqcount_end(&syncp->seq); + local_irq_restore(flags); +#endif +} + static inline void u64_stats_update_begin_raw(struct u64_stats_sync *syncp) { #if BITS_PER_LONG==32 && defined(CONFIG_SMP) -- cgit v1.2.3 From 9de506a547c0d172d13a91d69b1a399e6a2c0efa Mon Sep 17 00:00:00 2001 From: Michal Kalderon Date: Mon, 5 Mar 2018 23:50:46 +0200 Subject: qed: Free RoCE ILT Memory on rmmod qedr Rdma requires ILT Memory to be allocated for it's QPs. Each ILT entry points to a page used by several Rdma QPs. To avoid allocating all the memory in advance, the rdma implementation dynamically allocates memory as more QPs are added, however it does not dynamically free the memory. The memory should have been freed on rmmod qedr, but isn't. This patch adds the memory freeing on rmmod qedr (currently it will be freed with qed is removed). An outcome of this bug, is that if qedr is unloaded and loaded without unloaded qed, there will be no more RoCE traffic. The reason these are related, is that the logic of detecting the first QP ever opened is by asking whether ILT memory for RoCE has been allocated. In addition, this patch modifies freeing of the Task context to always use the PROTOCOLID_ROCE and not the protocol passed, this is because task context for iWARP and ROCE both use the ROCE protocol id, as opposed to the connection context. Fixes: dbb799c39717 ("qed: Initialize hardware for new protocols") Signed-off-by: Michal Kalderon Signed-off-by: Ariel Elior Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/qed_cxt.c | 5 ++++- drivers/net/ethernet/qlogic/qed/qed_rdma.c | 1 + 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_cxt.c b/drivers/net/ethernet/qlogic/qed/qed_cxt.c index 6f546e869d8d..00f41c145d4d 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_cxt.c +++ b/drivers/net/ethernet/qlogic/qed/qed_cxt.c @@ -2480,7 +2480,10 @@ int qed_cxt_free_proto_ilt(struct qed_hwfn *p_hwfn, enum protocol_type proto) if (rc) return rc; - /* Free Task CXT */ + /* Free Task CXT ( Intentionally RoCE as task-id is shared between + * RoCE and iWARP ) + */ + proto = PROTOCOLID_ROCE; rc = qed_cxt_free_ilt_range(p_hwfn, QED_ELEM_TASK, 0, qed_cxt_get_proto_tid_count(p_hwfn, proto)); if (rc) diff --git a/drivers/net/ethernet/qlogic/qed/qed_rdma.c b/drivers/net/ethernet/qlogic/qed/qed_rdma.c index 5d040b873137..f3ee6538b553 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_rdma.c +++ b/drivers/net/ethernet/qlogic/qed/qed_rdma.c @@ -380,6 +380,7 @@ static void qed_rdma_free(struct qed_hwfn *p_hwfn) qed_rdma_free_reserved_lkey(p_hwfn); qed_rdma_resc_free(p_hwfn); + qed_cxt_free_proto_ilt(p_hwfn, p_hwfn->p_rdma_info->proto); } static void qed_rdma_get_guid(struct qed_hwfn *p_hwfn, u8 *guid) -- cgit v1.2.3 From cc5db3150e87fe7f7e947bf333b6c1c97f848ecb Mon Sep 17 00:00:00 2001 From: Hemanth Puranik Date: Tue, 6 Mar 2018 08:18:06 +0530 Subject: net: qcom/emac: Use proper free methods during TX This patch fixes the warning messages/call traces seen if DMA debug is enabled, In case of fragmented skb's memory was allocated using dma_map_page but freed using dma_unmap_single. This patch modifies buffer allocations in TX path to use dma_map_page in all the places and dma_unmap_page while freeing the buffers. Signed-off-by: Hemanth Puranik Acked-by: Timur Tabi Signed-off-by: David S. Miller --- drivers/net/ethernet/qualcomm/emac/emac-mac.c | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/qualcomm/emac/emac-mac.c b/drivers/net/ethernet/qualcomm/emac/emac-mac.c index 9cbb27263742..d5a32b7c7dc5 100644 --- a/drivers/net/ethernet/qualcomm/emac/emac-mac.c +++ b/drivers/net/ethernet/qualcomm/emac/emac-mac.c @@ -1194,9 +1194,9 @@ void emac_mac_tx_process(struct emac_adapter *adpt, struct emac_tx_queue *tx_q) while (tx_q->tpd.consume_idx != hw_consume_idx) { tpbuf = GET_TPD_BUFFER(tx_q, tx_q->tpd.consume_idx); if (tpbuf->dma_addr) { - dma_unmap_single(adpt->netdev->dev.parent, - tpbuf->dma_addr, tpbuf->length, - DMA_TO_DEVICE); + dma_unmap_page(adpt->netdev->dev.parent, + tpbuf->dma_addr, tpbuf->length, + DMA_TO_DEVICE); tpbuf->dma_addr = 0; } @@ -1353,9 +1353,11 @@ static void emac_tx_fill_tpd(struct emac_adapter *adpt, tpbuf = GET_TPD_BUFFER(tx_q, tx_q->tpd.produce_idx); tpbuf->length = mapped_len; - tpbuf->dma_addr = dma_map_single(adpt->netdev->dev.parent, - skb->data, tpbuf->length, - DMA_TO_DEVICE); + tpbuf->dma_addr = dma_map_page(adpt->netdev->dev.parent, + virt_to_page(skb->data), + offset_in_page(skb->data), + tpbuf->length, + DMA_TO_DEVICE); ret = dma_mapping_error(adpt->netdev->dev.parent, tpbuf->dma_addr); if (ret) @@ -1371,9 +1373,12 @@ static void emac_tx_fill_tpd(struct emac_adapter *adpt, if (mapped_len < len) { tpbuf = GET_TPD_BUFFER(tx_q, tx_q->tpd.produce_idx); tpbuf->length = len - mapped_len; - tpbuf->dma_addr = dma_map_single(adpt->netdev->dev.parent, - skb->data + mapped_len, - tpbuf->length, DMA_TO_DEVICE); + tpbuf->dma_addr = dma_map_page(adpt->netdev->dev.parent, + virt_to_page(skb->data + + mapped_len), + offset_in_page(skb->data + + mapped_len), + tpbuf->length, DMA_TO_DEVICE); ret = dma_mapping_error(adpt->netdev->dev.parent, tpbuf->dma_addr); if (ret) -- cgit v1.2.3 From e9fa1495d738e34fcec88a3d2ec9101a9ee5b310 Mon Sep 17 00:00:00 2001 From: Stefano Brivio Date: Tue, 6 Mar 2018 11:10:19 +0100 Subject: ipv6: Reflect MTU changes on PMTU of exceptions for MTU-less routes Currently, administrative MTU changes on a given netdevice are not reflected on route exceptions for MTU-less routes, with a set PMTU value, for that device: # ip -6 route get 2001:db8::b 2001:db8::b from :: dev vti_a proto kernel src 2001:db8::a metric 256 pref medium # ping6 -c 1 -q -s10000 2001:db8::b > /dev/null # ip netns exec a ip -6 route get 2001:db8::b 2001:db8::b from :: dev vti_a src 2001:db8::a metric 0 cache expires 571sec mtu 4926 pref medium # ip link set dev vti_a mtu 3000 # ip -6 route get 2001:db8::b 2001:db8::b from :: dev vti_a src 2001:db8::a metric 0 cache expires 571sec mtu 4926 pref medium # ip link set dev vti_a mtu 9000 # ip -6 route get 2001:db8::b 2001:db8::b from :: dev vti_a src 2001:db8::a metric 0 cache expires 571sec mtu 4926 pref medium The first issue is that since commit fb56be83e43d ("net-ipv6: on device mtu change do not add mtu to mtu-less routes") we don't call rt6_exceptions_update_pmtu() from rt6_mtu_change_route(), which handles administrative MTU changes, if the regular route is MTU-less. However, PMTU exceptions should be always updated, as long as RTAX_MTU is not locked. Keep the check for MTU-less main route, as introduced by that commit, but, for exceptions, call rt6_exceptions_update_pmtu() regardless of that check. Once that is fixed, one problem remains: MTU changes are not reflected if the new MTU is higher than the previous one, because rt6_exceptions_update_pmtu() doesn't allow that. We should instead allow PMTU increase if the old PMTU matches the local MTU, as that implies that the old MTU was the lowest in the path, and PMTU discovery might lead to different results. The existing check in rt6_mtu_change_route() correctly took that case into account (for regular routes only), so factor it out and re-use it also in rt6_exceptions_update_pmtu(). While at it, fix comments style and grammar, and try to be a bit more descriptive. Reported-by: Xiumei Mu Fixes: fb56be83e43d ("net-ipv6: on device mtu change do not add mtu to mtu-less routes") Fixes: f5bbe7ee79c2 ("ipv6: prepare rt6_mtu_change() for exception table") Signed-off-by: Stefano Brivio Acked-by: David Ahern Signed-off-by: David S. Miller --- net/ipv6/route.c | 71 +++++++++++++++++++++++++++++++++----------------------- 1 file changed, 42 insertions(+), 29 deletions(-) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 9dcfadddd800..0db4218c9186 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1509,7 +1509,30 @@ static void rt6_exceptions_remove_prefsrc(struct rt6_info *rt) } } -static void rt6_exceptions_update_pmtu(struct rt6_info *rt, int mtu) +static bool rt6_mtu_change_route_allowed(struct inet6_dev *idev, + struct rt6_info *rt, int mtu) +{ + /* If the new MTU is lower than the route PMTU, this new MTU will be the + * lowest MTU in the path: always allow updating the route PMTU to + * reflect PMTU decreases. + * + * If the new MTU is higher, and the route PMTU is equal to the local + * MTU, this means the old MTU is the lowest in the path, so allow + * updating it: if other nodes now have lower MTUs, PMTU discovery will + * handle this. + */ + + if (dst_mtu(&rt->dst) >= mtu) + return true; + + if (dst_mtu(&rt->dst) == idev->cnf.mtu6) + return true; + + return false; +} + +static void rt6_exceptions_update_pmtu(struct inet6_dev *idev, + struct rt6_info *rt, int mtu) { struct rt6_exception_bucket *bucket; struct rt6_exception *rt6_ex; @@ -1518,20 +1541,22 @@ static void rt6_exceptions_update_pmtu(struct rt6_info *rt, int mtu) bucket = rcu_dereference_protected(rt->rt6i_exception_bucket, lockdep_is_held(&rt6_exception_lock)); - if (bucket) { - for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) { - hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) { - struct rt6_info *entry = rt6_ex->rt6i; - /* For RTF_CACHE with rt6i_pmtu == 0 - * (i.e. a redirected route), - * the metrics of its rt->dst.from has already - * been updated. - */ - if (entry->rt6i_pmtu && entry->rt6i_pmtu > mtu) - entry->rt6i_pmtu = mtu; - } - bucket++; + if (!bucket) + return; + + for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) { + hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) { + struct rt6_info *entry = rt6_ex->rt6i; + + /* For RTF_CACHE with rt6i_pmtu == 0 (i.e. a redirected + * route), the metrics of its rt->dst.from have already + * been updated. + */ + if (entry->rt6i_pmtu && + rt6_mtu_change_route_allowed(idev, entry, mtu)) + entry->rt6i_pmtu = mtu; } + bucket++; } } @@ -3809,25 +3834,13 @@ static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg) Since RFC 1981 doesn't include administrative MTU increase update PMTU increase is a MUST. (i.e. jumbo frame) */ - /* - If new MTU is less than route PMTU, this new MTU will be the - lowest MTU in the path, update the route PMTU to reflect PMTU - decreases; if new MTU is greater than route PMTU, and the - old MTU is the lowest MTU in the path, update the route PMTU - to reflect the increase. In this case if the other nodes' MTU - also have the lowest MTU, TOO BIG MESSAGE will be lead to - PMTU discovery. - */ if (rt->dst.dev == arg->dev && - dst_metric_raw(&rt->dst, RTAX_MTU) && !dst_metric_locked(&rt->dst, RTAX_MTU)) { spin_lock_bh(&rt6_exception_lock); - if (dst_mtu(&rt->dst) >= arg->mtu || - (dst_mtu(&rt->dst) < arg->mtu && - dst_mtu(&rt->dst) == idev->cnf.mtu6)) { + if (dst_metric_raw(&rt->dst, RTAX_MTU) && + rt6_mtu_change_route_allowed(idev, rt, arg->mtu)) dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu); - } - rt6_exceptions_update_pmtu(rt, arg->mtu); + rt6_exceptions_update_pmtu(idev, rt, arg->mtu); spin_unlock_bh(&rt6_exception_lock); } return 0; -- cgit v1.2.3 From e06513d78d54e6c7026c9043a39e2c01ee25bdbe Mon Sep 17 00:00:00 2001 From: Jeremy Linton Date: Tue, 6 Mar 2018 09:00:06 -0600 Subject: net: smsc911x: Fix unload crash when link is up The smsc911x driver will crash if it is rmmod'ed while the netdev is up like: Call trace: phy_detach+0x94/0x150 phy_disconnect+0x40/0x50 smsc911x_stop+0x104/0x128 [smsc911x] __dev_close_many+0xb4/0x138 dev_close_many+0xbc/0x190 rollback_registered_many+0x140/0x460 rollback_registered+0x68/0xb0 unregister_netdevice_queue+0x100/0x118 unregister_netdev+0x28/0x38 smsc911x_drv_remove+0x58/0x130 [smsc911x] platform_drv_remove+0x30/0x50 device_release_driver_internal+0x15c/0x1f8 driver_detach+0x54/0x98 bus_remove_driver+0x64/0xe8 driver_unregister+0x34/0x60 platform_driver_unregister+0x20/0x30 smsc911x_cleanup_module+0x14/0xbca8 [smsc911x] SyS_delete_module+0x1e8/0x238 __sys_trace_return+0x0/0x4 This is caused by the mdiobus being unregistered/free'd and the code in phy_detach() attempting to manipulate mdio related structures from unregister_netdev() calling close() To fix this, we delay the mdiobus teardown until after the netdev is deregistered. Reported-by: Matt Sealey Signed-off-by: Jeremy Linton Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/ethernet/smsc/smsc911x.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/smsc/smsc911x.c b/drivers/net/ethernet/smsc/smsc911x.c index 012fb66eed8d..f0afb88d7bc2 100644 --- a/drivers/net/ethernet/smsc/smsc911x.c +++ b/drivers/net/ethernet/smsc/smsc911x.c @@ -2335,14 +2335,14 @@ static int smsc911x_drv_remove(struct platform_device *pdev) pdata = netdev_priv(dev); BUG_ON(!pdata); BUG_ON(!pdata->ioaddr); - WARN_ON(dev->phydev); SMSC_TRACE(pdata, ifdown, "Stopping driver"); + unregister_netdev(dev); + mdiobus_unregister(pdata->mii_bus); mdiobus_free(pdata->mii_bus); - unregister_netdev(dev); res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "smsc911x-memory"); if (!res) -- cgit v1.2.3 From a560002437d3646dafccecb1bf32d1685112ddda Mon Sep 17 00:00:00 2001 From: Kirill Tkhai Date: Tue, 6 Mar 2018 18:46:39 +0300 Subject: net: Fix hlist corruptions in inet_evict_bucket() inet_evict_bucket() iterates global list, and several tasks may call it in parallel. All of them hash the same fq->list_evictor to different lists, which leads to list corruption. This patch makes fq be hashed to expired list only if this has not been made yet by another task. Since inet_frag_alloc() allocates fq using kmem_cache_zalloc(), we may rely on list_evictor is initially unhashed. The problem seems to exist before async pernet_operations, as there was possible to have exit method to be executed in parallel with inet_frags::frags_work, so I add two Fixes tags. This also may go to stable. Fixes: d1fe19444d82 "inet: frag: don't re-use chainlist for evictor" Fixes: f84c6821aa54 "net: Convert pernet_subsys, registered from inet_init()" Signed-off-by: Kirill Tkhai Signed-off-by: David S. Miller --- net/ipv4/inet_fragment.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c index 26a3d0315728..e8ec28999f5c 100644 --- a/net/ipv4/inet_fragment.c +++ b/net/ipv4/inet_fragment.c @@ -119,6 +119,9 @@ out: static bool inet_fragq_should_evict(const struct inet_frag_queue *q) { + if (!hlist_unhashed(&q->list_evictor)) + return false; + return q->net->low_thresh == 0 || frag_mem_limit(q->net) >= q->net->low_thresh; } -- cgit v1.2.3 From 17cfe79a65f98abe535261856c5aef14f306dff7 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 6 Mar 2018 07:54:53 -0800 Subject: l2tp: do not accept arbitrary sockets syzkaller found an issue caused by lack of sufficient checks in l2tp_tunnel_create() RAW sockets can not be considered as UDP ones for instance. In another patch, we shall replace all pr_err() by less intrusive pr_debug() so that syzkaller can find other bugs faster. Acked-by: Guillaume Nault Acked-by: James Chapman ================================================================== BUG: KASAN: slab-out-of-bounds in setup_udp_tunnel_sock+0x3ee/0x5f0 net/ipv4/udp_tunnel.c:69 dst_release: dst:00000000d53d0d0f refcnt:-1 Write of size 1 at addr ffff8801d013b798 by task syz-executor3/6242 CPU: 1 PID: 6242 Comm: syz-executor3 Not tainted 4.16.0-rc2+ #253 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:17 [inline] dump_stack+0x194/0x24d lib/dump_stack.c:53 print_address_description+0x73/0x250 mm/kasan/report.c:256 kasan_report_error mm/kasan/report.c:354 [inline] kasan_report+0x23b/0x360 mm/kasan/report.c:412 __asan_report_store1_noabort+0x17/0x20 mm/kasan/report.c:435 setup_udp_tunnel_sock+0x3ee/0x5f0 net/ipv4/udp_tunnel.c:69 l2tp_tunnel_create+0x1354/0x17f0 net/l2tp/l2tp_core.c:1596 pppol2tp_connect+0x14b1/0x1dd0 net/l2tp/l2tp_ppp.c:707 SYSC_connect+0x213/0x4a0 net/socket.c:1640 SyS_connect+0x24/0x30 net/socket.c:1621 do_syscall_64+0x280/0x940 arch/x86/entry/common.c:287 entry_SYSCALL_64_after_hwframe+0x42/0xb7 Fixes: fd558d186df2 ("l2tp: Split pppol2tp patch into separate l2tp and ppp parts") Signed-off-by: Eric Dumazet Reported-by: syzbot Signed-off-by: David S. Miller --- net/l2tp/l2tp_core.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index 83421c6f0bef..e22512e32827 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -1457,9 +1457,14 @@ int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32 encap = cfg->encap; /* Quick sanity checks */ + err = -EPROTONOSUPPORT; + if (sk->sk_type != SOCK_DGRAM) { + pr_debug("tunl %hu: fd %d wrong socket type\n", + tunnel_id, fd); + goto err; + } switch (encap) { case L2TP_ENCAPTYPE_UDP: - err = -EPROTONOSUPPORT; if (sk->sk_protocol != IPPROTO_UDP) { pr_err("tunl %hu: fd %d wrong protocol, got %d, expected %d\n", tunnel_id, fd, sk->sk_protocol, IPPROTO_UDP); @@ -1467,7 +1472,6 @@ int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32 } break; case L2TP_ENCAPTYPE_IP: - err = -EPROTONOSUPPORT; if (sk->sk_protocol != IPPROTO_L2TP) { pr_err("tunl %hu: fd %d wrong protocol, got %d, expected %d\n", tunnel_id, fd, sk->sk_protocol, IPPROTO_L2TP); -- cgit v1.2.3 From 67f93df79aeefc3add4e4b31a752600f834236e2 Mon Sep 17 00:00:00 2001 From: Alexey Kodanev Date: Tue, 6 Mar 2018 22:57:01 +0300 Subject: dccp: check sk for closed state in dccp_sendmsg() dccp_disconnect() sets 'dp->dccps_hc_tx_ccid' tx handler to NULL, therefore if DCCP socket is disconnected and dccp_sendmsg() is called after it, it will cause a NULL pointer dereference in dccp_write_xmit(). This crash and the reproducer was reported by syzbot. Looks like it is reproduced if commit 69c64866ce07 ("dccp: CVE-2017-8824: use-after-free in DCCP code") is applied. Reported-by: syzbot+f99ab3887ab65d70f816@syzkaller.appspotmail.com Signed-off-by: Alexey Kodanev Signed-off-by: David S. Miller --- net/dccp/proto.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/net/dccp/proto.c b/net/dccp/proto.c index 15bdc002d90c..84cd4e3fd01b 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c @@ -794,6 +794,11 @@ int dccp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) if (skb == NULL) goto out_release; + if (sk->sk_state == DCCP_CLOSED) { + rc = -ENOTCONN; + goto out_discard; + } + skb_reserve(skb, sk->sk_prot->max_header); rc = memcpy_from_msg(skb_put(skb, len), msg, len); if (rc != 0) -- cgit v1.2.3 From e05836ac07c77dd90377f8c8140bce2a44af5fe7 Mon Sep 17 00:00:00 2001 From: Soheil Hassas Yeganeh Date: Tue, 6 Mar 2018 17:15:12 -0500 Subject: tcp: purge write queue upon aborting the connection When the connection is aborted, there is no point in keeping the packets on the write queue until the connection is closed. Similar to a27fd7a8ed38 ('tcp: purge write queue upon RST'), this is essential for a correct MSG_ZEROCOPY implementation, because userspace cannot call close(fd) before receiving zerocopy signals even when the connection is aborted. Fixes: f214f915e7db ("tcp: enable MSG_ZEROCOPY") Signed-off-by: Soheil Hassas Yeganeh Signed-off-by: Neal Cardwell Reviewed-by: Eric Dumazet Signed-off-by: Yuchung Cheng Signed-off-by: David S. Miller --- net/ipv4/tcp.c | 1 + net/ipv4/tcp_timer.c | 1 + 2 files changed, 2 insertions(+) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 48636aee23c3..8b8059b7af4d 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -3566,6 +3566,7 @@ int tcp_abort(struct sock *sk, int err) bh_unlock_sock(sk); local_bh_enable(); + tcp_write_queue_purge(sk); release_sock(sk); return 0; } diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 71fc60f1b326..f7d944855f8e 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -34,6 +34,7 @@ static void tcp_write_err(struct sock *sk) sk->sk_err = sk->sk_err_soft ? : ETIMEDOUT; sk->sk_error_report(sk); + tcp_write_queue_purge(sk); tcp_done(sk); __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONTIMEOUT); } -- cgit v1.2.3 From b51f26b14683838825170387457176c1ffaea9f5 Mon Sep 17 00:00:00 2001 From: Paul Moore Date: Tue, 6 Mar 2018 17:27:44 -0500 Subject: net: don't unnecessarily load kernel modules in dev_ioctl() Starting with v4.16-rc1 we've been seeing a higher than usual number of requests for the kernel to load networking modules, even on events which shouldn't trigger a module load (e.g. ioctl(TCGETS)). Stephen Smalley suggested the problem may lie in commit 44c02a2c3dc5 ("dev_ioctl(): move copyin/copyout to callers") which moves changes the network dev_ioctl() function to always call dev_load(), regardless of the requested ioctl. This patch moves the dev_load() calls back into the individual ioctls while preserving the rest of the original patch. Reported-by: Dominick Grift Suggested-by: Stephen Smalley Signed-off-by: Paul Moore Signed-off-by: David S. Miller --- net/core/dev_ioctl.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/net/core/dev_ioctl.c b/net/core/dev_ioctl.c index 0ab1af04296c..a04e1e88bf3a 100644 --- a/net/core/dev_ioctl.c +++ b/net/core/dev_ioctl.c @@ -402,8 +402,6 @@ int dev_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr, bool *need_c if (colon) *colon = 0; - dev_load(net, ifr->ifr_name); - /* * See which interface the caller is talking about. */ @@ -423,6 +421,7 @@ int dev_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr, bool *need_c case SIOCGIFMAP: case SIOCGIFINDEX: case SIOCGIFTXQLEN: + dev_load(net, ifr->ifr_name); rcu_read_lock(); ret = dev_ifsioc_locked(net, ifr, cmd); rcu_read_unlock(); @@ -431,6 +430,7 @@ int dev_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr, bool *need_c return ret; case SIOCETHTOOL: + dev_load(net, ifr->ifr_name); rtnl_lock(); ret = dev_ethtool(net, ifr); rtnl_unlock(); @@ -447,6 +447,7 @@ int dev_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr, bool *need_c case SIOCGMIIPHY: case SIOCGMIIREG: case SIOCSIFNAME: + dev_load(net, ifr->ifr_name); if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) return -EPERM; rtnl_lock(); @@ -494,6 +495,7 @@ int dev_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr, bool *need_c /* fall through */ case SIOCBONDSLAVEINFOQUERY: case SIOCBONDINFOQUERY: + dev_load(net, ifr->ifr_name); rtnl_lock(); ret = dev_ifsioc(net, ifr, cmd); rtnl_unlock(); @@ -518,6 +520,7 @@ int dev_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr, bool *need_c cmd == SIOCGHWTSTAMP || (cmd >= SIOCDEVPRIVATE && cmd <= SIOCDEVPRIVATE + 15)) { + dev_load(net, ifr->ifr_name); rtnl_lock(); ret = dev_ifsioc(net, ifr, cmd); rtnl_unlock(); -- cgit v1.2.3 From 016764de8b0d17e946832d7b6530434daa82df0e Mon Sep 17 00:00:00 2001 From: Ganesh Goudar Date: Wed, 7 Mar 2018 13:08:45 +0530 Subject: cxgb4: copy adap index to PF0-3 adapter instances instantiation of VF's on different adapters fails, copy adapter index and chip type to PF0-3 adapter instances to fix the issue. Signed-off-by: Ganesh Goudar Signed-off-by: David S. Miller --- drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c index 7b452e85de2a..33bc84185b82 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c @@ -5181,6 +5181,8 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent) adapter->name = pci_name(pdev); adapter->mbox = func; adapter->pf = func; + adapter->params.chip = chip; + adapter->adap_idx = adap_idx; adapter->msg_enable = DFLT_MSG_ENABLE; adapter->mbox_log = kzalloc(sizeof(*adapter->mbox_log) + (sizeof(struct mbox_cmd) * -- cgit v1.2.3 From b06ef18a4c255609388ed6e068a1c69c797545e0 Mon Sep 17 00:00:00 2001 From: Ganesh Goudar Date: Wed, 7 Mar 2018 13:10:24 +0530 Subject: cxgb4: do not set needs_free_netdev for mgmt dev's Do not set 'needs_free_netdev' as we do call free_netdev for mgmt net devices, doing both hits BUG_ON. Signed-off-by: Ganesh Goudar Signed-off-by: David S. Miller --- drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c index 33bc84185b82..61022b5f6743 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c @@ -4970,7 +4970,6 @@ static void cxgb4_mgmt_setup(struct net_device *dev) /* Initialize the device structure. */ dev->netdev_ops = &cxgb4_mgmt_netdev_ops; dev->ethtool_ops = &cxgb4_mgmt_ethtool_ops; - dev->needs_free_netdev = true; } static int cxgb4_iov_configure(struct pci_dev *pdev, int num_vfs) -- cgit v1.2.3 From 3ffb0ba9b567a8efb9a04ed3d1ec15ff333ada22 Mon Sep 17 00:00:00 2001 From: Vishal Verma Date: Mon, 5 Mar 2018 16:56:13 -0700 Subject: libnvdimm, {btt, blk}: do integrity setup before add_disk() Prior to 25520d55cdb6 ("block: Inline blk_integrity in struct gendisk") we needed to temporarily add a zero-capacity disk before registering for blk-integrity. But adding a zero-capacity disk caused the partition table scanning to bail early, and this resulted in partitions not coming up after a probe of the BTT or blk namespaces. We can now register for integrity before the disk has been added, and this fixes the rescan problems. Fixes: 25520d55cdb6 ("block: Inline blk_integrity in struct gendisk") Reported-by: Dariusz Dokupil Cc: Signed-off-by: Vishal Verma Signed-off-by: Dan Williams --- drivers/nvdimm/blk.c | 3 +-- drivers/nvdimm/btt.c | 3 +-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/nvdimm/blk.c b/drivers/nvdimm/blk.c index 345acca576b3..1bd7b3734751 100644 --- a/drivers/nvdimm/blk.c +++ b/drivers/nvdimm/blk.c @@ -278,8 +278,6 @@ static int nsblk_attach_disk(struct nd_namespace_blk *nsblk) disk->queue = q; disk->flags = GENHD_FL_EXT_DEVT; nvdimm_namespace_disk_name(&nsblk->common, disk->disk_name); - set_capacity(disk, 0); - device_add_disk(dev, disk); if (devm_add_action_or_reset(dev, nd_blk_release_disk, disk)) return -ENOMEM; @@ -292,6 +290,7 @@ static int nsblk_attach_disk(struct nd_namespace_blk *nsblk) } set_capacity(disk, available_disk_size >> SECTOR_SHIFT); + device_add_disk(dev, disk); revalidate_disk(disk); return 0; } diff --git a/drivers/nvdimm/btt.c b/drivers/nvdimm/btt.c index 2ef544f10ec8..4b95ac513de2 100644 --- a/drivers/nvdimm/btt.c +++ b/drivers/nvdimm/btt.c @@ -1545,8 +1545,6 @@ static int btt_blk_init(struct btt *btt) queue_flag_set_unlocked(QUEUE_FLAG_NONROT, btt->btt_queue); btt->btt_queue->queuedata = btt; - set_capacity(btt->btt_disk, 0); - device_add_disk(&btt->nd_btt->dev, btt->btt_disk); if (btt_meta_size(btt)) { int rc = nd_integrity_init(btt->btt_disk, btt_meta_size(btt)); @@ -1558,6 +1556,7 @@ static int btt_blk_init(struct btt *btt) } } set_capacity(btt->btt_disk, btt->nlba * btt->sector_size >> 9); + device_add_disk(&btt->nd_btt->dev, btt->btt_disk); btt->nd_btt->size = btt->nlba * (u64)btt->sector_size; revalidate_disk(btt->btt_disk); -- cgit v1.2.3 From 6007b080d2e2adb7af22bf29165f0594ea12b34c Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 7 Mar 2018 22:10:01 +0100 Subject: bpf, x64: increase number of passes In Cilium some of the main programs we run today are hitting 9 passes on x64's JIT compiler, and we've had cases already where we surpassed the limit where the JIT then punts the program to the interpreter instead, leading to insertion failures due to CONFIG_BPF_JIT_ALWAYS_ON or insertion failures due to the prog array owner being JITed but the program to insert not (both must have the same JITed/non-JITed property). One concrete case the program image shrunk from 12,767 bytes down to 10,288 bytes where the image converged after 16 steps. I've measured that this took 340us in the JIT until it converges on my i7-6600U. Thus, increase the original limit we had from day one where the JIT covered cBPF only back then before we run into the case (as similar with the complexity limit) where we trip over this and hit program rejections. Also add a cond_resched() into the compilation loop, the JIT process runs without any locks and may sleep anyway. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Reviewed-by: Eric Dumazet Signed-off-by: Alexei Starovoitov --- arch/x86/net/bpf_jit_comp.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 45e4eb5bcbb2..ce5b2ebd5701 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -1188,7 +1188,7 @@ skip_init_addrs: * may converge on the last pass. In such case do one more * pass to emit the final image */ - for (pass = 0; pass < 10 || image; pass++) { + for (pass = 0; pass < 20 || image; pass++) { proglen = do_jit(prog, addrs, image, oldproglen, &ctx); if (proglen <= 0) { image = NULL; @@ -1215,6 +1215,7 @@ skip_init_addrs: } } oldproglen = proglen; + cond_resched(); } if (bpf_jit_enable > 1) -- cgit v1.2.3 From 970656b3246d30955894951ed16bd658e42d8c24 Mon Sep 17 00:00:00 2001 From: Leonard Crestez Date: Thu, 1 Mar 2018 14:52:08 +0200 Subject: ARM: dts: imx7d-sdb: Fix regulator-usb-otg2-vbus node name The two usb-otg regulators for imx7d-sdb are both called "regulator-usb-otg1-vbus" and they effectively override each other. This is most likely a copy-paste error. Fixes: b877039aa1fe ("ARM: dts: imx7d-sdb: Adjust the regulator nodes") Signed-off-by: Leonard Crestez Reviewed-by: Fabio Estevam Signed-off-by: Shawn Guo --- arch/arm/boot/dts/imx7d-sdb.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/imx7d-sdb.dts b/arch/arm/boot/dts/imx7d-sdb.dts index a7a5dc7b2700..e7d2db839d70 100644 --- a/arch/arm/boot/dts/imx7d-sdb.dts +++ b/arch/arm/boot/dts/imx7d-sdb.dts @@ -82,7 +82,7 @@ enable-active-high; }; - reg_usb_otg2_vbus: regulator-usb-otg1-vbus { + reg_usb_otg2_vbus: regulator-usb-otg2-vbus { compatible = "regulator-fixed"; regulator-name = "usb_otg2_vbus"; regulator-min-microvolt = <5000000>; -- cgit v1.2.3 From 49159a9dc3da83f17be00acbc7b2ab84ffec1aa7 Mon Sep 17 00:00:00 2001 From: Tero Kristo Date: Fri, 23 Feb 2018 14:28:21 +0200 Subject: clk: ti: clkctrl: add support for CLK_SET_RATE_PARENT flag Certain clkctrl clocks, notably the display ones, use the CLK_SET_RATE_PARENT feature extensively. Add support for this flag to the clkctrl clocks. Signed-off-by: Tero Kristo Reported-by: Jyri Sarha Acked-by: Tony Lindgren Tested-by: Jyri Sarha --- drivers/clk/ti/clkctrl.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/clk/ti/clkctrl.c b/drivers/clk/ti/clkctrl.c index afa0d6bfc5c1..421b05392220 100644 --- a/drivers/clk/ti/clkctrl.c +++ b/drivers/clk/ti/clkctrl.c @@ -537,6 +537,8 @@ static void __init _ti_omap4_clkctrl_setup(struct device_node *node) init.parent_names = ®_data->parent; init.num_parents = 1; init.flags = 0; + if (reg_data->flags & CLKF_SET_RATE_PARENT) + init.flags |= CLK_SET_RATE_PARENT; init.name = kasprintf(GFP_KERNEL, "%s:%s:%04x:%d", node->parent->name, node->name, reg_data->offset, 0); -- cgit v1.2.3 From c083dc5f3738d394223baa0f90705397b0844acd Mon Sep 17 00:00:00 2001 From: Tero Kristo Date: Fri, 23 Feb 2018 14:29:19 +0200 Subject: clk: ti: am33xx: add set-rate-parent support for display clkctrl clock Display driver assumes it can use clk_set_rate for the display clock via set-rate-parent mechanism, so add the flag for this to it. Signed-off-by: Tero Kristo Reported-by: Jyri Sarha Acked-by: Tony Lindgren Tested-by: Jyri Sarha --- drivers/clk/ti/clk-33xx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clk/ti/clk-33xx.c b/drivers/clk/ti/clk-33xx.c index 612491a26070..12e0a2d19911 100644 --- a/drivers/clk/ti/clk-33xx.c +++ b/drivers/clk/ti/clk-33xx.c @@ -45,7 +45,7 @@ static const struct omap_clkctrl_bit_data am3_gpio4_bit_data[] __initconst = { static const struct omap_clkctrl_reg_data am3_l4_per_clkctrl_regs[] __initconst = { { AM3_CPGMAC0_CLKCTRL, NULL, CLKF_SW_SUP, "cpsw_125mhz_gclk", "cpsw_125mhz_clkdm" }, - { AM3_LCDC_CLKCTRL, NULL, CLKF_SW_SUP, "lcd_gclk", "lcdc_clkdm" }, + { AM3_LCDC_CLKCTRL, NULL, CLKF_SW_SUP | CLKF_SET_RATE_PARENT, "lcd_gclk", "lcdc_clkdm" }, { AM3_USB_OTG_HS_CLKCTRL, NULL, CLKF_SW_SUP, "usbotg_fck", "l3s_clkdm" }, { AM3_TPTC0_CLKCTRL, NULL, CLKF_SW_SUP, "l3_gclk", "l3_clkdm" }, { AM3_EMIF_CLKCTRL, NULL, CLKF_SW_SUP, "dpll_ddr_m2_div2_ck", "l3_clkdm" }, -- cgit v1.2.3 From 762790b75210f5219c6896565f811271405a9632 Mon Sep 17 00:00:00 2001 From: Tero Kristo Date: Mon, 26 Feb 2018 14:40:37 +0200 Subject: clk: ti: am43xx: add set-rate-parent support for display clkctrl clock Display driver assumes it can use clk_set_rate for the display clock via set-rate-parent mechanism, so add the flag for this to id. Signed-off-by: Tero Kristo Acked-by: Tony Lindgren --- drivers/clk/ti/clk-43xx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clk/ti/clk-43xx.c b/drivers/clk/ti/clk-43xx.c index 2b7c2e017665..63c5ddb50187 100644 --- a/drivers/clk/ti/clk-43xx.c +++ b/drivers/clk/ti/clk-43xx.c @@ -187,7 +187,7 @@ static const struct omap_clkctrl_reg_data am4_l4_per_clkctrl_regs[] __initconst { AM4_OCP2SCP0_CLKCTRL, NULL, CLKF_SW_SUP, "l4ls_gclk" }, { AM4_OCP2SCP1_CLKCTRL, NULL, CLKF_SW_SUP, "l4ls_gclk" }, { AM4_EMIF_CLKCTRL, NULL, CLKF_SW_SUP, "dpll_ddr_m2_ck", "emif_clkdm" }, - { AM4_DSS_CORE_CLKCTRL, NULL, CLKF_SW_SUP, "disp_clk", "dss_clkdm" }, + { AM4_DSS_CORE_CLKCTRL, NULL, CLKF_SW_SUP | CLKF_SET_RATE_PARENT, "disp_clk", "dss_clkdm" }, { AM4_CPGMAC0_CLKCTRL, NULL, CLKF_SW_SUP, "cpsw_125mhz_gclk", "cpsw_125mhz_clkdm" }, { 0 }, }; -- cgit v1.2.3 From 625504aeff11eeb95f78ad9dde22d911c6839c71 Mon Sep 17 00:00:00 2001 From: Niklas Söderlund Date: Thu, 22 Feb 2018 00:32:50 +0100 Subject: pinctrl: sh-pfc: r8a7795: remove duplicate of CLKOUT pin in pinmux_pins[] MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When adding GP-1-28 port pin support it was forgotten to remove the CLKOUT pin from the list of pins that are not associated with a GPIO port in pinmux_pins[]. This results in a warning when reading the pinctrl files in sysfs as the CLKOUT pin is still added as a none GPIO pin. Fix this by removing the duplicated entry which is no longer needed. ~ # cat /sys/kernel/debug/pinctrl/e6060000.pin-controller/pinconf-pins [ 89.432081] ------------[ cut here ]------------ [ 89.436904] Pin 496 is not in bias info list [ 89.441252] WARNING: CPU: 1 PID: 456 at drivers/pinctrl/sh-pfc/core.c:408 sh_pfc_pin_to_bias_reg+0xb0/0xb8 [ 89.451002] CPU: 1 PID: 456 Comm: cat Not tainted 4.16.0-rc1-arm64-renesas-00048-gdfafc344a4f24dde #12 [ 89.460394] Hardware name: Renesas Salvator-X 2nd version board based on r8a7795 ES2.0+ (DT) [ 89.468910] pstate: 80000085 (Nzcv daIf -PAN -UAO) [ 89.473747] pc : sh_pfc_pin_to_bias_reg+0xb0/0xb8 [ 89.478495] lr : sh_pfc_pin_to_bias_reg+0xb0/0xb8 [ 89.483241] sp : ffff00000aff3ab0 [ 89.486587] x29: ffff00000aff3ab0 x28: ffff00000893c698 [ 89.491955] x27: ffff000008ad7d98 x26: 0000000000000000 [ 89.497323] x25: ffff8006fb3f5028 x24: ffff8006fb3f5018 [ 89.502690] x23: 0000000000000001 x22: 00000000000001f0 [ 89.508057] x21: ffff8006fb3f5018 x20: ffff000008bef000 [ 89.513423] x19: 0000000000000000 x18: ffffffffffffffff [ 89.518790] x17: 0000000000006c4a x16: ffff000008d67c98 [ 89.524157] x15: 0000000000000001 x14: ffff00000896ca98 [ 89.529524] x13: 00000000cce5f611 x12: ffff8006f8d3b5a8 [ 89.534891] x11: ffff00000981e000 x10: ffff000008befa08 [ 89.540258] x9 : ffff8006f9b987a0 x8 : ffff000008befa08 [ 89.545625] x7 : ffff000008137094 x6 : 0000000000000000 [ 89.550991] x5 : 0000000000000000 x4 : 0000000000000001 [ 89.556357] x3 : 0000000000000007 x2 : 0000000000000007 [ 89.561723] x1 : 1ff24f80f1818600 x0 : 0000000000000000 [ 89.567091] Call trace: [ 89.569561] sh_pfc_pin_to_bias_reg+0xb0/0xb8 [ 89.573960] r8a7795_pinmux_get_bias+0x30/0xc0 [ 89.578445] sh_pfc_pinconf_get+0x1e0/0x2d8 [ 89.582669] pin_config_get_for_pin+0x20/0x30 [ 89.587067] pinconf_generic_dump_one+0x180/0x1c8 [ 89.591815] pinconf_generic_dump_pins+0x84/0xd8 [ 89.596476] pinconf_pins_show+0xc8/0x130 [ 89.600528] seq_read+0xe4/0x510 [ 89.603789] full_proxy_read+0x60/0x90 [ 89.607576] __vfs_read+0x30/0x140 [ 89.611010] vfs_read+0x90/0x170 [ 89.614269] SyS_read+0x60/0xd8 [ 89.617443] __sys_trace_return+0x0/0x4 [ 89.621314] ---[ end trace 99c8d0d39c13e794 ]--- Fixes: 82d2de5a4f646f72 ("pinctrl: sh-pfc: r8a7795: Add GP-1-28 port pin support") Reviewed-and-tested-by: Geert Uytterhoeven Signed-off-by: Niklas Söderlund Signed-off-by: Linus Walleij --- drivers/pinctrl/sh-pfc/pfc-r8a7795.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/pinctrl/sh-pfc/pfc-r8a7795.c b/drivers/pinctrl/sh-pfc/pfc-r8a7795.c index 18aeee592fdc..35951e7b89d2 100644 --- a/drivers/pinctrl/sh-pfc/pfc-r8a7795.c +++ b/drivers/pinctrl/sh-pfc/pfc-r8a7795.c @@ -1538,7 +1538,6 @@ static const struct sh_pfc_pin pinmux_pins[] = { SH_PFC_PIN_NAMED_CFG('B', 18, AVB_TD1, CFG_FLAGS), SH_PFC_PIN_NAMED_CFG('B', 19, AVB_RXC, CFG_FLAGS), SH_PFC_PIN_NAMED_CFG('C', 1, PRESETOUT#, CFG_FLAGS), - SH_PFC_PIN_NAMED_CFG('F', 1, CLKOUT, CFG_FLAGS), SH_PFC_PIN_NAMED_CFG('H', 37, MLB_REF, CFG_FLAGS), SH_PFC_PIN_NAMED_CFG('V', 3, QSPI1_SPCLK, CFG_FLAGS), SH_PFC_PIN_NAMED_CFG('V', 5, QSPI1_SSL, CFG_FLAGS), -- cgit v1.2.3 From 93b0beae721b3344923b4b8317e9d83b542f4ca6 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Tue, 20 Feb 2018 19:17:51 +0100 Subject: pinctrl: samsung: Validate alias coming from DT Driver uses alias from Device Tree as an index of pin controller data array. In case of a wrong DTB or an out-of-tree DTB, the alias could be outside of this data array leading to out-of-bounds access. Depending on binary and memory layout, this could be handled properly (showing error like "samsung-pinctrl 3860000.pinctrl: driver data not available") or could lead to exceptions. Reported-by: Geert Uytterhoeven Cc: Fixes: 30574f0db1b1 ("pinctrl: add samsung pinctrl and gpiolib driver") Signed-off-by: Krzysztof Kozlowski Reviewed-by: Geert Uytterhoeven Acked-by: Tomasz Figa Signed-off-by: Linus Walleij --- drivers/pinctrl/samsung/pinctrl-exynos-arm.c | 56 +++++++++++++++++++---- drivers/pinctrl/samsung/pinctrl-exynos-arm64.c | 14 +++++- drivers/pinctrl/samsung/pinctrl-s3c24xx.c | 28 ++++++++++-- drivers/pinctrl/samsung/pinctrl-s3c64xx.c | 7 ++- drivers/pinctrl/samsung/pinctrl-samsung.c | 61 ++++++++++++++++---------- drivers/pinctrl/samsung/pinctrl-samsung.h | 40 ++++++++++------- 6 files changed, 154 insertions(+), 52 deletions(-) diff --git a/drivers/pinctrl/samsung/pinctrl-exynos-arm.c b/drivers/pinctrl/samsung/pinctrl-exynos-arm.c index c32399faff57..90c274490181 100644 --- a/drivers/pinctrl/samsung/pinctrl-exynos-arm.c +++ b/drivers/pinctrl/samsung/pinctrl-exynos-arm.c @@ -124,7 +124,7 @@ static const struct samsung_pin_bank_data s5pv210_pin_bank[] __initconst = { EXYNOS_PIN_BANK_EINTW(8, 0xc60, "gph3", 0x0c), }; -const struct samsung_pin_ctrl s5pv210_pin_ctrl[] __initconst = { +static const struct samsung_pin_ctrl s5pv210_pin_ctrl[] __initconst = { { /* pin-controller instance 0 data */ .pin_banks = s5pv210_pin_bank, @@ -137,6 +137,11 @@ const struct samsung_pin_ctrl s5pv210_pin_ctrl[] __initconst = { }, }; +const struct samsung_pinctrl_of_match_data s5pv210_of_data __initconst = { + .ctrl = s5pv210_pin_ctrl, + .num_ctrl = ARRAY_SIZE(s5pv210_pin_ctrl), +}; + /* Pad retention control code for accessing PMU regmap */ static atomic_t exynos_shared_retention_refcnt; @@ -199,7 +204,7 @@ static const struct samsung_retention_data exynos3250_retention_data __initconst * Samsung pinctrl driver data for Exynos3250 SoC. Exynos3250 SoC includes * two gpio/pin-mux/pinconfig controllers. */ -const struct samsung_pin_ctrl exynos3250_pin_ctrl[] __initconst = { +static const struct samsung_pin_ctrl exynos3250_pin_ctrl[] __initconst = { { /* pin-controller instance 0 data */ .pin_banks = exynos3250_pin_banks0, @@ -220,6 +225,11 @@ const struct samsung_pin_ctrl exynos3250_pin_ctrl[] __initconst = { }, }; +const struct samsung_pinctrl_of_match_data exynos3250_of_data __initconst = { + .ctrl = exynos3250_pin_ctrl, + .num_ctrl = ARRAY_SIZE(exynos3250_pin_ctrl), +}; + /* pin banks of exynos4210 pin-controller 0 */ static const struct samsung_pin_bank_data exynos4210_pin_banks0[] __initconst = { EXYNOS_PIN_BANK_EINTG(8, 0x000, "gpa0", 0x00), @@ -303,7 +313,7 @@ static const struct samsung_retention_data exynos4_audio_retention_data __initco * Samsung pinctrl driver data for Exynos4210 SoC. Exynos4210 SoC includes * three gpio/pin-mux/pinconfig controllers. */ -const struct samsung_pin_ctrl exynos4210_pin_ctrl[] __initconst = { +static const struct samsung_pin_ctrl exynos4210_pin_ctrl[] __initconst = { { /* pin-controller instance 0 data */ .pin_banks = exynos4210_pin_banks0, @@ -329,6 +339,11 @@ const struct samsung_pin_ctrl exynos4210_pin_ctrl[] __initconst = { }, }; +const struct samsung_pinctrl_of_match_data exynos4210_of_data __initconst = { + .ctrl = exynos4210_pin_ctrl, + .num_ctrl = ARRAY_SIZE(exynos4210_pin_ctrl), +}; + /* pin banks of exynos4x12 pin-controller 0 */ static const struct samsung_pin_bank_data exynos4x12_pin_banks0[] __initconst = { EXYNOS_PIN_BANK_EINTG(8, 0x000, "gpa0", 0x00), @@ -391,7 +406,7 @@ static const struct samsung_pin_bank_data exynos4x12_pin_banks3[] __initconst = * Samsung pinctrl driver data for Exynos4x12 SoC. Exynos4x12 SoC includes * four gpio/pin-mux/pinconfig controllers. */ -const struct samsung_pin_ctrl exynos4x12_pin_ctrl[] __initconst = { +static const struct samsung_pin_ctrl exynos4x12_pin_ctrl[] __initconst = { { /* pin-controller instance 0 data */ .pin_banks = exynos4x12_pin_banks0, @@ -427,6 +442,11 @@ const struct samsung_pin_ctrl exynos4x12_pin_ctrl[] __initconst = { }, }; +const struct samsung_pinctrl_of_match_data exynos4x12_of_data __initconst = { + .ctrl = exynos4x12_pin_ctrl, + .num_ctrl = ARRAY_SIZE(exynos4x12_pin_ctrl), +}; + /* pin banks of exynos5250 pin-controller 0 */ static const struct samsung_pin_bank_data exynos5250_pin_banks0[] __initconst = { EXYNOS_PIN_BANK_EINTG(8, 0x000, "gpa0", 0x00), @@ -487,7 +507,7 @@ static const struct samsung_pin_bank_data exynos5250_pin_banks3[] __initconst = * Samsung pinctrl driver data for Exynos5250 SoC. Exynos5250 SoC includes * four gpio/pin-mux/pinconfig controllers. */ -const struct samsung_pin_ctrl exynos5250_pin_ctrl[] __initconst = { +static const struct samsung_pin_ctrl exynos5250_pin_ctrl[] __initconst = { { /* pin-controller instance 0 data */ .pin_banks = exynos5250_pin_banks0, @@ -523,6 +543,11 @@ const struct samsung_pin_ctrl exynos5250_pin_ctrl[] __initconst = { }, }; +const struct samsung_pinctrl_of_match_data exynos5250_of_data __initconst = { + .ctrl = exynos5250_pin_ctrl, + .num_ctrl = ARRAY_SIZE(exynos5250_pin_ctrl), +}; + /* pin banks of exynos5260 pin-controller 0 */ static const struct samsung_pin_bank_data exynos5260_pin_banks0[] __initconst = { EXYNOS_PIN_BANK_EINTG(4, 0x000, "gpa0", 0x00), @@ -567,7 +592,7 @@ static const struct samsung_pin_bank_data exynos5260_pin_banks2[] __initconst = * Samsung pinctrl driver data for Exynos5260 SoC. Exynos5260 SoC includes * three gpio/pin-mux/pinconfig controllers. */ -const struct samsung_pin_ctrl exynos5260_pin_ctrl[] __initconst = { +static const struct samsung_pin_ctrl exynos5260_pin_ctrl[] __initconst = { { /* pin-controller instance 0 data */ .pin_banks = exynos5260_pin_banks0, @@ -587,6 +612,11 @@ const struct samsung_pin_ctrl exynos5260_pin_ctrl[] __initconst = { }, }; +const struct samsung_pinctrl_of_match_data exynos5260_of_data __initconst = { + .ctrl = exynos5260_pin_ctrl, + .num_ctrl = ARRAY_SIZE(exynos5260_pin_ctrl), +}; + /* pin banks of exynos5410 pin-controller 0 */ static const struct samsung_pin_bank_data exynos5410_pin_banks0[] __initconst = { EXYNOS_PIN_BANK_EINTG(8, 0x000, "gpa0", 0x00), @@ -657,7 +687,7 @@ static const struct samsung_pin_bank_data exynos5410_pin_banks3[] __initconst = * Samsung pinctrl driver data for Exynos5410 SoC. Exynos5410 SoC includes * four gpio/pin-mux/pinconfig controllers. */ -const struct samsung_pin_ctrl exynos5410_pin_ctrl[] __initconst = { +static const struct samsung_pin_ctrl exynos5410_pin_ctrl[] __initconst = { { /* pin-controller instance 0 data */ .pin_banks = exynos5410_pin_banks0, @@ -690,6 +720,11 @@ const struct samsung_pin_ctrl exynos5410_pin_ctrl[] __initconst = { }, }; +const struct samsung_pinctrl_of_match_data exynos5410_of_data __initconst = { + .ctrl = exynos5410_pin_ctrl, + .num_ctrl = ARRAY_SIZE(exynos5410_pin_ctrl), +}; + /* pin banks of exynos5420 pin-controller 0 */ static const struct samsung_pin_bank_data exynos5420_pin_banks0[] __initconst = { EXYNOS_PIN_BANK_EINTG(8, 0x000, "gpy7", 0x00), @@ -774,7 +809,7 @@ static const struct samsung_retention_data exynos5420_retention_data __initconst * Samsung pinctrl driver data for Exynos5420 SoC. Exynos5420 SoC includes * four gpio/pin-mux/pinconfig controllers. */ -const struct samsung_pin_ctrl exynos5420_pin_ctrl[] __initconst = { +static const struct samsung_pin_ctrl exynos5420_pin_ctrl[] __initconst = { { /* pin-controller instance 0 data */ .pin_banks = exynos5420_pin_banks0, @@ -808,3 +843,8 @@ const struct samsung_pin_ctrl exynos5420_pin_ctrl[] __initconst = { .retention_data = &exynos4_audio_retention_data, }, }; + +const struct samsung_pinctrl_of_match_data exynos5420_of_data __initconst = { + .ctrl = exynos5420_pin_ctrl, + .num_ctrl = ARRAY_SIZE(exynos5420_pin_ctrl), +}; diff --git a/drivers/pinctrl/samsung/pinctrl-exynos-arm64.c b/drivers/pinctrl/samsung/pinctrl-exynos-arm64.c index fc8f7833bec0..71c9d1d9f345 100644 --- a/drivers/pinctrl/samsung/pinctrl-exynos-arm64.c +++ b/drivers/pinctrl/samsung/pinctrl-exynos-arm64.c @@ -175,7 +175,7 @@ static const struct samsung_retention_data exynos5433_fsys_retention_data __init * Samsung pinctrl driver data for Exynos5433 SoC. Exynos5433 SoC includes * ten gpio/pin-mux/pinconfig controllers. */ -const struct samsung_pin_ctrl exynos5433_pin_ctrl[] __initconst = { +static const struct samsung_pin_ctrl exynos5433_pin_ctrl[] __initconst = { { /* pin-controller instance 0 data */ .pin_banks = exynos5433_pin_banks0, @@ -260,6 +260,11 @@ const struct samsung_pin_ctrl exynos5433_pin_ctrl[] __initconst = { }, }; +const struct samsung_pinctrl_of_match_data exynos5433_of_data __initconst = { + .ctrl = exynos5433_pin_ctrl, + .num_ctrl = ARRAY_SIZE(exynos5433_pin_ctrl), +}; + /* pin banks of exynos7 pin-controller - ALIVE */ static const struct samsung_pin_bank_data exynos7_pin_banks0[] __initconst = { EXYNOS_PIN_BANK_EINTW(8, 0x000, "gpa0", 0x00), @@ -339,7 +344,7 @@ static const struct samsung_pin_bank_data exynos7_pin_banks9[] __initconst = { EXYNOS_PIN_BANK_EINTG(4, 0x020, "gpz1", 0x04), }; -const struct samsung_pin_ctrl exynos7_pin_ctrl[] __initconst = { +static const struct samsung_pin_ctrl exynos7_pin_ctrl[] __initconst = { { /* pin-controller instance 0 Alive data */ .pin_banks = exynos7_pin_banks0, @@ -392,3 +397,8 @@ const struct samsung_pin_ctrl exynos7_pin_ctrl[] __initconst = { .eint_gpio_init = exynos_eint_gpio_init, }, }; + +const struct samsung_pinctrl_of_match_data exynos7_of_data __initconst = { + .ctrl = exynos7_pin_ctrl, + .num_ctrl = ARRAY_SIZE(exynos7_pin_ctrl), +}; diff --git a/drivers/pinctrl/samsung/pinctrl-s3c24xx.c b/drivers/pinctrl/samsung/pinctrl-s3c24xx.c index 10187cb0e9b9..7e824e4d20f4 100644 --- a/drivers/pinctrl/samsung/pinctrl-s3c24xx.c +++ b/drivers/pinctrl/samsung/pinctrl-s3c24xx.c @@ -565,7 +565,7 @@ static const struct samsung_pin_bank_data s3c2412_pin_banks[] __initconst = { PIN_BANK_2BIT(13, 0x080, "gpj"), }; -const struct samsung_pin_ctrl s3c2412_pin_ctrl[] __initconst = { +static const struct samsung_pin_ctrl s3c2412_pin_ctrl[] __initconst = { { .pin_banks = s3c2412_pin_banks, .nr_banks = ARRAY_SIZE(s3c2412_pin_banks), @@ -573,6 +573,11 @@ const struct samsung_pin_ctrl s3c2412_pin_ctrl[] __initconst = { }, }; +const struct samsung_pinctrl_of_match_data s3c2412_of_data __initconst = { + .ctrl = s3c2412_pin_ctrl, + .num_ctrl = ARRAY_SIZE(s3c2412_pin_ctrl), +}; + static const struct samsung_pin_bank_data s3c2416_pin_banks[] __initconst = { PIN_BANK_A(27, 0x000, "gpa"), PIN_BANK_2BIT(11, 0x010, "gpb"), @@ -587,7 +592,7 @@ static const struct samsung_pin_bank_data s3c2416_pin_banks[] __initconst = { PIN_BANK_2BIT(2, 0x100, "gpm"), }; -const struct samsung_pin_ctrl s3c2416_pin_ctrl[] __initconst = { +static const struct samsung_pin_ctrl s3c2416_pin_ctrl[] __initconst = { { .pin_banks = s3c2416_pin_banks, .nr_banks = ARRAY_SIZE(s3c2416_pin_banks), @@ -595,6 +600,11 @@ const struct samsung_pin_ctrl s3c2416_pin_ctrl[] __initconst = { }, }; +const struct samsung_pinctrl_of_match_data s3c2416_of_data __initconst = { + .ctrl = s3c2416_pin_ctrl, + .num_ctrl = ARRAY_SIZE(s3c2416_pin_ctrl), +}; + static const struct samsung_pin_bank_data s3c2440_pin_banks[] __initconst = { PIN_BANK_A(25, 0x000, "gpa"), PIN_BANK_2BIT(11, 0x010, "gpb"), @@ -607,7 +617,7 @@ static const struct samsung_pin_bank_data s3c2440_pin_banks[] __initconst = { PIN_BANK_2BIT(13, 0x0d0, "gpj"), }; -const struct samsung_pin_ctrl s3c2440_pin_ctrl[] __initconst = { +static const struct samsung_pin_ctrl s3c2440_pin_ctrl[] __initconst = { { .pin_banks = s3c2440_pin_banks, .nr_banks = ARRAY_SIZE(s3c2440_pin_banks), @@ -615,6 +625,11 @@ const struct samsung_pin_ctrl s3c2440_pin_ctrl[] __initconst = { }, }; +const struct samsung_pinctrl_of_match_data s3c2440_of_data __initconst = { + .ctrl = s3c2440_pin_ctrl, + .num_ctrl = ARRAY_SIZE(s3c2440_pin_ctrl), +}; + static const struct samsung_pin_bank_data s3c2450_pin_banks[] __initconst = { PIN_BANK_A(28, 0x000, "gpa"), PIN_BANK_2BIT(11, 0x010, "gpb"), @@ -630,10 +645,15 @@ static const struct samsung_pin_bank_data s3c2450_pin_banks[] __initconst = { PIN_BANK_2BIT(2, 0x100, "gpm"), }; -const struct samsung_pin_ctrl s3c2450_pin_ctrl[] __initconst = { +static const struct samsung_pin_ctrl s3c2450_pin_ctrl[] __initconst = { { .pin_banks = s3c2450_pin_banks, .nr_banks = ARRAY_SIZE(s3c2450_pin_banks), .eint_wkup_init = s3c24xx_eint_init, }, }; + +const struct samsung_pinctrl_of_match_data s3c2450_of_data __initconst = { + .ctrl = s3c2450_pin_ctrl, + .num_ctrl = ARRAY_SIZE(s3c2450_pin_ctrl), +}; diff --git a/drivers/pinctrl/samsung/pinctrl-s3c64xx.c b/drivers/pinctrl/samsung/pinctrl-s3c64xx.c index 679628ac4b31..288e6567ceb1 100644 --- a/drivers/pinctrl/samsung/pinctrl-s3c64xx.c +++ b/drivers/pinctrl/samsung/pinctrl-s3c64xx.c @@ -789,7 +789,7 @@ static const struct samsung_pin_bank_data s3c64xx_pin_banks0[] __initconst = { * Samsung pinctrl driver data for S3C64xx SoC. S3C64xx SoC includes * one gpio/pin-mux/pinconfig controller. */ -const struct samsung_pin_ctrl s3c64xx_pin_ctrl[] __initconst = { +static const struct samsung_pin_ctrl s3c64xx_pin_ctrl[] __initconst = { { /* pin-controller instance 1 data */ .pin_banks = s3c64xx_pin_banks0, @@ -798,3 +798,8 @@ const struct samsung_pin_ctrl s3c64xx_pin_ctrl[] __initconst = { .eint_wkup_init = s3c64xx_eint_eint0_init, }, }; + +const struct samsung_pinctrl_of_match_data s3c64xx_of_data __initconst = { + .ctrl = s3c64xx_pin_ctrl, + .num_ctrl = ARRAY_SIZE(s3c64xx_pin_ctrl), +}; diff --git a/drivers/pinctrl/samsung/pinctrl-samsung.c b/drivers/pinctrl/samsung/pinctrl-samsung.c index da58e4554137..336e88d7bdb9 100644 --- a/drivers/pinctrl/samsung/pinctrl-samsung.c +++ b/drivers/pinctrl/samsung/pinctrl-samsung.c @@ -942,12 +942,33 @@ static int samsung_gpiolib_register(struct platform_device *pdev, return 0; } +static const struct samsung_pin_ctrl * +samsung_pinctrl_get_soc_data_for_of_alias(struct platform_device *pdev) +{ + struct device_node *node = pdev->dev.of_node; + const struct samsung_pinctrl_of_match_data *of_data; + int id; + + id = of_alias_get_id(node, "pinctrl"); + if (id < 0) { + dev_err(&pdev->dev, "failed to get alias id\n"); + return NULL; + } + + of_data = of_device_get_match_data(&pdev->dev); + if (id >= of_data->num_ctrl) { + dev_err(&pdev->dev, "invalid alias id %d\n", id); + return NULL; + } + + return &(of_data->ctrl[id]); +} + /* retrieve the soc specific data */ static const struct samsung_pin_ctrl * samsung_pinctrl_get_soc_data(struct samsung_pinctrl_drv_data *d, struct platform_device *pdev) { - int id; struct device_node *node = pdev->dev.of_node; struct device_node *np; const struct samsung_pin_bank_data *bdata; @@ -957,13 +978,9 @@ samsung_pinctrl_get_soc_data(struct samsung_pinctrl_drv_data *d, void __iomem *virt_base[SAMSUNG_PINCTRL_NUM_RESOURCES]; unsigned int i; - id = of_alias_get_id(node, "pinctrl"); - if (id < 0) { - dev_err(&pdev->dev, "failed to get alias id\n"); + ctrl = samsung_pinctrl_get_soc_data_for_of_alias(pdev); + if (!ctrl) return ERR_PTR(-ENOENT); - } - ctrl = of_device_get_match_data(&pdev->dev); - ctrl += id; d->suspend = ctrl->suspend; d->resume = ctrl->resume; @@ -1188,41 +1205,41 @@ static int __maybe_unused samsung_pinctrl_resume(struct device *dev) static const struct of_device_id samsung_pinctrl_dt_match[] = { #ifdef CONFIG_PINCTRL_EXYNOS_ARM { .compatible = "samsung,exynos3250-pinctrl", - .data = exynos3250_pin_ctrl }, + .data = &exynos3250_of_data }, { .compatible = "samsung,exynos4210-pinctrl", - .data = exynos4210_pin_ctrl }, + .data = &exynos4210_of_data }, { .compatible = "samsung,exynos4x12-pinctrl", - .data = exynos4x12_pin_ctrl }, + .data = &exynos4x12_of_data }, { .compatible = "samsung,exynos5250-pinctrl", - .data = exynos5250_pin_ctrl }, + .data = &exynos5250_of_data }, { .compatible = "samsung,exynos5260-pinctrl", - .data = exynos5260_pin_ctrl }, + .data = &exynos5260_of_data }, { .compatible = "samsung,exynos5410-pinctrl", - .data = exynos5410_pin_ctrl }, + .data = &exynos5410_of_data }, { .compatible = "samsung,exynos5420-pinctrl", - .data = exynos5420_pin_ctrl }, + .data = &exynos5420_of_data }, { .compatible = "samsung,s5pv210-pinctrl", - .data = s5pv210_pin_ctrl }, + .data = &s5pv210_of_data }, #endif #ifdef CONFIG_PINCTRL_EXYNOS_ARM64 { .compatible = "samsung,exynos5433-pinctrl", - .data = exynos5433_pin_ctrl }, + .data = &exynos5433_of_data }, { .compatible = "samsung,exynos7-pinctrl", - .data = exynos7_pin_ctrl }, + .data = &exynos7_of_data }, #endif #ifdef CONFIG_PINCTRL_S3C64XX { .compatible = "samsung,s3c64xx-pinctrl", - .data = s3c64xx_pin_ctrl }, + .data = &s3c64xx_of_data }, #endif #ifdef CONFIG_PINCTRL_S3C24XX { .compatible = "samsung,s3c2412-pinctrl", - .data = s3c2412_pin_ctrl }, + .data = &s3c2412_of_data }, { .compatible = "samsung,s3c2416-pinctrl", - .data = s3c2416_pin_ctrl }, + .data = &s3c2416_of_data }, { .compatible = "samsung,s3c2440-pinctrl", - .data = s3c2440_pin_ctrl }, + .data = &s3c2440_of_data }, { .compatible = "samsung,s3c2450-pinctrl", - .data = s3c2450_pin_ctrl }, + .data = &s3c2450_of_data }, #endif {}, }; diff --git a/drivers/pinctrl/samsung/pinctrl-samsung.h b/drivers/pinctrl/samsung/pinctrl-samsung.h index e204f609823b..f0cda9424dfe 100644 --- a/drivers/pinctrl/samsung/pinctrl-samsung.h +++ b/drivers/pinctrl/samsung/pinctrl-samsung.h @@ -281,6 +281,16 @@ struct samsung_pinctrl_drv_data { void (*resume)(struct samsung_pinctrl_drv_data *); }; +/** + * struct samsung_pinctrl_of_match_data: OF match device specific configuration data. + * @ctrl: array of pin controller data. + * @num_ctrl: size of array @ctrl. + */ +struct samsung_pinctrl_of_match_data { + const struct samsung_pin_ctrl *ctrl; + unsigned int num_ctrl; +}; + /** * struct samsung_pin_group: represent group of pins of a pinmux function. * @name: name of the pin group, used to lookup the group. @@ -309,20 +319,20 @@ struct samsung_pmx_func { }; /* list of all exported SoC specific data */ -extern const struct samsung_pin_ctrl exynos3250_pin_ctrl[]; -extern const struct samsung_pin_ctrl exynos4210_pin_ctrl[]; -extern const struct samsung_pin_ctrl exynos4x12_pin_ctrl[]; -extern const struct samsung_pin_ctrl exynos5250_pin_ctrl[]; -extern const struct samsung_pin_ctrl exynos5260_pin_ctrl[]; -extern const struct samsung_pin_ctrl exynos5410_pin_ctrl[]; -extern const struct samsung_pin_ctrl exynos5420_pin_ctrl[]; -extern const struct samsung_pin_ctrl exynos5433_pin_ctrl[]; -extern const struct samsung_pin_ctrl exynos7_pin_ctrl[]; -extern const struct samsung_pin_ctrl s3c64xx_pin_ctrl[]; -extern const struct samsung_pin_ctrl s3c2412_pin_ctrl[]; -extern const struct samsung_pin_ctrl s3c2416_pin_ctrl[]; -extern const struct samsung_pin_ctrl s3c2440_pin_ctrl[]; -extern const struct samsung_pin_ctrl s3c2450_pin_ctrl[]; -extern const struct samsung_pin_ctrl s5pv210_pin_ctrl[]; +extern const struct samsung_pinctrl_of_match_data exynos3250_of_data; +extern const struct samsung_pinctrl_of_match_data exynos4210_of_data; +extern const struct samsung_pinctrl_of_match_data exynos4x12_of_data; +extern const struct samsung_pinctrl_of_match_data exynos5250_of_data; +extern const struct samsung_pinctrl_of_match_data exynos5260_of_data; +extern const struct samsung_pinctrl_of_match_data exynos5410_of_data; +extern const struct samsung_pinctrl_of_match_data exynos5420_of_data; +extern const struct samsung_pinctrl_of_match_data exynos5433_of_data; +extern const struct samsung_pinctrl_of_match_data exynos7_of_data; +extern const struct samsung_pinctrl_of_match_data s3c64xx_of_data; +extern const struct samsung_pinctrl_of_match_data s3c2412_of_data; +extern const struct samsung_pinctrl_of_match_data s3c2416_of_data; +extern const struct samsung_pinctrl_of_match_data s3c2440_of_data; +extern const struct samsung_pinctrl_of_match_data s3c2450_of_data; +extern const struct samsung_pinctrl_of_match_data s5pv210_of_data; #endif /* __PINCTRL_SAMSUNG_H */ -- cgit v1.2.3 From cbcc607e18422555db569b593608aec26111cb0b Mon Sep 17 00:00:00 2001 From: Arkadi Sharshevsky Date: Thu, 8 Mar 2018 12:42:10 +0200 Subject: team: Fix double free in error path The __send_and_alloc_skb() receives a skb ptr as a parameter but in case it fails the skb is not valid: - Send failed and released the skb internally. - Allocation failed. The current code tries to release the skb in case of failure which causes redundant freeing. Fixes: 9b00cf2d1024 ("team: implement multipart netlink messages for options transfers") Signed-off-by: Arkadi Sharshevsky Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/team/team.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c index a468439969df..56c701b73c12 100644 --- a/drivers/net/team/team.c +++ b/drivers/net/team/team.c @@ -2395,7 +2395,7 @@ send_done: if (!nlh) { err = __send_and_alloc_skb(&skb, team, portid, send_func); if (err) - goto errout; + return err; goto send_done; } @@ -2681,7 +2681,7 @@ send_done: if (!nlh) { err = __send_and_alloc_skb(&skb, team, portid, send_func); if (err) - goto errout; + return err; goto send_done; } -- cgit v1.2.3 From 3d07e0746fbbbe107e611d35719c009742e21b94 Mon Sep 17 00:00:00 2001 From: Daniel Axtens Date: Thu, 8 Mar 2018 23:34:35 +1100 Subject: docs: segmentation-offloads.txt: Correct TCP gso_types Pretty minor: just SKB_GSO_TCP -> SKB_GSO_TCPV4 and SKB_GSO_TCP6 -> SKB_GSO_TCPV6. Signed-off-by: Daniel Axtens Signed-off-by: David S. Miller --- Documentation/networking/segmentation-offloads.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/networking/segmentation-offloads.txt b/Documentation/networking/segmentation-offloads.txt index 23a8dd91a9ec..fc0c949e7f9c 100644 --- a/Documentation/networking/segmentation-offloads.txt +++ b/Documentation/networking/segmentation-offloads.txt @@ -20,8 +20,8 @@ TCP Segmentation Offload TCP segmentation allows a device to segment a single frame into multiple frames with a data payload size specified in skb_shinfo()->gso_size. -When TCP segmentation requested the bit for either SKB_GSO_TCP or -SKB_GSO_TCP6 should be set in skb_shinfo()->gso_type and +When TCP segmentation requested the bit for either SKB_GSO_TCPV4 or +SKB_GSO_TCPV6 should be set in skb_shinfo()->gso_type and skb_shinfo()->gso_size should be set to a non-zero value. TCP segmentation is dependent on support for the use of partial checksum -- cgit v1.2.3 From 97ef0faf575e03b352553f92c9430cb4c0431436 Mon Sep 17 00:00:00 2001 From: Mathias Nyman Date: Thu, 8 Mar 2018 17:17:14 +0200 Subject: xhci: fix endpoint context tracer output Fix incorrent values showed for max Primary stream and Linear stream array (LSA) values in the endpoint context decoder. Fixes: 19a7d0d65c4a ("usb: host: xhci: add Slot and EP Context tracers") Cc: # v4.12+ Signed-off-by: Mathias Nyman Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci.h | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h index e4d7d3d06a75..d20e57b35d32 100644 --- a/drivers/usb/host/xhci.h +++ b/drivers/usb/host/xhci.h @@ -718,11 +718,12 @@ struct xhci_ep_ctx { /* bits 10:14 are Max Primary Streams */ /* bit 15 is Linear Stream Array */ /* Interval - period between requests to an endpoint - 125u increments. */ -#define EP_INTERVAL(p) (((p) & 0xff) << 16) -#define EP_INTERVAL_TO_UFRAMES(p) (1 << (((p) >> 16) & 0xff)) -#define CTX_TO_EP_INTERVAL(p) (((p) >> 16) & 0xff) -#define EP_MAXPSTREAMS_MASK (0x1f << 10) -#define EP_MAXPSTREAMS(p) (((p) << 10) & EP_MAXPSTREAMS_MASK) +#define EP_INTERVAL(p) (((p) & 0xff) << 16) +#define EP_INTERVAL_TO_UFRAMES(p) (1 << (((p) >> 16) & 0xff)) +#define CTX_TO_EP_INTERVAL(p) (((p) >> 16) & 0xff) +#define EP_MAXPSTREAMS_MASK (0x1f << 10) +#define EP_MAXPSTREAMS(p) (((p) << 10) & EP_MAXPSTREAMS_MASK) +#define CTX_TO_EP_MAXPSTREAMS(p) (((p) & EP_MAXPSTREAMS_MASK) >> 10) /* Endpoint is set up with a Linear Stream Array (vs. Secondary Stream Array) */ #define EP_HAS_LSA (1 << 15) /* hosts with LEC=1 use bits 31:24 as ESIT high bits. */ @@ -2549,21 +2550,22 @@ static inline const char *xhci_decode_ep_context(u32 info, u32 info2, u64 deq, u8 burst; u8 cerr; u8 mult; - u8 lsa; - u8 hid; + + bool lsa; + bool hid; esit = CTX_TO_MAX_ESIT_PAYLOAD_HI(info) << 16 | CTX_TO_MAX_ESIT_PAYLOAD(tx_info); ep_state = info & EP_STATE_MASK; - max_pstr = info & EP_MAXPSTREAMS_MASK; + max_pstr = CTX_TO_EP_MAXPSTREAMS(info); interval = CTX_TO_EP_INTERVAL(info); mult = CTX_TO_EP_MULT(info) + 1; - lsa = info & EP_HAS_LSA; + lsa = !!(info & EP_HAS_LSA); cerr = (info2 & (3 << 1)) >> 1; ep_type = CTX_TO_EP_TYPE(info2); - hid = info2 & (1 << 7); + hid = !!(info2 & (1 << 7)); burst = CTX_TO_MAX_BURST(info2); maxp = MAX_PACKET_DECODED(info2); -- cgit v1.2.3 From a098dc8b03d1c7284ed98d921224fffa15b13ece Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Thu, 8 Mar 2018 17:17:15 +0200 Subject: usb: xhci: dbc: Fix lockdep warning The xHCI DbC implementation might enter a deadlock situation because there is no sufficient protection against the shared data between process and softirq contexts. This can lead to the following lockdep warnings. This patch changes to use spin_{,un}lock_irq{save,restore} to avoid potential deadlock. [ 528.248084] ================================ [ 528.252914] WARNING: inconsistent lock state [ 528.257756] 4.15.0-rc1+ #1630 Not tainted [ 528.262305] -------------------------------- [ 528.267145] inconsistent {SOFTIRQ-ON-W} -> {IN-SOFTIRQ-W} usage. [ 528.273953] ksoftirqd/1/17 [HC0[0]:SC1[1]:HE0:SE0] takes: [ 528.280075] (&(&port->port_lock)->rlock){+.?.}, at: [] dbc_rx_push+0x38/0x1c0 [ 528.290043] {SOFTIRQ-ON-W} state was registered at: [ 528.295570] _raw_spin_lock+0x2f/0x40 [ 528.299818] dbc_write_complete+0x27/0xa0 [ 528.304458] xhci_dbc_giveback+0xd1/0x200 [ 528.309098] xhci_dbc_flush_endpoint_requests+0x50/0x70 [ 528.315116] xhci_dbc_handle_events+0x696/0x7b0 [ 528.320349] process_one_work+0x1ee/0x6e0 [ 528.324988] worker_thread+0x4a/0x430 [ 528.329236] kthread+0x13e/0x170 [ 528.332992] ret_from_fork+0x24/0x30 [ 528.337141] irq event stamp: 2861 [ 528.340897] hardirqs last enabled at (2860): [] tasklet_action+0x6a/0x250 [ 528.350460] hardirqs last disabled at (2861): [] _raw_spin_lock_irq+0xf/0x40 [ 528.360219] softirqs last enabled at (2852): [] __do_softirq+0x3dc/0x4f9 [ 528.369683] softirqs last disabled at (2857): [] run_ksoftirqd+0x1b/0x60 [ 528.379048] [ 528.379048] other info that might help us debug this: [ 528.386443] Possible unsafe locking scenario: [ 528.386443] [ 528.393150] CPU0 [ 528.395917] ---- [ 528.398687] lock(&(&port->port_lock)->rlock); [ 528.403821] [ 528.406786] lock(&(&port->port_lock)->rlock); [ 528.412116] [ 528.412116] *** DEADLOCK *** [ 528.412116] [ 528.418825] no locks held by ksoftirqd/1/17. [ 528.423662] [ 528.423662] stack backtrace: [ 528.428598] CPU: 1 PID: 17 Comm: ksoftirqd/1 Not tainted 4.15.0-rc1+ #1630 [ 528.436387] Call Trace: [ 528.439158] dump_stack+0x5e/0x8e [ 528.442914] print_usage_bug+0x1fc/0x220 [ 528.447357] mark_lock+0x4db/0x5a0 [ 528.451210] __lock_acquire+0x726/0x1130 [ 528.455655] ? __lock_acquire+0x557/0x1130 [ 528.460296] lock_acquire+0xa2/0x200 [ 528.464347] ? dbc_rx_push+0x38/0x1c0 [ 528.468496] _raw_spin_lock_irq+0x35/0x40 [ 528.473038] ? dbc_rx_push+0x38/0x1c0 [ 528.477186] dbc_rx_push+0x38/0x1c0 [ 528.481139] tasklet_action+0x1d2/0x250 [ 528.485483] __do_softirq+0x1dc/0x4f9 [ 528.489630] run_ksoftirqd+0x1b/0x60 [ 528.493682] smpboot_thread_fn+0x179/0x270 [ 528.498324] kthread+0x13e/0x170 [ 528.501981] ? sort_range+0x20/0x20 [ 528.505933] ? kthread_delayed_work_timer_fn+0x80/0x80 [ 528.511755] ret_from_fork+0x24/0x30 Fixes: dfba2174dc42 ("usb: xhci: Add DbC support in xHCI driver") Signed-off-by: Lu Baolu Signed-off-by: Mathias Nyman Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-dbgcap.c | 20 ++++++++++++-------- drivers/usb/host/xhci-dbgtty.c | 20 ++++++++++++-------- 2 files changed, 24 insertions(+), 16 deletions(-) diff --git a/drivers/usb/host/xhci-dbgcap.c b/drivers/usb/host/xhci-dbgcap.c index a1ab8acf39ba..c359bae7b754 100644 --- a/drivers/usb/host/xhci-dbgcap.c +++ b/drivers/usb/host/xhci-dbgcap.c @@ -328,13 +328,14 @@ dbc_ep_do_queue(struct dbc_ep *dep, struct dbc_request *req) int dbc_ep_queue(struct dbc_ep *dep, struct dbc_request *req, gfp_t gfp_flags) { + unsigned long flags; struct xhci_dbc *dbc = dep->dbc; int ret = -ESHUTDOWN; - spin_lock(&dbc->lock); + spin_lock_irqsave(&dbc->lock, flags); if (dbc->state == DS_CONFIGURED) ret = dbc_ep_do_queue(dep, req); - spin_unlock(&dbc->lock); + spin_unlock_irqrestore(&dbc->lock, flags); mod_delayed_work(system_wq, &dbc->event_work, 0); @@ -521,15 +522,16 @@ static void xhci_do_dbc_stop(struct xhci_hcd *xhci) static int xhci_dbc_start(struct xhci_hcd *xhci) { int ret; + unsigned long flags; struct xhci_dbc *dbc = xhci->dbc; WARN_ON(!dbc); pm_runtime_get_sync(xhci_to_hcd(xhci)->self.controller); - spin_lock(&dbc->lock); + spin_lock_irqsave(&dbc->lock, flags); ret = xhci_do_dbc_start(xhci); - spin_unlock(&dbc->lock); + spin_unlock_irqrestore(&dbc->lock, flags); if (ret) { pm_runtime_put(xhci_to_hcd(xhci)->self.controller); @@ -541,6 +543,7 @@ static int xhci_dbc_start(struct xhci_hcd *xhci) static void xhci_dbc_stop(struct xhci_hcd *xhci) { + unsigned long flags; struct xhci_dbc *dbc = xhci->dbc; struct dbc_port *port = &dbc->port; @@ -551,9 +554,9 @@ static void xhci_dbc_stop(struct xhci_hcd *xhci) if (port->registered) xhci_dbc_tty_unregister_device(xhci); - spin_lock(&dbc->lock); + spin_lock_irqsave(&dbc->lock, flags); xhci_do_dbc_stop(xhci); - spin_unlock(&dbc->lock); + spin_unlock_irqrestore(&dbc->lock, flags); pm_runtime_put_sync(xhci_to_hcd(xhci)->self.controller); } @@ -779,14 +782,15 @@ static void xhci_dbc_handle_events(struct work_struct *work) int ret; enum evtreturn evtr; struct xhci_dbc *dbc; + unsigned long flags; struct xhci_hcd *xhci; dbc = container_of(to_delayed_work(work), struct xhci_dbc, event_work); xhci = dbc->xhci; - spin_lock(&dbc->lock); + spin_lock_irqsave(&dbc->lock, flags); evtr = xhci_dbc_do_handle_events(dbc); - spin_unlock(&dbc->lock); + spin_unlock_irqrestore(&dbc->lock, flags); switch (evtr) { case EVT_GSER: diff --git a/drivers/usb/host/xhci-dbgtty.c b/drivers/usb/host/xhci-dbgtty.c index 8d47b6fbf973..75f0b92694ba 100644 --- a/drivers/usb/host/xhci-dbgtty.c +++ b/drivers/usb/host/xhci-dbgtty.c @@ -92,21 +92,23 @@ static void dbc_start_rx(struct dbc_port *port) static void dbc_read_complete(struct xhci_hcd *xhci, struct dbc_request *req) { + unsigned long flags; struct xhci_dbc *dbc = xhci->dbc; struct dbc_port *port = &dbc->port; - spin_lock(&port->port_lock); + spin_lock_irqsave(&port->port_lock, flags); list_add_tail(&req->list_pool, &port->read_queue); tasklet_schedule(&port->push); - spin_unlock(&port->port_lock); + spin_unlock_irqrestore(&port->port_lock, flags); } static void dbc_write_complete(struct xhci_hcd *xhci, struct dbc_request *req) { + unsigned long flags; struct xhci_dbc *dbc = xhci->dbc; struct dbc_port *port = &dbc->port; - spin_lock(&port->port_lock); + spin_lock_irqsave(&port->port_lock, flags); list_add(&req->list_pool, &port->write_pool); switch (req->status) { case 0: @@ -119,7 +121,7 @@ static void dbc_write_complete(struct xhci_hcd *xhci, struct dbc_request *req) req->status); break; } - spin_unlock(&port->port_lock); + spin_unlock_irqrestore(&port->port_lock, flags); } static void xhci_dbc_free_req(struct dbc_ep *dep, struct dbc_request *req) @@ -327,12 +329,13 @@ static void dbc_rx_push(unsigned long _port) { struct dbc_request *req; struct tty_struct *tty; + unsigned long flags; bool do_push = false; bool disconnect = false; struct dbc_port *port = (void *)_port; struct list_head *queue = &port->read_queue; - spin_lock_irq(&port->port_lock); + spin_lock_irqsave(&port->port_lock, flags); tty = port->port.tty; while (!list_empty(queue)) { req = list_first_entry(queue, struct dbc_request, list_pool); @@ -392,16 +395,17 @@ static void dbc_rx_push(unsigned long _port) if (!disconnect) dbc_start_rx(port); - spin_unlock_irq(&port->port_lock); + spin_unlock_irqrestore(&port->port_lock, flags); } static int dbc_port_activate(struct tty_port *_port, struct tty_struct *tty) { + unsigned long flags; struct dbc_port *port = container_of(_port, struct dbc_port, port); - spin_lock_irq(&port->port_lock); + spin_lock_irqsave(&port->port_lock, flags); dbc_start_rx(port); - spin_unlock_irq(&port->port_lock); + spin_unlock_irqrestore(&port->port_lock, flags); return 0; } -- cgit v1.2.3 From de3d50aadd40bf68614db9fd157b275ce9c2d467 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Wed, 7 Mar 2018 13:49:09 -0800 Subject: hv_netvsc: fix filter flags The recent change to not always enable all multicast and broadcast was broken; meant to set filter, not change flags. Fixes: 009f766ca238 ("hv_netvsc: filter multicast/broadcast") Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- drivers/net/hyperv/rndis_filter.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/hyperv/rndis_filter.c b/drivers/net/hyperv/rndis_filter.c index 8927c483c217..411a3aee39b2 100644 --- a/drivers/net/hyperv/rndis_filter.c +++ b/drivers/net/hyperv/rndis_filter.c @@ -861,9 +861,9 @@ static void rndis_set_multicast(struct work_struct *w) filter = NDIS_PACKET_TYPE_PROMISCUOUS; } else { if (flags & IFF_ALLMULTI) - flags |= NDIS_PACKET_TYPE_ALL_MULTICAST; + filter |= NDIS_PACKET_TYPE_ALL_MULTICAST; if (flags & IFF_BROADCAST) - flags |= NDIS_PACKET_TYPE_BROADCAST; + filter |= NDIS_PACKET_TYPE_BROADCAST; } rndis_filter_set_packet_filter(rdev, filter); -- cgit v1.2.3 From 7eeb4a6ee4820c4e84895d252079a797f27fc80d Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Wed, 7 Mar 2018 13:49:10 -0800 Subject: hv_netvsc: avoid repeated updates of packet filter The netvsc driver can get repeated calls to netvsc_rx_mode during network setup; each of these calls ends up scheduling the lower layers to update tha packet filter. This update requires an request/response to the host. So avoid doing this if we already know that the correct packet filter value is set. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- drivers/net/hyperv/hyperv_net.h | 1 + drivers/net/hyperv/rndis_filter.c | 8 ++++++-- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h index 0db3bd1ea06f..cd538d5a7986 100644 --- a/drivers/net/hyperv/hyperv_net.h +++ b/drivers/net/hyperv/hyperv_net.h @@ -173,6 +173,7 @@ struct rndis_device { struct list_head req_list; struct work_struct mcast_work; + u32 filter; bool link_state; /* 0 - link up, 1 - link down */ diff --git a/drivers/net/hyperv/rndis_filter.c b/drivers/net/hyperv/rndis_filter.c index 411a3aee39b2..00ec80c23fe5 100644 --- a/drivers/net/hyperv/rndis_filter.c +++ b/drivers/net/hyperv/rndis_filter.c @@ -825,13 +825,15 @@ static int rndis_filter_set_packet_filter(struct rndis_device *dev, struct rndis_set_request *set; int ret; + if (dev->filter == new_filter) + return 0; + request = get_rndis_request(dev, RNDIS_MSG_SET, RNDIS_MESSAGE_SIZE(struct rndis_set_request) + sizeof(u32)); if (!request) return -ENOMEM; - /* Setup the rndis set */ set = &request->request_msg.msg.set_req; set->oid = RNDIS_OID_GEN_CURRENT_PACKET_FILTER; @@ -842,8 +844,10 @@ static int rndis_filter_set_packet_filter(struct rndis_device *dev, &new_filter, sizeof(u32)); ret = rndis_filter_send_request(dev, request); - if (ret == 0) + if (ret == 0) { wait_for_completion(&request->wait_event); + dev->filter = new_filter; + } put_rndis_request(dev, request); -- cgit v1.2.3 From 35a57b7fef136fa3d5b735ba773f191b95110fa0 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Wed, 7 Mar 2018 13:49:11 -0800 Subject: hv_netvsc: fix locking for rx_mode The rx_mode operation handler is different than other callbacks in that is not always called with rtnl held. Therefore use RCU to ensure that references are valid. Fixes: bee9d41b37ea ("hv_netvsc: propagate rx filters to VF") Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- drivers/net/hyperv/netvsc_drv.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index cdb78eefab67..48d9fa7a66c2 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -89,15 +89,20 @@ static void netvsc_change_rx_flags(struct net_device *net, int change) static void netvsc_set_rx_mode(struct net_device *net) { struct net_device_context *ndev_ctx = netdev_priv(net); - struct net_device *vf_netdev = rtnl_dereference(ndev_ctx->vf_netdev); - struct netvsc_device *nvdev = rtnl_dereference(ndev_ctx->nvdev); + struct net_device *vf_netdev; + struct netvsc_device *nvdev; + rcu_read_lock(); + vf_netdev = rcu_dereference(ndev_ctx->vf_netdev); if (vf_netdev) { dev_uc_sync(vf_netdev, net); dev_mc_sync(vf_netdev, net); } - rndis_filter_update(nvdev); + nvdev = rcu_dereference(ndev_ctx->nvdev); + if (nvdev) + rndis_filter_update(nvdev); + rcu_read_unlock(); } static int netvsc_open(struct net_device *net) -- cgit v1.2.3 From b0dee7910317f41f398838992516af6a3b981d86 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Wed, 7 Mar 2018 13:49:12 -0800 Subject: hv_netvsc: fix locking during VF setup The dev_uc/mc_sync calls need to have the device address list locked. This was spotted by running with lockdep enabled. Fixes: bee9d41b37ea ("hv_netvsc: propagate rx filters to VF") Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- drivers/net/hyperv/netvsc_drv.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index 48d9fa7a66c2..faea0be18924 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -1851,8 +1851,12 @@ static void __netvsc_vf_setup(struct net_device *ndev, /* set multicast etc flags on VF */ dev_change_flags(vf_netdev, ndev->flags | IFF_SLAVE); + + /* sync address list from ndev to VF */ + netif_addr_lock_bh(ndev); dev_uc_sync(vf_netdev, ndev); dev_mc_sync(vf_netdev, ndev); + netif_addr_unlock_bh(ndev); if (netif_running(ndev)) { ret = dev_open(vf_netdev); -- cgit v1.2.3 From d9ee65539d3eabd9ade46cca1780e3309ad0f907 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 6 Mar 2018 12:47:08 -0500 Subject: NFS: Fix an incorrect type in struct nfs_direct_req The start offset needs to be of type loff_t. Fixed: 5fadeb47dcc5c ("nfs: count DIO good bytes correctly with mirroring") Cc: stable@vger.kernel.org # v4.0+ Signed-off-by: Trond Myklebust --- fs/nfs/direct.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 8c10b0562e75..621c517b325c 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -86,10 +86,10 @@ struct nfs_direct_req { struct nfs_direct_mirror mirrors[NFS_PAGEIO_DESCRIPTOR_MIRROR_MAX]; int mirror_count; + loff_t io_start; /* Start offset for I/O */ ssize_t count, /* bytes actually processed */ max_count, /* max expected count */ bytes_left, /* bytes left to be sent */ - io_start, /* start of IO */ error; /* any reported error */ struct completion completion; /* wait for i/o completion */ -- cgit v1.2.3 From 9c6376ebddad585da4238532dd6d90ae23ffee67 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 7 Mar 2018 14:49:06 -0500 Subject: pNFS: Prevent the layout header refcount going to zero in pnfs_roc() Ensure that we hold a reference to the layout header when processing the pNFS return-on-close so that the refcount value does not inadvertently go to zero. Reported-by: Tigran Mkrtchyan Signed-off-by: Trond Myklebust Cc: stable@vger.kernel.org # v4.10+ Tested-by: Tigran Mkrtchyan --- fs/nfs/pnfs.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index c13e826614b5..ee723aa153a3 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -292,8 +292,11 @@ pnfs_detach_layout_hdr(struct pnfs_layout_hdr *lo) void pnfs_put_layout_hdr(struct pnfs_layout_hdr *lo) { - struct inode *inode = lo->plh_inode; + struct inode *inode; + if (!lo) + return; + inode = lo->plh_inode; pnfs_layoutreturn_before_put_layout_hdr(lo); if (refcount_dec_and_lock(&lo->plh_refcount, &inode->i_lock)) { @@ -1241,10 +1244,12 @@ retry: spin_lock(&ino->i_lock); lo = nfsi->layout; if (!lo || !pnfs_layout_is_valid(lo) || - test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags)) + test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags)) { + lo = NULL; goto out_noroc; + } + pnfs_get_layout_hdr(lo); if (test_bit(NFS_LAYOUT_RETURN_LOCK, &lo->plh_flags)) { - pnfs_get_layout_hdr(lo); spin_unlock(&ino->i_lock); wait_on_bit(&lo->plh_flags, NFS_LAYOUT_RETURN, TASK_UNINTERRUPTIBLE); @@ -1312,10 +1317,12 @@ out_noroc: struct pnfs_layoutdriver_type *ld = NFS_SERVER(ino)->pnfs_curr_ld; if (ld->prepare_layoutreturn) ld->prepare_layoutreturn(args); + pnfs_put_layout_hdr(lo); return true; } if (layoutreturn) pnfs_send_layoutreturn(lo, &stateid, iomode, true); + pnfs_put_layout_hdr(lo); return false; } -- cgit v1.2.3 From c4f24df942a181699c5bab01b8e5e82b925f77f3 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 7 Mar 2018 15:22:31 -0500 Subject: NFS: Fix unstable write completion We do want to respect the FLUSH_SYNC argument to nfs_commit_inode() to ensure that all outstanding COMMIT requests to the inode in question are complete. Currently we may exit early from both nfs_commit_inode() and nfs_write_inode() even if there are COMMIT requests in flight, or unstable writes on the commit list. In order to get the right semantics w.r.t. sync_inode(), we don't need to have nfs_commit_inode() reset the inode dirty flags when called from nfs_wb_page() and/or nfs_wb_all(). We just need to ensure that nfs_write_inode() leaves them in the right state if there are outstanding commits, or stable pages. Reported-by: Scott Mayhew Fixes: dc4fd9ab01ab ("nfs: don't wait on commit in nfs_commit_inode()...") Cc: stable@vger.kernel.org # v4.14+ Signed-off-by: Trond Myklebust --- fs/nfs/write.c | 83 ++++++++++++++++++++++++++++++---------------------------- 1 file changed, 43 insertions(+), 40 deletions(-) diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 7428a669d7a7..e7d8ceae8f26 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1876,40 +1876,43 @@ int nfs_generic_commit_list(struct inode *inode, struct list_head *head, return status; } -int nfs_commit_inode(struct inode *inode, int how) +static int __nfs_commit_inode(struct inode *inode, int how, + struct writeback_control *wbc) { LIST_HEAD(head); struct nfs_commit_info cinfo; int may_wait = how & FLUSH_SYNC; - int error = 0; - int res; + int ret, nscan; nfs_init_cinfo_from_inode(&cinfo, inode); nfs_commit_begin(cinfo.mds); - res = nfs_scan_commit(inode, &head, &cinfo); - if (res) - error = nfs_generic_commit_list(inode, &head, how, &cinfo); + for (;;) { + ret = nscan = nfs_scan_commit(inode, &head, &cinfo); + if (ret <= 0) + break; + ret = nfs_generic_commit_list(inode, &head, how, &cinfo); + if (ret < 0) + break; + ret = 0; + if (wbc && wbc->sync_mode == WB_SYNC_NONE) { + if (nscan < wbc->nr_to_write) + wbc->nr_to_write -= nscan; + else + wbc->nr_to_write = 0; + } + if (nscan < INT_MAX) + break; + cond_resched(); + } nfs_commit_end(cinfo.mds); - if (res == 0) - return res; - if (error < 0) - goto out_error; - if (!may_wait) - goto out_mark_dirty; - error = wait_on_commit(cinfo.mds); - if (error < 0) - return error; - return res; -out_error: - res = error; - /* Note: If we exit without ensuring that the commit is complete, - * we must mark the inode as dirty. Otherwise, future calls to - * sync_inode() with the WB_SYNC_ALL flag set will fail to ensure - * that the data is on the disk. - */ -out_mark_dirty: - __mark_inode_dirty(inode, I_DIRTY_DATASYNC); - return res; + if (ret || !may_wait) + return ret; + return wait_on_commit(cinfo.mds); +} + +int nfs_commit_inode(struct inode *inode, int how) +{ + return __nfs_commit_inode(inode, how, NULL); } EXPORT_SYMBOL_GPL(nfs_commit_inode); @@ -1919,11 +1922,11 @@ int nfs_write_inode(struct inode *inode, struct writeback_control *wbc) int flags = FLUSH_SYNC; int ret = 0; - /* no commits means nothing needs to be done */ - if (!atomic_long_read(&nfsi->commit_info.ncommit)) - return ret; - if (wbc->sync_mode == WB_SYNC_NONE) { + /* no commits means nothing needs to be done */ + if (!atomic_long_read(&nfsi->commit_info.ncommit)) + goto check_requests_outstanding; + /* Don't commit yet if this is a non-blocking flush and there * are a lot of outstanding writes for this mapping. */ @@ -1934,16 +1937,16 @@ int nfs_write_inode(struct inode *inode, struct writeback_control *wbc) flags = 0; } - ret = nfs_commit_inode(inode, flags); - if (ret >= 0) { - if (wbc->sync_mode == WB_SYNC_NONE) { - if (ret < wbc->nr_to_write) - wbc->nr_to_write -= ret; - else - wbc->nr_to_write = 0; - } - return 0; - } + ret = __nfs_commit_inode(inode, flags, wbc); + if (!ret) { + if (flags & FLUSH_SYNC) + return 0; + } else if (atomic_long_read(&nfsi->commit_info.ncommit)) + goto out_mark_dirty; + +check_requests_outstanding: + if (!atomic_read(&nfsi->commit_info.rpcs_out)) + return ret; out_mark_dirty: __mark_inode_dirty(inode, I_DIRTY_DATASYNC); return ret; -- cgit v1.2.3 From 5126a504b63d82785eaece3a9c30c660b313785a Mon Sep 17 00:00:00 2001 From: Teijo Kinnunen Date: Thu, 1 Mar 2018 19:34:29 +0200 Subject: USB: storage: Add JMicron bridge 152d:2567 to unusual_devs.h This USB-SATA controller seems to be similar with JMicron bridge 152d:2566 already on the list. Adding it here fixes "Invalid field in cdb" errors. Signed-off-by: Teijo Kinnunen Cc: stable@vger.kernel.org Acked-by: Alan Stern Signed-off-by: Greg Kroah-Hartman --- drivers/usb/storage/unusual_devs.h | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/usb/storage/unusual_devs.h b/drivers/usb/storage/unusual_devs.h index 264af199aec8..747d3a9596d9 100644 --- a/drivers/usb/storage/unusual_devs.h +++ b/drivers/usb/storage/unusual_devs.h @@ -2118,6 +2118,13 @@ UNUSUAL_DEV( 0x152d, 0x2566, 0x0114, 0x0114, USB_SC_DEVICE, USB_PR_DEVICE, NULL, US_FL_BROKEN_FUA ), +/* Reported by Teijo Kinnunen */ +UNUSUAL_DEV( 0x152d, 0x2567, 0x0117, 0x0117, + "JMicron", + "USB to ATA/ATAPI Bridge", + USB_SC_DEVICE, USB_PR_DEVICE, NULL, + US_FL_BROKEN_FUA ), + /* Reported-by George Cherian */ UNUSUAL_DEV(0x152d, 0x9561, 0x0000, 0x9999, "JMicron", -- cgit v1.2.3 From 015dbeb2282030bf56762e21d25f09422edfd750 Mon Sep 17 00:00:00 2001 From: Yoshihiro Shimoda Date: Tue, 27 Feb 2018 17:15:20 +0900 Subject: usb: host: xhci-rcar: add support for r8a77965 This patch adds support for r8a77965 (R-Car M3-N). Signed-off-by: Yoshihiro Shimoda Reviewed-by: Simon Horman Reviewed-by: Rob Herring Cc: stable Signed-off-by: Greg Kroah-Hartman --- Documentation/devicetree/bindings/usb/usb-xhci.txt | 1 + drivers/usb/host/xhci-rcar.c | 4 ++++ 2 files changed, 5 insertions(+) diff --git a/Documentation/devicetree/bindings/usb/usb-xhci.txt b/Documentation/devicetree/bindings/usb/usb-xhci.txt index e2ea59bbca93..1651483a7048 100644 --- a/Documentation/devicetree/bindings/usb/usb-xhci.txt +++ b/Documentation/devicetree/bindings/usb/usb-xhci.txt @@ -13,6 +13,7 @@ Required properties: - "renesas,xhci-r8a7793" for r8a7793 SoC - "renesas,xhci-r8a7795" for r8a7795 SoC - "renesas,xhci-r8a7796" for r8a7796 SoC + - "renesas,xhci-r8a77965" for r8a77965 SoC - "renesas,rcar-gen2-xhci" for a generic R-Car Gen2 or RZ/G1 compatible device - "renesas,rcar-gen3-xhci" for a generic R-Car Gen3 compatible device diff --git a/drivers/usb/host/xhci-rcar.c b/drivers/usb/host/xhci-rcar.c index f0b559660007..f33ffc2bc4ed 100644 --- a/drivers/usb/host/xhci-rcar.c +++ b/drivers/usb/host/xhci-rcar.c @@ -83,6 +83,10 @@ static const struct soc_device_attribute rcar_quirks_match[] = { .soc_id = "r8a7796", .data = (void *)RCAR_XHCI_FIRMWARE_V3, }, + { + .soc_id = "r8a77965", + .data = (void *)RCAR_XHCI_FIRMWARE_V3, + }, { /* sentinel */ }, }; -- cgit v1.2.3 From 3f553b308bb004eb730da8e00a28150c157c7724 Mon Sep 17 00:00:00 2001 From: Leon Yu Date: Tue, 6 Mar 2018 23:16:24 +0800 Subject: module: propagate error in modules_open() otherwise kernel can oops later in seq_release() due to dereferencing null file->private_data which is only set if seq_open() succeeds. BUG: unable to handle kernel NULL pointer dereference at 0000000000000000 IP: seq_release+0xc/0x30 Call Trace: close_pdeo+0x37/0xd0 proc_reg_release+0x5d/0x60 __fput+0x9d/0x1d0 ____fput+0x9/0x10 task_work_run+0x75/0x90 do_exit+0x252/0xa00 do_group_exit+0x36/0xb0 SyS_exit_group+0xf/0x10 Fixes: 516fb7f2e73d ("/proc/module: use the same logic as /proc/kallsyms for address exposure") Cc: Jessica Yu Cc: Linus Torvalds Cc: stable@vger.kernel.org # 4.15+ Signed-off-by: Leon Yu Signed-off-by: Jessica Yu --- kernel/module.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/module.c b/kernel/module.c index ad2d420024f6..e42764acedb4 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -4228,7 +4228,7 @@ static int modules_open(struct inode *inode, struct file *file) m->private = kallsyms_show_value() ? NULL : (void *)8ul; } - return 0; + return err; } static const struct file_operations proc_modules_operations = { -- cgit v1.2.3 From 601ff7edc748213fc6d4d7c1ded6467edbd1f1c2 Mon Sep 17 00:00:00 2001 From: Jon Hunter Date: Thu, 31 Aug 2017 15:56:53 +0100 Subject: MAINTAINERS: Update Tegra IOMMU maintainer The email address for the Tegra IOMMU maintainer, Hiroshi, is no longer valid. Hiroshi has not been maintaining the Tegra IOMMU driver for some time now and so remove Hiroshi as the maintainer and add Thierry Reding instead. Also add the mailing list information for this driver as well. Signed-off-by: Jon Hunter Acked-by: Thierry Reding Signed-off-by: Thierry Reding --- MAINTAINERS | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 3bdc260e36b7..0666381e7224 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -13619,7 +13619,8 @@ S: Supported F: drivers/i2c/busses/i2c-tegra.c TEGRA IOMMU DRIVERS -M: Hiroshi Doyu +M: Thierry Reding +L: linux-tegra@vger.kernel.org S: Supported F: drivers/iommu/tegra* -- cgit v1.2.3 From c4dc56be7e26040bfc60ce73425353516a356955 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Thu, 1 Mar 2018 11:34:48 +0100 Subject: ARM: davinci: fix the GPIO lookup for omapl138-hawk The GPIO chip is called davinci_gpio.0 in legacy mode. Fix it, so that mmc can correctly lookup the wp and cp gpios. Note that it is the gpio-davinci driver that sets the gpiochip label to davinci_gpio.0. Fixes: c69f43fb4f26 ("ARM: davinci: hawk: use gpio descriptor for mmc pins") Signed-off-by: Bartosz Golaszewski [nsekhar@ti.com: add a note on where the chip label is set] Signed-off-by: Sekhar Nori --- arch/arm/mach-davinci/board-omapl138-hawk.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/mach-davinci/board-omapl138-hawk.c b/arch/arm/mach-davinci/board-omapl138-hawk.c index a3e78074be70..62eb7d668890 100644 --- a/arch/arm/mach-davinci/board-omapl138-hawk.c +++ b/arch/arm/mach-davinci/board-omapl138-hawk.c @@ -127,8 +127,8 @@ static struct gpiod_lookup_table mmc_gpios_table = { .dev_id = "da830-mmc.0", .table = { /* CD: gpio3_12: gpio60: chip 1 contains gpio range 32-63*/ - GPIO_LOOKUP("davinci_gpio.1", 28, "cd", GPIO_ACTIVE_LOW), - GPIO_LOOKUP("davinci_gpio.1", 29, "wp", GPIO_ACTIVE_LOW), + GPIO_LOOKUP("davinci_gpio.0", 28, "cd", GPIO_ACTIVE_LOW), + GPIO_LOOKUP("davinci_gpio.0", 29, "wp", GPIO_ACTIVE_LOW), }, }; -- cgit v1.2.3 From b24881e0b0b69155b092c525b7fded258d78a46d Mon Sep 17 00:00:00 2001 From: Xiong Zhang Date: Mon, 26 Feb 2018 10:40:18 +0800 Subject: drm/i915/gvt: Add runtime_pm_get/put into gvt_switch_mmio If user continuously create vgpu, boot guest, shoutdown guest and destroy vgpu from remote, the following calltrace exists in dmesg sometimes: [ 6412.954721] RPM wakelock ref not held during HW access [ 6412.954795] WARNING: CPU: 7 PID: 11941 at linux/drivers/gpu/drm/i915/intel_drv.h:1800 intel_uncore_forcewake_get.part.7+0x96/0xa0 [i915] [ 6412.954915] Call Trace: [ 6412.954951] intel_uncore_forcewake_get+0x18/0x20 [i915] [ 6412.954989] intel_gvt_switch_mmio+0x8e/0x770 [i915] [ 6412.954996] ? __slab_free+0x14d/0x2c0 [ 6412.955001] ? __slab_free+0x14d/0x2c0 [ 6412.955006] ? __slab_free+0x14d/0x2c0 [ 6412.955041] intel_vgpu_stop_schedule+0x92/0xd0 [i915] [ 6412.955073] intel_gvt_deactivate_vgpu+0x48/0x60 [i915] [ 6412.955078] __intel_vgpu_release+0x55/0x260 [kvmgt] when this happens, gvt_switch_mmio is called at vgpu destroy, host i915 is idle and doesn't hold RPM wakelock, igd is in powersave mode, but gvt_switch_mmio require igd power on to access register, so intel_runtime_pm_get should be added to make sure igd power on before gvt_switch_mmio. v2: Move runtime_pm_get/put into gvt_switch_mmio.(Zhenyu) Signed-off-by: Xiong Zhang Signed-off-by: Zhi Wang --- drivers/gpu/drm/i915/gvt/mmio_context.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/i915/gvt/mmio_context.c b/drivers/gpu/drm/i915/gvt/mmio_context.c index 256f1bb522b7..152df3d0291e 100644 --- a/drivers/gpu/drm/i915/gvt/mmio_context.c +++ b/drivers/gpu/drm/i915/gvt/mmio_context.c @@ -394,9 +394,11 @@ void intel_gvt_switch_mmio(struct intel_vgpu *pre, * performace for batch mmio read/write, so we need * handle forcewake mannually. */ + intel_runtime_pm_get(dev_priv); intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); switch_mmio(pre, next, ring_id); intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); + intel_runtime_pm_put(dev_priv); } /** -- cgit v1.2.3 From fa3dd623e559e8e7004179f9594b090318df0d05 Mon Sep 17 00:00:00 2001 From: Min He Date: Fri, 2 Mar 2018 10:00:25 +0800 Subject: drm/i915/gvt: keep oa config in shadow ctx When populating shadow ctx from guest, we should handle oa related registers in hw ctx, so that they will not be overlapped by guest oa configs. This patch made it possible to capture oa data from host for both host and guests. Signed-off-by: Min He Signed-off-by: Zhi Wang --- drivers/gpu/drm/i915/gvt/scheduler.c | 50 ++++++++++++++++++++++++++++++++++++ drivers/gpu/drm/i915/gvt/scheduler.h | 4 +++ 2 files changed, 54 insertions(+) diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c index b55b3580ca1d..8caf72c1e794 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.c +++ b/drivers/gpu/drm/i915/gvt/scheduler.c @@ -52,6 +52,54 @@ static void set_context_pdp_root_pointer( pdp_pair[i].val = pdp[7 - i]; } +/* + * when populating shadow ctx from guest, we should not overrride oa related + * registers, so that they will not be overlapped by guest oa configs. Thus + * made it possible to capture oa data from host for both host and guests. + */ +static void sr_oa_regs(struct intel_vgpu_workload *workload, + u32 *reg_state, bool save) +{ + struct drm_i915_private *dev_priv = workload->vgpu->gvt->dev_priv; + u32 ctx_oactxctrl = dev_priv->perf.oa.ctx_oactxctrl_offset; + u32 ctx_flexeu0 = dev_priv->perf.oa.ctx_flexeu0_offset; + int i = 0; + u32 flex_mmio[] = { + i915_mmio_reg_offset(EU_PERF_CNTL0), + i915_mmio_reg_offset(EU_PERF_CNTL1), + i915_mmio_reg_offset(EU_PERF_CNTL2), + i915_mmio_reg_offset(EU_PERF_CNTL3), + i915_mmio_reg_offset(EU_PERF_CNTL4), + i915_mmio_reg_offset(EU_PERF_CNTL5), + i915_mmio_reg_offset(EU_PERF_CNTL6), + }; + + if (!workload || !reg_state || workload->ring_id != RCS) + return; + + if (save) { + workload->oactxctrl = reg_state[ctx_oactxctrl + 1]; + + for (i = 0; i < ARRAY_SIZE(workload->flex_mmio); i++) { + u32 state_offset = ctx_flexeu0 + i * 2; + + workload->flex_mmio[i] = reg_state[state_offset + 1]; + } + } else { + reg_state[ctx_oactxctrl] = + i915_mmio_reg_offset(GEN8_OACTXCONTROL); + reg_state[ctx_oactxctrl + 1] = workload->oactxctrl; + + for (i = 0; i < ARRAY_SIZE(workload->flex_mmio); i++) { + u32 state_offset = ctx_flexeu0 + i * 2; + u32 mmio = flex_mmio[i]; + + reg_state[state_offset] = mmio; + reg_state[state_offset + 1] = workload->flex_mmio[i]; + } + } +} + static int populate_shadow_context(struct intel_vgpu_workload *workload) { struct intel_vgpu *vgpu = workload->vgpu; @@ -98,6 +146,7 @@ static int populate_shadow_context(struct intel_vgpu_workload *workload) page = i915_gem_object_get_page(ctx_obj, LRC_STATE_PN); shadow_ring_context = kmap(page); + sr_oa_regs(workload, (u32 *)shadow_ring_context, true); #define COPY_REG(name) \ intel_gvt_hypervisor_read_gpa(vgpu, workload->ring_context_gpa \ + RING_CTX_OFF(name.val), &shadow_ring_context->name.val, 4) @@ -122,6 +171,7 @@ static int populate_shadow_context(struct intel_vgpu_workload *workload) sizeof(*shadow_ring_context), I915_GTT_PAGE_SIZE - sizeof(*shadow_ring_context)); + sr_oa_regs(workload, (u32 *)shadow_ring_context, false); kunmap(page); return 0; } diff --git a/drivers/gpu/drm/i915/gvt/scheduler.h b/drivers/gpu/drm/i915/gvt/scheduler.h index ff175a98b19e..2603336b7c6d 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.h +++ b/drivers/gpu/drm/i915/gvt/scheduler.h @@ -110,6 +110,10 @@ struct intel_vgpu_workload { /* shadow batch buffer */ struct list_head shadow_bb; struct intel_shadow_wa_ctx wa_ctx; + + /* oa registers */ + u32 oactxctrl; + u32 flex_mmio[7]; }; struct intel_vgpu_shadow_bb { -- cgit v1.2.3 From a5f596830e27e15f7a0ecd6be55e433d776986d8 Mon Sep 17 00:00:00 2001 From: Pete Zaitcev Date: Fri, 9 Mar 2018 00:21:14 -0600 Subject: usb: usbmon: Read text within supplied buffer size This change fixes buffer overflows and silent data corruption with the usbmon device driver text file read operations. Signed-off-by: Fredrik Noring Signed-off-by: Pete Zaitcev Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/mon/mon_text.c | 126 ++++++++++++++++++++++++++++----------------- 1 file changed, 78 insertions(+), 48 deletions(-) diff --git a/drivers/usb/mon/mon_text.c b/drivers/usb/mon/mon_text.c index f5e1bb5e5217..984f7e12a6a5 100644 --- a/drivers/usb/mon/mon_text.c +++ b/drivers/usb/mon/mon_text.c @@ -85,6 +85,8 @@ struct mon_reader_text { wait_queue_head_t wait; int printf_size; + size_t printf_offset; + size_t printf_togo; char *printf_buf; struct mutex printf_lock; @@ -376,75 +378,103 @@ err_alloc: return rc; } -/* - * For simplicity, we read one record in one system call and throw out - * what does not fit. This means that the following does not work: - * dd if=/dbg/usbmon/0t bs=10 - * Also, we do not allow seeks and do not bother advancing the offset. - */ +static ssize_t mon_text_copy_to_user(struct mon_reader_text *rp, + char __user * const buf, const size_t nbytes) +{ + const size_t togo = min(nbytes, rp->printf_togo); + + if (copy_to_user(buf, &rp->printf_buf[rp->printf_offset], togo)) + return -EFAULT; + rp->printf_togo -= togo; + rp->printf_offset += togo; + return togo; +} + +/* ppos is not advanced since the llseek operation is not permitted. */ static ssize_t mon_text_read_t(struct file *file, char __user *buf, - size_t nbytes, loff_t *ppos) + size_t nbytes, loff_t *ppos) { struct mon_reader_text *rp = file->private_data; struct mon_event_text *ep; struct mon_text_ptr ptr; + ssize_t ret; - ep = mon_text_read_wait(rp, file); - if (IS_ERR(ep)) - return PTR_ERR(ep); mutex_lock(&rp->printf_lock); - ptr.cnt = 0; - ptr.pbuf = rp->printf_buf; - ptr.limit = rp->printf_size; - - mon_text_read_head_t(rp, &ptr, ep); - mon_text_read_statset(rp, &ptr, ep); - ptr.cnt += snprintf(ptr.pbuf + ptr.cnt, ptr.limit - ptr.cnt, - " %d", ep->length); - mon_text_read_data(rp, &ptr, ep); - - if (copy_to_user(buf, rp->printf_buf, ptr.cnt)) - ptr.cnt = -EFAULT; + + if (rp->printf_togo == 0) { + + ep = mon_text_read_wait(rp, file); + if (IS_ERR(ep)) { + mutex_unlock(&rp->printf_lock); + return PTR_ERR(ep); + } + ptr.cnt = 0; + ptr.pbuf = rp->printf_buf; + ptr.limit = rp->printf_size; + + mon_text_read_head_t(rp, &ptr, ep); + mon_text_read_statset(rp, &ptr, ep); + ptr.cnt += snprintf(ptr.pbuf + ptr.cnt, ptr.limit - ptr.cnt, + " %d", ep->length); + mon_text_read_data(rp, &ptr, ep); + + rp->printf_togo = ptr.cnt; + rp->printf_offset = 0; + + kmem_cache_free(rp->e_slab, ep); + } + + ret = mon_text_copy_to_user(rp, buf, nbytes); mutex_unlock(&rp->printf_lock); - kmem_cache_free(rp->e_slab, ep); - return ptr.cnt; + return ret; } +/* ppos is not advanced since the llseek operation is not permitted. */ static ssize_t mon_text_read_u(struct file *file, char __user *buf, - size_t nbytes, loff_t *ppos) + size_t nbytes, loff_t *ppos) { struct mon_reader_text *rp = file->private_data; struct mon_event_text *ep; struct mon_text_ptr ptr; + ssize_t ret; - ep = mon_text_read_wait(rp, file); - if (IS_ERR(ep)) - return PTR_ERR(ep); mutex_lock(&rp->printf_lock); - ptr.cnt = 0; - ptr.pbuf = rp->printf_buf; - ptr.limit = rp->printf_size; - mon_text_read_head_u(rp, &ptr, ep); - if (ep->type == 'E') { - mon_text_read_statset(rp, &ptr, ep); - } else if (ep->xfertype == USB_ENDPOINT_XFER_ISOC) { - mon_text_read_isostat(rp, &ptr, ep); - mon_text_read_isodesc(rp, &ptr, ep); - } else if (ep->xfertype == USB_ENDPOINT_XFER_INT) { - mon_text_read_intstat(rp, &ptr, ep); - } else { - mon_text_read_statset(rp, &ptr, ep); + if (rp->printf_togo == 0) { + + ep = mon_text_read_wait(rp, file); + if (IS_ERR(ep)) { + mutex_unlock(&rp->printf_lock); + return PTR_ERR(ep); + } + ptr.cnt = 0; + ptr.pbuf = rp->printf_buf; + ptr.limit = rp->printf_size; + + mon_text_read_head_u(rp, &ptr, ep); + if (ep->type == 'E') { + mon_text_read_statset(rp, &ptr, ep); + } else if (ep->xfertype == USB_ENDPOINT_XFER_ISOC) { + mon_text_read_isostat(rp, &ptr, ep); + mon_text_read_isodesc(rp, &ptr, ep); + } else if (ep->xfertype == USB_ENDPOINT_XFER_INT) { + mon_text_read_intstat(rp, &ptr, ep); + } else { + mon_text_read_statset(rp, &ptr, ep); + } + ptr.cnt += snprintf(ptr.pbuf + ptr.cnt, ptr.limit - ptr.cnt, + " %d", ep->length); + mon_text_read_data(rp, &ptr, ep); + + rp->printf_togo = ptr.cnt; + rp->printf_offset = 0; + + kmem_cache_free(rp->e_slab, ep); } - ptr.cnt += snprintf(ptr.pbuf + ptr.cnt, ptr.limit - ptr.cnt, - " %d", ep->length); - mon_text_read_data(rp, &ptr, ep); - if (copy_to_user(buf, rp->printf_buf, ptr.cnt)) - ptr.cnt = -EFAULT; + ret = mon_text_copy_to_user(rp, buf, nbytes); mutex_unlock(&rp->printf_lock); - kmem_cache_free(rp->e_slab, ep); - return ptr.cnt; + return ret; } static struct mon_event_text *mon_text_read_wait(struct mon_reader_text *rp, -- cgit v1.2.3 From a37d48e32303d535bdfd554c57952ce31f428b3a Mon Sep 17 00:00:00 2001 From: Vijendar Mukunda Date: Fri, 9 Mar 2018 21:13:02 +0530 Subject: ASoC: amd: 16bit resolution support for i2s sp instance Moved 16bit resolution condition check for stoney platform to acp_hw_params.Depending upon substream required register value need to be programmed rather than enabling 16bit resolution support all time in acp init. Signed-off-by: Vijendar Mukunda Signed-off-by: Mark Brown --- sound/soc/amd/acp-pcm-dma.c | 16 +++++++++------- sound/soc/amd/acp.h | 2 ++ 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/sound/soc/amd/acp-pcm-dma.c b/sound/soc/amd/acp-pcm-dma.c index c33a512283a4..9fb356db3ab2 100644 --- a/sound/soc/amd/acp-pcm-dma.c +++ b/sound/soc/amd/acp-pcm-dma.c @@ -579,13 +579,6 @@ static int acp_init(void __iomem *acp_mmio, u32 asic_type) for (bank = 1; bank < 48; bank++) acp_set_sram_bank_state(acp_mmio, bank, false); } - - /* Stoney supports 16bit resolution */ - if (asic_type == CHIP_STONEY) { - val = acp_reg_read(acp_mmio, mmACP_I2S_16BIT_RESOLUTION_EN); - val |= 0x03; - acp_reg_write(val, acp_mmio, mmACP_I2S_16BIT_RESOLUTION_EN); - } return 0; } @@ -774,6 +767,7 @@ static int acp_dma_hw_params(struct snd_pcm_substream *substream, { int status; uint64_t size; + u32 val = 0; struct page *pg; struct snd_pcm_runtime *runtime; struct audio_substream_data *rtd; @@ -786,6 +780,14 @@ static int acp_dma_hw_params(struct snd_pcm_substream *substream, if (WARN_ON(!rtd)) return -EINVAL; + if (adata->asic_type == CHIP_STONEY) { + val = acp_reg_read(adata->acp_mmio, mmACP_I2S_16BIT_RESOLUTION_EN); + if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) + val |= ACP_I2S_SP_16BIT_RESOLUTION_EN; + else + val |= ACP_I2S_MIC_16BIT_RESOLUTION_EN; + acp_reg_write(val, adata->acp_mmio, mmACP_I2S_16BIT_RESOLUTION_EN); + } size = params_buffer_bytes(params); status = snd_pcm_lib_malloc_pages(substream, size); if (status < 0) diff --git a/sound/soc/amd/acp.h b/sound/soc/amd/acp.h index ecb458935d1e..9293f179f272 100644 --- a/sound/soc/amd/acp.h +++ b/sound/soc/amd/acp.h @@ -70,6 +70,8 @@ #define CAPTURE_END_DMA_DESCR_CH15 7 #define mmACP_I2S_16BIT_RESOLUTION_EN 0x5209 +#define ACP_I2S_MIC_16BIT_RESOLUTION_EN 0x01 +#define ACP_I2S_SP_16BIT_RESOLUTION_EN 0x02 enum acp_dma_priority_level { /* 0x0 Specifies the DMA channel is given normal priority */ ACP_DMA_PRIORITY_LEVEL_NORMAL = 0x0, -- cgit v1.2.3 From 9f62c15f28b0d1d746734666d88a79f08ba1e43e Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Thu, 8 Mar 2018 17:00:02 +0100 Subject: ipv6: fix access to non-linear packet in ndisc_fill_redirect_hdr_option() Fix the following slab-out-of-bounds kasan report in ndisc_fill_redirect_hdr_option when the incoming ipv6 packet is not linear and the accessed data are not in the linear data region of orig_skb. [ 1503.122508] ================================================================== [ 1503.122832] BUG: KASAN: slab-out-of-bounds in ndisc_send_redirect+0x94e/0x990 [ 1503.123036] Read of size 1184 at addr ffff8800298ab6b0 by task netperf/1932 [ 1503.123220] CPU: 0 PID: 1932 Comm: netperf Not tainted 4.16.0-rc2+ #124 [ 1503.123347] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.10.2-2.fc27 04/01/2014 [ 1503.123527] Call Trace: [ 1503.123579] [ 1503.123638] print_address_description+0x6e/0x280 [ 1503.123849] kasan_report+0x233/0x350 [ 1503.123946] memcpy+0x1f/0x50 [ 1503.124037] ndisc_send_redirect+0x94e/0x990 [ 1503.125150] ip6_forward+0x1242/0x13b0 [...] [ 1503.153890] Allocated by task 1932: [ 1503.153982] kasan_kmalloc+0x9f/0xd0 [ 1503.154074] __kmalloc_track_caller+0xb5/0x160 [ 1503.154198] __kmalloc_reserve.isra.41+0x24/0x70 [ 1503.154324] __alloc_skb+0x130/0x3e0 [ 1503.154415] sctp_packet_transmit+0x21a/0x1810 [ 1503.154533] sctp_outq_flush+0xc14/0x1db0 [ 1503.154624] sctp_do_sm+0x34e/0x2740 [ 1503.154715] sctp_primitive_SEND+0x57/0x70 [ 1503.154807] sctp_sendmsg+0xaa6/0x1b10 [ 1503.154897] sock_sendmsg+0x68/0x80 [ 1503.154987] ___sys_sendmsg+0x431/0x4b0 [ 1503.155078] __sys_sendmsg+0xa4/0x130 [ 1503.155168] do_syscall_64+0x171/0x3f0 [ 1503.155259] entry_SYSCALL_64_after_hwframe+0x42/0xb7 [ 1503.155436] Freed by task 1932: [ 1503.155527] __kasan_slab_free+0x134/0x180 [ 1503.155618] kfree+0xbc/0x180 [ 1503.155709] skb_release_data+0x27f/0x2c0 [ 1503.155800] consume_skb+0x94/0xe0 [ 1503.155889] sctp_chunk_put+0x1aa/0x1f0 [ 1503.155979] sctp_inq_pop+0x2f8/0x6e0 [ 1503.156070] sctp_assoc_bh_rcv+0x6a/0x230 [ 1503.156164] sctp_inq_push+0x117/0x150 [ 1503.156255] sctp_backlog_rcv+0xdf/0x4a0 [ 1503.156346] __release_sock+0x142/0x250 [ 1503.156436] release_sock+0x80/0x180 [ 1503.156526] sctp_sendmsg+0xbb0/0x1b10 [ 1503.156617] sock_sendmsg+0x68/0x80 [ 1503.156708] ___sys_sendmsg+0x431/0x4b0 [ 1503.156799] __sys_sendmsg+0xa4/0x130 [ 1503.156889] do_syscall_64+0x171/0x3f0 [ 1503.156980] entry_SYSCALL_64_after_hwframe+0x42/0xb7 [ 1503.157158] The buggy address belongs to the object at ffff8800298ab600 which belongs to the cache kmalloc-1024 of size 1024 [ 1503.157444] The buggy address is located 176 bytes inside of 1024-byte region [ffff8800298ab600, ffff8800298aba00) [ 1503.157702] The buggy address belongs to the page: [ 1503.157820] page:ffffea0000a62a00 count:1 mapcount:0 mapping:0000000000000000 index:0x0 compound_mapcount: 0 [ 1503.158053] flags: 0x4000000000008100(slab|head) [ 1503.158171] raw: 4000000000008100 0000000000000000 0000000000000000 00000001800e000e [ 1503.158350] raw: dead000000000100 dead000000000200 ffff880036002600 0000000000000000 [ 1503.158523] page dumped because: kasan: bad access detected [ 1503.158698] Memory state around the buggy address: [ 1503.158816] ffff8800298ab900: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 [ 1503.158988] ffff8800298ab980: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 [ 1503.159165] >ffff8800298aba00: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc [ 1503.159338] ^ [ 1503.159436] ffff8800298aba80: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 1503.159610] ffff8800298abb00: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 1503.159785] ================================================================== [ 1503.159964] Disabling lock debugging due to kernel taint The test scenario to trigger the issue consists of 4 devices: - H0: data sender, connected to LAN0 - H1: data receiver, connected to LAN1 - GW0 and GW1: routers between LAN0 and LAN1. Both of them have an ethernet connection on LAN0 and LAN1 On H{0,1} set GW0 as default gateway while on GW0 set GW1 as next hop for data from LAN0 to LAN1. Moreover create an ip6ip6 tunnel between H0 and H1 and send 3 concurrent data streams (TCP/UDP/SCTP) from H0 to H1 through ip6ip6 tunnel (send buffer size is set to 16K). While data streams are active flush the route cache on HA multiple times. I have not been able to identify a given commit that introduced the issue since, using the reproducer described above, the kasan report has been triggered from 4.14 and I have not gone back further. Reported-by: Jianlin Shi Reviewed-by: Stefano Brivio Reviewed-by: Eric Dumazet Signed-off-by: Lorenzo Bianconi Signed-off-by: David S. Miller --- net/ipv6/ndisc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index f61a5b613b52..ba5e04c6ae17 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -1554,7 +1554,8 @@ static void ndisc_fill_redirect_hdr_option(struct sk_buff *skb, *(opt++) = (rd_len >> 3); opt += 6; - memcpy(opt, ipv6_hdr(orig_skb), rd_len - 8); + skb_copy_bits(orig_skb, skb_network_offset(orig_skb), opt, + rd_len - 8); } void ndisc_send_redirect(struct sk_buff *skb, const struct in6_addr *target) -- cgit v1.2.3 From ca0edb131bdf1e6beaeb2b8289fd6b374b74147d Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 5 Mar 2018 08:51:03 -0800 Subject: ieee802154: 6lowpan: fix possible NULL deref in lowpan_device_event() A tun device type can trivially be set to arbitrary value using TUNSETLINK ioctl(). Therefore, lowpan_device_event() must really check that ieee802154_ptr is not NULL. Fixes: 2c88b5283f60d ("ieee802154: 6lowpan: remove check on null") Signed-off-by: Eric Dumazet Cc: Alexander Aring Cc: Stefan Schmidt Reported-by: syzbot Acked-by: Stefan Schmidt Signed-off-by: David S. Miller --- net/ieee802154/6lowpan/core.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/net/ieee802154/6lowpan/core.c b/net/ieee802154/6lowpan/core.c index 974765b7d92a..e9f0489e4229 100644 --- a/net/ieee802154/6lowpan/core.c +++ b/net/ieee802154/6lowpan/core.c @@ -206,9 +206,13 @@ static inline void lowpan_netlink_fini(void) static int lowpan_device_event(struct notifier_block *unused, unsigned long event, void *ptr) { - struct net_device *wdev = netdev_notifier_info_to_dev(ptr); + struct net_device *ndev = netdev_notifier_info_to_dev(ptr); + struct wpan_dev *wpan_dev; - if (wdev->type != ARPHRD_IEEE802154) + if (ndev->type != ARPHRD_IEEE802154) + return NOTIFY_DONE; + wpan_dev = ndev->ieee802154_ptr; + if (!wpan_dev) return NOTIFY_DONE; switch (event) { @@ -217,8 +221,8 @@ static int lowpan_device_event(struct notifier_block *unused, * also delete possible lowpan interfaces which belongs * to the wpan interface. */ - if (wdev->ieee802154_ptr->lowpan_dev) - lowpan_dellink(wdev->ieee802154_ptr->lowpan_dev, NULL); + if (wpan_dev->lowpan_dev) + lowpan_dellink(wpan_dev->lowpan_dev, NULL); break; default: return NOTIFY_DONE; -- cgit v1.2.3 From 1dd27cde30e85774c77349c804229431616d594a Mon Sep 17 00:00:00 2001 From: Daniel Axtens Date: Fri, 9 Mar 2018 14:06:09 +1100 Subject: net: use skb_is_gso_sctp() instead of open-coding As well as the basic conversion, I noticed that a lot of the SCTP code checks gso_type without first checking skb_is_gso() so I have added that where appropriate. Also, document the helper. Cc: Daniel Borkmann Cc: Marcelo Ricardo Leitner Signed-off-by: Daniel Axtens Signed-off-by: David S. Miller --- Documentation/networking/segmentation-offloads.txt | 5 ++++- net/core/skbuff.c | 2 +- net/sched/act_csum.c | 2 +- net/sctp/input.c | 8 ++++---- net/sctp/inqueue.c | 2 +- net/sctp/offload.c | 2 +- 6 files changed, 12 insertions(+), 9 deletions(-) diff --git a/Documentation/networking/segmentation-offloads.txt b/Documentation/networking/segmentation-offloads.txt index fc0c949e7f9c..aca542ec125c 100644 --- a/Documentation/networking/segmentation-offloads.txt +++ b/Documentation/networking/segmentation-offloads.txt @@ -155,7 +155,10 @@ Therefore, any code in the core networking stack must be aware of the possibility that gso_size will be GSO_BY_FRAGS and handle that case appropriately. -There are a couple of helpers to make this easier: +There are some helpers to make this easier: + + - skb_is_gso(skb) && skb_is_gso_sctp(skb) is the best way to see if + an skb is an SCTP GSO skb. - For size checks, the skb_gso_validate_*_len family of helpers correctly considers GSO_BY_FRAGS. diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 0bb0d8877954..baf990528943 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -4904,7 +4904,7 @@ static unsigned int skb_gso_transport_seglen(const struct sk_buff *skb) thlen += inner_tcp_hdrlen(skb); } else if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6))) { thlen = tcp_hdrlen(skb); - } else if (unlikely(shinfo->gso_type & SKB_GSO_SCTP)) { + } else if (unlikely(skb_is_gso_sctp(skb))) { thlen = sizeof(struct sctphdr); } /* UFO sets gso_size to the size of the fragmentation diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c index b7ba9b06b147..24b2e8e681cf 100644 --- a/net/sched/act_csum.c +++ b/net/sched/act_csum.c @@ -350,7 +350,7 @@ static int tcf_csum_sctp(struct sk_buff *skb, unsigned int ihl, { struct sctphdr *sctph; - if (skb_is_gso(skb) && skb_shinfo(skb)->gso_type & SKB_GSO_SCTP) + if (skb_is_gso(skb) && skb_is_gso_sctp(skb)) return 1; sctph = tcf_csum_skb_nextlayer(skb, ihl, ipl, sizeof(*sctph)); diff --git a/net/sctp/input.c b/net/sctp/input.c index 0247cc432e02..b381d78548ac 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -106,6 +106,7 @@ int sctp_rcv(struct sk_buff *skb) int family; struct sctp_af *af; struct net *net = dev_net(skb->dev); + bool is_gso = skb_is_gso(skb) && skb_is_gso_sctp(skb); if (skb->pkt_type != PACKET_HOST) goto discard_it; @@ -123,8 +124,7 @@ int sctp_rcv(struct sk_buff *skb) * it's better to just linearize it otherwise crc computing * takes longer. */ - if ((!(skb_shinfo(skb)->gso_type & SKB_GSO_SCTP) && - skb_linearize(skb)) || + if ((!is_gso && skb_linearize(skb)) || !pskb_may_pull(skb, sizeof(struct sctphdr))) goto discard_it; @@ -135,7 +135,7 @@ int sctp_rcv(struct sk_buff *skb) if (skb_csum_unnecessary(skb)) __skb_decr_checksum_unnecessary(skb); else if (!sctp_checksum_disable && - !(skb_shinfo(skb)->gso_type & SKB_GSO_SCTP) && + !is_gso && sctp_rcv_checksum(net, skb) < 0) goto discard_it; skb->csum_valid = 1; @@ -1218,7 +1218,7 @@ static struct sctp_association *__sctp_rcv_lookup_harder(struct net *net, * issue as packets hitting this are mostly INIT or INIT-ACK and * those cannot be on GSO-style anyway. */ - if ((skb_shinfo(skb)->gso_type & SKB_GSO_SCTP) == SKB_GSO_SCTP) + if (skb_is_gso(skb) && skb_is_gso_sctp(skb)) return NULL; ch = (struct sctp_chunkhdr *)skb->data; diff --git a/net/sctp/inqueue.c b/net/sctp/inqueue.c index 48392552ee7c..23ebc5318edc 100644 --- a/net/sctp/inqueue.c +++ b/net/sctp/inqueue.c @@ -170,7 +170,7 @@ next_chunk: chunk = list_entry(entry, struct sctp_chunk, list); - if ((skb_shinfo(chunk->skb)->gso_type & SKB_GSO_SCTP) == SKB_GSO_SCTP) { + if (skb_is_gso(chunk->skb) && skb_is_gso_sctp(chunk->skb)) { /* GSO-marked skbs but without frags, handle * them normally */ diff --git a/net/sctp/offload.c b/net/sctp/offload.c index 35bc7106d182..123e9f2dc226 100644 --- a/net/sctp/offload.c +++ b/net/sctp/offload.c @@ -45,7 +45,7 @@ static struct sk_buff *sctp_gso_segment(struct sk_buff *skb, struct sk_buff *segs = ERR_PTR(-EINVAL); struct sctphdr *sh; - if (!(skb_shinfo(skb)->gso_type & SKB_GSO_SCTP)) + if (!skb_is_gso_sctp(skb)) goto out; sh = sctp_hdr(skb); -- cgit v1.2.3 From c535d632aecc6359d072374675a7787cbe71773b Mon Sep 17 00:00:00 2001 From: Gregory CLEMENT Date: Tue, 27 Feb 2018 18:04:25 +0100 Subject: mailmap: Update email address for Gregory CLEMENT As now Free Electrons is Bootlin. Signed-off-by: Gregory CLEMENT --- .mailmap | 1 + 1 file changed, 1 insertion(+) diff --git a/.mailmap b/.mailmap index e18cab73e209..a2ce89a456c2 100644 --- a/.mailmap +++ b/.mailmap @@ -62,6 +62,7 @@ Frank Zago Greg Kroah-Hartman Greg Kroah-Hartman Greg Kroah-Hartman +Gregory CLEMENT Henk Vergonet Henrik Kretzschmar Henrik Rydberg -- cgit v1.2.3 From d06cbe9cbb8905df21b11d1cf789c9b2947688e9 Mon Sep 17 00:00:00 2001 From: Kunihiko Hayashi Date: Fri, 9 Mar 2018 15:27:20 +0900 Subject: net: ethernet: ave: enable Rx drop interrupt This enables AVE_GI_RXDROP interrupt factor. This factor indicates depletion of Rx descriptors and the handler counts the number of dropped packets. Signed-off-by: Kunihiko Hayashi Signed-off-by: David S. Miller --- drivers/net/ethernet/socionext/sni_ave.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/socionext/sni_ave.c b/drivers/net/ethernet/socionext/sni_ave.c index 111e7ca9df56..f5c5984afefb 100644 --- a/drivers/net/ethernet/socionext/sni_ave.c +++ b/drivers/net/ethernet/socionext/sni_ave.c @@ -1295,7 +1295,7 @@ static int ave_open(struct net_device *ndev) val |= AVE_IIRQC_EN0 | (AVE_INTM_COUNT << 16); writel(val, priv->base + AVE_IIRQC); - val = AVE_GI_RXIINT | AVE_GI_RXOVF | AVE_GI_TX; + val = AVE_GI_RXIINT | AVE_GI_RXOVF | AVE_GI_TX | AVE_GI_RXDROP; ave_irq_restore(ndev, val); napi_enable(&priv->napi_rx); -- cgit v1.2.3 From ab7e34b3431c5d29817c503ad4d3bf2732f92ad3 Mon Sep 17 00:00:00 2001 From: Alexander Potapenko Date: Fri, 9 Mar 2018 14:50:32 +0800 Subject: vhost_net: initialize rx_ring in vhost_net_open() KMSAN reported a use of uninit memory in vhost_net_buf_unproduce() while trying to access n->vqs[VHOST_NET_VQ_TX].rx_ring: ================================================================== BUG: KMSAN: use of uninitialized memory in vhost_net_buf_unproduce+0x7bb/0x9a0 drivers/vho et.c:170 CPU: 0 PID: 3021 Comm: syz-fuzzer Not tainted 4.16.0-rc4+ #3853 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1 04/01/2014 Call Trace: __dump_stack lib/dump_stack.c:17 [inline] dump_stack+0x185/0x1d0 lib/dump_stack.c:53 kmsan_report+0x142/0x1f0 mm/kmsan/kmsan.c:1093 __msan_warning_32+0x6c/0xb0 mm/kmsan/kmsan_instr.c:676 vhost_net_buf_unproduce+0x7bb/0x9a0 drivers/vhost/net.c:170 vhost_net_stop_vq drivers/vhost/net.c:974 [inline] vhost_net_stop+0x146/0x380 drivers/vhost/net.c:982 vhost_net_release+0xb1/0x4f0 drivers/vhost/net.c:1015 __fput+0x49f/0xa00 fs/file_table.c:209 ____fput+0x37/0x40 fs/file_table.c:243 task_work_run+0x243/0x2c0 kernel/task_work.c:113 tracehook_notify_resume include/linux/tracehook.h:191 [inline] exit_to_usermode_loop arch/x86/entry/common.c:166 [inline] prepare_exit_to_usermode+0x349/0x3b0 arch/x86/entry/common.c:196 syscall_return_slowpath+0xf3/0x6d0 arch/x86/entry/common.c:265 do_syscall_64+0x34d/0x450 arch/x86/entry/common.c:292 ... origin: kmsan_save_stack_with_flags mm/kmsan/kmsan.c:303 [inline] kmsan_internal_poison_shadow+0xb8/0x1b0 mm/kmsan/kmsan.c:213 kmsan_kmalloc_large+0x6f/0xd0 mm/kmsan/kmsan.c:392 kmalloc_large_node_hook mm/slub.c:1366 [inline] kmalloc_large_node mm/slub.c:3808 [inline] __kmalloc_node+0x100e/0x1290 mm/slub.c:3818 kmalloc_node include/linux/slab.h:554 [inline] kvmalloc_node+0x1a5/0x2e0 mm/util.c:419 kvmalloc include/linux/mm.h:541 [inline] vhost_net_open+0x64/0x5f0 drivers/vhost/net.c:921 misc_open+0x7b5/0x8b0 drivers/char/misc.c:154 chrdev_open+0xc28/0xd90 fs/char_dev.c:417 do_dentry_open+0xccb/0x1430 fs/open.c:752 vfs_open+0x272/0x2e0 fs/open.c:866 do_last fs/namei.c:3378 [inline] path_openat+0x49ad/0x6580 fs/namei.c:3519 do_filp_open+0x267/0x640 fs/namei.c:3553 do_sys_open+0x6ad/0x9c0 fs/open.c:1059 SYSC_openat+0xc7/0xe0 fs/open.c:1086 SyS_openat+0x63/0x90 fs/open.c:1080 do_syscall_64+0x2f1/0x450 arch/x86/entry/common.c:287 ================================================================== Fixes: c67df11f6e480 ("vhost_net: try batch dequing from skb array") Signed-off-by: Alexander Potapenko Signed-off-by: Jason Wang Acked-by: Michael S. Tsirkin Signed-off-by: David S. Miller --- drivers/vhost/net.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index 610cba276d47..60f1080bffc7 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -948,6 +948,7 @@ static int vhost_net_open(struct inode *inode, struct file *f) n->vqs[i].done_idx = 0; n->vqs[i].vhost_hlen = 0; n->vqs[i].sock_hlen = 0; + n->vqs[i].rx_ring = NULL; vhost_net_buf_init(&n->vqs[i].rxq); } vhost_dev_init(dev, vqs, VHOST_NET_VQ_MAX); -- cgit v1.2.3 From 303fd71b37fb710b26f5ff5444029d62cfd627bd Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Fri, 9 Mar 2018 14:50:33 +0800 Subject: vhost_net: keep private_data and rx_ring synced We get pointer ring from the exported sock, this means we should keep rx_ring and vq->private synced during both vq stop and backend set, otherwise we may see stale rx_ring. Fixes: c67df11f6e480 ("vhost_net: try batch dequing from skb array") Signed-off-by: Michael S. Tsirkin Signed-off-by: Jason Wang Acked-by: Michael S. Tsirkin Signed-off-by: David S. Miller --- drivers/vhost/net.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index 60f1080bffc7..efb93063fda1 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -973,6 +973,7 @@ static struct socket *vhost_net_stop_vq(struct vhost_net *n, vhost_net_disable_vq(n, vq); vq->private_data = NULL; vhost_net_buf_unproduce(nvq); + nvq->rx_ring = NULL; mutex_unlock(&vq->mutex); return sock; } @@ -1162,14 +1163,14 @@ static long vhost_net_set_backend(struct vhost_net *n, unsigned index, int fd) vhost_net_disable_vq(n, vq); vq->private_data = sock; vhost_net_buf_unproduce(nvq); - if (index == VHOST_NET_VQ_RX) - nvq->rx_ring = get_tap_ptr_ring(fd); r = vhost_vq_init_access(vq); if (r) goto err_used; r = vhost_net_enable_vq(n, vq); if (r) goto err_used; + if (index == VHOST_NET_VQ_RX) + nvq->rx_ring = get_tap_ptr_ring(fd); oldubufs = nvq->ubufs; nvq->ubufs = ubufs; -- cgit v1.2.3 From 3a4030761ea88ff439030ca98e3094b9900e96b7 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Fri, 9 Mar 2018 14:50:34 +0800 Subject: vhost_net: examine pointer types during un-producing After commit fc72d1d54dd9 ("tuntap: XDP transmission"), we can actually queueing XDP pointers in the pointer ring, so we should examine the pointer type before freeing the pointer. Fixes: fc72d1d54dd9 ("tuntap: XDP transmission") Reported-by: Michael S. Tsirkin Acked-by: Michael S. Tsirkin Signed-off-by: Jason Wang Signed-off-by: David S. Miller --- drivers/net/tun.c | 3 ++- drivers/vhost/net.c | 2 +- include/linux/if_tun.h | 4 ++++ 3 files changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 7433bb2e4451..28cfa642e39a 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -655,7 +655,7 @@ static struct tun_struct *tun_enable_queue(struct tun_file *tfile) return tun; } -static void tun_ptr_free(void *ptr) +void tun_ptr_free(void *ptr) { if (!ptr) return; @@ -667,6 +667,7 @@ static void tun_ptr_free(void *ptr) __skb_array_destroy_skb(ptr); } } +EXPORT_SYMBOL_GPL(tun_ptr_free); static void tun_queue_purge(struct tun_file *tfile) { diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index efb93063fda1..8139bc70ad7d 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -170,7 +170,7 @@ static void vhost_net_buf_unproduce(struct vhost_net_virtqueue *nvq) if (nvq->rx_ring && !vhost_net_buf_is_empty(rxq)) { ptr_ring_unconsume(nvq->rx_ring, rxq->queue + rxq->head, vhost_net_buf_get_size(rxq), - __skb_array_destroy_skb); + tun_ptr_free); rxq->head = rxq->tail = 0; } } diff --git a/include/linux/if_tun.h b/include/linux/if_tun.h index c5b0a75a7812..fd00170b494f 100644 --- a/include/linux/if_tun.h +++ b/include/linux/if_tun.h @@ -25,6 +25,7 @@ struct ptr_ring *tun_get_tx_ring(struct file *file); bool tun_is_xdp_buff(void *ptr); void *tun_xdp_to_ptr(void *ptr); void *tun_ptr_to_xdp(void *ptr); +void tun_ptr_free(void *ptr); #else #include #include @@ -50,5 +51,8 @@ static inline void *tun_ptr_to_xdp(void *ptr) { return NULL; } +static inline void tun_ptr_free(void *ptr) +{ +} #endif /* CONFIG_TUN */ #endif /* __IF_TUN_H */ -- cgit v1.2.3 From d56e57ca030c8b4296944a2ae61ac167bf979c07 Mon Sep 17 00:00:00 2001 From: Yoshihiro Shimoda Date: Thu, 8 Mar 2018 17:17:16 +0200 Subject: usb: host: xhci-plat: revert "usb: host: xhci-plat: enable clk in resume timing" This patch reverts the commit 835e4241e714 ("usb: host: xhci-plat: enable clk in resume timing") because this driver also has runtime PM and the commit 560869100b99 ("clk: renesas: cpg-mssr: Restore module clocks during resume") will restore the clock on R-Car H3 environment. If the xhci_plat_suspend() disables the clk, the system cannot enable the clk in resume like the following behavior: < In resume > - genpd_resume_noirq() runs and enable the clk (enable_count = 1) - cpg_mssr_resume_noirq() restores the clk register. -- Since the clk was disabled in suspend, cpg_mssr_resume_noirq() will disable the clk and keep the enable_count. - Even if xhci_plat_resume() calls clk_prepare_enable(), since the enable_count is 1, the clk will be not enabled. After this patch is applied, the cpg-mssr driver will save the clk as enable, so the clk will be enabled in resume. Fixes: 835e4241e714 ("usb: host: xhci-plat: enable clk in resume timing") Signed-off-by: Yoshihiro Shimoda Signed-off-by: Mathias Nyman Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-plat.c | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/drivers/usb/host/xhci-plat.c b/drivers/usb/host/xhci-plat.c index 6f038306c14d..6652e2d5bd2e 100644 --- a/drivers/usb/host/xhci-plat.c +++ b/drivers/usb/host/xhci-plat.c @@ -360,7 +360,6 @@ static int __maybe_unused xhci_plat_suspend(struct device *dev) { struct usb_hcd *hcd = dev_get_drvdata(dev); struct xhci_hcd *xhci = hcd_to_xhci(hcd); - int ret; /* * xhci_suspend() needs `do_wakeup` to know whether host is allowed @@ -370,12 +369,7 @@ static int __maybe_unused xhci_plat_suspend(struct device *dev) * reconsider this when xhci_plat_suspend enlarges its scope, e.g., * also applies to runtime suspend. */ - ret = xhci_suspend(xhci, device_may_wakeup(dev)); - - if (!device_may_wakeup(dev) && !IS_ERR(xhci->clk)) - clk_disable_unprepare(xhci->clk); - - return ret; + return xhci_suspend(xhci, device_may_wakeup(dev)); } static int __maybe_unused xhci_plat_resume(struct device *dev) @@ -384,9 +378,6 @@ static int __maybe_unused xhci_plat_resume(struct device *dev) struct xhci_hcd *xhci = hcd_to_xhci(hcd); int ret; - if (!device_may_wakeup(dev) && !IS_ERR(xhci->clk)) - clk_prepare_enable(xhci->clk); - ret = xhci_priv_resume_quirk(hcd); if (ret) return ret; -- cgit v1.2.3 From 191edc5e2e515aab1075a3f0ef23599e80be5f59 Mon Sep 17 00:00:00 2001 From: Kai-Heng Feng Date: Thu, 8 Mar 2018 17:17:17 +0200 Subject: xhci: Fix front USB ports on ASUS PRIME B350M-A When a USB device gets plugged on ASUS PRIME B350M-A's front ports, the xHC stops working: [ 549.114587] xhci_hcd 0000:02:00.0: WARN: xHC CMD_RUN timeout [ 549.114608] suspend_common(): xhci_pci_suspend+0x0/0xc0 returns -110 [ 549.114638] xhci_hcd 0000:02:00.0: can't suspend (hcd_pci_runtime_suspend returned -110) Delay before running xHC command CMD_RUN can workaround the issue. Use a new quirk to make the delay only targets to the affected xHC. Signed-off-by: Kai-Heng Feng Signed-off-by: Mathias Nyman Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-pci.c | 3 +++ drivers/usb/host/xhci.c | 3 +++ drivers/usb/host/xhci.h | 1 + 3 files changed, 7 insertions(+) diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c index 5262fa571a5d..d9f831b67e57 100644 --- a/drivers/usb/host/xhci-pci.c +++ b/drivers/usb/host/xhci-pci.c @@ -126,6 +126,9 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci) if (pdev->vendor == PCI_VENDOR_ID_AMD && usb_amd_find_chipset_info()) xhci->quirks |= XHCI_AMD_PLL_FIX; + if (pdev->vendor == PCI_VENDOR_ID_AMD && pdev->device == 0x43bb) + xhci->quirks |= XHCI_SUSPEND_DELAY; + if (pdev->vendor == PCI_VENDOR_ID_AMD) xhci->quirks |= XHCI_TRUST_TX_LENGTH; diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c index 25d4b748a56f..5d37700ae4b0 100644 --- a/drivers/usb/host/xhci.c +++ b/drivers/usb/host/xhci.c @@ -877,6 +877,9 @@ int xhci_suspend(struct xhci_hcd *xhci, bool do_wakeup) clear_bit(HCD_FLAG_POLL_RH, &xhci->shared_hcd->flags); del_timer_sync(&xhci->shared_hcd->rh_timer); + if (xhci->quirks & XHCI_SUSPEND_DELAY) + usleep_range(1000, 1500); + spin_lock_irq(&xhci->lock); clear_bit(HCD_FLAG_HW_ACCESSIBLE, &hcd->flags); clear_bit(HCD_FLAG_HW_ACCESSIBLE, &xhci->shared_hcd->flags); diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h index d20e57b35d32..866e141d4972 100644 --- a/drivers/usb/host/xhci.h +++ b/drivers/usb/host/xhci.h @@ -1826,6 +1826,7 @@ struct xhci_hcd { #define XHCI_U2_DISABLE_WAKE (1 << 27) #define XHCI_ASMEDIA_MODIFY_FLOWCONTROL (1 << 28) #define XHCI_HW_LPM_DISABLE (1 << 29) +#define XHCI_SUSPEND_DELAY (1 << 30) unsigned int num_active_eps; unsigned int limit_active_eps; -- cgit v1.2.3 From df3334c223a033f562645712e832ca4cbb326bbf Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 22 Feb 2018 17:39:17 +0000 Subject: usbip: vudc: fix null pointer dereference on udc->lock Currently the driver attempts to spin lock on udc->lock before a NULL pointer check is performed on udc, hence there is a potential null pointer dereference on udc->lock. Fix this by moving the null check on udc before the lock occurs. Fixes: ea6873a45a22 ("usbip: vudc: Add SysFS infrastructure for VUDC") Signed-off-by: Colin Ian King Acked-by: Shuah Khan Reviewed-by: Krzysztof Opasiak Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/usbip/vudc_sysfs.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/usb/usbip/vudc_sysfs.c b/drivers/usb/usbip/vudc_sysfs.c index d86f72bbbb91..6dcd3ff655c3 100644 --- a/drivers/usb/usbip/vudc_sysfs.c +++ b/drivers/usb/usbip/vudc_sysfs.c @@ -105,10 +105,14 @@ static ssize_t usbip_sockfd_store(struct device *dev, struct device_attribute *a if (rv != 0) return -EINVAL; + if (!udc) { + dev_err(dev, "no device"); + return -ENODEV; + } spin_lock_irqsave(&udc->lock, flags); /* Don't export what we don't have */ - if (!udc || !udc->driver || !udc->pullup) { - dev_err(dev, "no device or gadget not bound"); + if (!udc->driver || !udc->pullup) { + dev_err(dev, "gadget not bound"); ret = -ENODEV; goto unlock; } -- cgit v1.2.3 From 49bae2f3093b0a7bc5e1a158d89697a73cdb0243 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Fri, 9 Mar 2018 15:33:52 +0200 Subject: mlxsw: spectrum: Fix gact_ok offloading For ok GACT action, TERMINATE binding_cmd should be used in action set passed down to HW. Fixes: b2925957ec1a9 ("mlxsw: spectrum_flower: Offload "ok" termination action") Signed-off-by: Jiri Pirko Reported-by: Alexander Petrovskiy Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c | 11 +++++++++++ drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.h | 1 + drivers/net/ethernet/mellanox/mlxsw/spectrum.h | 1 + drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c | 5 +++++ drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c | 2 +- 5 files changed, 19 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c index b698fb481b2e..996dc099cd58 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c @@ -443,6 +443,17 @@ int mlxsw_afa_block_jump(struct mlxsw_afa_block *block, u16 group_id) } EXPORT_SYMBOL(mlxsw_afa_block_jump); +int mlxsw_afa_block_terminate(struct mlxsw_afa_block *block) +{ + if (block->finished) + return -EINVAL; + mlxsw_afa_set_goto_set(block->cur_set, + MLXSW_AFA_SET_GOTO_BINDING_CMD_TERM, 0); + block->finished = true; + return 0; +} +EXPORT_SYMBOL(mlxsw_afa_block_terminate); + static struct mlxsw_afa_fwd_entry * mlxsw_afa_fwd_entry_create(struct mlxsw_afa *mlxsw_afa, u8 local_port) { diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.h b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.h index 43132293475c..b91f2b0829b0 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.h +++ b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.h @@ -65,6 +65,7 @@ char *mlxsw_afa_block_first_set(struct mlxsw_afa_block *block); u32 mlxsw_afa_block_first_set_kvdl_index(struct mlxsw_afa_block *block); int mlxsw_afa_block_continue(struct mlxsw_afa_block *block); int mlxsw_afa_block_jump(struct mlxsw_afa_block *block, u16 group_id); +int mlxsw_afa_block_terminate(struct mlxsw_afa_block *block); int mlxsw_afa_block_append_drop(struct mlxsw_afa_block *block); int mlxsw_afa_block_append_trap(struct mlxsw_afa_block *block, u16 trap_id); int mlxsw_afa_block_append_trap_and_forward(struct mlxsw_afa_block *block, diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h index 4ec1ca3c96c8..386861773476 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h @@ -553,6 +553,7 @@ void mlxsw_sp_acl_rulei_keymask_buf(struct mlxsw_sp_acl_rule_info *rulei, int mlxsw_sp_acl_rulei_act_continue(struct mlxsw_sp_acl_rule_info *rulei); int mlxsw_sp_acl_rulei_act_jump(struct mlxsw_sp_acl_rule_info *rulei, u16 group_id); +int mlxsw_sp_acl_rulei_act_terminate(struct mlxsw_sp_acl_rule_info *rulei); int mlxsw_sp_acl_rulei_act_drop(struct mlxsw_sp_acl_rule_info *rulei); int mlxsw_sp_acl_rulei_act_trap(struct mlxsw_sp_acl_rule_info *rulei); int mlxsw_sp_acl_rulei_act_mirror(struct mlxsw_sp *mlxsw_sp, diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c index 0897a5435cc2..92d90ed7207e 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c @@ -528,6 +528,11 @@ int mlxsw_sp_acl_rulei_act_jump(struct mlxsw_sp_acl_rule_info *rulei, return mlxsw_afa_block_jump(rulei->act_block, group_id); } +int mlxsw_sp_acl_rulei_act_terminate(struct mlxsw_sp_acl_rule_info *rulei) +{ + return mlxsw_afa_block_terminate(rulei->act_block); +} + int mlxsw_sp_acl_rulei_act_drop(struct mlxsw_sp_acl_rule_info *rulei) { return mlxsw_afa_block_append_drop(rulei->act_block); diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c index 6ce00e28d4ea..89dbf569dff5 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c @@ -65,7 +65,7 @@ static int mlxsw_sp_flower_parse_actions(struct mlxsw_sp *mlxsw_sp, tcf_exts_to_list(exts, &actions); list_for_each_entry(a, &actions, list) { if (is_tcf_gact_ok(a)) { - err = mlxsw_sp_acl_rulei_act_continue(rulei); + err = mlxsw_sp_acl_rulei_act_terminate(rulei); if (err) return err; } else if (is_tcf_gact_shot(a)) { -- cgit v1.2.3 From 663f1b26f9c129aa2912c1a1d3359e3ecd88e814 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Fri, 9 Mar 2018 15:33:53 +0200 Subject: mlxsw: spectrum: Prevent duplicate mirrors The Spectrum ASIC doesn't support mirroring more than once from a single binding point (which is a port-direction pair). Therefore detect that a second binding of a given binding point is attempted. To that end, extend struct mlxsw_sp_span_inspected_port to track whether a given binding point is bound or not. Extend mlxsw_sp_span_entry_port_find() to look for ports based on the full unique key: port number, direction, and boundness. Besides fixing the overt bug where configured mirrors are not offloaded, this also fixes a more subtle bug: mlxsw_sp_span_inspected_port_del() just defers to mlxsw_sp_span_entry_bound_port_find(), and that used to find the first port with the right number (disregarding the type). Thus by adding and removing egress and ingress mirrors in the right order, one could trick the system into believing it has no egress mirrors when in fact it did have some. That then caused that mlxsw_sp_span_port_mtu_update() didn't update mirroring buffer when MTU was changed. Fixes: 763b4b70afcd ("mlxsw: spectrum: Add support in matchall mirror TC offloading") Signed-off-by: Petr Machata Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/spectrum.c | 28 ++++++++++++++++++++++---- drivers/net/ethernet/mellanox/mlxsw/spectrum.h | 3 +++ 2 files changed, 27 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index c7e941aecc2a..bf400c75fcc8 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -655,13 +655,17 @@ static int mlxsw_sp_span_port_mtu_update(struct mlxsw_sp_port *port, u16 mtu) } static struct mlxsw_sp_span_inspected_port * -mlxsw_sp_span_entry_bound_port_find(struct mlxsw_sp_port *port, - struct mlxsw_sp_span_entry *span_entry) +mlxsw_sp_span_entry_bound_port_find(struct mlxsw_sp_span_entry *span_entry, + enum mlxsw_sp_span_type type, + struct mlxsw_sp_port *port, + bool bind) { struct mlxsw_sp_span_inspected_port *p; list_for_each_entry(p, &span_entry->bound_ports_list, list) - if (port->local_port == p->local_port) + if (type == p->type && + port->local_port == p->local_port && + bind == p->bound) return p; return NULL; } @@ -691,8 +695,22 @@ mlxsw_sp_span_inspected_port_add(struct mlxsw_sp_port *port, struct mlxsw_sp_span_inspected_port *inspected_port; struct mlxsw_sp *mlxsw_sp = port->mlxsw_sp; char sbib_pl[MLXSW_REG_SBIB_LEN]; + int i; int err; + /* A given (source port, direction) can only be bound to one analyzer, + * so if a binding is requested, check for conflicts. + */ + if (bind) + for (i = 0; i < mlxsw_sp->span.entries_count; i++) { + struct mlxsw_sp_span_entry *curr = + &mlxsw_sp->span.entries[i]; + + if (mlxsw_sp_span_entry_bound_port_find(curr, type, + port, bind)) + return -EEXIST; + } + /* if it is an egress SPAN, bind a shared buffer to it */ if (type == MLXSW_SP_SPAN_EGRESS) { u32 buffsize = mlxsw_sp_span_mtu_to_buffsize(mlxsw_sp, @@ -720,6 +738,7 @@ mlxsw_sp_span_inspected_port_add(struct mlxsw_sp_port *port, } inspected_port->local_port = port->local_port; inspected_port->type = type; + inspected_port->bound = bind; list_add_tail(&inspected_port->list, &span_entry->bound_ports_list); return 0; @@ -746,7 +765,8 @@ mlxsw_sp_span_inspected_port_del(struct mlxsw_sp_port *port, struct mlxsw_sp *mlxsw_sp = port->mlxsw_sp; char sbib_pl[MLXSW_REG_SBIB_LEN]; - inspected_port = mlxsw_sp_span_entry_bound_port_find(port, span_entry); + inspected_port = mlxsw_sp_span_entry_bound_port_find(span_entry, type, + port, bind); if (!inspected_port) return; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h index 386861773476..92064db2ae44 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h @@ -120,6 +120,9 @@ struct mlxsw_sp_span_inspected_port { struct list_head list; enum mlxsw_sp_span_type type; u8 local_port; + + /* Whether this is a directly bound mirror (port-to-port) or an ACL. */ + bool bound; }; struct mlxsw_sp_span_entry { -- cgit v1.2.3 From 3b04caab81649a9e8d5375b919b6653d791951df Mon Sep 17 00:00:00 2001 From: William Tu Date: Fri, 9 Mar 2018 07:34:40 -0800 Subject: ip6gre: add erspan v2 to tunnel lookup The patch adds the erspan v2 proto in ip6gre_tunnel_lookup so the erspan v2 tunnel can be found correctly. Signed-off-by: William Tu Signed-off-by: David S. Miller --- net/ipv6/ip6_gre.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 3c353125546d..83c2fffd40be 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -126,7 +126,8 @@ static struct ip6_tnl *ip6gre_tunnel_lookup(struct net_device *dev, struct ip6_tnl *t, *cand = NULL; struct ip6gre_net *ign = net_generic(net, ip6gre_net_id); int dev_type = (gre_proto == htons(ETH_P_TEB) || - gre_proto == htons(ETH_P_ERSPAN)) ? + gre_proto == htons(ETH_P_ERSPAN) || + gre_proto == htons(ETH_P_ERSPAN2)) ? ARPHRD_ETHER : ARPHRD_IP6GRE; int score, cand_score = 4; -- cgit v1.2.3 From d6aa71197ffcb68850bfebfc3fc160abe41df53b Mon Sep 17 00:00:00 2001 From: William Tu Date: Fri, 9 Mar 2018 07:34:41 -0800 Subject: ip6erspan: improve error handling for erspan version number. When users fill in incorrect erspan version number through the struct erspan_metadata uapi, current code skips pushing the erspan header but continue pushing the gre header, which is incorrect. The patch fixes it by returning error. Signed-off-by: William Tu Signed-off-by: David S. Miller --- net/ipv6/ip6_gre.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 83c2fffd40be..a299f5424e16 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -945,6 +945,8 @@ static netdev_tx_t ip6erspan_tunnel_xmit(struct sk_buff *skb, md->u.md2.dir, get_hwid(&md->u.md2), truncate, false); + } else { + goto tx_err; } } else { switch (skb->protocol) { -- cgit v1.2.3 From e41c7c68ea771683cae5a7f81c268f38d7912ecb Mon Sep 17 00:00:00 2001 From: William Tu Date: Fri, 9 Mar 2018 07:34:42 -0800 Subject: ip6erspan: make sure enough headroom at xmit. The patch adds skb_cow_header() to ensure enough headroom at ip6erspan_tunnel_xmit before pushing the erspan header to the skb. Signed-off-by: William Tu Signed-off-by: David S. Miller --- net/ipv6/ip6_gre.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index a299f5424e16..1bbd0930063e 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -903,6 +903,9 @@ static netdev_tx_t ip6erspan_tunnel_xmit(struct sk_buff *skb, truncate = true; } + if (skb_cow_head(skb, dev->needed_headroom)) + goto tx_err; + t->parms.o_flags &= ~TUNNEL_KEY; IPCB(skb)->flags = 0; -- cgit v1.2.3 From d6c931ea32dc08ac2665bb5f009f9c40ad1bbdb3 Mon Sep 17 00:00:00 2001 From: Fredrik Noring Date: Fri, 9 Mar 2018 18:34:34 +0100 Subject: USB: OHCI: Fix NULL dereference in HCDs using HCD_LOCAL_MEM Scatter-gather needs to be disabled when using dma_declare_coherent_memory and HCD_LOCAL_MEM. Andrea Righi made the equivalent fix for EHCI drivers in commit 4307a28eb01284 "USB: EHCI: fix NULL pointer dererence in HCDs that use HCD_LOCAL_MEM". The following NULL pointer WARN_ON_ONCE triggered with OHCI drivers: ------------[ cut here ]------------ WARNING: CPU: 0 PID: 49 at drivers/usb/core/hcd.c:1379 hcd_alloc_coherent+0x4c/0xc8 Modules linked in: CPU: 0 PID: 49 Comm: usb-storage Not tainted 4.15.0+ #1014 Stack : 00000000 00000000 805a78d2 0000003a 81f5c2cc 8053d367 804d77fc 00000031 805a3a08 00000563 81ee9400 805a0000 00000000 10058c00 81f61b10 805c0000 00000000 00000000 805a0000 00d9038e 00000004 803ee818 00000006 312e3420 805c0000 00000000 00000073 81f61958 00000000 00000000 802eb380 804fd538 00000009 00000563 81ee9400 805a0000 00000002 80056148 00000000 805a0000 ... Call Trace: [<578af360>] show_stack+0x74/0x104 [<2f3702c6>] __warn+0x118/0x120 [] warn_slowpath_null+0x44/0x58 [] hcd_alloc_coherent+0x4c/0xc8 [<3578fa36>] usb_hcd_map_urb_for_dma+0x4d8/0x534 [<110bc94c>] usb_hcd_submit_urb+0x82c/0x834 [<02eb5baf>] usb_sg_wait+0x14c/0x1a0 [] usb_stor_bulk_transfer_sglist.part.1+0xac/0x124 [<87a5c34c>] usb_stor_bulk_srb+0x40/0x60 [] usb_stor_Bulk_transport+0x160/0x37c [] usb_stor_invoke_transport+0x3c/0x500 [<004754f4>] usb_stor_control_thread+0x258/0x28c [<22edf42e>] kthread+0x134/0x13c [] ret_from_kernel_thread+0x14/0x1c ---[ end trace bcdb825805eefdcc ]--- Signed-off-by: Fredrik Noring Acked-by: Alan Stern Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/ohci-hcd.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/usb/host/ohci-hcd.c b/drivers/usb/host/ohci-hcd.c index 84f88fa411cd..d088c340e4d0 100644 --- a/drivers/usb/host/ohci-hcd.c +++ b/drivers/usb/host/ohci-hcd.c @@ -447,7 +447,8 @@ static int ohci_init (struct ohci_hcd *ohci) struct usb_hcd *hcd = ohci_to_hcd(ohci); /* Accept arbitrarily long scatter-gather lists */ - hcd->self.sg_tablesize = ~0; + if (!(hcd->driver->flags & HCD_LOCAL_MEM)) + hcd->self.sg_tablesize = ~0; if (distrust_firmware) ohci->flags |= OHCI_QUIRK_HUB_POWER; -- cgit v1.2.3 From 7832f6d12fa25cd4cfc18eaae67eb5e2dbaa2c34 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Sun, 25 Feb 2018 16:20:01 +0100 Subject: usb: typec: tcpm: fusb302: Do not log an error on -EPROBE_DEFER Do not log an error if tcpm_register_port() fails with -EPROBE_DEFER. Fixes: cf140a356971 ("typec: fusb302: Use dev_err during probe") Signed-off-by: Hans de Goede Reviewed-by: Guenter Roeck Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/fusb302/fusb302.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/usb/typec/fusb302/fusb302.c b/drivers/usb/typec/fusb302/fusb302.c index 9ce4756adad6..dcd8ef085b30 100644 --- a/drivers/usb/typec/fusb302/fusb302.c +++ b/drivers/usb/typec/fusb302/fusb302.c @@ -1857,7 +1857,8 @@ static int fusb302_probe(struct i2c_client *client, chip->tcpm_port = tcpm_register_port(&client->dev, &chip->tcpc_dev); if (IS_ERR(chip->tcpm_port)) { ret = PTR_ERR(chip->tcpm_port); - dev_err(dev, "cannot register tcpm port, ret=%d", ret); + if (ret != -EPROBE_DEFER) + dev_err(dev, "cannot register tcpm port, ret=%d", ret); goto destroy_workqueue; } -- cgit v1.2.3 From 3c3e4b3a708a9d6451052e348981f37d2b3e92b0 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 8 Mar 2018 12:31:53 +0300 Subject: iio: adc: meson-saradc: unlock on error in meson_sar_adc_lock() The meson_sar_adc_lock() function is not supposed to hold the "indio_dev->mlock" on the error path. Fixes: 3adbf3427330 ("iio: adc: add a driver for the SAR ADC found in Amlogic Meson SoCs") Signed-off-by: Dan Carpenter Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/adc/meson_saradc.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/iio/adc/meson_saradc.c b/drivers/iio/adc/meson_saradc.c index 29fa7736d80c..ede955d9b2a4 100644 --- a/drivers/iio/adc/meson_saradc.c +++ b/drivers/iio/adc/meson_saradc.c @@ -462,8 +462,10 @@ static int meson_sar_adc_lock(struct iio_dev *indio_dev) regmap_read(priv->regmap, MESON_SAR_ADC_DELAY, &val); } while (val & MESON_SAR_ADC_DELAY_BL30_BUSY && timeout--); - if (timeout < 0) + if (timeout < 0) { + mutex_unlock(&indio_dev->mlock); return -ETIMEDOUT; + } } return 0; -- cgit v1.2.3 From cc4e0036311fbb0b2cab7dc8f142f84ebd8b388b Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Sat, 10 Mar 2018 16:21:32 +0000 Subject: Revert "iio: accel: st_accel: remove redundant pointer pdata" This reverts commit 585ed27d06151f98e39238298f43ee261314ae74. This removed code which was unused due to a bug in commit 7383d44b. To fix this bug the code is needed. Thus this revert. Signed-off-by: Michael Nosthoff Signed-off-by: Jonathan Cameron --- drivers/iio/accel/st_accel_core.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/iio/accel/st_accel_core.c b/drivers/iio/accel/st_accel_core.c index 6fe995cf16a6..460aa58e0159 100644 --- a/drivers/iio/accel/st_accel_core.c +++ b/drivers/iio/accel/st_accel_core.c @@ -920,6 +920,8 @@ static const struct iio_trigger_ops st_accel_trigger_ops = { int st_accel_common_probe(struct iio_dev *indio_dev) { struct st_sensor_data *adata = iio_priv(indio_dev); + struct st_sensors_platform_data *pdata = + (struct st_sensors_platform_data *)adata->dev->platform_data; int irq = adata->get_irq_data_ready(indio_dev); int err; @@ -946,6 +948,9 @@ int st_accel_common_probe(struct iio_dev *indio_dev) &adata->sensor_settings->fs.fs_avl[0]; adata->odr = adata->sensor_settings->odr.odr_avl[0].hz; + if (!pdata) + pdata = (struct st_sensors_platform_data *)&default_accel_pdata; + err = st_sensors_init_sensor(indio_dev, adata->dev->platform_data); if (err < 0) goto st_accel_power_off; -- cgit v1.2.3 From 8b438686a001db64c21782d04ef68111e53c45d9 Mon Sep 17 00:00:00 2001 From: Michael Nosthoff Date: Fri, 9 Mar 2018 10:02:45 +0100 Subject: iio: st_pressure: st_accel: pass correct platform data to init Commit 7383d44b added a pointer pdata which get set to the default platform_data when non was defined in the device. But it did not pass this pointer to the st_sensors_init_sensor call but still used the maybe uninitialized platform_data from dev. This breaks initialization when no platform_data is given and the optional st,drdy-int-pin devicetree option is not set. This commit fixes this. Cc: stable@vger.kernel.org Fixes: 7383d44b ("iio: st_pressure: st_accel: Initialise sensor platform data properly") Signed-off-by: Michael Nosthoff Signed-off-by: Jonathan Cameron --- drivers/iio/accel/st_accel_core.c | 2 +- drivers/iio/pressure/st_pressure_core.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/iio/accel/st_accel_core.c b/drivers/iio/accel/st_accel_core.c index 460aa58e0159..3e6fd5a8ac5b 100644 --- a/drivers/iio/accel/st_accel_core.c +++ b/drivers/iio/accel/st_accel_core.c @@ -951,7 +951,7 @@ int st_accel_common_probe(struct iio_dev *indio_dev) if (!pdata) pdata = (struct st_sensors_platform_data *)&default_accel_pdata; - err = st_sensors_init_sensor(indio_dev, adata->dev->platform_data); + err = st_sensors_init_sensor(indio_dev, pdata); if (err < 0) goto st_accel_power_off; diff --git a/drivers/iio/pressure/st_pressure_core.c b/drivers/iio/pressure/st_pressure_core.c index 349e5c713c03..4ddb6cf7d401 100644 --- a/drivers/iio/pressure/st_pressure_core.c +++ b/drivers/iio/pressure/st_pressure_core.c @@ -640,7 +640,7 @@ int st_press_common_probe(struct iio_dev *indio_dev) press_data->sensor_settings->drdy_irq.int2.addr)) pdata = (struct st_sensors_platform_data *)&default_press_pdata; - err = st_sensors_init_sensor(indio_dev, press_data->dev->platform_data); + err = st_sensors_init_sensor(indio_dev, pdata); if (err < 0) goto st_press_power_off; -- cgit v1.2.3 From d0f833065221cbfcbadf19fd4102bcfa9330006a Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 9 Mar 2018 21:58:28 +0100 Subject: ALSA: seq: Fix possible UAF in snd_seq_check_queue() Although we've covered the races between concurrent write() and ioctl() in the previous patch series, there is still a possible UAF in the following scenario: A: user client closed B: timer irq -> snd_seq_release() -> snd_seq_timer_interrupt() -> snd_seq_free_client() -> snd_seq_check_queue() -> cell = snd_seq_prioq_cell_peek() -> snd_seq_prioq_leave() .... removing all cells -> snd_seq_pool_done() .... vfree() -> snd_seq_compare_tick_time(cell) ... Oops So the problem is that a cell is peeked and accessed without any protection until it's retrieved from the queue again via snd_seq_prioq_cell_out(). This patch tries to address it, also cleans up the code by a slight refactoring. snd_seq_prioq_cell_out() now receives an extra pointer argument. When it's non-NULL, the function checks the event timestamp with the given pointer. The caller needs to pass the right reference either to snd_seq_tick or snd_seq_realtime depending on the event timestamp type. A good news is that the above change allows us to remove the snd_seq_prioq_cell_peek(), too, thus the patch actually reduces the code size. Reviewed-by: Nicolai Stange Cc: Signed-off-by: Takashi Iwai --- sound/core/seq/seq_prioq.c | 28 ++++++++++++++-------------- sound/core/seq/seq_prioq.h | 6 ++---- sound/core/seq/seq_queue.c | 28 +++++++++------------------- 3 files changed, 25 insertions(+), 37 deletions(-) diff --git a/sound/core/seq/seq_prioq.c b/sound/core/seq/seq_prioq.c index bc1c8488fc2a..2bc6759e4adc 100644 --- a/sound/core/seq/seq_prioq.c +++ b/sound/core/seq/seq_prioq.c @@ -87,7 +87,7 @@ void snd_seq_prioq_delete(struct snd_seq_prioq **fifo) if (f->cells > 0) { /* drain prioQ */ while (f->cells > 0) - snd_seq_cell_free(snd_seq_prioq_cell_out(f)); + snd_seq_cell_free(snd_seq_prioq_cell_out(f, NULL)); } kfree(f); @@ -214,8 +214,18 @@ int snd_seq_prioq_cell_in(struct snd_seq_prioq * f, return 0; } +/* return 1 if the current time >= event timestamp */ +static int event_is_ready(struct snd_seq_event *ev, void *current_time) +{ + if ((ev->flags & SNDRV_SEQ_TIME_STAMP_MASK) == SNDRV_SEQ_TIME_STAMP_TICK) + return snd_seq_compare_tick_time(current_time, &ev->time.tick); + else + return snd_seq_compare_real_time(current_time, &ev->time.time); +} + /* dequeue cell from prioq */ -struct snd_seq_event_cell *snd_seq_prioq_cell_out(struct snd_seq_prioq *f) +struct snd_seq_event_cell *snd_seq_prioq_cell_out(struct snd_seq_prioq *f, + void *current_time) { struct snd_seq_event_cell *cell; unsigned long flags; @@ -227,6 +237,8 @@ struct snd_seq_event_cell *snd_seq_prioq_cell_out(struct snd_seq_prioq *f) spin_lock_irqsave(&f->lock, flags); cell = f->head; + if (cell && current_time && !event_is_ready(&cell->event, current_time)) + cell = NULL; if (cell) { f->head = cell->next; @@ -252,18 +264,6 @@ int snd_seq_prioq_avail(struct snd_seq_prioq * f) return f->cells; } - -/* peek at cell at the head of the prioq */ -struct snd_seq_event_cell *snd_seq_prioq_cell_peek(struct snd_seq_prioq * f) -{ - if (f == NULL) { - pr_debug("ALSA: seq: snd_seq_prioq_cell_in() called with NULL prioq\n"); - return NULL; - } - return f->head; -} - - static inline int prioq_match(struct snd_seq_event_cell *cell, int client, int timestamp) { diff --git a/sound/core/seq/seq_prioq.h b/sound/core/seq/seq_prioq.h index d38bb78d9345..2c315ca10fc4 100644 --- a/sound/core/seq/seq_prioq.h +++ b/sound/core/seq/seq_prioq.h @@ -44,14 +44,12 @@ void snd_seq_prioq_delete(struct snd_seq_prioq **fifo); int snd_seq_prioq_cell_in(struct snd_seq_prioq *f, struct snd_seq_event_cell *cell); /* dequeue cell from prioq */ -struct snd_seq_event_cell *snd_seq_prioq_cell_out(struct snd_seq_prioq *f); +struct snd_seq_event_cell *snd_seq_prioq_cell_out(struct snd_seq_prioq *f, + void *current_time); /* return number of events available in prioq */ int snd_seq_prioq_avail(struct snd_seq_prioq *f); -/* peek at cell at the head of the prioq */ -struct snd_seq_event_cell *snd_seq_prioq_cell_peek(struct snd_seq_prioq *f); - /* client left queue */ void snd_seq_prioq_leave(struct snd_seq_prioq *f, int client, int timestamp); diff --git a/sound/core/seq/seq_queue.c b/sound/core/seq/seq_queue.c index 0428e9061b47..b377f5048352 100644 --- a/sound/core/seq/seq_queue.c +++ b/sound/core/seq/seq_queue.c @@ -277,30 +277,20 @@ void snd_seq_check_queue(struct snd_seq_queue *q, int atomic, int hop) __again: /* Process tick queue... */ - while ((cell = snd_seq_prioq_cell_peek(q->tickq)) != NULL) { - if (snd_seq_compare_tick_time(&q->timer->tick.cur_tick, - &cell->event.time.tick)) { - cell = snd_seq_prioq_cell_out(q->tickq); - if (cell) - snd_seq_dispatch_event(cell, atomic, hop); - } else { - /* event remains in the queue */ + for (;;) { + cell = snd_seq_prioq_cell_out(q->tickq, + &q->timer->tick.cur_tick); + if (!cell) break; - } + snd_seq_dispatch_event(cell, atomic, hop); } - /* Process time queue... */ - while ((cell = snd_seq_prioq_cell_peek(q->timeq)) != NULL) { - if (snd_seq_compare_real_time(&q->timer->cur_time, - &cell->event.time.time)) { - cell = snd_seq_prioq_cell_out(q->timeq); - if (cell) - snd_seq_dispatch_event(cell, atomic, hop); - } else { - /* event remains in the queue */ + for (;;) { + cell = snd_seq_prioq_cell_out(q->timeq, &q->timer->cur_time); + if (!cell) break; - } + snd_seq_dispatch_event(cell, atomic, hop); } /* free lock */ -- cgit v1.2.3 From a2ff19f7b70118ced291a28d5313469914de451b Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 9 Mar 2018 22:23:31 +0100 Subject: ALSA: seq: Clear client entry before deleting else at closing When releasing a client, we need to clear the clienttab[] entry at first, then call snd_seq_queue_client_leave(). Otherwise, the in-flight cell in the queue might be picked up by the timer interrupt via snd_seq_check_queue() before calling snd_seq_queue_client_leave(), and it's delivered to another queue while the client is clearing queues. This may eventually result in an uncleared cell remaining in a queue, and the later snd_seq_pool_delete() may need to wait for a long time until the event gets really processed. By moving the clienttab[] clearance at the beginning of release, any event delivery of a cell belonging to this client will fail at a later point, since snd_seq_client_ptr() returns NULL. Thus the cell that was picked up by the timer interrupt will be returned immediately without further delivery, and the long stall of snd_seq_delete_pool() can be avoided, too. Cc: Signed-off-by: Takashi Iwai --- sound/core/seq/seq_clientmgr.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/core/seq/seq_clientmgr.c b/sound/core/seq/seq_clientmgr.c index 918338dea5b9..61a07fe34cd2 100644 --- a/sound/core/seq/seq_clientmgr.c +++ b/sound/core/seq/seq_clientmgr.c @@ -255,12 +255,12 @@ static int seq_free_client1(struct snd_seq_client *client) if (!client) return 0; - snd_seq_delete_all_ports(client); - snd_seq_queue_client_leave(client->number); spin_lock_irqsave(&clients_lock, flags); clienttablock[client->number] = 1; clienttab[client->number] = NULL; spin_unlock_irqrestore(&clients_lock, flags); + snd_seq_delete_all_ports(client); + snd_seq_queue_client_leave(client->number); snd_use_lock_sync(&client->use_lock); snd_seq_queue_client_termination(client->number); if (client->pool) -- cgit v1.2.3 From 01c0b4265cc16bc1f43f475c5944c55c10d5768f Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Sat, 10 Mar 2018 23:04:23 +0100 Subject: ALSA: pcm: Fix UAF in snd_pcm_oss_get_formats() snd_pcm_oss_get_formats() has an obvious use-after-free around snd_mask_test() calls, as spotted by syzbot. The passed format_mask argument is a pointer to the hw_params object that is freed before the loop. What a surprise that it has been present since the original code of decades ago... Reported-by: syzbot+4090700a4f13fccaf648@syzkaller.appspotmail.com Cc: Signed-off-by: Takashi Iwai --- sound/core/oss/pcm_oss.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/sound/core/oss/pcm_oss.c b/sound/core/oss/pcm_oss.c index b044c0a5a674..02298c9c6020 100644 --- a/sound/core/oss/pcm_oss.c +++ b/sound/core/oss/pcm_oss.c @@ -1762,10 +1762,9 @@ static int snd_pcm_oss_get_formats(struct snd_pcm_oss_file *pcm_oss_file) return -ENOMEM; _snd_pcm_hw_params_any(params); err = snd_pcm_hw_refine(substream, params); - format_mask = hw_param_mask_c(params, SNDRV_PCM_HW_PARAM_FORMAT); - kfree(params); if (err < 0) - return err; + goto error; + format_mask = hw_param_mask_c(params, SNDRV_PCM_HW_PARAM_FORMAT); for (fmt = 0; fmt < 32; ++fmt) { if (snd_mask_test(format_mask, fmt)) { int f = snd_pcm_oss_format_to(fmt); @@ -1773,7 +1772,10 @@ static int snd_pcm_oss_get_formats(struct snd_pcm_oss_file *pcm_oss_file) formats |= f; } } - return formats; + + error: + kfree(params); + return err < 0 ? err : formats; } static int snd_pcm_oss_set_format(struct snd_pcm_oss_file *pcm_oss_file, int format) -- cgit v1.2.3 From 4f2c7583e33eb08dc09dd2e25574b80175ba7d93 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 6 Mar 2018 15:51:32 +0000 Subject: irqchip/gic-v3-its: Ensure nr_ites >= nr_lpis When struct its_device instances are created, the nr_ites member will be set to a power of 2 that equals or exceeds the requested number of MSIs passed to the msi_prepare() callback. At the same time, the LPI map is allocated to be some multiple of 32 in size, where the allocated size may be less than the requested size depending on whether a contiguous range of sufficient size is available in the global LPI bitmap. This may result in the situation where the nr_ites < nr_lpis, and since nr_ites is what we program into the hardware when we map the device, the additional LPIs will be non-functional. For bog standard hardware, this does not really matter. However, in cases where ITS device IDs are shared between different PCIe devices, we may end up allocating these additional LPIs without taking into account that they don't actually work. So let's make nr_ites at least 32. This ensures that all allocated LPIs are 'live', and that its_alloc_device_irq() will fail when attempts are made to allocate MSIs beyond what was allocated in the first place. Signed-off-by: Ard Biesheuvel [maz: updated comment] Signed-off-by: Marc Zyngier --- drivers/irqchip/irq-gic-v3-its.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c index 94b7d74d519f..2cbb19cddbf8 100644 --- a/drivers/irqchip/irq-gic-v3-its.c +++ b/drivers/irqchip/irq-gic-v3-its.c @@ -1412,7 +1412,7 @@ static struct irq_chip its_irq_chip = { * This gives us (((1UL << id_bits) - 8192) >> 5) possible allocations. */ #define IRQS_PER_CHUNK_SHIFT 5 -#define IRQS_PER_CHUNK (1 << IRQS_PER_CHUNK_SHIFT) +#define IRQS_PER_CHUNK (1UL << IRQS_PER_CHUNK_SHIFT) #define ITS_MAX_LPI_NRBITS 16 /* 64K LPIs */ static unsigned long *lpi_bitmap; @@ -2119,11 +2119,10 @@ static struct its_device *its_create_device(struct its_node *its, u32 dev_id, dev = kzalloc(sizeof(*dev), GFP_KERNEL); /* - * At least one bit of EventID is being used, hence a minimum - * of two entries. No, the architecture doesn't let you - * express an ITT with a single entry. + * We allocate at least one chunk worth of LPIs bet device, + * and thus that many ITEs. The device may require less though. */ - nr_ites = max(2UL, roundup_pow_of_two(nvecs)); + nr_ites = max(IRQS_PER_CHUNK, roundup_pow_of_two(nvecs)); sz = nr_ites * its->ite_size; sz = max(sz, ITS_ITT_ALIGN) + ITS_ITT_ALIGN - 1; itt = kzalloc(sz, GFP_KERNEL); -- cgit v1.2.3 From 61b8b22858b9cfa5df52fc0e4893aebfe0bbb804 Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Mon, 12 Feb 2018 11:12:56 -0200 Subject: irqchip/irq-imx-gpcv2: Remove unused function imx_gpcv2_get_wakeup_source() is not used anywhere, so remove it. This fixes the following sparse warning: drivers/irqchip/irq-imx-gpcv2.c:34:5: warning: symbol 'imx_gpcv2_get_wakeup_source' was not declared. Should it be static? Fixes: e324c4dc4a59 ("irqchip/imx-gpcv2: IMX GPCv2 driver for wakeup sources") Signed-off-by: Fabio Estevam Signed-off-by: Marc Zyngier --- drivers/irqchip/irq-imx-gpcv2.c | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/drivers/irqchip/irq-imx-gpcv2.c b/drivers/irqchip/irq-imx-gpcv2.c index 675eda5ff2b8..4760307ab43f 100644 --- a/drivers/irqchip/irq-imx-gpcv2.c +++ b/drivers/irqchip/irq-imx-gpcv2.c @@ -28,20 +28,6 @@ struct gpcv2_irqchip_data { static struct gpcv2_irqchip_data *imx_gpcv2_instance; -/* - * Interface for the low level wakeup code. - */ -u32 imx_gpcv2_get_wakeup_source(u32 **sources) -{ - if (!imx_gpcv2_instance) - return 0; - - if (sources) - *sources = imx_gpcv2_instance->wakeup_sources; - - return IMR_NUM; -} - static int gpcv2_wakeup_source_save(void) { struct gpcv2_irqchip_data *cd; -- cgit v1.2.3 From 932909d9b28d27e807ff8eecb68c7748f6701628 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 8 Mar 2018 12:54:19 +0100 Subject: netfilter: ebtables: fix erroneous reject of last rule The last rule in the blob has next_entry offset that is same as total size. This made "ebtables32 -A OUTPUT -d de:ad:be:ef:01:02" fail on 64 bit kernel. Fixes: b71812168571fa ("netfilter: ebtables: CONFIG_COMPAT: don't trust userland offsets") Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/bridge/netfilter/ebtables.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c index 254ef9f49567..a94d23b0a9af 100644 --- a/net/bridge/netfilter/ebtables.c +++ b/net/bridge/netfilter/ebtables.c @@ -2119,8 +2119,12 @@ static int size_entry_mwt(struct ebt_entry *entry, const unsigned char *base, * offsets are relative to beginning of struct ebt_entry (i.e., 0). */ for (i = 0; i < 4 ; ++i) { - if (offsets[i] >= *total) + if (offsets[i] > *total) return -EINVAL; + + if (i < 3 && offsets[i] == *total) + return -EINVAL; + if (i == 0) continue; if (offsets[i-1] > offsets[i]) -- cgit v1.2.3 From b1d0a5d0cba4597c0394997b2d5fced3e3841b4e Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Sat, 10 Mar 2018 01:15:45 +0100 Subject: netfilter: x_tables: add and use xt_check_proc_name recent and hashlimit both create /proc files, but only check that name is 0 terminated. This can trigger WARN() from procfs when name is "" or "/". Add helper for this and then use it for both. Cc: Eric Dumazet Reported-by: Eric Dumazet Reported-by: Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/x_tables.h | 2 ++ net/netfilter/x_tables.c | 30 ++++++++++++++++++++++++++++++ net/netfilter/xt_hashlimit.c | 16 ++++++++++------ net/netfilter/xt_recent.c | 6 +++--- 4 files changed, 45 insertions(+), 9 deletions(-) diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index 1313b35c3ab7..14529511c4b8 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -285,6 +285,8 @@ unsigned int *xt_alloc_entry_offsets(unsigned int size); bool xt_find_jump_offset(const unsigned int *offsets, unsigned int target, unsigned int size); +int xt_check_proc_name(const char *name, unsigned int size); + int xt_check_match(struct xt_mtchk_param *, unsigned int size, u_int8_t proto, bool inv_proto); int xt_check_target(struct xt_tgchk_param *, unsigned int size, u_int8_t proto, diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index fa1655aff8d3..4aa01c90e9d1 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -423,6 +423,36 @@ textify_hooks(char *buf, size_t size, unsigned int mask, uint8_t nfproto) return buf; } +/** + * xt_check_proc_name - check that name is suitable for /proc file creation + * + * @name: file name candidate + * @size: length of buffer + * + * some x_tables modules wish to create a file in /proc. + * This function makes sure that the name is suitable for this + * purpose, it checks that name is NUL terminated and isn't a 'special' + * name, like "..". + * + * returns negative number on error or 0 if name is useable. + */ +int xt_check_proc_name(const char *name, unsigned int size) +{ + if (name[0] == '\0') + return -EINVAL; + + if (strnlen(name, size) == size) + return -ENAMETOOLONG; + + if (strcmp(name, ".") == 0 || + strcmp(name, "..") == 0 || + strchr(name, '/')) + return -EINVAL; + + return 0; +} +EXPORT_SYMBOL(xt_check_proc_name); + int xt_check_match(struct xt_mtchk_param *par, unsigned int size, u_int8_t proto, bool inv_proto) { diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c index 66f5aca62a08..3360f13dc208 100644 --- a/net/netfilter/xt_hashlimit.c +++ b/net/netfilter/xt_hashlimit.c @@ -917,8 +917,9 @@ static int hashlimit_mt_check_v1(const struct xt_mtchk_param *par) struct hashlimit_cfg3 cfg = {}; int ret; - if (info->name[sizeof(info->name) - 1] != '\0') - return -EINVAL; + ret = xt_check_proc_name(info->name, sizeof(info->name)); + if (ret) + return ret; ret = cfg_copy(&cfg, (void *)&info->cfg, 1); @@ -935,8 +936,9 @@ static int hashlimit_mt_check_v2(const struct xt_mtchk_param *par) struct hashlimit_cfg3 cfg = {}; int ret; - if (info->name[sizeof(info->name) - 1] != '\0') - return -EINVAL; + ret = xt_check_proc_name(info->name, sizeof(info->name)); + if (ret) + return ret; ret = cfg_copy(&cfg, (void *)&info->cfg, 2); @@ -950,9 +952,11 @@ static int hashlimit_mt_check_v2(const struct xt_mtchk_param *par) static int hashlimit_mt_check(const struct xt_mtchk_param *par) { struct xt_hashlimit_mtinfo3 *info = par->matchinfo; + int ret; - if (info->name[sizeof(info->name) - 1] != '\0') - return -EINVAL; + ret = xt_check_proc_name(info->name, sizeof(info->name)); + if (ret) + return ret; return hashlimit_mt_check_common(par, &info->hinfo, &info->cfg, info->name, 3); diff --git a/net/netfilter/xt_recent.c b/net/netfilter/xt_recent.c index 6d232d18faff..81ee1d6543b2 100644 --- a/net/netfilter/xt_recent.c +++ b/net/netfilter/xt_recent.c @@ -361,9 +361,9 @@ static int recent_mt_check(const struct xt_mtchk_param *par, info->hit_count, XT_RECENT_MAX_NSTAMPS - 1); return -EINVAL; } - if (info->name[0] == '\0' || - strnlen(info->name, XT_RECENT_NAME_LEN) == XT_RECENT_NAME_LEN) - return -EINVAL; + ret = xt_check_proc_name(info->name, sizeof(info->name)); + if (ret) + return ret; if (ip_pkt_list_tot && info->hit_count < ip_pkt_list_tot) nstamp_mask = roundup_pow_of_two(ip_pkt_list_tot) - 1; -- cgit v1.2.3 From c8d70a700a5b486bfa8e5a7d33d805389f6e59f9 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 9 Mar 2018 14:27:31 +0100 Subject: netfilter: bridge: ebt_among: add more missing match size checks ebt_among is special, it has a dynamic match size and is exempt from the central size checks. commit c4585a2823edf ("bridge: ebt_among: add missing match size checks") added validation for pool size, but missed fact that the macros ebt_among_wh_src/dst can already return out-of-bound result because they do not check value of wh_src/dst_ofs (an offset) vs. the size of the match that userspace gave to us. v2: check that offset has correct alignment. Paolo Abeni points out that we should also check that src/dst wormhash arrays do not overlap, and src + length lines up with start of dst (or vice versa). v3: compact wormhash_sizes_valid() part NB: Fixes tag is intentionally wrong, this bug exists from day one when match was added for 2.6 kernel. Tag is there so stable maintainers will notice this one too. Tested with same rules from the earlier patch. Fixes: c4585a2823edf ("bridge: ebt_among: add missing match size checks") Reported-by: Signed-off-by: Florian Westphal Reviewed-by: Eric Dumazet Signed-off-by: Pablo Neira Ayuso --- net/bridge/netfilter/ebt_among.c | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/net/bridge/netfilter/ebt_among.c b/net/bridge/netfilter/ebt_among.c index c5afb4232ecb..620e54f08296 100644 --- a/net/bridge/netfilter/ebt_among.c +++ b/net/bridge/netfilter/ebt_among.c @@ -177,6 +177,28 @@ static bool poolsize_invalid(const struct ebt_mac_wormhash *w) return w && w->poolsize >= (INT_MAX / sizeof(struct ebt_mac_wormhash_tuple)); } +static bool wormhash_offset_invalid(int off, unsigned int len) +{ + if (off == 0) /* not present */ + return false; + + if (off < (int)sizeof(struct ebt_among_info) || + off % __alignof__(struct ebt_mac_wormhash)) + return true; + + off += sizeof(struct ebt_mac_wormhash); + + return off > len; +} + +static bool wormhash_sizes_valid(const struct ebt_mac_wormhash *wh, int a, int b) +{ + if (a == 0) + a = sizeof(struct ebt_among_info); + + return ebt_mac_wormhash_size(wh) + a == b; +} + static int ebt_among_mt_check(const struct xt_mtchk_param *par) { const struct ebt_among_info *info = par->matchinfo; @@ -189,6 +211,10 @@ static int ebt_among_mt_check(const struct xt_mtchk_param *par) if (expected_length > em->match_size) return -EINVAL; + if (wormhash_offset_invalid(info->wh_dst_ofs, em->match_size) || + wormhash_offset_invalid(info->wh_src_ofs, em->match_size)) + return -EINVAL; + wh_dst = ebt_among_wh_dst(info); if (poolsize_invalid(wh_dst)) return -EINVAL; @@ -201,6 +227,14 @@ static int ebt_among_mt_check(const struct xt_mtchk_param *par) if (poolsize_invalid(wh_src)) return -EINVAL; + if (info->wh_src_ofs < info->wh_dst_ofs) { + if (!wormhash_sizes_valid(wh_src, info->wh_src_ofs, info->wh_dst_ofs)) + return -EINVAL; + } else { + if (!wormhash_sizes_valid(wh_dst, info->wh_dst_ofs, info->wh_src_ofs)) + return -EINVAL; + } + expected_length += ebt_mac_wormhash_size(wh_src); if (em->match_size != EBT_ALIGN(expected_length)) { -- cgit v1.2.3 From c04a3f730021c304c7cc4bc30ee57ee70ad98d57 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Fri, 9 Mar 2018 18:56:26 +0100 Subject: netfilter: nf_tables: release flowtable hooks Otherwise we leak this array. Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 558593e6a0a3..c4acc7340eb1 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -5423,6 +5423,7 @@ err: static void nf_tables_flowtable_destroy(struct nft_flowtable *flowtable) { cancel_delayed_work_sync(&flowtable->data.gc_work); + kfree(flowtable->ops); kfree(flowtable->name); flowtable->data.type->free(&flowtable->data); rhashtable_destroy(&flowtable->data.rhashtable); -- cgit v1.2.3 From 7f95beea36089918335eb1810ddd7ba8cf9d09cc Mon Sep 17 00:00:00 2001 From: Shawn Lin Date: Thu, 8 Mar 2018 14:49:41 +0800 Subject: clk: update cached phase to respect the fact when setting phase It's found that the final phase set by driver doesn't match that of the output from clk_summary: dwmmc_rockchip fe310000.dwmmc: Successfully tuned phase to 346 mmc0: new ultra high speed SDR104 SDIO card at address 0001 cat /sys/kernel/debug/clk/clk_summary | grep sdio_sample sdio_sample 0 1 0 50000000 0 0 It seems the cached core->phase isn't updated after the clk was registered. So fix this issue by updating the core->phase if setting phase successfully. Fixes: 9e4d04adeb1a ("clk: add clk_core_set_phase_nolock function") Cc: Stable Cc: Jerome Brunet Signed-off-by: Shawn Lin Reviewed-by: Jerome Brunet Tested-by: Jerome Brunet Signed-off-by: Michael Turquette --- drivers/clk/clk.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c index 0f686a9dac3e..617e56268b18 100644 --- a/drivers/clk/clk.c +++ b/drivers/clk/clk.c @@ -2309,8 +2309,11 @@ static int clk_core_set_phase_nolock(struct clk_core *core, int degrees) trace_clk_set_phase(core, degrees); - if (core->ops->set_phase) + if (core->ops->set_phase) { ret = core->ops->set_phase(core->hw, degrees); + if (!ret) + core->phase = degrees; + } trace_clk_set_phase_complete(core, degrees); -- cgit v1.2.3 From 13fbcc8dc573482dd3f27568257fd7087f8935f4 Mon Sep 17 00:00:00 2001 From: Shannon Nelson Date: Thu, 8 Mar 2018 16:17:23 -0800 Subject: macvlan: filter out unsupported feature flags Adding a macvlan device on top of a lowerdev that supports the xfrm offloads fails with a new regression: # ip link add link ens1f0 mv0 type macvlan RTNETLINK answers: Operation not permitted Tracing down the failure shows that the macvlan device inherits the NETIF_F_HW_ESP and NETIF_F_HW_ESP_TX_CSUM feature flags from the lowerdev, but with no dev->xfrmdev_ops API filled in, it doesn't actually support xfrm. When the request is made to add the new macvlan device, the XFRM listener for NETDEV_REGISTER calls xfrm_api_check() which fails the new registration because dev->xfrmdev_ops is NULL. The macvlan creation succeeds when we filter out the ESP feature flags in macvlan_fix_features(), so let's filter them out like we're already filtering out ~NETIF_F_NETNS_LOCAL. When XFRM support is added in the future, we can add the flags into MACVLAN_FEATURES. This same problem could crop up in the future with any other new feature flags, so let's filter out any flags that aren't defined as supported in macvlan. Fixes: d77e38e612a0 ("xfrm: Add an IPsec hardware offloading API") Reported-by: Alexey Kodanev Signed-off-by: Shannon Nelson Signed-off-by: David S. Miller --- drivers/net/macvlan.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c index 8fc02d9db3d0..725f4b4afc6d 100644 --- a/drivers/net/macvlan.c +++ b/drivers/net/macvlan.c @@ -1036,7 +1036,7 @@ static netdev_features_t macvlan_fix_features(struct net_device *dev, lowerdev_features &= (features | ~NETIF_F_LRO); features = netdev_increment_features(lowerdev_features, features, mask); features |= ALWAYS_ON_FEATURES; - features &= ~NETIF_F_NETNS_LOCAL; + features &= (ALWAYS_ON_FEATURES | MACVLAN_FEATURES); return features; } -- cgit v1.2.3 From ddc502dfed600bff0b61d899f70d95b76223fdfc Mon Sep 17 00:00:00 2001 From: zhangliping Date: Fri, 9 Mar 2018 10:08:50 +0800 Subject: openvswitch: meter: fix the incorrect calculation of max delta_t Max delat_t should be the full_bucket/rate instead of the full_bucket. Also report EINVAL if the rate is zero. Fixes: 96fbc13d7e77 ("openvswitch: Add meter infrastructure") Cc: Andy Zhou Signed-off-by: zhangliping Acked-by: Pravin B Shelar Signed-off-by: David S. Miller --- net/openvswitch/meter.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/net/openvswitch/meter.c b/net/openvswitch/meter.c index 04b94281a30b..b891a91577f8 100644 --- a/net/openvswitch/meter.c +++ b/net/openvswitch/meter.c @@ -242,14 +242,20 @@ static struct dp_meter *dp_meter_create(struct nlattr **a) band->type = nla_get_u32(attr[OVS_BAND_ATTR_TYPE]); band->rate = nla_get_u32(attr[OVS_BAND_ATTR_RATE]); + if (band->rate == 0) { + err = -EINVAL; + goto exit_free_meter; + } + band->burst_size = nla_get_u32(attr[OVS_BAND_ATTR_BURST]); /* Figure out max delta_t that is enough to fill any bucket. * Keep max_delta_t size to the bucket units: * pkts => 1/1000 packets, kilobits => bits. + * + * Start with a full bucket. */ - band_max_delta_t = (band->burst_size + band->rate) * 1000; - /* Start with a full bucket. */ - band->bucket = band_max_delta_t; + band->bucket = (band->burst_size + band->rate) * 1000; + band_max_delta_t = band->bucket / band->rate; if (band_max_delta_t > meter->max_delta_t) meter->max_delta_t = band_max_delta_t; band++; -- cgit v1.2.3 From b7db978ac283b237835129ac87f26cbac94d04e7 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Tue, 6 Feb 2018 09:52:07 +0100 Subject: can: m_can: change comparison to bitshift when dealing with a mask Due to a typo, the mask was destroyed by a comparison instead of a bit shift. Reported-by: Geert Uytterhoeven Signed-off-by: Wolfram Sang Signed-off-by: Marc Kleine-Budde --- drivers/net/can/m_can/m_can.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/can/m_can/m_can.c b/drivers/net/can/m_can/m_can.c index 2594f7779c6f..74170b0d23cf 100644 --- a/drivers/net/can/m_can/m_can.c +++ b/drivers/net/can/m_can/m_can.c @@ -253,7 +253,7 @@ enum m_can_mram_cfg { /* Rx FIFO 0/1 Configuration (RXF0C/RXF1C) */ #define RXFC_FWM_SHIFT 24 -#define RXFC_FWM_MASK (0x7f < RXFC_FWM_SHIFT) +#define RXFC_FWM_MASK (0x7f << RXFC_FWM_SHIFT) #define RXFC_FS_SHIFT 16 #define RXFC_FS_MASK (0x7f << RXFC_FS_SHIFT) -- cgit v1.2.3 From 591d65d5b15496af8d05e252bc1da611c66c0b79 Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Mon, 5 Mar 2018 21:29:52 +0100 Subject: can: ifi: Check core revision upon probe Older versions of the core are not compatible with the driver due to various intrusive fixes of the core. Read out the VER register, check the core revision bitfield and verify if the core in use is new enough (rev 2.1 or newer) to work correctly with this driver. Signed-off-by: Marek Vasut Cc: Heiko Schocher Cc: Markus Marb Cc: Marc Kleine-Budde Cc: linux-stable Signed-off-by: Marc Kleine-Budde --- drivers/net/can/ifi_canfd/ifi_canfd.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/net/can/ifi_canfd/ifi_canfd.c b/drivers/net/can/ifi_canfd/ifi_canfd.c index 2772d05ff11c..9fd396c3569a 100644 --- a/drivers/net/can/ifi_canfd/ifi_canfd.c +++ b/drivers/net/can/ifi_canfd/ifi_canfd.c @@ -136,6 +136,8 @@ #define IFI_CANFD_SYSCLOCK 0x50 #define IFI_CANFD_VER 0x54 +#define IFI_CANFD_VER_REV_MASK 0xff +#define IFI_CANFD_VER_REV_MIN_SUPPORTED 0x15 #define IFI_CANFD_IP_ID 0x58 #define IFI_CANFD_IP_ID_VALUE 0xD073CAFD @@ -933,7 +935,7 @@ static int ifi_canfd_plat_probe(struct platform_device *pdev) struct resource *res; void __iomem *addr; int irq, ret; - u32 id; + u32 id, rev; res = platform_get_resource(pdev, IORESOURCE_MEM, 0); addr = devm_ioremap_resource(dev, res); @@ -947,6 +949,13 @@ static int ifi_canfd_plat_probe(struct platform_device *pdev) return -EINVAL; } + rev = readl(addr + IFI_CANFD_VER) & IFI_CANFD_VER_REV_MASK; + if (rev < IFI_CANFD_VER_REV_MIN_SUPPORTED) { + dev_err(dev, "This block is too old (rev %i), minimum supported is rev %i\n", + rev, IFI_CANFD_VER_REV_MIN_SUPPORTED); + return -EINVAL; + } + ndev = alloc_candev(sizeof(*priv), 1); if (!ndev) return -ENOMEM; -- cgit v1.2.3 From 880dd464b4304583c557c4e5f5ecebfd55d232b1 Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Thu, 1 Mar 2018 19:34:00 +0100 Subject: can: ifi: Repair the error handling The new version of the IFI CANFD core has significantly less complex error state indication logic. In particular, the warning/error state bits are no longer all over the place, but are all present in the STATUS register. Moreover, there is a new IRQ register bit indicating transition between error states (active/warning/passive/busoff). This patch makes use of this bit to weed out the obscure selective INTERRUPT register clearing, which was used to carry over the error state indication into the poll function. While at it, this patch fixes the handling of the ACTIVE state, since the hardware provides indication of the core being in ACTIVE state and that in turn fixes the state transition indication toward userspace. Finally, register reads in the poll function are moved to the matching subfunctions since those are also no longer needed in the poll function. Signed-off-by: Marek Vasut Cc: Heiko Schocher Cc: Markus Marb Cc: Marc Kleine-Budde Cc: linux-stable Signed-off-by: Marc Kleine-Budde --- drivers/net/can/ifi_canfd/ifi_canfd.c | 64 ++++++++++++++++++++--------------- 1 file changed, 37 insertions(+), 27 deletions(-) diff --git a/drivers/net/can/ifi_canfd/ifi_canfd.c b/drivers/net/can/ifi_canfd/ifi_canfd.c index 9fd396c3569a..fedd927ba6ed 100644 --- a/drivers/net/can/ifi_canfd/ifi_canfd.c +++ b/drivers/net/can/ifi_canfd/ifi_canfd.c @@ -30,6 +30,7 @@ #define IFI_CANFD_STCMD_ERROR_ACTIVE BIT(2) #define IFI_CANFD_STCMD_ERROR_PASSIVE BIT(3) #define IFI_CANFD_STCMD_BUSOFF BIT(4) +#define IFI_CANFD_STCMD_ERROR_WARNING BIT(5) #define IFI_CANFD_STCMD_BUSMONITOR BIT(16) #define IFI_CANFD_STCMD_LOOPBACK BIT(18) #define IFI_CANFD_STCMD_DISABLE_CANFD BIT(24) @@ -52,7 +53,10 @@ #define IFI_CANFD_TXSTCMD_OVERFLOW BIT(13) #define IFI_CANFD_INTERRUPT 0xc +#define IFI_CANFD_INTERRUPT_ERROR_BUSOFF BIT(0) #define IFI_CANFD_INTERRUPT_ERROR_WARNING BIT(1) +#define IFI_CANFD_INTERRUPT_ERROR_STATE_CHG BIT(2) +#define IFI_CANFD_INTERRUPT_ERROR_REC_TEC_INC BIT(3) #define IFI_CANFD_INTERRUPT_ERROR_COUNTER BIT(10) #define IFI_CANFD_INTERRUPT_TXFIFO_EMPTY BIT(16) #define IFI_CANFD_INTERRUPT_TXFIFO_REMOVE BIT(22) @@ -61,6 +65,10 @@ #define IFI_CANFD_INTERRUPT_SET_IRQ ((u32)BIT(31)) #define IFI_CANFD_IRQMASK 0x10 +#define IFI_CANFD_IRQMASK_ERROR_BUSOFF BIT(0) +#define IFI_CANFD_IRQMASK_ERROR_WARNING BIT(1) +#define IFI_CANFD_IRQMASK_ERROR_STATE_CHG BIT(2) +#define IFI_CANFD_IRQMASK_ERROR_REC_TEC_INC BIT(3) #define IFI_CANFD_IRQMASK_SET_ERR BIT(7) #define IFI_CANFD_IRQMASK_SET_TS BIT(15) #define IFI_CANFD_IRQMASK_TXFIFO_EMPTY BIT(16) @@ -222,7 +230,10 @@ static void ifi_canfd_irq_enable(struct net_device *ndev, bool enable) if (enable) { enirq = IFI_CANFD_IRQMASK_TXFIFO_EMPTY | - IFI_CANFD_IRQMASK_RXFIFO_NEMPTY; + IFI_CANFD_IRQMASK_RXFIFO_NEMPTY | + IFI_CANFD_IRQMASK_ERROR_STATE_CHG | + IFI_CANFD_IRQMASK_ERROR_WARNING | + IFI_CANFD_IRQMASK_ERROR_BUSOFF; if (priv->can.ctrlmode & CAN_CTRLMODE_BERR_REPORTING) enirq |= IFI_CANFD_INTERRUPT_ERROR_COUNTER; } @@ -363,12 +374,13 @@ static int ifi_canfd_handle_lost_msg(struct net_device *ndev) return 1; } -static int ifi_canfd_handle_lec_err(struct net_device *ndev, const u32 errctr) +static int ifi_canfd_handle_lec_err(struct net_device *ndev) { struct ifi_canfd_priv *priv = netdev_priv(ndev); struct net_device_stats *stats = &ndev->stats; struct can_frame *cf; struct sk_buff *skb; + u32 errctr = readl(priv->base + IFI_CANFD_ERROR_CTR); const u32 errmask = IFI_CANFD_ERROR_CTR_OVERLOAD_FIRST | IFI_CANFD_ERROR_CTR_ACK_ERROR_FIRST | IFI_CANFD_ERROR_CTR_BIT0_ERROR_FIRST | @@ -451,6 +463,11 @@ static int ifi_canfd_handle_state_change(struct net_device *ndev, switch (new_state) { case CAN_STATE_ERROR_ACTIVE: + /* error active state */ + priv->can.can_stats.error_warning++; + priv->can.state = CAN_STATE_ERROR_ACTIVE; + break; + case CAN_STATE_ERROR_WARNING: /* error warning state */ priv->can.can_stats.error_warning++; priv->can.state = CAN_STATE_ERROR_WARNING; @@ -479,7 +496,7 @@ static int ifi_canfd_handle_state_change(struct net_device *ndev, ifi_canfd_get_berr_counter(ndev, &bec); switch (new_state) { - case CAN_STATE_ERROR_ACTIVE: + case CAN_STATE_ERROR_WARNING: /* error warning state */ cf->can_id |= CAN_ERR_CRTL; cf->data[1] = (bec.txerr > bec.rxerr) ? @@ -512,22 +529,21 @@ static int ifi_canfd_handle_state_change(struct net_device *ndev, return 1; } -static int ifi_canfd_handle_state_errors(struct net_device *ndev, u32 stcmd) +static int ifi_canfd_handle_state_errors(struct net_device *ndev) { struct ifi_canfd_priv *priv = netdev_priv(ndev); + u32 stcmd = readl(priv->base + IFI_CANFD_STCMD); int work_done = 0; - u32 isr; - /* - * The ErrWarn condition is a little special, since the bit is - * located in the INTERRUPT register instead of STCMD register. - */ - isr = readl(priv->base + IFI_CANFD_INTERRUPT); - if ((isr & IFI_CANFD_INTERRUPT_ERROR_WARNING) && + if ((stcmd & IFI_CANFD_STCMD_ERROR_ACTIVE) && + (priv->can.state != CAN_STATE_ERROR_ACTIVE)) { + netdev_dbg(ndev, "Error, entered active state\n"); + work_done += ifi_canfd_handle_state_change(ndev, + CAN_STATE_ERROR_ACTIVE); + } + + if ((stcmd & IFI_CANFD_STCMD_ERROR_WARNING) && (priv->can.state != CAN_STATE_ERROR_WARNING)) { - /* Clear the interrupt */ - writel(IFI_CANFD_INTERRUPT_ERROR_WARNING, - priv->base + IFI_CANFD_INTERRUPT); netdev_dbg(ndev, "Error, entered warning state\n"); work_done += ifi_canfd_handle_state_change(ndev, CAN_STATE_ERROR_WARNING); @@ -554,18 +570,11 @@ static int ifi_canfd_poll(struct napi_struct *napi, int quota) { struct net_device *ndev = napi->dev; struct ifi_canfd_priv *priv = netdev_priv(ndev); - const u32 stcmd_state_mask = IFI_CANFD_STCMD_ERROR_PASSIVE | - IFI_CANFD_STCMD_BUSOFF; - int work_done = 0; - - u32 stcmd = readl(priv->base + IFI_CANFD_STCMD); u32 rxstcmd = readl(priv->base + IFI_CANFD_RXSTCMD); - u32 errctr = readl(priv->base + IFI_CANFD_ERROR_CTR); + int work_done = 0; /* Handle bus state changes */ - if ((stcmd & stcmd_state_mask) || - ((stcmd & IFI_CANFD_STCMD_ERROR_ACTIVE) == 0)) - work_done += ifi_canfd_handle_state_errors(ndev, stcmd); + work_done += ifi_canfd_handle_state_errors(ndev); /* Handle lost messages on RX */ if (rxstcmd & IFI_CANFD_RXSTCMD_OVERFLOW) @@ -573,7 +582,7 @@ static int ifi_canfd_poll(struct napi_struct *napi, int quota) /* Handle lec errors on the bus */ if (priv->can.ctrlmode & CAN_CTRLMODE_BERR_REPORTING) - work_done += ifi_canfd_handle_lec_err(ndev, errctr); + work_done += ifi_canfd_handle_lec_err(ndev); /* Handle normal messages on RX */ if (!(rxstcmd & IFI_CANFD_RXSTCMD_EMPTY)) @@ -594,12 +603,13 @@ static irqreturn_t ifi_canfd_isr(int irq, void *dev_id) struct net_device_stats *stats = &ndev->stats; const u32 rx_irq_mask = IFI_CANFD_INTERRUPT_RXFIFO_NEMPTY | IFI_CANFD_INTERRUPT_RXFIFO_NEMPTY_PER | + IFI_CANFD_INTERRUPT_ERROR_COUNTER | + IFI_CANFD_INTERRUPT_ERROR_STATE_CHG | IFI_CANFD_INTERRUPT_ERROR_WARNING | - IFI_CANFD_INTERRUPT_ERROR_COUNTER; + IFI_CANFD_INTERRUPT_ERROR_BUSOFF; const u32 tx_irq_mask = IFI_CANFD_INTERRUPT_TXFIFO_EMPTY | IFI_CANFD_INTERRUPT_TXFIFO_REMOVE; - const u32 clr_irq_mask = ~((u32)(IFI_CANFD_INTERRUPT_SET_IRQ | - IFI_CANFD_INTERRUPT_ERROR_WARNING)); + const u32 clr_irq_mask = ~((u32)IFI_CANFD_INTERRUPT_SET_IRQ); u32 isr; isr = readl(priv->base + IFI_CANFD_INTERRUPT); -- cgit v1.2.3 From e6048a00cfd0863d32f53b226e0b9a3633fc3332 Mon Sep 17 00:00:00 2001 From: Stephane Grosjean Date: Thu, 8 Mar 2018 09:30:28 +0100 Subject: can: peak/pcie_fd: fix echo_skb is occupied! bug This patch makes atomic the handling of the linux-can echo_skb array and the network tx queue. This prevents from the "BUG! echo_skb is occupied!" message to be printed by the linux-can core, in SMP environments. Reported-by: Diana Burgess Signed-off-by: Stephane Grosjean Cc: linux-stable Signed-off-by: Marc Kleine-Budde --- drivers/net/can/peak_canfd/peak_canfd.c | 12 ++++++------ drivers/net/can/peak_canfd/peak_pciefd_main.c | 8 ++++++-- 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/drivers/net/can/peak_canfd/peak_canfd.c b/drivers/net/can/peak_canfd/peak_canfd.c index 55513411a82e..6fa66231ed8e 100644 --- a/drivers/net/can/peak_canfd/peak_canfd.c +++ b/drivers/net/can/peak_canfd/peak_canfd.c @@ -262,7 +262,6 @@ static int pucan_handle_can_rx(struct peak_canfd_priv *priv, spin_lock_irqsave(&priv->echo_lock, flags); can_get_echo_skb(priv->ndev, msg->client); - spin_unlock_irqrestore(&priv->echo_lock, flags); /* count bytes of the echo instead of skb */ stats->tx_bytes += cf_len; @@ -271,6 +270,7 @@ static int pucan_handle_can_rx(struct peak_canfd_priv *priv, /* restart tx queue (a slot is free) */ netif_wake_queue(priv->ndev); + spin_unlock_irqrestore(&priv->echo_lock, flags); return 0; } @@ -726,11 +726,6 @@ static netdev_tx_t peak_canfd_start_xmit(struct sk_buff *skb, */ should_stop_tx_queue = !!(priv->can.echo_skb[priv->echo_idx]); - spin_unlock_irqrestore(&priv->echo_lock, flags); - - /* write the skb on the interface */ - priv->write_tx_msg(priv, msg); - /* stop network tx queue if not enough room to save one more msg too */ if (priv->can.ctrlmode & CAN_CTRLMODE_FD) should_stop_tx_queue |= (room_left < @@ -742,6 +737,11 @@ static netdev_tx_t peak_canfd_start_xmit(struct sk_buff *skb, if (should_stop_tx_queue) netif_stop_queue(ndev); + spin_unlock_irqrestore(&priv->echo_lock, flags); + + /* write the skb on the interface */ + priv->write_tx_msg(priv, msg); + return NETDEV_TX_OK; } diff --git a/drivers/net/can/peak_canfd/peak_pciefd_main.c b/drivers/net/can/peak_canfd/peak_pciefd_main.c index 788c3464a3b0..3c51a884db87 100644 --- a/drivers/net/can/peak_canfd/peak_pciefd_main.c +++ b/drivers/net/can/peak_canfd/peak_pciefd_main.c @@ -349,8 +349,12 @@ static irqreturn_t pciefd_irq_handler(int irq, void *arg) priv->tx_pages_free++; spin_unlock_irqrestore(&priv->tx_lock, flags); - /* wake producer up */ - netif_wake_queue(priv->ucan.ndev); + /* wake producer up (only if enough room in echo_skb array) */ + spin_lock_irqsave(&priv->ucan.echo_lock, flags); + if (!priv->ucan.can.echo_skb[priv->ucan.echo_idx]) + netif_wake_queue(priv->ucan.ndev); + + spin_unlock_irqrestore(&priv->ucan.echo_lock, flags); } /* re-enable Rx DMA transfer for this CAN */ -- cgit v1.2.3 From ffd137f7043cb30067e1bff6fe62a073ae190b23 Mon Sep 17 00:00:00 2001 From: Stephane Grosjean Date: Thu, 8 Mar 2018 09:30:29 +0100 Subject: can: peak/pcie_fd: remove useless code when interface starts When an interface starts, the echo_skb array is empty and the network queue should be started only. This patch replaces useless code and locks when the internal RX_BARRIER message is received from the IP core, telling the driver that tx may start. Signed-off-by: Stephane Grosjean Cc: linux-stable Signed-off-by: Marc Kleine-Budde --- drivers/net/can/peak_canfd/peak_canfd.c | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) diff --git a/drivers/net/can/peak_canfd/peak_canfd.c b/drivers/net/can/peak_canfd/peak_canfd.c index 6fa66231ed8e..ed8561d4a90f 100644 --- a/drivers/net/can/peak_canfd/peak_canfd.c +++ b/drivers/net/can/peak_canfd/peak_canfd.c @@ -333,7 +333,6 @@ static int pucan_handle_status(struct peak_canfd_priv *priv, /* this STATUS is the CNF of the RX_BARRIER: Tx path can be setup */ if (pucan_status_is_rx_barrier(msg)) { - unsigned long flags; if (priv->enable_tx_path) { int err = priv->enable_tx_path(priv); @@ -342,16 +341,8 @@ static int pucan_handle_status(struct peak_canfd_priv *priv, return err; } - /* restart network queue only if echo skb array is free */ - spin_lock_irqsave(&priv->echo_lock, flags); - - if (!priv->can.echo_skb[priv->echo_idx]) { - spin_unlock_irqrestore(&priv->echo_lock, flags); - - netif_wake_queue(ndev); - } else { - spin_unlock_irqrestore(&priv->echo_lock, flags); - } + /* start network queue (echo_skb array is empty) */ + netif_start_queue(ndev); return 0; } -- cgit v1.2.3 From 7035046d6d02d455325c5d0328bcc8a0b5dd96b0 Mon Sep 17 00:00:00 2001 From: Ondrej Jirman Date: Sat, 10 Mar 2018 12:05:11 +0100 Subject: drm/sun4i: Fix exclusivity of the TCON clocks Currently the exclusivity is enabled when the rate is set by the mode setting functions. These functions are called by mode_set_nofb callback of drm_crc_helper. Then exclusivity is disabled when tcon is disabled by atomic_disable callback. What happens is that mode_set_nofb can be called once when mode changes, and afterwards the system can call atomic_enable and atomic_disable multiple times without further calls to mode_set_nofb. This happens: mode_set_nofb - clk exclusivity is enabled atomic_enable atomic_disable - clk exclusivity is disabled atomic_enable atomic_disable - clk exclusivity is already disabled, leading to WARN in clk_rate_exclusive_put Solution is to enable exclusivity in sun4i_tcon_channel_set_status. Signed-off-by: Ondrej Jirman Cc: Jernej Skrabec Signed-off-by: Maxime Ripard Link: https://patchwork.freedesktop.org/patch/msgid/20180310110511.14697-1-megous@megous.com --- drivers/gpu/drm/sun4i/sun4i_tcon.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/sun4i/sun4i_tcon.c b/drivers/gpu/drm/sun4i/sun4i_tcon.c index 2de586b7c98b..a818ca491605 100644 --- a/drivers/gpu/drm/sun4i/sun4i_tcon.c +++ b/drivers/gpu/drm/sun4i/sun4i_tcon.c @@ -103,6 +103,7 @@ static void sun4i_tcon_channel_set_status(struct sun4i_tcon *tcon, int channel, if (enabled) { clk_prepare_enable(clk); + clk_rate_exclusive_get(clk); } else { clk_rate_exclusive_put(clk); clk_disable_unprepare(clk); @@ -262,7 +263,7 @@ static void sun4i_tcon0_mode_set_common(struct sun4i_tcon *tcon, const struct drm_display_mode *mode) { /* Configure the dot clock */ - clk_set_rate_exclusive(tcon->dclk, mode->crtc_clock * 1000); + clk_set_rate(tcon->dclk, mode->crtc_clock * 1000); /* Set the resolution */ regmap_write(tcon->regs, SUN4I_TCON0_BASIC0_REG, @@ -423,7 +424,7 @@ static void sun4i_tcon1_mode_set(struct sun4i_tcon *tcon, WARN_ON(!tcon->quirks->has_channel_1); /* Configure the dot clock */ - clk_set_rate_exclusive(tcon->sclk1, mode->crtc_clock * 1000); + clk_set_rate(tcon->sclk1, mode->crtc_clock * 1000); /* Adjust clock delay */ clk_delay = sun4i_tcon_get_clk_delay(mode, 1); -- cgit v1.2.3 From c9b3bce18da4a0aebc27853052dea39aa64b7d75 Mon Sep 17 00:00:00 2001 From: Bich HEMON Date: Mon, 12 Mar 2018 08:52:37 +0000 Subject: can: m_can: select pinctrl state in each suspend/resume function Make sure to apply the correct pin state in suspend/resume callbacks. Putting pins in sleep state saves power. Signed-off-by: Bich Hemon Signed-off-by: Marc Kleine-Budde --- drivers/net/can/m_can/m_can.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/can/m_can/m_can.c b/drivers/net/can/m_can/m_can.c index 74170b0d23cf..b397a33f3d32 100644 --- a/drivers/net/can/m_can/m_can.c +++ b/drivers/net/can/m_can/m_can.c @@ -26,6 +26,7 @@ #include #include #include +#include /* napi related */ #define M_CAN_NAPI_WEIGHT 64 @@ -1700,6 +1701,8 @@ static __maybe_unused int m_can_suspend(struct device *dev) m_can_clk_stop(priv); } + pinctrl_pm_select_sleep_state(dev); + priv->can.state = CAN_STATE_SLEEPING; return 0; @@ -1710,6 +1713,8 @@ static __maybe_unused int m_can_resume(struct device *dev) struct net_device *ndev = dev_get_drvdata(dev); struct m_can_priv *priv = netdev_priv(ndev); + pinctrl_pm_select_default_state(dev); + m_can_init_ram(priv); priv->can.state = CAN_STATE_ERROR_ACTIVE; -- cgit v1.2.3 From 59fba0869acae06ff594dd7e9808ed673f53538a Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 10 Jan 2018 17:35:43 +0100 Subject: phy: qcom-ufs: add MODULE_LICENSE tag While the specific UFS PHY drivers (14nm and 20nm) have a module license, the common base module does not, leading to a Kbuild failure: WARNING: modpost: missing MODULE_LICENSE() in drivers/phy/qualcomm/phy-qcom-ufs.o FATAL: modpost: GPL-incompatible module phy-qcom-ufs.ko uses GPL-only symbol 'clk_enable' This adds a module description and license tag to fix the build. I added both Yaniv and Vivek as authors here, as Yaniv sent the initial submission, while Vivek did most of the work since. Signed-off-by: Arnd Bergmann Acked-by: Bjorn Andersson Signed-off-by: Kishon Vijay Abraham I --- drivers/phy/qualcomm/phy-qcom-ufs.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/phy/qualcomm/phy-qcom-ufs.c b/drivers/phy/qualcomm/phy-qcom-ufs.c index c5ff4525edef..c5493ea51282 100644 --- a/drivers/phy/qualcomm/phy-qcom-ufs.c +++ b/drivers/phy/qualcomm/phy-qcom-ufs.c @@ -675,3 +675,8 @@ int ufs_qcom_phy_power_off(struct phy *generic_phy) return 0; } EXPORT_SYMBOL_GPL(ufs_qcom_phy_power_off); + +MODULE_AUTHOR("Yaniv Gardi "); +MODULE_AUTHOR("Vivek Gautam "); +MODULE_DESCRIPTION("Universal Flash Storage (UFS) QCOM PHY"); +MODULE_LICENSE("GPL v2"); -- cgit v1.2.3 From 9ca8614980d4593dcff649f8aefe604079cfafe1 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Mon, 12 Mar 2018 00:19:09 +0100 Subject: drm/sun4i: Fix an error handling path in 'sun4i_drv_bind()' Commit 070badfab767 ("drm/sun4i: call drm_vblank_init with correct number of crtcs") has moved some code without updating the error handling gotos accordingly. Branch to the correct label and remove a now unused lablel. Fixes: 070badfab767 ("drm/sun4i: call drm_vblank_init with correct number of crtcs") Signed-off-by: Christophe JAILLET Signed-off-by: Maxime Ripard Link: https://patchwork.freedesktop.org/patch/msgid/20180311231909.5381-1-christophe.jaillet@wanadoo.fr --- drivers/gpu/drm/sun4i/sun4i_drv.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/gpu/drm/sun4i/sun4i_drv.c b/drivers/gpu/drm/sun4i/sun4i_drv.c index 4570da0227b4..d9a71f361b14 100644 --- a/drivers/gpu/drm/sun4i/sun4i_drv.c +++ b/drivers/gpu/drm/sun4i/sun4i_drv.c @@ -111,7 +111,7 @@ static int sun4i_drv_bind(struct device *dev) /* drm_vblank_init calls kcalloc, which can fail */ ret = drm_vblank_init(drm, drm->mode_config.num_crtc); if (ret) - goto free_mem_region; + goto cleanup_mode_config; drm->irq_enabled = true; @@ -139,7 +139,6 @@ finish_poll: sun4i_framebuffer_free(drm); cleanup_mode_config: drm_mode_config_cleanup(drm); -free_mem_region: of_reserved_mem_device_release(dev); free_drm: drm_dev_unref(drm); -- cgit v1.2.3 From 1da961d72ab0cfbe8b7c26cba731dc2bb6b9494b Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Mon, 5 Mar 2018 19:25:49 +0300 Subject: x86/cpufeatures: Add Intel Total Memory Encryption cpufeature CPUID.0x7.0x0:ECX[13] indicates whether CPU supports Intel Total Memory Encryption. Signed-off-by: Kirill A. Shutemov Cc: Dave Hansen Cc: Kai Huang Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Tom Lendacky Cc: linux-mm@kvack.org Link: http://lkml.kernel.org/r/20180305162610.37510-2-kirill.shutemov@linux.intel.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/cpufeatures.h | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index f41079da38c5..16898eb813f5 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -316,6 +316,7 @@ #define X86_FEATURE_VPCLMULQDQ (16*32+10) /* Carry-Less Multiplication Double Quadword */ #define X86_FEATURE_AVX512_VNNI (16*32+11) /* Vector Neural Network Instructions */ #define X86_FEATURE_AVX512_BITALG (16*32+12) /* Support for VPOPCNT[B,W] and VPSHUF-BITQMB instructions */ +#define X86_FEATURE_TME (16*32+13) /* Intel Total Memory Encryption */ #define X86_FEATURE_AVX512_VPOPCNTDQ (16*32+14) /* POPCNT for vectors of DW/QW */ #define X86_FEATURE_LA57 (16*32+16) /* 5-level page tables */ #define X86_FEATURE_RDPID (16*32+22) /* RDPID instruction */ -- cgit v1.2.3 From 7958b2246fadf54b7ff820a2a5a2c5ca1554716f Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Mon, 5 Mar 2018 19:25:51 +0300 Subject: x86/cpufeatures: Add Intel PCONFIG cpufeature CPUID.0x7.0x0:EDX[18] indicates whether Intel CPU support PCONFIG instruction. Signed-off-by: Kirill A. Shutemov Cc: Dave Hansen Cc: Kai Huang Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Tom Lendacky Cc: linux-mm@kvack.org Link: http://lkml.kernel.org/r/20180305162610.37510-4-kirill.shutemov@linux.intel.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/cpufeatures.h | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 16898eb813f5..d554c11e01ff 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -329,6 +329,7 @@ /* Intel-defined CPU features, CPUID level 0x00000007:0 (EDX), word 18 */ #define X86_FEATURE_AVX512_4VNNIW (18*32+ 2) /* AVX-512 Neural Network Instructions */ #define X86_FEATURE_AVX512_4FMAPS (18*32+ 3) /* AVX-512 Multiply Accumulation Single precision */ +#define X86_FEATURE_PCONFIG (18*32+18) /* Intel PCONFIG */ #define X86_FEATURE_SPEC_CTRL (18*32+26) /* "" Speculation Control (IBRS + IBPB) */ #define X86_FEATURE_INTEL_STIBP (18*32+27) /* "" Single Thread Indirect Branch Predictors */ #define X86_FEATURE_ARCH_CAPABILITIES (18*32+29) /* IA32_ARCH_CAPABILITIES MSR (Intel) */ -- cgit v1.2.3 From 40088dc4e1ead7df31728c73f5b51d71da18831d Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 12 Mar 2018 13:55:48 +0100 Subject: ALSA: hda - Revert power_save option default value With the commit 1ba8f9d30817 ("ALSA: hda: Add a power_save blacklist"), we changed the default value of power_save option to -1 for processing the power-save blacklist. Unfortunately, this seems breaking user-space applications that actually read the power_save parameter value via sysfs and judge / adjust the power-saving status. They see the value -1 as if the power-save is turned off, although the actual value is taken from CONFIG_SND_HDA_POWER_SAVE_DEFAULT and it can be a positive. So, overall, passing -1 there was no good idea. Let's partially revert it -- at least for power_save option default value is restored again to CONFIG_SND_HDA_POWER_SAVE_DEFAULT. Meanwhile, in this patch, we keep the blacklist behavior and make is adjustable via the new option, pm_blacklist. Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=199073 Fixes: 1ba8f9d30817 ("ALSA: hda: Add a power_save blacklist") Acked-by: Hans de Goede Cc: Signed-off-by: Takashi Iwai --- sound/pci/hda/hda_intel.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c index 96143df19b21..d5017adf9feb 100644 --- a/sound/pci/hda/hda_intel.c +++ b/sound/pci/hda/hda_intel.c @@ -181,11 +181,15 @@ static const struct kernel_param_ops param_ops_xint = { }; #define param_check_xint param_check_int -static int power_save = -1; +static int power_save = CONFIG_SND_HDA_POWER_SAVE_DEFAULT; module_param(power_save, xint, 0644); MODULE_PARM_DESC(power_save, "Automatic power-saving timeout " "(in second, 0 = disable)."); +static bool pm_blacklist = true; +module_param(pm_blacklist, bool, 0644); +MODULE_PARM_DESC(pm_blacklist, "Enable power-management blacklist"); + /* reset the HD-audio controller in power save mode. * this may give more power-saving, but will take longer time to * wake up. @@ -2300,10 +2304,9 @@ static int azx_probe_continue(struct azx *chip) val = power_save; #ifdef CONFIG_PM - if (val == -1) { + if (pm_blacklist) { const struct snd_pci_quirk *q; - val = CONFIG_SND_HDA_POWER_SAVE_DEFAULT; q = snd_pci_quirk_lookup(chip->pci, power_save_blacklist); if (q && val) { dev_info(chip->card->dev, "device %04x:%04x is on the power_save blacklist, forcing power_save to 0\n", -- cgit v1.2.3 From a2c054a896b8ac794ddcfc7c92e2dc7ec4ed4ed5 Mon Sep 17 00:00:00 2001 From: Brad Mouring Date: Thu, 8 Mar 2018 16:23:03 -0600 Subject: net: phy: Tell caller result of phy_change() In 664fcf123a30e (net: phy: Threaded interrupts allow some simplification) the phy_interrupt system was changed to use a traditional threaded interrupt scheme instead of a workqueue approach. With this change, the phy status check moved into phy_change, which did not report back to the caller whether or not the interrupt was handled. This means that, in the case of a shared phy interrupt, only the first phydev's interrupt registers are checked (since phy_interrupt() would always return IRQ_HANDLED). This leads to interrupt storms when it is a secondary device that's actually the interrupt source. Signed-off-by: Brad Mouring Signed-off-by: David S. Miller --- drivers/net/phy/phy.c | 145 +++++++++++++++++++++++++------------------------- include/linux/phy.h | 1 - 2 files changed, 72 insertions(+), 74 deletions(-) diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c index a6f924fee584..9aabfa1a455a 100644 --- a/drivers/net/phy/phy.c +++ b/drivers/net/phy/phy.c @@ -617,6 +617,77 @@ static void phy_error(struct phy_device *phydev) phy_trigger_machine(phydev, false); } +/** + * phy_disable_interrupts - Disable the PHY interrupts from the PHY side + * @phydev: target phy_device struct + */ +static int phy_disable_interrupts(struct phy_device *phydev) +{ + int err; + + /* Disable PHY interrupts */ + err = phy_config_interrupt(phydev, PHY_INTERRUPT_DISABLED); + if (err) + goto phy_err; + + /* Clear the interrupt */ + err = phy_clear_interrupt(phydev); + if (err) + goto phy_err; + + return 0; + +phy_err: + phy_error(phydev); + + return err; +} + +/** + * phy_change - Called by the phy_interrupt to handle PHY changes + * @phydev: phy_device struct that interrupted + */ +static irqreturn_t phy_change(struct phy_device *phydev) +{ + if (phy_interrupt_is_valid(phydev)) { + if (phydev->drv->did_interrupt && + !phydev->drv->did_interrupt(phydev)) + return IRQ_NONE; + + if (phydev->state == PHY_HALTED) + if (phy_disable_interrupts(phydev)) + goto phy_err; + } + + mutex_lock(&phydev->lock); + if ((PHY_RUNNING == phydev->state) || (PHY_NOLINK == phydev->state)) + phydev->state = PHY_CHANGELINK; + mutex_unlock(&phydev->lock); + + /* reschedule state queue work to run as soon as possible */ + phy_trigger_machine(phydev, true); + + if (phy_interrupt_is_valid(phydev) && phy_clear_interrupt(phydev)) + goto phy_err; + return IRQ_HANDLED; + +phy_err: + phy_error(phydev); + return IRQ_NONE; +} + +/** + * phy_change_work - Scheduled by the phy_mac_interrupt to handle PHY changes + * @work: work_struct that describes the work to be done + */ +void phy_change_work(struct work_struct *work) +{ + struct phy_device *phydev = + container_of(work, struct phy_device, phy_queue); + + phy_change(phydev); +} + /** * phy_interrupt - PHY interrupt handler * @irq: interrupt line @@ -632,9 +703,7 @@ static irqreturn_t phy_interrupt(int irq, void *phy_dat) if (PHY_HALTED == phydev->state) return IRQ_NONE; /* It can't be ours. */ - phy_change(phydev); - - return IRQ_HANDLED; + return phy_change(phydev); } /** @@ -651,32 +720,6 @@ static int phy_enable_interrupts(struct phy_device *phydev) return phy_config_interrupt(phydev, PHY_INTERRUPT_ENABLED); } -/** - * phy_disable_interrupts - Disable the PHY interrupts from the PHY side - * @phydev: target phy_device struct - */ -static int phy_disable_interrupts(struct phy_device *phydev) -{ - int err; - - /* Disable PHY interrupts */ - err = phy_config_interrupt(phydev, PHY_INTERRUPT_DISABLED); - if (err) - goto phy_err; - - /* Clear the interrupt */ - err = phy_clear_interrupt(phydev); - if (err) - goto phy_err; - - return 0; - -phy_err: - phy_error(phydev); - - return err; -} - /** * phy_start_interrupts - request and enable interrupts for a PHY device * @phydev: target phy_device struct @@ -719,50 +762,6 @@ int phy_stop_interrupts(struct phy_device *phydev) } EXPORT_SYMBOL(phy_stop_interrupts); -/** - * phy_change - Called by the phy_interrupt to handle PHY changes - * @phydev: phy_device struct that interrupted - */ -void phy_change(struct phy_device *phydev) -{ - if (phy_interrupt_is_valid(phydev)) { - if (phydev->drv->did_interrupt && - !phydev->drv->did_interrupt(phydev)) - return; - - if (phydev->state == PHY_HALTED) - if (phy_disable_interrupts(phydev)) - goto phy_err; - } - - mutex_lock(&phydev->lock); - if ((PHY_RUNNING == phydev->state) || (PHY_NOLINK == phydev->state)) - phydev->state = PHY_CHANGELINK; - mutex_unlock(&phydev->lock); - - /* reschedule state queue work to run as soon as possible */ - phy_trigger_machine(phydev, true); - - if (phy_interrupt_is_valid(phydev) && phy_clear_interrupt(phydev)) - goto phy_err; - return; - -phy_err: - phy_error(phydev); -} - -/** - * phy_change_work - Scheduled by the phy_mac_interrupt to handle PHY changes - * @work: work_struct that describes the work to be done - */ -void phy_change_work(struct work_struct *work) -{ - struct phy_device *phydev = - container_of(work, struct phy_device, phy_queue); - - phy_change(phydev); -} - /** * phy_stop - Bring down the PHY link, and stop checking the status * @phydev: target phy_device struct diff --git a/include/linux/phy.h b/include/linux/phy.h index d7069539f351..b260fb336b25 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -1012,7 +1012,6 @@ int phy_driver_register(struct phy_driver *new_driver, struct module *owner); int phy_drivers_register(struct phy_driver *new_driver, int n, struct module *owner); void phy_state_machine(struct work_struct *work); -void phy_change(struct phy_device *phydev); void phy_change_work(struct work_struct *work); void phy_mac_interrupt(struct phy_device *phydev); void phy_start_machine(struct phy_device *phydev); -- cgit v1.2.3 From 4ed50ef4da4d113fe65d9f9d049c1ce7468e3ac1 Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Fri, 9 Mar 2018 23:46:03 -0500 Subject: bnxt_en: Refactor the functions to reserve hardware rings. The bnxt_hwrm_reserve_{pf|vf}_rings() functions are very similar to the bnxt_hwrm_check_{pf|vf}_rings() functions. Refactor the former so that the latter can make use of common code in the next patch. Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 85 +++++++++++++++++++------------ 1 file changed, 53 insertions(+), 32 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 1500243b9886..377fcebadffc 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -4558,18 +4558,17 @@ int __bnxt_hwrm_get_tx_rings(struct bnxt *bp, u16 fid, int *tx_rings) return rc; } -static int -bnxt_hwrm_reserve_pf_rings(struct bnxt *bp, int tx_rings, int rx_rings, - int ring_grps, int cp_rings, int vnics) +static void +__bnxt_hwrm_reserve_pf_rings(struct bnxt *bp, struct hwrm_func_cfg_input *req, + int tx_rings, int rx_rings, int ring_grps, + int cp_rings, int vnics) { - struct hwrm_func_cfg_input req = {0}; u32 enables = 0; - int rc; - bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_FUNC_CFG, -1, -1); - req.fid = cpu_to_le16(0xffff); + bnxt_hwrm_cmd_hdr_init(bp, req, HWRM_FUNC_CFG, -1, -1); + req->fid = cpu_to_le16(0xffff); enables |= tx_rings ? FUNC_CFG_REQ_ENABLES_NUM_TX_RINGS : 0; - req.num_tx_rings = cpu_to_le16(tx_rings); + req->num_tx_rings = cpu_to_le16(tx_rings); if (bp->flags & BNXT_FLAG_NEW_RM) { enables |= rx_rings ? FUNC_CFG_REQ_ENABLES_NUM_RX_RINGS : 0; enables |= cp_rings ? FUNC_CFG_REQ_ENABLES_NUM_CMPL_RINGS | @@ -4578,16 +4577,53 @@ bnxt_hwrm_reserve_pf_rings(struct bnxt *bp, int tx_rings, int rx_rings, FUNC_CFG_REQ_ENABLES_NUM_HW_RING_GRPS : 0; enables |= vnics ? FUNC_VF_CFG_REQ_ENABLES_NUM_VNICS : 0; - req.num_rx_rings = cpu_to_le16(rx_rings); - req.num_hw_ring_grps = cpu_to_le16(ring_grps); - req.num_cmpl_rings = cpu_to_le16(cp_rings); - req.num_stat_ctxs = req.num_cmpl_rings; - req.num_vnics = cpu_to_le16(vnics); + req->num_rx_rings = cpu_to_le16(rx_rings); + req->num_hw_ring_grps = cpu_to_le16(ring_grps); + req->num_cmpl_rings = cpu_to_le16(cp_rings); + req->num_stat_ctxs = req->num_cmpl_rings; + req->num_vnics = cpu_to_le16(vnics); } - if (!enables) + req->enables = cpu_to_le32(enables); +} + +static void +__bnxt_hwrm_reserve_vf_rings(struct bnxt *bp, + struct hwrm_func_vf_cfg_input *req, int tx_rings, + int rx_rings, int ring_grps, int cp_rings, + int vnics) +{ + u32 enables = 0; + + bnxt_hwrm_cmd_hdr_init(bp, req, HWRM_FUNC_VF_CFG, -1, -1); + enables |= tx_rings ? FUNC_VF_CFG_REQ_ENABLES_NUM_TX_RINGS : 0; + enables |= rx_rings ? FUNC_VF_CFG_REQ_ENABLES_NUM_RX_RINGS : 0; + enables |= cp_rings ? FUNC_VF_CFG_REQ_ENABLES_NUM_CMPL_RINGS | + FUNC_VF_CFG_REQ_ENABLES_NUM_STAT_CTXS : 0; + enables |= ring_grps ? FUNC_VF_CFG_REQ_ENABLES_NUM_HW_RING_GRPS : 0; + enables |= vnics ? FUNC_VF_CFG_REQ_ENABLES_NUM_VNICS : 0; + + req->num_tx_rings = cpu_to_le16(tx_rings); + req->num_rx_rings = cpu_to_le16(rx_rings); + req->num_hw_ring_grps = cpu_to_le16(ring_grps); + req->num_cmpl_rings = cpu_to_le16(cp_rings); + req->num_stat_ctxs = req->num_cmpl_rings; + req->num_vnics = cpu_to_le16(vnics); + + req->enables = cpu_to_le32(enables); +} + +static int +bnxt_hwrm_reserve_pf_rings(struct bnxt *bp, int tx_rings, int rx_rings, + int ring_grps, int cp_rings, int vnics) +{ + struct hwrm_func_cfg_input req = {0}; + int rc; + + __bnxt_hwrm_reserve_pf_rings(bp, &req, tx_rings, rx_rings, ring_grps, + cp_rings, vnics); + if (!req.enables) return 0; - req.enables = cpu_to_le32(enables); rc = hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT); if (rc) return -ENOMEM; @@ -4604,7 +4640,6 @@ bnxt_hwrm_reserve_vf_rings(struct bnxt *bp, int tx_rings, int rx_rings, int ring_grps, int cp_rings, int vnics) { struct hwrm_func_vf_cfg_input req = {0}; - u32 enables = 0; int rc; if (!(bp->flags & BNXT_FLAG_NEW_RM)) { @@ -4612,22 +4647,8 @@ bnxt_hwrm_reserve_vf_rings(struct bnxt *bp, int tx_rings, int rx_rings, return 0; } - bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_FUNC_VF_CFG, -1, -1); - enables |= tx_rings ? FUNC_VF_CFG_REQ_ENABLES_NUM_TX_RINGS : 0; - enables |= rx_rings ? FUNC_VF_CFG_REQ_ENABLES_NUM_RX_RINGS : 0; - enables |= cp_rings ? FUNC_VF_CFG_REQ_ENABLES_NUM_CMPL_RINGS | - FUNC_VF_CFG_REQ_ENABLES_NUM_STAT_CTXS : 0; - enables |= ring_grps ? FUNC_VF_CFG_REQ_ENABLES_NUM_HW_RING_GRPS : 0; - enables |= vnics ? FUNC_VF_CFG_REQ_ENABLES_NUM_VNICS : 0; - - req.num_tx_rings = cpu_to_le16(tx_rings); - req.num_rx_rings = cpu_to_le16(rx_rings); - req.num_hw_ring_grps = cpu_to_le16(ring_grps); - req.num_cmpl_rings = cpu_to_le16(cp_rings); - req.num_stat_ctxs = req.num_cmpl_rings; - req.num_vnics = cpu_to_le16(vnics); - - req.enables = cpu_to_le32(enables); + __bnxt_hwrm_reserve_vf_rings(bp, &req, tx_rings, rx_rings, ring_grps, + cp_rings, vnics); rc = hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT); if (rc) return -ENOMEM; -- cgit v1.2.3 From 6fc2ffdf1001ae4fb485b3ba95ff757ae54565c9 Mon Sep 17 00:00:00 2001 From: Eddie Wai Date: Fri, 9 Mar 2018 23:46:04 -0500 Subject: bnxt_en: Fix vnic accounting in the bnxt_check_rings() path. The number of vnics to check must be determined ahead of time because only standard RX rings require vnics to support RFS. The logic is similar to the ring reservation logic and we can now use the refactored common functions to do most of the work in setting up the firmware message. Fixes: 8f23d638b36b ("bnxt_en: Expand bnxt_check_rings() to check all resources.") Signed-off-by: Eddie Wai Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 64 ++++++++++--------------------- 1 file changed, 20 insertions(+), 44 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 377fcebadffc..b847d54504ed 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -4764,39 +4764,25 @@ static bool bnxt_need_reserve_rings(struct bnxt *bp) } static int bnxt_hwrm_check_vf_rings(struct bnxt *bp, int tx_rings, int rx_rings, - int ring_grps, int cp_rings) + int ring_grps, int cp_rings, int vnics) { struct hwrm_func_vf_cfg_input req = {0}; - u32 flags, enables; + u32 flags; int rc; if (!(bp->flags & BNXT_FLAG_NEW_RM)) return 0; - bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_FUNC_VF_CFG, -1, -1); + __bnxt_hwrm_reserve_vf_rings(bp, &req, tx_rings, rx_rings, ring_grps, + cp_rings, vnics); flags = FUNC_VF_CFG_REQ_FLAGS_TX_ASSETS_TEST | FUNC_VF_CFG_REQ_FLAGS_RX_ASSETS_TEST | FUNC_VF_CFG_REQ_FLAGS_CMPL_ASSETS_TEST | FUNC_VF_CFG_REQ_FLAGS_RING_GRP_ASSETS_TEST | FUNC_VF_CFG_REQ_FLAGS_STAT_CTX_ASSETS_TEST | FUNC_VF_CFG_REQ_FLAGS_VNIC_ASSETS_TEST; - enables = FUNC_VF_CFG_REQ_ENABLES_NUM_TX_RINGS | - FUNC_VF_CFG_REQ_ENABLES_NUM_RX_RINGS | - FUNC_VF_CFG_REQ_ENABLES_NUM_CMPL_RINGS | - FUNC_VF_CFG_REQ_ENABLES_NUM_HW_RING_GRPS | - FUNC_VF_CFG_REQ_ENABLES_NUM_STAT_CTXS | - FUNC_VF_CFG_REQ_ENABLES_NUM_VNICS; req.flags = cpu_to_le32(flags); - req.enables = cpu_to_le32(enables); - req.num_tx_rings = cpu_to_le16(tx_rings); - req.num_rx_rings = cpu_to_le16(rx_rings); - req.num_cmpl_rings = cpu_to_le16(cp_rings); - req.num_hw_ring_grps = cpu_to_le16(ring_grps); - req.num_stat_ctxs = cpu_to_le16(cp_rings); - req.num_vnics = cpu_to_le16(1); - if (bp->flags & BNXT_FLAG_RFS) - req.num_vnics = cpu_to_le16(rx_rings + 1); rc = hwrm_send_message_silent(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT); if (rc) return -ENOMEM; @@ -4804,38 +4790,23 @@ static int bnxt_hwrm_check_vf_rings(struct bnxt *bp, int tx_rings, int rx_rings, } static int bnxt_hwrm_check_pf_rings(struct bnxt *bp, int tx_rings, int rx_rings, - int ring_grps, int cp_rings) + int ring_grps, int cp_rings, int vnics) { struct hwrm_func_cfg_input req = {0}; - u32 flags, enables; + u32 flags; int rc; - bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_FUNC_CFG, -1, -1); - req.fid = cpu_to_le16(0xffff); + __bnxt_hwrm_reserve_pf_rings(bp, &req, tx_rings, rx_rings, ring_grps, + cp_rings, vnics); flags = FUNC_CFG_REQ_FLAGS_TX_ASSETS_TEST; - enables = FUNC_CFG_REQ_ENABLES_NUM_TX_RINGS; - req.num_tx_rings = cpu_to_le16(tx_rings); - if (bp->flags & BNXT_FLAG_NEW_RM) { + if (bp->flags & BNXT_FLAG_NEW_RM) flags |= FUNC_CFG_REQ_FLAGS_RX_ASSETS_TEST | FUNC_CFG_REQ_FLAGS_CMPL_ASSETS_TEST | FUNC_CFG_REQ_FLAGS_RING_GRP_ASSETS_TEST | FUNC_CFG_REQ_FLAGS_STAT_CTX_ASSETS_TEST | FUNC_CFG_REQ_FLAGS_VNIC_ASSETS_TEST; - enables |= FUNC_CFG_REQ_ENABLES_NUM_RX_RINGS | - FUNC_CFG_REQ_ENABLES_NUM_CMPL_RINGS | - FUNC_CFG_REQ_ENABLES_NUM_HW_RING_GRPS | - FUNC_CFG_REQ_ENABLES_NUM_STAT_CTXS | - FUNC_CFG_REQ_ENABLES_NUM_VNICS; - req.num_rx_rings = cpu_to_le16(rx_rings); - req.num_cmpl_rings = cpu_to_le16(cp_rings); - req.num_hw_ring_grps = cpu_to_le16(ring_grps); - req.num_stat_ctxs = cpu_to_le16(cp_rings); - req.num_vnics = cpu_to_le16(1); - if (bp->flags & BNXT_FLAG_RFS) - req.num_vnics = cpu_to_le16(rx_rings + 1); - } + req.flags = cpu_to_le32(flags); - req.enables = cpu_to_le32(enables); rc = hwrm_send_message_silent(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT); if (rc) return -ENOMEM; @@ -4843,17 +4814,17 @@ static int bnxt_hwrm_check_pf_rings(struct bnxt *bp, int tx_rings, int rx_rings, } static int bnxt_hwrm_check_rings(struct bnxt *bp, int tx_rings, int rx_rings, - int ring_grps, int cp_rings) + int ring_grps, int cp_rings, int vnics) { if (bp->hwrm_spec_code < 0x10801) return 0; if (BNXT_PF(bp)) return bnxt_hwrm_check_pf_rings(bp, tx_rings, rx_rings, - ring_grps, cp_rings); + ring_grps, cp_rings, vnics); return bnxt_hwrm_check_vf_rings(bp, tx_rings, rx_rings, ring_grps, - cp_rings); + cp_rings, vnics); } static void bnxt_hwrm_set_coal_params(struct bnxt_coal *hw_coal, @@ -7552,7 +7523,7 @@ int bnxt_check_rings(struct bnxt *bp, int tx, int rx, bool sh, int tcs, int max_rx, max_tx, tx_sets = 1; int tx_rings_needed; int rx_rings = rx; - int cp, rc; + int cp, vnics, rc; if (tcs) tx_sets = tcs; @@ -7568,10 +7539,15 @@ int bnxt_check_rings(struct bnxt *bp, int tx, int rx, bool sh, int tcs, if (max_tx < tx_rings_needed) return -ENOMEM; + vnics = 1; + if (bp->flags & BNXT_FLAG_RFS) + vnics += rx_rings; + if (bp->flags & BNXT_FLAG_AGG_RINGS) rx_rings <<= 1; cp = sh ? max_t(int, tx_rings_needed, rx) : tx_rings_needed + rx; - return bnxt_hwrm_check_rings(bp, tx_rings_needed, rx_rings, rx, cp); + return bnxt_hwrm_check_rings(bp, tx_rings_needed, rx_rings, rx, cp, + vnics); } static void bnxt_unmap_bars(struct bnxt *bp, struct pci_dev *pdev) -- cgit v1.2.3 From b9ecc3400bc418af3ba9e56ea852f4ad69c23454 Mon Sep 17 00:00:00 2001 From: Sriharsha Basavapatna Date: Fri, 9 Mar 2018 23:46:05 -0500 Subject: bnxt_en: Remove unwanted ovs-offload messages in some conditions In some conditions when the driver fails to add a flow in HW and returns an error back to the stack, the stack continues to invoke get_flow_stats() and/or del_flow() on it. The driver fails these APIs with an error message "no flow_node for cookie". The message gets logged repeatedly as long as the stack keeps invoking these functions. Fix this by removing the corresponding netdev_info() calls from these functions. Fixes: d7bc73053024 ("bnxt_en: add code to query TC flower offload stats") Signed-off-by: Sriharsha Basavapatna Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c index fbe6e208e17b..2049d4356df4 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c @@ -1269,11 +1269,8 @@ static int bnxt_tc_del_flow(struct bnxt *bp, flow_node = rhashtable_lookup_fast(&tc_info->flow_table, &tc_flow_cmd->cookie, tc_info->flow_ht_params); - if (!flow_node) { - netdev_info(bp->dev, "ERROR: no flow_node for cookie %lx", - tc_flow_cmd->cookie); + if (!flow_node) return -EINVAL; - } return __bnxt_tc_del_flow(bp, flow_node); } @@ -1290,11 +1287,8 @@ static int bnxt_tc_get_flow_stats(struct bnxt *bp, flow_node = rhashtable_lookup_fast(&tc_info->flow_table, &tc_flow_cmd->cookie, tc_info->flow_ht_params); - if (!flow_node) { - netdev_info(bp->dev, "Error: no flow_node for cookie %lx", - tc_flow_cmd->cookie); + if (!flow_node) return -1; - } flow = &flow_node->flow; curr_stats = &flow->stats; -- cgit v1.2.3 From ed7bc602f60a653e5dea488e6917d9a75d6ac0dd Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Fri, 9 Mar 2018 23:46:06 -0500 Subject: bnxt_en: Pass complete VLAN TCI to the stack. When receiving a packet with VLAN tag, pass the entire 16-bit TCI to the stack when calling __vlan_hwaccel_put_tag(). The current code is only passing the 12-bit tag and it is missing the priority bits. Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 4 ++-- drivers/net/ethernet/broadcom/bnxt/bnxt.h | 1 + 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index b847d54504ed..35099c83f847 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -1439,7 +1439,7 @@ static inline struct sk_buff *bnxt_tpa_end(struct bnxt *bp, (skb->dev->features & NETIF_F_HW_VLAN_CTAG_RX)) { u16 vlan_proto = tpa_info->metadata >> RX_CMP_FLAGS2_METADATA_TPID_SFT; - u16 vtag = tpa_info->metadata & RX_CMP_FLAGS2_METADATA_VID_MASK; + u16 vtag = tpa_info->metadata & RX_CMP_FLAGS2_METADATA_TCI_MASK; __vlan_hwaccel_put_tag(skb, htons(vlan_proto), vtag); } @@ -1623,7 +1623,7 @@ static int bnxt_rx_pkt(struct bnxt *bp, struct bnxt_napi *bnapi, u32 *raw_cons, cpu_to_le32(RX_CMP_FLAGS2_META_FORMAT_VLAN)) && (skb->dev->features & NETIF_F_HW_VLAN_CTAG_RX)) { u32 meta_data = le32_to_cpu(rxcmp1->rx_cmp_meta_data); - u16 vtag = meta_data & RX_CMP_FLAGS2_METADATA_VID_MASK; + u16 vtag = meta_data & RX_CMP_FLAGS2_METADATA_TCI_MASK; u16 vlan_proto = meta_data >> RX_CMP_FLAGS2_METADATA_TPID_SFT; __vlan_hwaccel_put_tag(skb, htons(vlan_proto), vtag); diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h index 1989c470172c..5e3d62189cab 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h @@ -189,6 +189,7 @@ struct rx_cmp_ext { #define RX_CMP_FLAGS2_T_L4_CS_CALC (0x1 << 3) #define RX_CMP_FLAGS2_META_FORMAT_VLAN (0x1 << 4) __le32 rx_cmp_meta_data; + #define RX_CMP_FLAGS2_METADATA_TCI_MASK 0xffff #define RX_CMP_FLAGS2_METADATA_VID_MASK 0xfff #define RX_CMP_FLAGS2_METADATA_TPID_MASK 0xffff0000 #define RX_CMP_FLAGS2_METADATA_TPID_SFT 16 -- cgit v1.2.3 From 832aed16ce7af2a43dafe9d4bc9080322e042cde Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Fri, 9 Mar 2018 23:46:07 -0500 Subject: bnxt_en: Fix regressions when setting up MQPRIO TX rings. Recent changes added the bnxt_init_int_mode() call in the driver's open path whenever ring reservations are changed. This call was previously only called in the probe path. In the open path, if MQPRIO TC has been setup, the bnxt_init_int_mode() call would reset and mess up the MQPRIO per TC rings. Fix it by not re-initilizing bp->tx_nr_rings_per_tc in bnxt_init_int_mode(). Instead, initialize it in the probe path only after the bnxt_init_int_mode() call. Fixes: 674f50a5b026 ("bnxt_en: Implement new method to reserve rings.") Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 35099c83f847..cbdb54f61855 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -5857,7 +5857,6 @@ static int bnxt_init_msix(struct bnxt *bp) if (rc) goto msix_setup_exit; - bp->tx_nr_rings_per_tc = bp->tx_nr_rings; bp->cp_nr_rings = (min == 1) ? max_t(int, bp->tx_nr_rings, bp->rx_nr_rings) : bp->tx_nr_rings + bp->rx_nr_rings; @@ -5889,7 +5888,6 @@ static int bnxt_init_inta(struct bnxt *bp) bp->rx_nr_rings = 1; bp->tx_nr_rings = 1; bp->cp_nr_rings = 1; - bp->tx_nr_rings_per_tc = bp->tx_nr_rings; bp->flags |= BNXT_FLAG_SHARED_RINGS; bp->irq_tbl[0].vector = bp->pdev->irq; return 0; @@ -8661,6 +8659,11 @@ static int bnxt_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) if (rc) goto init_err_pci_clean; + /* No TC has been set yet and rings may have been trimmed due to + * limited MSIX, so we re-initialize the TX rings per TC. + */ + bp->tx_nr_rings_per_tc = bp->tx_nr_rings; + bnxt_get_wol_settings(bp); if (bp->flags & BNXT_FLAG_WOL_CAP) device_set_wakeup_enable(&pdev->dev, bp->wol); -- cgit v1.2.3 From 6ae777eab2f53b50d84a5d75a48d2d149f787da8 Mon Sep 17 00:00:00 2001 From: Venkat Duvvuru Date: Fri, 9 Mar 2018 23:46:08 -0500 Subject: bnxt_en: Return standard Linux error codes for hwrm flow cmds. Currently, internal error value is returned by the driver, when hwrm_cfa_flow_alloc() fails due lack of resources. We should be returning Linux errno value -ENOSPC instead. This patch also converts other similar command errors to standard Linux errno code (-EIO) in bnxt_tc.c Fixes: db1d36a27324 ("bnxt_en: add TC flower offload flow_alloc/free FW cmds") Signed-off-by: Venkat Duvvuru Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c | 23 ++++++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c index 2049d4356df4..65c2cee35766 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c @@ -349,6 +349,9 @@ static int bnxt_hwrm_cfa_flow_free(struct bnxt *bp, __le16 flow_handle) if (rc) netdev_info(bp->dev, "Error: %s: flow_handle=0x%x rc=%d", __func__, flow_handle, rc); + + if (rc) + rc = -EIO; return rc; } @@ -484,13 +487,15 @@ static int bnxt_hwrm_cfa_flow_alloc(struct bnxt *bp, struct bnxt_tc_flow *flow, req.action_flags = cpu_to_le16(action_flags); mutex_lock(&bp->hwrm_cmd_lock); - rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT); if (!rc) *flow_handle = resp->flow_handle; - mutex_unlock(&bp->hwrm_cmd_lock); + if (rc == HWRM_ERR_CODE_RESOURCE_ALLOC_ERROR) + rc = -ENOSPC; + else if (rc) + rc = -EIO; return rc; } @@ -561,6 +566,8 @@ static int hwrm_cfa_decap_filter_alloc(struct bnxt *bp, netdev_info(bp->dev, "%s: Error rc=%d", __func__, rc); mutex_unlock(&bp->hwrm_cmd_lock); + if (rc) + rc = -EIO; return rc; } @@ -576,6 +583,9 @@ static int hwrm_cfa_decap_filter_free(struct bnxt *bp, rc = hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT); if (rc) netdev_info(bp->dev, "%s: Error rc=%d", __func__, rc); + + if (rc) + rc = -EIO; return rc; } @@ -624,6 +634,8 @@ static int hwrm_cfa_encap_record_alloc(struct bnxt *bp, netdev_info(bp->dev, "%s: Error rc=%d", __func__, rc); mutex_unlock(&bp->hwrm_cmd_lock); + if (rc) + rc = -EIO; return rc; } @@ -639,6 +651,9 @@ static int hwrm_cfa_encap_record_free(struct bnxt *bp, rc = hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT); if (rc) netdev_info(bp->dev, "%s: Error rc=%d", __func__, rc); + + if (rc) + rc = -EIO; return rc; } @@ -1338,8 +1353,10 @@ bnxt_hwrm_cfa_flow_stats_get(struct bnxt *bp, int num_flows, } else { netdev_info(bp->dev, "error rc=%d", rc); } - mutex_unlock(&bp->hwrm_cmd_lock); + + if (rc) + rc = -EIO; return rc; } -- cgit v1.2.3 From 1a037782e79047ec3386d8ba94c103cbdfb851d0 Mon Sep 17 00:00:00 2001 From: Venkat Duvvuru Date: Fri, 9 Mar 2018 23:46:09 -0500 Subject: bnxt_en: close & open NIC, only when the interface is in running state. bnxt_restore_pf_fw_resources routine frees PF resources by calling close_nic and allocates the resources back, by doing open_nic. However, this is not needed, if the PF is already in closed state. This bug causes the driver to call open the device and call request_irq() when it is not needed. Ultimately, pci_disable_msix() will crash when bnxt_en is unloaded. This patch fixes the problem by skipping __bnxt_close_nic and __bnxt_open_nic inside bnxt_restore_pf_fw_resources routine, if the interface is not running. Fixes: 80fcaf46c092 ("bnxt_en: Restore MSIX after disabling SRIOV.") Signed-off-by: Venkat Duvvuru Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index cbdb54f61855..cc9569f474f5 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -8432,13 +8432,20 @@ int bnxt_restore_pf_fw_resources(struct bnxt *bp) return 0; bnxt_hwrm_func_qcaps(bp); - __bnxt_close_nic(bp, true, false); + + if (netif_running(bp->dev)) + __bnxt_close_nic(bp, true, false); + bnxt_clear_int_mode(bp); rc = bnxt_init_int_mode(bp); - if (rc) - dev_close(bp->dev); - else - rc = bnxt_open_nic(bp, true, false); + + if (netif_running(bp->dev)) { + if (rc) + dev_close(bp->dev); + else + rc = bnxt_open_nic(bp, true, false); + } + return rc; } -- cgit v1.2.3 From 3c4fe80b32c685bdc02b280814d0cfe80d441c72 Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Fri, 9 Mar 2018 23:46:10 -0500 Subject: bnxt_en: Check valid VNIC ID in bnxt_hwrm_vnic_set_tpa(). During initialization, if we encounter errors, there is a code path that calls bnxt_hwrm_vnic_set_tpa() with invalid VNIC ID. This may cause a warning in firmware logs. Fixes: c0c050c58d84 ("bnxt_en: New Broadcom ethernet driver.") Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index cc9569f474f5..c7e5e6f09647 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -3847,6 +3847,9 @@ static int bnxt_hwrm_vnic_set_tpa(struct bnxt *bp, u16 vnic_id, u32 tpa_flags) struct bnxt_vnic_info *vnic = &bp->vnic_info[vnic_id]; struct hwrm_vnic_tpa_cfg_input req = {0}; + if (vnic->fw_vnic_id == INVALID_HW_RING_ID) + return 0; + bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_VNIC_TPA_CFG, -1, -1); if (tpa_flags) { -- cgit v1.2.3 From bf2ae2e4bf9360e07c0cdfa166bcdc0afd92f4ce Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sat, 10 Mar 2018 18:57:50 +0800 Subject: sock_diag: request _diag module only when the family or proto has been registered Now when using 'ss' in iproute, kernel would try to load all _diag modules, which also causes corresponding family and proto modules to be loaded as well due to module dependencies. Like after running 'ss', sctp, dccp, af_packet (if it works as a module) would be loaded. For example: $ lsmod|grep sctp $ ss $ lsmod|grep sctp sctp_diag 16384 0 sctp 323584 5 sctp_diag inet_diag 24576 4 raw_diag,tcp_diag,sctp_diag,udp_diag libcrc32c 16384 3 nf_conntrack,nf_nat,sctp As these family and proto modules are loaded unintentionally, it could cause some problems, like: - Some debug tools use 'ss' to collect the socket info, which loads all those diag and family and protocol modules. It's noisy for identifying issues. - Users usually expect to drop sctp init packet silently when they have no sense of sctp protocol instead of sending abort back. - It wastes resources (especially with multiple netns), and SCTP module can't be unloaded once it's loaded. ... In short, it's really inappropriate to have these family and proto modules loaded unexpectedly when just doing debugging with inet_diag. This patch is to introduce sock_load_diag_module() where it loads the _diag module only when it's corresponding family or proto has been already registered. Note that we can't just load _diag module without the family or proto loaded, as some symbols used in _diag module are from the family or proto module. v1->v2: - move inet proto check to inet_diag to avoid a compiling err. v2->v3: - define sock_load_diag_module in sock.c and export one symbol only. - improve the changelog. Reported-by: Sabrina Dubroca Acked-by: Marcelo Ricardo Leitner Acked-by: Phil Sutter Acked-by: Sabrina Dubroca Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/linux/net.h | 1 + include/net/sock.h | 1 + net/core/sock.c | 21 +++++++++++++++++++++ net/core/sock_diag.c | 12 ++++-------- net/ipv4/inet_diag.c | 3 +-- net/socket.c | 5 +++++ 6 files changed, 33 insertions(+), 10 deletions(-) diff --git a/include/linux/net.h b/include/linux/net.h index 91216b16feb7..2a0391eea05c 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -222,6 +222,7 @@ enum { int sock_wake_async(struct socket_wq *sk_wq, int how, int band); int sock_register(const struct net_proto_family *fam); void sock_unregister(int family); +bool sock_is_registered(int family); int __sock_create(struct net *net, int family, int type, int proto, struct socket **res, int kern); int sock_create(int family, int type, int proto, struct socket **res); diff --git a/include/net/sock.h b/include/net/sock.h index 169c92afcafa..ae23f3b389ca 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1137,6 +1137,7 @@ struct proto { int proto_register(struct proto *prot, int alloc_slab); void proto_unregister(struct proto *prot); +int sock_load_diag_module(int family, int protocol); #ifdef SOCK_REFCNT_DEBUG static inline void sk_refcnt_debug_inc(struct sock *sk) diff --git a/net/core/sock.c b/net/core/sock.c index c501499a04fe..85b0b64e7f9d 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -3261,6 +3261,27 @@ void proto_unregister(struct proto *prot) } EXPORT_SYMBOL(proto_unregister); +int sock_load_diag_module(int family, int protocol) +{ + if (!protocol) { + if (!sock_is_registered(family)) + return -ENOENT; + + return request_module("net-pf-%d-proto-%d-type-%d", PF_NETLINK, + NETLINK_SOCK_DIAG, family); + } + +#ifdef CONFIG_INET + if (family == AF_INET && + !rcu_access_pointer(inet_protos[protocol])) + return -ENOENT; +#endif + + return request_module("net-pf-%d-proto-%d-type-%d-%d", PF_NETLINK, + NETLINK_SOCK_DIAG, family, protocol); +} +EXPORT_SYMBOL(sock_load_diag_module); + #ifdef CONFIG_PROC_FS static void *proto_seq_start(struct seq_file *seq, loff_t *pos) __acquires(proto_list_mutex) diff --git a/net/core/sock_diag.c b/net/core/sock_diag.c index 146b50e30659..c37b5be7c5e4 100644 --- a/net/core/sock_diag.c +++ b/net/core/sock_diag.c @@ -220,8 +220,7 @@ static int __sock_diag_cmd(struct sk_buff *skb, struct nlmsghdr *nlh) return -EINVAL; if (sock_diag_handlers[req->sdiag_family] == NULL) - request_module("net-pf-%d-proto-%d-type-%d", PF_NETLINK, - NETLINK_SOCK_DIAG, req->sdiag_family); + sock_load_diag_module(req->sdiag_family, 0); mutex_lock(&sock_diag_table_mutex); hndl = sock_diag_handlers[req->sdiag_family]; @@ -247,8 +246,7 @@ static int sock_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, case TCPDIAG_GETSOCK: case DCCPDIAG_GETSOCK: if (inet_rcv_compat == NULL) - request_module("net-pf-%d-proto-%d-type-%d", PF_NETLINK, - NETLINK_SOCK_DIAG, AF_INET); + sock_load_diag_module(AF_INET, 0); mutex_lock(&sock_diag_table_mutex); if (inet_rcv_compat != NULL) @@ -281,14 +279,12 @@ static int sock_diag_bind(struct net *net, int group) case SKNLGRP_INET_TCP_DESTROY: case SKNLGRP_INET_UDP_DESTROY: if (!sock_diag_handlers[AF_INET]) - request_module("net-pf-%d-proto-%d-type-%d", PF_NETLINK, - NETLINK_SOCK_DIAG, AF_INET); + sock_load_diag_module(AF_INET, 0); break; case SKNLGRP_INET6_TCP_DESTROY: case SKNLGRP_INET6_UDP_DESTROY: if (!sock_diag_handlers[AF_INET6]) - request_module("net-pf-%d-proto-%d-type-%d", PF_NETLINK, - NETLINK_SOCK_DIAG, AF_INET6); + sock_load_diag_module(AF_INET6, 0); break; } return 0; diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index a383f299ce24..4e5bc4b2f14e 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -53,8 +53,7 @@ static DEFINE_MUTEX(inet_diag_table_mutex); static const struct inet_diag_handler *inet_diag_lock_handler(int proto) { if (!inet_diag_table[proto]) - request_module("net-pf-%d-proto-%d-type-%d-%d", PF_NETLINK, - NETLINK_SOCK_DIAG, AF_INET, proto); + sock_load_diag_module(AF_INET, proto); mutex_lock(&inet_diag_table_mutex); if (!inet_diag_table[proto]) diff --git a/net/socket.c b/net/socket.c index a93c99b518ca..08847c3b8c39 100644 --- a/net/socket.c +++ b/net/socket.c @@ -2587,6 +2587,11 @@ void sock_unregister(int family) } EXPORT_SYMBOL(sock_unregister); +bool sock_is_registered(int family) +{ + return family < NPROTO && rcu_access_pointer(net_families[family]); +} + static int __init sock_init(void) { int err; -- cgit v1.2.3 From b6b76dd62c56f257cdd30900ed22668c42a74030 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Mon, 12 Mar 2018 19:00:49 +0900 Subject: error-injection: Fix to prohibit jump optimization Since the kprobe which was optimized by jump can not change the execution path, the kprobe for error-injection must not be optimized. To prohibit it, set a dummy post-handler as officially stated in Documentation/kprobes.txt. Fixes: 4b1a29a7f542 ("error-injection: Support fault injection framework") Signed-off-by: Masami Hiramatsu Signed-off-by: Daniel Borkmann --- kernel/fail_function.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/kernel/fail_function.c b/kernel/fail_function.c index 21b0122cb39c..1d5632d8bbcc 100644 --- a/kernel/fail_function.c +++ b/kernel/fail_function.c @@ -14,6 +14,15 @@ static int fei_kprobe_handler(struct kprobe *kp, struct pt_regs *regs); +static void fei_post_handler(struct kprobe *kp, struct pt_regs *regs, + unsigned long flags) +{ + /* + * A dummy post handler is required to prohibit optimizing, because + * jump optimization does not support execution path overriding. + */ +} + struct fei_attr { struct list_head list; struct kprobe kp; @@ -56,6 +65,7 @@ static struct fei_attr *fei_attr_new(const char *sym, unsigned long addr) return NULL; } attr->kp.pre_handler = fei_kprobe_handler; + attr->kp.post_handler = fei_post_handler; attr->retval = adjust_error_retval(addr, 0); INIT_LIST_HEAD(&attr->list); } -- cgit v1.2.3 From 99652a469df19086d594e8e89757d4081a812789 Mon Sep 17 00:00:00 2001 From: Jerome Brunet Date: Wed, 14 Feb 2018 14:43:36 +0100 Subject: clk: migrate the count of orphaned clocks at init The orphan clocks reparents should migrate any existing count from the orphan clock to its new acestor clocks, otherwise we may have inconsistent counts in the tree and end-up with gated critical clocks Assuming we have two clocks, A and B. * Clock A has CLK_IS_CRITICAL flag set. * Clock B is an ancestor of A which can gate. Clock B gate is left enabled by the bootloader. Step 1: Clock A is registered. Since it is a critical clock, it is enabled. The clock being still an orphan, no parent are enabled. Step 2: Clock B is registered and reparented to clock A (potentially through several other clocks). We are now in situation where the enable count of clock A is 1 while the enable count of its ancestors is 0, which is not good. Step 3: in lateinit, clk_disable_unused() is called, the enable_count of clock B being 0, clock B is gated and and critical clock A actually gets disabled. This situation was found while adding fdiv_clk gates to the meson8b platform. These clocks parent clk81 critical clock, which is the mother of all peripheral clocks in this system. Because of the issue described here, the system is crashing when clk_disable_unused() is called. The situation is solved by reverting commit f8f8f1d04494 ("clk: Don't touch hardware when reparenting during registration"). To avoid breaking again the situation described in this commit description, enabling critical clock should be done before walking the orphan list. This way, a parent critical clock may not be accidentally disabled due to the CLK_OPS_PARENT_ENABLE mechanism. Fixes: f8f8f1d04494 ("clk: Don't touch hardware when reparenting during registration") Cc: Stephen Boyd Cc: Shawn Guo Cc: Dong Aisheng Signed-off-by: Jerome Brunet Tested-by: Marek Szyprowski Tested-by: Heiko Stuebner Signed-off-by: Michael Turquette --- drivers/clk/clk.c | 37 +++++++++++++++++++++---------------- 1 file changed, 21 insertions(+), 16 deletions(-) diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c index 0f686a9dac3e..6d54e933c901 100644 --- a/drivers/clk/clk.c +++ b/drivers/clk/clk.c @@ -2967,23 +2967,38 @@ static int __clk_core_init(struct clk_core *core) rate = 0; core->rate = core->req_rate = rate; + /* + * Enable CLK_IS_CRITICAL clocks so newly added critical clocks + * don't get accidentally disabled when walking the orphan tree and + * reparenting clocks + */ + if (core->flags & CLK_IS_CRITICAL) { + unsigned long flags; + + clk_core_prepare(core); + + flags = clk_enable_lock(); + clk_core_enable(core); + clk_enable_unlock(flags); + } + /* * walk the list of orphan clocks and reparent any that newly finds a * parent. */ hlist_for_each_entry_safe(orphan, tmp2, &clk_orphan_list, child_node) { struct clk_core *parent = __clk_init_parent(orphan); - unsigned long flags; /* - * we could call __clk_set_parent, but that would result in a - * redundant call to the .set_rate op, if it exists + * We need to use __clk_set_parent_before() and _after() to + * to properly migrate any prepare/enable count of the orphan + * clock. This is important for CLK_IS_CRITICAL clocks, which + * are enabled during init but might not have a parent yet. */ if (parent) { /* update the clk tree topology */ - flags = clk_enable_lock(); - clk_reparent(orphan, parent); - clk_enable_unlock(flags); + __clk_set_parent_before(orphan, parent); + __clk_set_parent_after(orphan, parent, NULL); __clk_recalc_accuracies(orphan); __clk_recalc_rates(orphan, 0); } @@ -3000,16 +3015,6 @@ static int __clk_core_init(struct clk_core *core) if (core->ops->init) core->ops->init(core->hw); - if (core->flags & CLK_IS_CRITICAL) { - unsigned long flags; - - clk_core_prepare(core); - - flags = clk_enable_lock(); - clk_core_enable(core); - clk_enable_unlock(flags); - } - kref_init(&core->ref); out: clk_pm_runtime_put(core); -- cgit v1.2.3 From e8cd7143e269ab58344ff10d9729da144e17fed2 Mon Sep 17 00:00:00 2001 From: Mustamin B Mustaffa Date: Tue, 27 Feb 2018 11:07:34 +0800 Subject: drm/i915: Enable VBT based BL control for DP Currently, BXT_PP is hardcoded with value '0'. It practically disabled eDP backlight on MRB (BXT) platform. This patch will tell which BXT_PP registers (there are two set of PP_CONTROL in the spec) to be used as defined in VBT (Video Bios Timing table) and this will enabled eDP backlight controller on MRB (BXT) platform. v2: - Remove unnecessary information in commit message. - Assign vbt.backlight.controller to a backlight_controller variable and return the variable value. v3: - Rebased to latest code base. - updated commit title. Signed-off-by: Mustamin B Mustaffa Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20180227030734.37901-1-mustamin.b.mustaffa@intel.com (cherry picked from commit 73c0fcac97bf7f4a6a61b825b205d1cf127cfca7) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/intel_dp.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index 35c5299feab6..a29868cd30c7 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -620,19 +620,15 @@ static int bxt_power_sequencer_idx(struct intel_dp *intel_dp) { struct drm_i915_private *dev_priv = to_i915(intel_dp_to_dev(intel_dp)); + int backlight_controller = dev_priv->vbt.backlight.controller; lockdep_assert_held(&dev_priv->pps_mutex); /* We should never land here with regular DP ports */ WARN_ON(!intel_dp_is_edp(intel_dp)); - /* - * TODO: BXT has 2 PPS instances. The correct port->PPS instance - * mapping needs to be retrieved from VBT, for now just hard-code to - * use instance #0 always. - */ if (!intel_dp->pps_reset) - return 0; + return backlight_controller; intel_dp->pps_reset = false; @@ -642,7 +638,7 @@ bxt_power_sequencer_idx(struct intel_dp *intel_dp) */ intel_dp_init_panel_power_sequencer_registers(intel_dp, false); - return 0; + return backlight_controller; } typedef bool (*vlv_pipe_check)(struct drm_i915_private *dev_priv, -- cgit v1.2.3 From 82813ba9b4b31cd445a2ec1a1404de6e78f32b35 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 7 Mar 2018 17:13:03 +0000 Subject: drm/i915: Only prune fences after wait-for-all Currently, we only allow ourselves to prune the fences so long as all the waits completed (i.e. all the fences we checked were signaled), and that the reservation snapshot did not change across the wait. However, if we only waited for a subset of the reservation object, i.e. just waiting for the last writer to complete as opposed to all readers as well, then we would erroneously conclude we could prune the fences as indeed although all of our waits were successful, they did not represent the totality of the reservation object. v2: We only need to check the shared fences due to construction (i.e. all of the shared fences will be later than the exclusive fence, if any). Fixes: e54ca9774777 ("drm/i915: Remove completed fences after a wait") Signed-off-by: Chris Wilson Cc: Joonas Lahtinen Cc: Matthew Auld Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20180307171303.29466-1-chris@chris-wilson.co.uk (cherry picked from commit fa73055b8442c97b3ba7cd0aa57cd2ad32124201) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/i915_gem.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 66ee9d888d16..6ff5d655c202 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -434,20 +434,28 @@ i915_gem_object_wait_reservation(struct reservation_object *resv, dma_fence_put(shared[i]); kfree(shared); + /* + * If both shared fences and an exclusive fence exist, + * then by construction the shared fences must be later + * than the exclusive fence. If we successfully wait for + * all the shared fences, we know that the exclusive fence + * must all be signaled. If all the shared fences are + * signaled, we can prune the array and recover the + * floating references on the fences/requests. + */ prune_fences = count && timeout >= 0; } else { excl = reservation_object_get_excl_rcu(resv); } - if (excl && timeout >= 0) { + if (excl && timeout >= 0) timeout = i915_gem_object_wait_fence(excl, flags, timeout, rps_client); - prune_fences = timeout >= 0; - } dma_fence_put(excl); - /* Oportunistically prune the fences iff we know they have *all* been + /* + * Opportunistically prune the fences iff we know they have *all* been * signaled and that the reservation object has not been changed (i.e. * no new fences have been added). */ -- cgit v1.2.3 From f1430f145eefcab2bddb9e836d427c4aac067faa Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 8 Mar 2018 14:26:47 +0000 Subject: drm/i915: Kick the rps worker when changing the boost frequency The boost frequency is only applied from the RPS worker while someone is waiting on a request and requested a boost. As such, when the user wishes to change the frequency, we have to kick the worker in order to re-evaluate whether to apply the boost frequency. v2: Check num_waiters to decide if we should kick the worker to handle boosting. Fixes: 29ecd78d3b79 ("drm/i915: Define a separate variable and control for RPS waitboost frequency") Signed-off-by: Chris Wilson Cc: Mika Kuoppala Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20180308142648.4016-1-chris@chris-wilson.co.uk (cherry picked from commit 59cd31f177b34deb834a5c97478502741be1cf2e) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/i915_sysfs.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_sysfs.c b/drivers/gpu/drm/i915/i915_sysfs.c index b33d2158c234..e5e6f6bb2b05 100644 --- a/drivers/gpu/drm/i915/i915_sysfs.c +++ b/drivers/gpu/drm/i915/i915_sysfs.c @@ -304,8 +304,9 @@ static ssize_t gt_boost_freq_mhz_store(struct device *kdev, { struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev); struct intel_rps *rps = &dev_priv->gt_pm.rps; - u32 val; + bool boost = false; ssize_t ret; + u32 val; ret = kstrtou32(buf, 0, &val); if (ret) @@ -317,8 +318,13 @@ static ssize_t gt_boost_freq_mhz_store(struct device *kdev, return -EINVAL; mutex_lock(&dev_priv->pcu_lock); - rps->boost_freq = val; + if (val != rps->boost_freq) { + rps->boost_freq = val; + boost = atomic_read(&rps->num_waiters); + } mutex_unlock(&dev_priv->pcu_lock); + if (boost) + schedule_work(&rps->work); return count; } -- cgit v1.2.3 From 3016e0a0c91246e55418825ba9aae271be267522 Mon Sep 17 00:00:00 2001 From: Benjamin Poirier Date: Tue, 6 Mar 2018 10:55:52 +0900 Subject: Revert "e1000e: Separate signaling for link check/link up" This reverts commit 19110cfbb34d4af0cdfe14cd243f3b09dc95b013. This reverts commit 4110e02eb45ea447ec6f5459c9934de0a273fb91. This reverts commit d3604515c9eda464a92e8e67aae82dfe07fe3c98. Commit 19110cfbb34d ("e1000e: Separate signaling for link check/link up") changed what happens to the link status when there is an error which happens after "get_link_status = false" in the copper check_for_link callbacks. Previously, such an error would be ignored and the link considered up. After that commit, any error implies that the link is down. Revert commit 19110cfbb34d ("e1000e: Separate signaling for link check/link up") and its followups. After reverting, the race condition described in the log of commit 19110cfbb34d is reintroduced. It may still be triggered by LSC events but this should keep the link down in case the link is electrically unstable, as discussed. The race may no longer be triggered by RXO events because commit 4aea7a5c5e94 ("e1000e: Avoid receiver overrun interrupt bursts") restored reading icr in the Other handler. Link: https://lkml.org/lkml/2018/3/1/789 Signed-off-by: Benjamin Poirier Acked-by: Alexander Duyck Tested-by: Aaron Brown Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/e1000e/ich8lan.c | 13 ++++--------- drivers/net/ethernet/intel/e1000e/mac.c | 13 ++++--------- drivers/net/ethernet/intel/e1000e/netdev.c | 2 +- 3 files changed, 9 insertions(+), 19 deletions(-) diff --git a/drivers/net/ethernet/intel/e1000e/ich8lan.c b/drivers/net/ethernet/intel/e1000e/ich8lan.c index ff308b05d68c..d6d4ed7acf03 100644 --- a/drivers/net/ethernet/intel/e1000e/ich8lan.c +++ b/drivers/net/ethernet/intel/e1000e/ich8lan.c @@ -1367,9 +1367,6 @@ out: * Checks to see of the link status of the hardware has changed. If a * change in link status has been detected, then we read the PHY registers * to get the current speed/duplex if link exists. - * - * Returns a negative error code (-E1000_ERR_*) or 0 (link down) or 1 (link - * up). **/ static s32 e1000_check_for_copper_link_ich8lan(struct e1000_hw *hw) { @@ -1385,7 +1382,7 @@ static s32 e1000_check_for_copper_link_ich8lan(struct e1000_hw *hw) * Change or Rx Sequence Error interrupt. */ if (!mac->get_link_status) - return 1; + return 0; /* First we want to see if the MII Status Register reports * link. If so, then we want to get the current speed/duplex @@ -1602,7 +1599,7 @@ static s32 e1000_check_for_copper_link_ich8lan(struct e1000_hw *hw) * we have already determined whether we have link or not. */ if (!mac->autoneg) - return 1; + return -E1000_ERR_CONFIG; /* Auto-Neg is enabled. Auto Speed Detection takes care * of MAC speed/duplex configuration. So we only need to @@ -1616,12 +1613,10 @@ static s32 e1000_check_for_copper_link_ich8lan(struct e1000_hw *hw) * different link partner. */ ret_val = e1000e_config_fc_after_link_up(hw); - if (ret_val) { + if (ret_val) e_dbg("Error configuring flow control\n"); - return ret_val; - } - return 1; + return ret_val; } static s32 e1000_get_variants_ich8lan(struct e1000_adapter *adapter) diff --git a/drivers/net/ethernet/intel/e1000e/mac.c b/drivers/net/ethernet/intel/e1000e/mac.c index db735644b312..b322011ec282 100644 --- a/drivers/net/ethernet/intel/e1000e/mac.c +++ b/drivers/net/ethernet/intel/e1000e/mac.c @@ -410,9 +410,6 @@ void e1000e_clear_hw_cntrs_base(struct e1000_hw *hw) * Checks to see of the link status of the hardware has changed. If a * change in link status has been detected, then we read the PHY registers * to get the current speed/duplex if link exists. - * - * Returns a negative error code (-E1000_ERR_*) or 0 (link down) or 1 (link - * up). **/ s32 e1000e_check_for_copper_link(struct e1000_hw *hw) { @@ -426,7 +423,7 @@ s32 e1000e_check_for_copper_link(struct e1000_hw *hw) * Change or Rx Sequence Error interrupt. */ if (!mac->get_link_status) - return 1; + return 0; /* First we want to see if the MII Status Register reports * link. If so, then we want to get the current speed/duplex @@ -450,7 +447,7 @@ s32 e1000e_check_for_copper_link(struct e1000_hw *hw) * we have already determined whether we have link or not. */ if (!mac->autoneg) - return 1; + return -E1000_ERR_CONFIG; /* Auto-Neg is enabled. Auto Speed Detection takes care * of MAC speed/duplex configuration. So we only need to @@ -464,12 +461,10 @@ s32 e1000e_check_for_copper_link(struct e1000_hw *hw) * different link partner. */ ret_val = e1000e_config_fc_after_link_up(hw); - if (ret_val) { + if (ret_val) e_dbg("Error configuring flow control\n"); - return ret_val; - } - return 1; + return ret_val; } /** diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c index c0f23446bf26..dc853b0863af 100644 --- a/drivers/net/ethernet/intel/e1000e/netdev.c +++ b/drivers/net/ethernet/intel/e1000e/netdev.c @@ -5090,7 +5090,7 @@ static bool e1000e_has_link(struct e1000_adapter *adapter) case e1000_media_type_copper: if (hw->mac.get_link_status) { ret_val = hw->mac.ops.check_for_link(hw); - link_active = ret_val > 0; + link_active = !hw->mac.get_link_status; } else { link_active = true; } -- cgit v1.2.3 From e2710dbf0dc1e37d85368e2404049dadda848d5a Mon Sep 17 00:00:00 2001 From: Benjamin Poirier Date: Tue, 6 Mar 2018 10:55:53 +0900 Subject: e1000e: Fix link check race condition Alex reported the following race condition: /* link goes up... interrupt... schedule watchdog */ \ e1000_watchdog_task \ e1000e_has_link \ hw->mac.ops.check_for_link() === e1000e_check_for_copper_link \ e1000e_phy_has_link_generic(..., &link) link = true /* link goes down... interrupt */ \ e1000_msix_other hw->mac.get_link_status = true /* link is up */ mac->get_link_status = false link_active = true /* link_active is true, wrongly, and stays so because * get_link_status is false */ Avoid this problem by making sure that we don't set get_link_status = false after having checked the link. It seems this problem has been present since the introduction of e1000e. Link: https://lkml.org/lkml/2018/1/29/338 Reported-by: Alexander Duyck Signed-off-by: Benjamin Poirier Acked-by: Alexander Duyck Tested-by: Aaron Brown Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/e1000e/ich8lan.c | 31 ++++++++++++++++------------- drivers/net/ethernet/intel/e1000e/mac.c | 14 ++++++------- 2 files changed, 24 insertions(+), 21 deletions(-) diff --git a/drivers/net/ethernet/intel/e1000e/ich8lan.c b/drivers/net/ethernet/intel/e1000e/ich8lan.c index d6d4ed7acf03..1dddfb7b2de6 100644 --- a/drivers/net/ethernet/intel/e1000e/ich8lan.c +++ b/drivers/net/ethernet/intel/e1000e/ich8lan.c @@ -1383,6 +1383,7 @@ static s32 e1000_check_for_copper_link_ich8lan(struct e1000_hw *hw) */ if (!mac->get_link_status) return 0; + mac->get_link_status = false; /* First we want to see if the MII Status Register reports * link. If so, then we want to get the current speed/duplex @@ -1390,12 +1391,12 @@ static s32 e1000_check_for_copper_link_ich8lan(struct e1000_hw *hw) */ ret_val = e1000e_phy_has_link_generic(hw, 1, 0, &link); if (ret_val) - return ret_val; + goto out; if (hw->mac.type == e1000_pchlan) { ret_val = e1000_k1_gig_workaround_hv(hw, link); if (ret_val) - return ret_val; + goto out; } /* When connected at 10Mbps half-duplex, some parts are excessively @@ -1428,7 +1429,7 @@ static s32 e1000_check_for_copper_link_ich8lan(struct e1000_hw *hw) ret_val = hw->phy.ops.acquire(hw); if (ret_val) - return ret_val; + goto out; if (hw->mac.type == e1000_pch2lan) emi_addr = I82579_RX_CONFIG; @@ -1450,7 +1451,7 @@ static s32 e1000_check_for_copper_link_ich8lan(struct e1000_hw *hw) hw->phy.ops.release(hw); if (ret_val) - return ret_val; + goto out; if (hw->mac.type >= e1000_pch_spt) { u16 data; @@ -1459,14 +1460,14 @@ static s32 e1000_check_for_copper_link_ich8lan(struct e1000_hw *hw) if (speed == SPEED_1000) { ret_val = hw->phy.ops.acquire(hw); if (ret_val) - return ret_val; + goto out; ret_val = e1e_rphy_locked(hw, PHY_REG(776, 20), &data); if (ret_val) { hw->phy.ops.release(hw); - return ret_val; + goto out; } ptr_gap = (data & (0x3FF << 2)) >> 2; @@ -1480,18 +1481,18 @@ static s32 e1000_check_for_copper_link_ich8lan(struct e1000_hw *hw) } hw->phy.ops.release(hw); if (ret_val) - return ret_val; + goto out; } else { ret_val = hw->phy.ops.acquire(hw); if (ret_val) - return ret_val; + goto out; ret_val = e1e_wphy_locked(hw, PHY_REG(776, 20), 0xC023); hw->phy.ops.release(hw); if (ret_val) - return ret_val; + goto out; } } @@ -1518,7 +1519,7 @@ static s32 e1000_check_for_copper_link_ich8lan(struct e1000_hw *hw) (hw->adapter->pdev->device == E1000_DEV_ID_PCH_I218_V3)) { ret_val = e1000_k1_workaround_lpt_lp(hw, link); if (ret_val) - return ret_val; + goto out; } if (hw->mac.type >= e1000_pch_lpt) { /* Set platform power management values for @@ -1526,7 +1527,7 @@ static s32 e1000_check_for_copper_link_ich8lan(struct e1000_hw *hw) */ ret_val = e1000_platform_pm_pch_lpt(hw, link); if (ret_val) - return ret_val; + goto out; } /* Clear link partner's EEE ability */ @@ -1549,9 +1550,7 @@ static s32 e1000_check_for_copper_link_ich8lan(struct e1000_hw *hw) } if (!link) - return 0; /* No link detected */ - - mac->get_link_status = false; + goto out; switch (hw->mac.type) { case e1000_pch2lan: @@ -1617,6 +1616,10 @@ static s32 e1000_check_for_copper_link_ich8lan(struct e1000_hw *hw) e_dbg("Error configuring flow control\n"); return ret_val; + +out: + mac->get_link_status = true; + return ret_val; } static s32 e1000_get_variants_ich8lan(struct e1000_adapter *adapter) diff --git a/drivers/net/ethernet/intel/e1000e/mac.c b/drivers/net/ethernet/intel/e1000e/mac.c index b322011ec282..5bdc3a2d4fd7 100644 --- a/drivers/net/ethernet/intel/e1000e/mac.c +++ b/drivers/net/ethernet/intel/e1000e/mac.c @@ -424,19 +424,15 @@ s32 e1000e_check_for_copper_link(struct e1000_hw *hw) */ if (!mac->get_link_status) return 0; + mac->get_link_status = false; /* First we want to see if the MII Status Register reports * link. If so, then we want to get the current speed/duplex * of the PHY. */ ret_val = e1000e_phy_has_link_generic(hw, 1, 0, &link); - if (ret_val) - return ret_val; - - if (!link) - return 0; /* No link detected */ - - mac->get_link_status = false; + if (ret_val || !link) + goto out; /* Check if there was DownShift, must be checked * immediately after link-up @@ -465,6 +461,10 @@ s32 e1000e_check_for_copper_link(struct e1000_hw *hw) e_dbg("Error configuring flow control\n"); return ret_val; + +out: + mac->get_link_status = true; + return ret_val; } /** -- cgit v1.2.3 From 2f987a76a97773beafbc615b9c4d8fe79129a7f4 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Mon, 12 Mar 2018 14:54:23 +0100 Subject: net: ipv6: keep sk status consistent after datagram connect failure On unsuccesful ip6_datagram_connect(), if the failure is caused by ip6_datagram_dst_update(), the sk peer information are cleared, but the sk->sk_state is preserved. If the socket was already in an established status, the overall sk status is inconsistent and fouls later checks in datagram code. Fix this saving the old peer information and restoring them in case of failure. This also aligns ipv6 datagram connect() behavior with ipv4. v1 -> v2: - added missing Fixes tag Fixes: 85cb73ff9b74 ("net: ipv6: reset daddr and dport in sk if connect() fails") Signed-off-by: Paolo Abeni Signed-off-by: David S. Miller --- net/ipv6/datagram.c | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index fbf08ce3f5ab..8a9ac2d0f5d3 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -146,10 +146,12 @@ int __ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr, struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr; struct inet_sock *inet = inet_sk(sk); struct ipv6_pinfo *np = inet6_sk(sk); - struct in6_addr *daddr; + struct in6_addr *daddr, old_daddr; + __be32 fl6_flowlabel = 0; + __be32 old_fl6_flowlabel; + __be32 old_dport; int addr_type; int err; - __be32 fl6_flowlabel = 0; if (usin->sin6_family == AF_INET) { if (__ipv6_only_sock(sk)) @@ -238,9 +240,13 @@ ipv4_connected: } } + /* save the current peer information before updating it */ + old_daddr = sk->sk_v6_daddr; + old_fl6_flowlabel = np->flow_label; + old_dport = inet->inet_dport; + sk->sk_v6_daddr = *daddr; np->flow_label = fl6_flowlabel; - inet->inet_dport = usin->sin6_port; /* @@ -250,11 +256,12 @@ ipv4_connected: err = ip6_datagram_dst_update(sk, true); if (err) { - /* Reset daddr and dport so that udp_v6_early_demux() - * fails to find this socket + /* Restore the socket peer info, to keep it consistent with + * the old socket state */ - memset(&sk->sk_v6_daddr, 0, sizeof(sk->sk_v6_daddr)); - inet->inet_dport = 0; + sk->sk_v6_daddr = old_daddr; + np->flow_label = old_fl6_flowlabel; + inet->inet_dport = old_dport; goto out; } -- cgit v1.2.3 From b954f94023dcc61388c8384f0f14eb8e42c863c5 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Mon, 12 Mar 2018 14:54:24 +0100 Subject: l2tp: fix races with ipv4-mapped ipv6 addresses The l2tp_tunnel_create() function checks for v4mapped ipv6 sockets and cache that flag, so that l2tp core code can reusing it at xmit time. If the socket is provided by the userspace, the connection status of the tunnel sockets can change between the tunnel creation and the xmit call, so that syzbot is able to trigger the following splat: BUG: KASAN: use-after-free in ip6_dst_idev include/net/ip6_fib.h:192 [inline] BUG: KASAN: use-after-free in ip6_xmit+0x1f76/0x2260 net/ipv6/ip6_output.c:264 Read of size 8 at addr ffff8801bd949318 by task syz-executor4/23448 CPU: 0 PID: 23448 Comm: syz-executor4 Not tainted 4.16.0-rc4+ #65 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:17 [inline] dump_stack+0x194/0x24d lib/dump_stack.c:53 print_address_description+0x73/0x250 mm/kasan/report.c:256 kasan_report_error mm/kasan/report.c:354 [inline] kasan_report+0x23c/0x360 mm/kasan/report.c:412 __asan_report_load8_noabort+0x14/0x20 mm/kasan/report.c:433 ip6_dst_idev include/net/ip6_fib.h:192 [inline] ip6_xmit+0x1f76/0x2260 net/ipv6/ip6_output.c:264 inet6_csk_xmit+0x2fc/0x580 net/ipv6/inet6_connection_sock.c:139 l2tp_xmit_core net/l2tp/l2tp_core.c:1053 [inline] l2tp_xmit_skb+0x105f/0x1410 net/l2tp/l2tp_core.c:1148 pppol2tp_sendmsg+0x470/0x670 net/l2tp/l2tp_ppp.c:341 sock_sendmsg_nosec net/socket.c:630 [inline] sock_sendmsg+0xca/0x110 net/socket.c:640 ___sys_sendmsg+0x767/0x8b0 net/socket.c:2046 __sys_sendmsg+0xe5/0x210 net/socket.c:2080 SYSC_sendmsg net/socket.c:2091 [inline] SyS_sendmsg+0x2d/0x50 net/socket.c:2087 do_syscall_64+0x281/0x940 arch/x86/entry/common.c:287 entry_SYSCALL_64_after_hwframe+0x42/0xb7 RIP: 0033:0x453e69 RSP: 002b:00007f819593cc68 EFLAGS: 00000246 ORIG_RAX: 000000000000002e RAX: ffffffffffffffda RBX: 00007f819593d6d4 RCX: 0000000000453e69 RDX: 0000000000000081 RSI: 000000002037ffc8 RDI: 0000000000000004 RBP: 000000000072bea0 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 00000000ffffffff R13: 00000000000004c3 R14: 00000000006f72e8 R15: 0000000000000000 This change addresses the issues: * explicitly checking for TCP_ESTABLISHED for user space provided sockets * dropping the v4mapped flag usage - it can become outdated - and explicitly invoking ipv6_addr_v4mapped() instead The issue is apparently there since ancient times. v1 -> v2: (many thanks to Guillaume) - with csum issue introduced in v1 - replace pr_err with pr_debug - fix build issue with IPV6 disabled - move l2tp_sk_is_v4mapped in l2tp_core.c v2 -> v3: - don't update inet_daddr for v4mapped address, unneeded - drop rendundant check at creation time Reported-and-tested-by: syzbot+92fa328176eb07e4ac1a@syzkaller.appspotmail.com Fixes: 3557baabf280 ("[L2TP]: PPP over L2TP driver core") Signed-off-by: Paolo Abeni Signed-off-by: David S. Miller --- net/l2tp/l2tp_core.c | 38 ++++++++++++++++++-------------------- net/l2tp/l2tp_core.h | 3 --- 2 files changed, 18 insertions(+), 23 deletions(-) diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index e22512e32827..14b67dfacc4b 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -111,6 +111,13 @@ struct l2tp_net { spinlock_t l2tp_session_hlist_lock; }; +#if IS_ENABLED(CONFIG_IPV6) +static bool l2tp_sk_is_v6(struct sock *sk) +{ + return sk->sk_family == PF_INET6 && + !ipv6_addr_v4mapped(&sk->sk_v6_daddr); +} +#endif static inline struct l2tp_tunnel *l2tp_tunnel(struct sock *sk) { @@ -1049,7 +1056,7 @@ static int l2tp_xmit_core(struct l2tp_session *session, struct sk_buff *skb, /* Queue the packet to IP for output */ skb->ignore_df = 1; #if IS_ENABLED(CONFIG_IPV6) - if (tunnel->sock->sk_family == PF_INET6 && !tunnel->v4mapped) + if (l2tp_sk_is_v6(tunnel->sock)) error = inet6_csk_xmit(tunnel->sock, skb, NULL); else #endif @@ -1112,6 +1119,15 @@ int l2tp_xmit_skb(struct l2tp_session *session, struct sk_buff *skb, int hdr_len goto out_unlock; } + /* The user-space may change the connection status for the user-space + * provided socket at run time: we must check it under the socket lock + */ + if (tunnel->fd >= 0 && sk->sk_state != TCP_ESTABLISHED) { + kfree_skb(skb); + ret = NET_XMIT_DROP; + goto out_unlock; + } + /* Get routing info from the tunnel socket */ skb_dst_drop(skb); skb_dst_set(skb, dst_clone(__sk_dst_check(sk, 0))); @@ -1131,7 +1147,7 @@ int l2tp_xmit_skb(struct l2tp_session *session, struct sk_buff *skb, int hdr_len /* Calculate UDP checksum if configured to do so */ #if IS_ENABLED(CONFIG_IPV6) - if (sk->sk_family == PF_INET6 && !tunnel->v4mapped) + if (l2tp_sk_is_v6(sk)) udp6_set_csum(udp_get_no_check6_tx(sk), skb, &inet6_sk(sk)->saddr, &sk->sk_v6_daddr, udp_len); @@ -1511,24 +1527,6 @@ int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32 if (cfg != NULL) tunnel->debug = cfg->debug; -#if IS_ENABLED(CONFIG_IPV6) - if (sk->sk_family == PF_INET6) { - struct ipv6_pinfo *np = inet6_sk(sk); - - if (ipv6_addr_v4mapped(&np->saddr) && - ipv6_addr_v4mapped(&sk->sk_v6_daddr)) { - struct inet_sock *inet = inet_sk(sk); - - tunnel->v4mapped = true; - inet->inet_saddr = np->saddr.s6_addr32[3]; - inet->inet_rcv_saddr = sk->sk_v6_rcv_saddr.s6_addr32[3]; - inet->inet_daddr = sk->sk_v6_daddr.s6_addr32[3]; - } else { - tunnel->v4mapped = false; - } - } -#endif - /* Mark socket as an encapsulation socket. See net/ipv4/udp.c */ tunnel->encap = encap; if (encap == L2TP_ENCAPTYPE_UDP) { diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h index a1aa9550f04e..2718d0b284d0 100644 --- a/net/l2tp/l2tp_core.h +++ b/net/l2tp/l2tp_core.h @@ -188,9 +188,6 @@ struct l2tp_tunnel { struct sock *sock; /* Parent socket */ int fd; /* Parent fd, if tunnel socket * was created by userspace */ -#if IS_ENABLED(CONFIG_IPV6) - bool v4mapped; -#endif struct work_struct del_work; -- cgit v1.2.3 From 04bf9ab3359ff89059509ee5c35446c45b9cdaaa Mon Sep 17 00:00:00 2001 From: Jerome Brunet Date: Wed, 14 Feb 2018 14:43:35 +0100 Subject: clk: fix determine rate error with pass-through clock If we try to determine the rate of a pass-through clock (a clock which does not implement .round_rate() nor .determine_rate()), clk_core_round_rate_nolock() will directly forward the call to the parent clock. In the particular case where the pass-through actually does not have a parent, clk_core_round_rate_nolock() will directly return 0 with the requested rate still set to the initial request structure. This is interpreted as if the rate could be exactly achieved while it actually cannot be adjusted. This become a real problem when this particular pass-through clock is the parent of a mux with the flag CLK_SET_RATE_PARENT set. The pass-through clock will always report an exact match, get picked and finally error when the rate is actually getting set. This is fixed by setting the rate inside the req to 0 when core is NULL in clk_core_round_rate_nolock() (same as in __clk_determine_rate() when hw is NULL) Fixes: 0f6cc2b8e94d ("clk: rework calls to round and determine rate callbacks") Signed-off-by: Jerome Brunet Signed-off-by: Michael Turquette Signed-off-by: Stephen Boyd --- drivers/clk/clk.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c index 6d54e933c901..cca05ea2c058 100644 --- a/drivers/clk/clk.c +++ b/drivers/clk/clk.c @@ -1125,8 +1125,10 @@ static int clk_core_round_rate_nolock(struct clk_core *core, { lockdep_assert_held(&prepare_lock); - if (!core) + if (!core) { + req->rate = 0; return 0; + } clk_core_init_rate_req(core, req); -- cgit v1.2.3 From 9903e41ae1f5d50c93f268ca3304d4d7c64b9311 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Thu, 4 Jan 2018 06:36:34 +0000 Subject: clk: hisilicon: hi3660:Fix potential NULL dereference in hi3660_stub_clk_probe() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit platform_get_resource() may return NULL, add proper check to avoid potential NULL dereferencing. This is detected by Coccinelle semantic patch. @@ expression pdev, res, n, t, e, e1, e2; @@ res = platform_get_resource(pdev, t, n); + if (!res) + return -EINVAL; ... when != res == NULL e = devm_ioremap(e1, res->start, e2); Fixes: 4f16f7ff3bc0 ("clk: hisilicon: Add support for Hi3660 stub clocks") Signed-off-by: Wei Yongjun Signed-off-by: Stephen Boyd --- drivers/clk/hisilicon/clk-hi3660-stub.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/clk/hisilicon/clk-hi3660-stub.c b/drivers/clk/hisilicon/clk-hi3660-stub.c index 9b6c72bbddf9..e8b2c43b1bb8 100644 --- a/drivers/clk/hisilicon/clk-hi3660-stub.c +++ b/drivers/clk/hisilicon/clk-hi3660-stub.c @@ -149,6 +149,8 @@ static int hi3660_stub_clk_probe(struct platform_device *pdev) return PTR_ERR(stub_clk_chan.mbox); res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!res) + return -EINVAL; freq_reg = devm_ioremap(dev, res->start, resource_size(res)); if (!freq_reg) return -ENOMEM; -- cgit v1.2.3 From 55c19eee3b471e7ca7e38783836f7b7137c9d14f Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Thu, 4 Jan 2018 06:34:43 +0000 Subject: clk: qcom: msm8916: Fix return value check in qcom_apcs_msm8916_clk_probe() In case of error, the function dev_get_regmap() returns NULL pointer not ERR_PTR(). The IS_ERR() test in the return value check should be replaced with NULL test. Fixes: 81ac38847a1d ("clk: qcom: Add APCS clock controller support") Signed-off-by: Wei Yongjun Signed-off-by: Stephen Boyd --- drivers/clk/qcom/apcs-msm8916.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/clk/qcom/apcs-msm8916.c b/drivers/clk/qcom/apcs-msm8916.c index 246957f1a413..b1cc8dbcd327 100644 --- a/drivers/clk/qcom/apcs-msm8916.c +++ b/drivers/clk/qcom/apcs-msm8916.c @@ -49,11 +49,10 @@ static int qcom_apcs_msm8916_clk_probe(struct platform_device *pdev) struct clk_regmap_mux_div *a53cc; struct regmap *regmap; struct clk_init_data init = { }; - int ret; + int ret = -ENODEV; regmap = dev_get_regmap(parent, NULL); - if (IS_ERR(regmap)) { - ret = PTR_ERR(regmap); + if (!regmap) { dev_err(dev, "failed to get regmap: %d\n", ret); return ret; } -- cgit v1.2.3 From 5a9f8df68ee6927f21dd3f2c75c16feb8b53a9e8 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Mon, 12 Mar 2018 16:00:40 -0700 Subject: net: dsa: Fix dsa_is_user_port() test inversion During the conversion to dsa_is_user_port(), a condition ended up being reversed, which would prevent the creation of any user port when using the legacy binding and/or platform data, fix that. Fixes: 4a5b85ffe2a0 ("net: dsa: use dsa_is_user_port everywhere") Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- net/dsa/legacy.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/dsa/legacy.c b/net/dsa/legacy.c index cb54b81d0bd9..42a7b85b84e1 100644 --- a/net/dsa/legacy.c +++ b/net/dsa/legacy.c @@ -194,7 +194,7 @@ static int dsa_switch_setup_one(struct dsa_switch *ds, ds->ports[i].dn = cd->port_dn[i]; ds->ports[i].cpu_dp = dst->cpu_dp; - if (dsa_is_user_port(ds, i)) + if (!dsa_is_user_port(ds, i)) continue; ret = dsa_slave_create(&ds->ports[i]); -- cgit v1.2.3 From 318aaf34f1179b39fa9c30fa0f3288b645beee39 Mon Sep 17 00:00:00 2001 From: Jason Yan Date: Thu, 8 Mar 2018 10:34:53 +0800 Subject: scsi: libsas: defer ata device eh commands to libata When ata device doing EH, some commands still attached with tasks are not passed to libata when abort failed or recover failed, so libata did not handle these commands. After these commands done, sas task is freed, but ata qc is not freed. This will cause ata qc leak and trigger a warning like below: WARNING: CPU: 0 PID: 28512 at drivers/ata/libata-eh.c:4037 ata_eh_finish+0xb4/0xcc CPU: 0 PID: 28512 Comm: kworker/u32:2 Tainted: G W OE 4.14.0#1 ...... Call trace: [] ata_eh_finish+0xb4/0xcc [] ata_do_eh+0xc4/0xd8 [] ata_std_error_handler+0x44/0x8c [] ata_scsi_port_error_handler+0x480/0x694 [] async_sas_ata_eh+0x4c/0x80 [] async_run_entry_fn+0x4c/0x170 [] process_one_work+0x144/0x390 [] worker_thread+0x144/0x418 [] kthread+0x10c/0x138 [] ret_from_fork+0x10/0x18 If ata qc leaked too many, ata tag allocation will fail and io blocked for ever. As suggested by Dan Williams, defer ata device commands to libata and merge sas_eh_finish_cmd() with sas_eh_defer_cmd(). libata will handle ata qcs correctly after this. Signed-off-by: Jason Yan CC: Xiaofei Tan CC: John Garry CC: Dan Williams Reviewed-by: Dan Williams Signed-off-by: Martin K. Petersen --- drivers/scsi/libsas/sas_scsi_host.c | 33 +++++++++++++-------------------- 1 file changed, 13 insertions(+), 20 deletions(-) diff --git a/drivers/scsi/libsas/sas_scsi_host.c b/drivers/scsi/libsas/sas_scsi_host.c index 626727207889..a372af68d9a9 100644 --- a/drivers/scsi/libsas/sas_scsi_host.c +++ b/drivers/scsi/libsas/sas_scsi_host.c @@ -223,6 +223,7 @@ out_done: static void sas_eh_finish_cmd(struct scsi_cmnd *cmd) { struct sas_ha_struct *sas_ha = SHOST_TO_SAS_HA(cmd->device->host); + struct domain_device *dev = cmd_to_domain_dev(cmd); struct sas_task *task = TO_SAS_TASK(cmd); /* At this point, we only get called following an actual abort @@ -231,6 +232,14 @@ static void sas_eh_finish_cmd(struct scsi_cmnd *cmd) */ sas_end_task(cmd, task); + if (dev_is_sata(dev)) { + /* defer commands to libata so that libata EH can + * handle ata qcs correctly + */ + list_move_tail(&cmd->eh_entry, &sas_ha->eh_ata_q); + return; + } + /* now finish the command and move it on to the error * handler done list, this also takes it off the * error handler pending list. @@ -238,22 +247,6 @@ static void sas_eh_finish_cmd(struct scsi_cmnd *cmd) scsi_eh_finish_cmd(cmd, &sas_ha->eh_done_q); } -static void sas_eh_defer_cmd(struct scsi_cmnd *cmd) -{ - struct domain_device *dev = cmd_to_domain_dev(cmd); - struct sas_ha_struct *ha = dev->port->ha; - struct sas_task *task = TO_SAS_TASK(cmd); - - if (!dev_is_sata(dev)) { - sas_eh_finish_cmd(cmd); - return; - } - - /* report the timeout to libata */ - sas_end_task(cmd, task); - list_move_tail(&cmd->eh_entry, &ha->eh_ata_q); -} - static void sas_scsi_clear_queue_lu(struct list_head *error_q, struct scsi_cmnd *my_cmd) { struct scsi_cmnd *cmd, *n; @@ -261,7 +254,7 @@ static void sas_scsi_clear_queue_lu(struct list_head *error_q, struct scsi_cmnd list_for_each_entry_safe(cmd, n, error_q, eh_entry) { if (cmd->device->sdev_target == my_cmd->device->sdev_target && cmd->device->lun == my_cmd->device->lun) - sas_eh_defer_cmd(cmd); + sas_eh_finish_cmd(cmd); } } @@ -618,12 +611,12 @@ static void sas_eh_handle_sas_errors(struct Scsi_Host *shost, struct list_head * case TASK_IS_DONE: SAS_DPRINTK("%s: task 0x%p is done\n", __func__, task); - sas_eh_defer_cmd(cmd); + sas_eh_finish_cmd(cmd); continue; case TASK_IS_ABORTED: SAS_DPRINTK("%s: task 0x%p is aborted\n", __func__, task); - sas_eh_defer_cmd(cmd); + sas_eh_finish_cmd(cmd); continue; case TASK_IS_AT_LU: SAS_DPRINTK("task 0x%p is at LU: lu recover\n", task); @@ -634,7 +627,7 @@ static void sas_eh_handle_sas_errors(struct Scsi_Host *shost, struct list_head * "recovered\n", SAS_ADDR(task->dev), cmd->device->lun); - sas_eh_defer_cmd(cmd); + sas_eh_finish_cmd(cmd); sas_scsi_clear_queue_lu(work_q, cmd); goto Again; } -- cgit v1.2.3 From 14bc1dff74277408f08661d03e785710a46e0699 Mon Sep 17 00:00:00 2001 From: Himanshu Madhani Date: Wed, 7 Mar 2018 10:49:26 -0800 Subject: scsi: qla2xxx: Remove FC_NO_LOOP_ID for FCP and FC-NVMe Discovery Commit 7d64c39e64310 fixed regression of FCP discovery when Nport Handle is in-use and relogin is triggered. However, during FCP and FC-NVMe discovery this resulted into only discovering NVMe LUNs. This patch fixes issue where FCP and FC-NVMe protocol is used on same port where assigning FC_NO_LOOP_ID will result into discovery failure for FCP LUNs. Fixes: a084fd68e1d26 ("scsi: qla2xxx: Fix re-login for Nport Handle in use") Signed-off-by: Himanshu Madhani Reviewed-by: Hannes Reinecke Signed-off-by: Martin K. Petersen --- drivers/scsi/qla2xxx/qla_init.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c index 00329dda6179..8d7fab3cd01d 100644 --- a/drivers/scsi/qla2xxx/qla_init.c +++ b/drivers/scsi/qla2xxx/qla_init.c @@ -1719,7 +1719,6 @@ qla24xx_handle_plogi_done_event(struct scsi_qla_host *vha, struct event_arg *ea) set_bit(ea->fcport->loop_id, vha->hw->loop_id_map); spin_lock_irqsave(&vha->hw->tgt.sess_lock, flags); - ea->fcport->loop_id = FC_NO_LOOP_ID; ea->fcport->chip_reset = vha->hw->base_qpair->chip_reset; ea->fcport->logout_on_delete = 1; ea->fcport->send_els_logo = 0; -- cgit v1.2.3 From 0dcd7876029b58770f769cbb7b484e88e4a305e5 Mon Sep 17 00:00:00 2001 From: Greg Hackmann Date: Wed, 7 Mar 2018 14:42:53 -0800 Subject: net: xfrm: use preempt-safe this_cpu_read() in ipcomp_alloc_tfms() f7c83bcbfaf5 ("net: xfrm: use __this_cpu_read per-cpu helper") added a __this_cpu_read() call inside ipcomp_alloc_tfms(). At the time, __this_cpu_read() required the caller to either not care about races or to handle preemption/interrupt issues. 3.15 tightened the rules around some per-cpu operations, and now __this_cpu_read() should never be used in a preemptible context. On 3.15 and later, we need to use this_cpu_read() instead. syzkaller reported this leading to the following kernel BUG while fuzzing sendmsg: BUG: using __this_cpu_read() in preemptible [00000000] code: repro/3101 caller is ipcomp_init_state+0x185/0x990 CPU: 3 PID: 3101 Comm: repro Not tainted 4.16.0-rc4-00123-g86f84779d8e9 #154 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1 04/01/2014 Call Trace: dump_stack+0xb9/0x115 check_preemption_disabled+0x1cb/0x1f0 ipcomp_init_state+0x185/0x990 ? __xfrm_init_state+0x876/0xc20 ? lock_downgrade+0x5e0/0x5e0 ipcomp4_init_state+0xaa/0x7c0 __xfrm_init_state+0x3eb/0xc20 xfrm_init_state+0x19/0x60 pfkey_add+0x20df/0x36f0 ? pfkey_broadcast+0x3dd/0x600 ? pfkey_sock_destruct+0x340/0x340 ? pfkey_seq_stop+0x80/0x80 ? __skb_clone+0x236/0x750 ? kmem_cache_alloc+0x1f6/0x260 ? pfkey_sock_destruct+0x340/0x340 ? pfkey_process+0x62a/0x6f0 pfkey_process+0x62a/0x6f0 ? pfkey_send_new_mapping+0x11c0/0x11c0 ? mutex_lock_io_nested+0x1390/0x1390 pfkey_sendmsg+0x383/0x750 ? dump_sp+0x430/0x430 sock_sendmsg+0xc0/0x100 ___sys_sendmsg+0x6c8/0x8b0 ? copy_msghdr_from_user+0x3b0/0x3b0 ? pagevec_lru_move_fn+0x144/0x1f0 ? find_held_lock+0x32/0x1c0 ? do_huge_pmd_anonymous_page+0xc43/0x11e0 ? lock_downgrade+0x5e0/0x5e0 ? get_kernel_page+0xb0/0xb0 ? _raw_spin_unlock+0x29/0x40 ? do_huge_pmd_anonymous_page+0x400/0x11e0 ? __handle_mm_fault+0x553/0x2460 ? __fget_light+0x163/0x1f0 ? __sys_sendmsg+0xc7/0x170 __sys_sendmsg+0xc7/0x170 ? SyS_shutdown+0x1a0/0x1a0 ? __do_page_fault+0x5a0/0xca0 ? lock_downgrade+0x5e0/0x5e0 SyS_sendmsg+0x27/0x40 ? __sys_sendmsg+0x170/0x170 do_syscall_64+0x19f/0x640 entry_SYSCALL_64_after_hwframe+0x42/0xb7 RIP: 0033:0x7f0ee73dfb79 RSP: 002b:00007ffe14fc15a8 EFLAGS: 00000207 ORIG_RAX: 000000000000002e RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007f0ee73dfb79 RDX: 0000000000000000 RSI: 00000000208befc8 RDI: 0000000000000004 RBP: 00007ffe14fc15b0 R08: 00007ffe14fc15c0 R09: 00007ffe14fc15c0 R10: 0000000000000000 R11: 0000000000000207 R12: 0000000000400440 R13: 00007ffe14fc16b0 R14: 0000000000000000 R15: 0000000000000000 Signed-off-by: Greg Hackmann Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_ipcomp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/xfrm/xfrm_ipcomp.c b/net/xfrm/xfrm_ipcomp.c index ccfdc7115a83..a00ec715aa46 100644 --- a/net/xfrm/xfrm_ipcomp.c +++ b/net/xfrm/xfrm_ipcomp.c @@ -283,7 +283,7 @@ static struct crypto_comp * __percpu *ipcomp_alloc_tfms(const char *alg_name) struct crypto_comp *tfm; /* This can be any valid CPU ID so we don't need locking. */ - tfm = __this_cpu_read(*pos->tfms); + tfm = this_cpu_read(*pos->tfms); if (!strcmp(crypto_comp_name(tfm), alg_name)) { pos->users++; -- cgit v1.2.3 From 79832f0b5f718e0023d9dd73e6845310609a564d Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 13 Mar 2018 14:09:21 +0000 Subject: efi/libstub/tpm: Initialize pointer variables to zero for mixed mode As reported by Jeremy Cline, running the new TPM libstub code in mixed mode (i.e., 64-bit kernel on 32-bit UEFI) results in hangs when invoking the TCG2 protocol, or when accessing the log_tbl pool allocation. The reason turns out to be that in both cases, the 64-bit pointer variables are not fully initialized by the 32-bit EFI code, and so we should take care to zero initialize these variables beforehand, or we'll end up dereferencing bogus pointers. Reported-by: Jeremy Cline Signed-off-by: Ard Biesheuvel Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: hdegoede@redhat.com Cc: jarkko.sakkinen@linux.intel.com Cc: javierm@redhat.com Cc: linux-efi@vger.kernel.org Cc: tweek@google.com Link: http://lkml.kernel.org/r/20180313140922.17266-2-ard.biesheuvel@linaro.org Signed-off-by: Ingo Molnar --- drivers/firmware/efi/libstub/tpm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/firmware/efi/libstub/tpm.c b/drivers/firmware/efi/libstub/tpm.c index da661bf8cb96..13c1edd37e96 100644 --- a/drivers/firmware/efi/libstub/tpm.c +++ b/drivers/firmware/efi/libstub/tpm.c @@ -68,11 +68,11 @@ void efi_retrieve_tpm2_eventlog_1_2(efi_system_table_t *sys_table_arg) efi_guid_t linux_eventlog_guid = LINUX_EFI_TPM_EVENT_LOG_GUID; efi_status_t status; efi_physical_addr_t log_location, log_last_entry; - struct linux_efi_tpm_eventlog *log_tbl; + struct linux_efi_tpm_eventlog *log_tbl = NULL; unsigned long first_entry_addr, last_entry_addr; size_t log_size, last_entry_size; efi_bool_t truncated; - void *tcg2_protocol; + void *tcg2_protocol = NULL; status = efi_call_early(locate_protocol, &tcg2_guid, NULL, &tcg2_protocol); -- cgit v1.2.3 From f89782c2d131e6eae0d1ea2569ba76bc4c5875fe Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 13 Mar 2018 12:09:38 +0300 Subject: qed: Use after free in qed_rdma_free() We're dereferencing "p_hwfn->p_rdma_info" but that is freed on the line before in qed_rdma_resc_free(p_hwfn). Fixes: 9de506a547c0 ("qed: Free RoCE ILT Memory on rmmod qedr") Signed-off-by: Dan Carpenter Acked-by: Michal Kalderon Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/qed_rdma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_rdma.c b/drivers/net/ethernet/qlogic/qed/qed_rdma.c index f3ee6538b553..a411f9c702a1 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_rdma.c +++ b/drivers/net/ethernet/qlogic/qed/qed_rdma.c @@ -379,8 +379,8 @@ static void qed_rdma_free(struct qed_hwfn *p_hwfn) DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "Freeing RDMA\n"); qed_rdma_free_reserved_lkey(p_hwfn); - qed_rdma_resc_free(p_hwfn); qed_cxt_free_proto_ilt(p_hwfn, p_hwfn->p_rdma_info->proto); + qed_rdma_resc_free(p_hwfn); } static void qed_rdma_get_guid(struct qed_hwfn *p_hwfn, u8 *guid) -- cgit v1.2.3 From 94e46a4f2d5eb14059e42f313c098d4854847376 Mon Sep 17 00:00:00 2001 From: Merlijn Wajer Date: Tue, 13 Mar 2018 09:48:40 -0500 Subject: usb: musb: Fix external abort in musb_remove on omap2430 This fixes an oops on unbind / module unload (on the musb omap2430 platform). musb_remove function now calls musb_platform_exit before disabling runtime pm. Signed-off-by: Merlijn Wajer Signed-off-by: Bin Liu Signed-off-by: Greg Kroah-Hartman --- drivers/usb/musb/musb_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/musb/musb_core.c b/drivers/usb/musb/musb_core.c index c344ef4e5355..4d723077be2b 100644 --- a/drivers/usb/musb/musb_core.c +++ b/drivers/usb/musb/musb_core.c @@ -2473,11 +2473,11 @@ static int musb_remove(struct platform_device *pdev) musb_disable_interrupts(musb); musb_writeb(musb->mregs, MUSB_DEVCTL, 0); spin_unlock_irqrestore(&musb->lock, flags); + musb_platform_exit(musb); pm_runtime_dont_use_autosuspend(musb->controller); pm_runtime_put_sync(musb->controller); pm_runtime_disable(musb->controller); - musb_platform_exit(musb); musb_phy_callback = NULL; if (musb->dma_controller) musb_dma_controller_destroy(musb->dma_controller); -- cgit v1.2.3 From 5617c599bbb09bfbee370e1cdb479d08bcd07559 Mon Sep 17 00:00:00 2001 From: Miguel Ojeda Date: Mon, 19 Feb 2018 16:02:48 +0100 Subject: auxdisplay: panel: Change comments to silence fallthrough warnings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Compiling with W=1 with gcc 7.2.0 gives 3 warnings like: drivers/auxdisplay/panel.c: In function ‘panel_process_inputs’: drivers/auxdisplay/panel.c:1374:17: warning: this statement may fall through [-Wimplicit-fallthrough=] Cc: Willy Tarreau Cc: Geert Uytterhoeven Signed-off-by: Miguel Ojeda --- drivers/auxdisplay/panel.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/auxdisplay/panel.c b/drivers/auxdisplay/panel.c index ea7869c0d7f9..ec5e8800f8ad 100644 --- a/drivers/auxdisplay/panel.c +++ b/drivers/auxdisplay/panel.c @@ -1372,7 +1372,7 @@ static void panel_process_inputs(void) break; input->rise_timer = 0; input->state = INPUT_ST_RISING; - /* no break here, fall through */ + /* fall through */ case INPUT_ST_RISING: if ((phys_curr & input->mask) != input->value) { input->state = INPUT_ST_LOW; @@ -1385,11 +1385,11 @@ static void panel_process_inputs(void) } input->high_timer = 0; input->state = INPUT_ST_HIGH; - /* no break here, fall through */ + /* fall through */ case INPUT_ST_HIGH: if (input_state_high(input)) break; - /* no break here, fall through */ + /* fall through */ case INPUT_ST_FALLING: input_state_falling(input); } -- cgit v1.2.3 From 6a78b4dde1057f89a1e25ec81ed646c4f8077311 Mon Sep 17 00:00:00 2001 From: Miguel Ojeda Date: Mon, 19 Feb 2018 16:14:17 +0100 Subject: auxdisplay: img-ascii-lcd: Fix doc comment to silence warnings Compiling with W=1 with gcc 7.2.0 gives 2 warnings: drivers/auxdisplay/img-ascii-lcd.c:233: warning: Function parameter or member 't' not described in 'img_ascii_lcd_scroll' drivers/auxdisplay/img-ascii-lcd.c:233: warning: Excess function parameter 'arg' description in 'img_ascii_lcd_scroll' Cc: Paul Burton Signed-off-by: Miguel Ojeda --- drivers/auxdisplay/img-ascii-lcd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/auxdisplay/img-ascii-lcd.c b/drivers/auxdisplay/img-ascii-lcd.c index 9180b9bd5821..d8133024fd5d 100644 --- a/drivers/auxdisplay/img-ascii-lcd.c +++ b/drivers/auxdisplay/img-ascii-lcd.c @@ -224,7 +224,7 @@ MODULE_DEVICE_TABLE(of, img_ascii_lcd_matches); /** * img_ascii_lcd_scroll() - scroll the display by a character - * @arg: really a pointer to the private data structure + * @t: really a pointer to the private data structure * * Scroll the current message along the LCD by one character, rearming the * timer if required. -- cgit v1.2.3 From 26a2c54d03bd514fb3d3520706f911b3ca56b011 Mon Sep 17 00:00:00 2001 From: Miguel Ojeda Date: Mon, 19 Feb 2018 16:47:52 +0100 Subject: auxdisplay: img-ascii-lcd: Silence 2 uninitialized warnings The warnings are: drivers/auxdisplay/img-ascii-lcd.c: warning: 'err' may be used uninitialized in this function [-Wuninitialized] At lines 109 and 207. Reported by Geert using the build service several times, e.g.: https://lkml.org/lkml/2018/2/19/303 They are two false positives, since num_chars > 0 in the three present configurations (boston, malta, sead3). Initialize to 0 in order to silence the warning. Cc: Geert Uytterhoeven Cc: Paul Burton Signed-off-by: Miguel Ojeda --- drivers/auxdisplay/img-ascii-lcd.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/auxdisplay/img-ascii-lcd.c b/drivers/auxdisplay/img-ascii-lcd.c index d8133024fd5d..834509506ef6 100644 --- a/drivers/auxdisplay/img-ascii-lcd.c +++ b/drivers/auxdisplay/img-ascii-lcd.c @@ -97,7 +97,7 @@ static struct img_ascii_lcd_config boston_config = { static void malta_update(struct img_ascii_lcd_ctx *ctx) { unsigned int i; - int err; + int err = 0; for (i = 0; i < ctx->cfg->num_chars; i++) { err = regmap_write(ctx->regmap, @@ -180,7 +180,7 @@ static int sead3_wait_lcd_idle(struct img_ascii_lcd_ctx *ctx) static void sead3_update(struct img_ascii_lcd_ctx *ctx) { unsigned int i; - int err; + int err = 0; for (i = 0; i < ctx->cfg->num_chars; i++) { err = sead3_wait_lcd_idle(ctx); -- cgit v1.2.3 From c37366742baa2bb3225be507d283baef151c5f8a Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Mon, 12 Mar 2018 20:30:43 -0400 Subject: dm mpath: fix uninitialized 'pg_init_wait' waitqueue_head NULL pointer Initialize all the scsi_dh related 'struct multipath' members regardless of whether a scsi_dh is in use or not. The subtle (and fragile) SCSI-assuming legacy code clearly needs further decoupling from non-SCSI (and/or developer understanding). Fixes: 8d47e65948dd ("dm mpath: remove unnecessary NVMe branching in favor of scsi_dh checks") Reported-by: Bart Van Assche Signed-off-by: Mike Snitzer --- drivers/md/dm-mpath.c | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c index 3fde9e9faddd..87c404eebe9f 100644 --- a/drivers/md/dm-mpath.c +++ b/drivers/md/dm-mpath.c @@ -223,6 +223,16 @@ static int alloc_multipath_stage2(struct dm_target *ti, struct multipath *m) dm_table_set_type(ti->table, m->queue_mode); + /* + * Init fields that are only used when a scsi_dh is attached + * - must do this unconditionally (really doesn't hurt non-SCSI uses) + */ + set_bit(MPATHF_QUEUE_IO, &m->flags); + atomic_set(&m->pg_init_in_progress, 0); + atomic_set(&m->pg_init_count, 0); + m->pg_init_delay_msecs = DM_PG_INIT_DELAY_DEFAULT; + init_waitqueue_head(&m->pg_init_wait); + return 0; } @@ -331,7 +341,6 @@ static void __switch_pg(struct multipath *m, struct priority_group *pg) set_bit(MPATHF_PG_INIT_REQUIRED, &m->flags); set_bit(MPATHF_QUEUE_IO, &m->flags); } else { - /* FIXME: not needed if no scsi_dh is attached */ clear_bit(MPATHF_PG_INIT_REQUIRED, &m->flags); clear_bit(MPATHF_QUEUE_IO, &m->flags); } @@ -823,16 +832,6 @@ retain: */ kfree(m->hw_handler_name); m->hw_handler_name = attached_handler_name; - - /* - * Init fields that are only used when a scsi_dh is attached - */ - if (!test_and_set_bit(MPATHF_QUEUE_IO, &m->flags)) { - atomic_set(&m->pg_init_in_progress, 0); - atomic_set(&m->pg_init_count, 0); - m->pg_init_delay_msecs = DM_PG_INIT_DELAY_DEFAULT; - init_waitqueue_head(&m->pg_init_wait); - } } } -- cgit v1.2.3 From e8f74a0f00113d74ac18d6de13096f9e2f95618a Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Mon, 12 Mar 2018 19:49:25 -0400 Subject: dm mpath: eliminate need to use scsi_device_from_queue Instead of scsi_device_from_queue(), use scsi_dh_attached_handler_name() -- whose implementation uses scsi_device_from_queue() to avoid trying to access SCSI-specific resources from non-SCSI devices. Fixes buildbot reported issue when CONFIG_SCSI isn't set: ERROR: "scsi_device_from_queue" [drivers/md/dm-multipath.ko] undefined! Fixes: 8d47e65948dd ("dm mpath: remove unnecessary NVMe branching in favor of scsi_dh checks") Signed-off-by: Mike Snitzer --- drivers/md/dm-mpath.c | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c index 87c404eebe9f..2481c4794ed6 100644 --- a/drivers/md/dm-mpath.c +++ b/drivers/md/dm-mpath.c @@ -22,7 +22,6 @@ #include #include #include -#include #include #include #include @@ -805,15 +804,14 @@ static int parse_path_selector(struct dm_arg_set *as, struct priority_group *pg, return 0; } -static int setup_scsi_dh(struct block_device *bdev, struct multipath *m, char **error) +static int setup_scsi_dh(struct block_device *bdev, struct multipath *m, + const char *attached_handler_name, char **error) { struct request_queue *q = bdev_get_queue(bdev); - const char *attached_handler_name; int r; if (test_bit(MPATHF_RETAIN_ATTACHED_HW_HANDLER, &m->flags)) { retain: - attached_handler_name = scsi_dh_attached_handler_name(q, GFP_KERNEL); if (attached_handler_name) { /* * Clear any hw_handler_params associated with a @@ -867,7 +865,8 @@ static struct pgpath *parse_path(struct dm_arg_set *as, struct path_selector *ps int r; struct pgpath *p; struct multipath *m = ti->private; - struct scsi_device *sdev; + struct request_queue *q; + const char *attached_handler_name; /* we need at least a path arg */ if (as->argc < 1) { @@ -886,11 +885,11 @@ static struct pgpath *parse_path(struct dm_arg_set *as, struct path_selector *ps goto bad; } - sdev = scsi_device_from_queue(bdev_get_queue(p->path.dev->bdev)); - if (sdev) { - put_device(&sdev->sdev_gendev); + q = bdev_get_queue(p->path.dev->bdev); + attached_handler_name = scsi_dh_attached_handler_name(q, GFP_KERNEL); + if (attached_handler_name) { INIT_DELAYED_WORK(&p->activate_path, activate_path_work); - r = setup_scsi_dh(p->path.dev->bdev, m, &ti->error); + r = setup_scsi_dh(p->path.dev->bdev, m, attached_handler_name, &ti->error); if (r) { dm_put_device(ti, p->path.dev); goto bad; -- cgit v1.2.3 From 537f4146c53c95aac977852b371bafb9c6755ee1 Mon Sep 17 00:00:00 2001 From: Arvind Yadav Date: Tue, 6 Mar 2018 15:35:43 +0530 Subject: workqueue: use put_device() instead of kfree() Never directly free @dev after calling device_register(), even if it returned an error! Always use put_device() to give up the reference initialized in this function instead. Signed-off-by: Arvind Yadav Signed-off-by: Tejun Heo --- kernel/workqueue.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/workqueue.c b/kernel/workqueue.c index bb9a519cbf50..ccd1080dd6e7 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -5337,7 +5337,7 @@ int workqueue_sysfs_register(struct workqueue_struct *wq) ret = device_register(&wq_dev->dev); if (ret) { - kfree(wq_dev); + put_device(&wq_dev->dev); wq->wq_dev = NULL; return ret; } -- cgit v1.2.3 From 2c292dbb398ee46fc1343daf6c3cf9715a75688e Mon Sep 17 00:00:00 2001 From: Boris Pismenny Date: Thu, 8 Mar 2018 15:51:40 +0200 Subject: IB/mlx5: Fix out-of-bounds read in create_raw_packet_qp_rq Add a check for the length of the qpin structure to prevent out-of-bounds reads BUG: KASAN: slab-out-of-bounds in create_raw_packet_qp+0x114c/0x15e2 Read of size 8192 at addr ffff880066b99290 by task syz-executor3/549 CPU: 3 PID: 549 Comm: syz-executor3 Not tainted 4.15.0-rc2+ #27 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.7.5-0-ge51488c-20140602_164612-nilsson.home.kraxel.org 04/01/2014 Call Trace: dump_stack+0x8d/0xd4 print_address_description+0x73/0x290 kasan_report+0x25c/0x370 ? create_raw_packet_qp+0x114c/0x15e2 memcpy+0x1f/0x50 create_raw_packet_qp+0x114c/0x15e2 ? create_raw_packet_qp_tis.isra.28+0x13d/0x13d ? lock_acquire+0x370/0x370 create_qp_common+0x2245/0x3b50 ? destroy_qp_user.isra.47+0x100/0x100 ? kasan_kmalloc+0x13d/0x170 ? sched_clock_cpu+0x18/0x180 ? fs_reclaim_acquire.part.15+0x5/0x30 ? __lock_acquire+0xa11/0x1da0 ? sched_clock_cpu+0x18/0x180 ? kmem_cache_alloc_trace+0x17e/0x310 ? mlx5_ib_create_qp+0x30e/0x17b0 mlx5_ib_create_qp+0x33d/0x17b0 ? sched_clock_cpu+0x18/0x180 ? create_qp_common+0x3b50/0x3b50 ? lock_acquire+0x370/0x370 ? __radix_tree_lookup+0x180/0x220 ? uverbs_try_lock_object+0x68/0xc0 ? rdma_lookup_get_uobject+0x114/0x240 create_qp.isra.5+0xce4/0x1e20 ? ib_uverbs_ex_create_cq_cb+0xa0/0xa0 ? copy_ah_attr_from_uverbs.isra.2+0xa00/0xa00 ? ib_uverbs_cq_event_handler+0x160/0x160 ? __might_fault+0x17c/0x1c0 ib_uverbs_create_qp+0x21b/0x2a0 ? ib_uverbs_destroy_cq+0x2e0/0x2e0 ib_uverbs_write+0x55a/0xad0 ? ib_uverbs_destroy_cq+0x2e0/0x2e0 ? ib_uverbs_destroy_cq+0x2e0/0x2e0 ? ib_uverbs_open+0x760/0x760 ? futex_wake+0x147/0x410 ? check_prev_add+0x1680/0x1680 ? do_futex+0x3d3/0xa60 ? sched_clock_cpu+0x18/0x180 __vfs_write+0xf7/0x5c0 ? ib_uverbs_open+0x760/0x760 ? kernel_read+0x110/0x110 ? lock_acquire+0x370/0x370 ? __fget+0x264/0x3b0 vfs_write+0x18a/0x460 SyS_write+0xc7/0x1a0 ? SyS_read+0x1a0/0x1a0 ? trace_hardirqs_on_thunk+0x1a/0x1c entry_SYSCALL_64_fastpath+0x18/0x85 RIP: 0033:0x4477b9 RSP: 002b:00007f1822cadc18 EFLAGS: 00000292 ORIG_RAX: 0000000000000001 RAX: ffffffffffffffda RBX: 0000000000000005 RCX: 00000000004477b9 RDX: 0000000000000070 RSI: 000000002000a000 RDI: 0000000000000005 RBP: 0000000000708000 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000292 R12: 00000000ffffffff R13: 0000000000005d70 R14: 00000000006e6e30 R15: 0000000020010ff0 Allocated by task 549: __kmalloc+0x15e/0x340 kvmalloc_node+0xa1/0xd0 create_user_qp.isra.46+0xd42/0x1610 create_qp_common+0x2e63/0x3b50 mlx5_ib_create_qp+0x33d/0x17b0 create_qp.isra.5+0xce4/0x1e20 ib_uverbs_create_qp+0x21b/0x2a0 ib_uverbs_write+0x55a/0xad0 __vfs_write+0xf7/0x5c0 vfs_write+0x18a/0x460 SyS_write+0xc7/0x1a0 entry_SYSCALL_64_fastpath+0x18/0x85 Freed by task 368: kfree+0xeb/0x2f0 kernfs_fop_release+0x140/0x180 __fput+0x266/0x700 task_work_run+0x104/0x180 exit_to_usermode_loop+0xf7/0x110 syscall_return_slowpath+0x298/0x370 entry_SYSCALL_64_fastpath+0x83/0x85 The buggy address belongs to the object at ffff880066b99180 which belongs to the cache kmalloc-512 of size 512 The buggy address is located 272 bytes inside of 512-byte region [ffff880066b99180, ffff880066b99380) The buggy address belongs to the page: page:000000006040eedd count:1 mapcount:0 mapping: (null) index:0x0 compound_mapcount: 0 flags: 0x4000000000008100(slab|head) raw: 4000000000008100 0000000000000000 0000000000000000 0000000180190019 raw: ffffea00019a7500 0000000b0000000b ffff88006c403080 0000000000000000 page dumped because: kasan: bad access detected Memory state around the buggy address: ffff880066b99180: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ffff880066b99200: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 >ffff880066b99280: 00 00 fc fc fc fc fc fc fc fc fc fc fc fc fc fc ^ ffff880066b99300: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc ffff880066b99380: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc Cc: syzkaller Fixes: 0fb2ed66a14c ("IB/mlx5: Add create and destroy functionality for Raw Packet QP") Signed-off-by: Boris Pismenny Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/qp.c | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 36197fbac63a..a2e1aa86e133 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -1161,7 +1161,7 @@ static void destroy_raw_packet_qp_sq(struct mlx5_ib_dev *dev, ib_umem_release(sq->ubuffer.umem); } -static int get_rq_pas_size(void *qpc) +static size_t get_rq_pas_size(void *qpc) { u32 log_page_size = MLX5_GET(qpc, qpc, log_page_size) + 12; u32 log_rq_stride = MLX5_GET(qpc, qpc, log_rq_stride); @@ -1177,7 +1177,8 @@ static int get_rq_pas_size(void *qpc) } static int create_raw_packet_qp_rq(struct mlx5_ib_dev *dev, - struct mlx5_ib_rq *rq, void *qpin) + struct mlx5_ib_rq *rq, void *qpin, + size_t qpinlen) { struct mlx5_ib_qp *mqp = rq->base.container_mibqp; __be64 *pas; @@ -1186,9 +1187,12 @@ static int create_raw_packet_qp_rq(struct mlx5_ib_dev *dev, void *rqc; void *wq; void *qpc = MLX5_ADDR_OF(create_qp_in, qpin, qpc); - int inlen; + size_t rq_pas_size = get_rq_pas_size(qpc); + size_t inlen; int err; - u32 rq_pas_size = get_rq_pas_size(qpc); + + if (qpinlen < rq_pas_size + MLX5_BYTE_OFF(create_qp_in, pas)) + return -EINVAL; inlen = MLX5_ST_SZ_BYTES(create_rq_in) + rq_pas_size; in = kvzalloc(inlen, GFP_KERNEL); @@ -1277,7 +1281,7 @@ static void destroy_raw_packet_qp_tir(struct mlx5_ib_dev *dev, } static int create_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, - u32 *in, + u32 *in, size_t inlen, struct ib_pd *pd) { struct mlx5_ib_raw_packet_qp *raw_packet_qp = &qp->raw_packet_qp; @@ -1309,7 +1313,7 @@ static int create_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, rq->flags |= MLX5_IB_RQ_CVLAN_STRIPPING; if (qp->flags & MLX5_IB_QP_PCI_WRITE_END_PADDING) rq->flags |= MLX5_IB_RQ_PCI_WRITE_END_PADDING; - err = create_raw_packet_qp_rq(dev, rq, in); + err = create_raw_packet_qp_rq(dev, rq, in, inlen); if (err) goto err_destroy_sq; @@ -1872,11 +1876,16 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd, } } + if (inlen < 0) { + err = -EINVAL; + goto err; + } + if (init_attr->qp_type == IB_QPT_RAW_PACKET || qp->flags & MLX5_IB_QP_UNDERLAY) { qp->raw_packet_qp.sq.ubuffer.buf_addr = ucmd.sq_buf_addr; raw_packet_qp_copy_info(qp, &qp->raw_packet_qp); - err = create_raw_packet_qp(dev, qp, in, pd); + err = create_raw_packet_qp(dev, qp, in, inlen, pd); } else { err = mlx5_core_create_qp(dev->mdev, &base->mqp, in, inlen); } -- cgit v1.2.3 From c2b37f76485f073f020e60b5954b6dc4e55f693c Mon Sep 17 00:00:00 2001 From: Boris Pismenny Date: Thu, 8 Mar 2018 15:51:41 +0200 Subject: IB/mlx5: Fix integer overflows in mlx5_ib_create_srq This patch validates user provided input to prevent integer overflow due to integer manipulation in the mlx5_ib_create_srq function. Cc: syzkaller Fixes: e126ba97dba9 ("mlx5: Add driver for Mellanox Connect-IB adapters") Signed-off-by: Boris Pismenny Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/srq.c | 15 +++++++++------ include/linux/mlx5/driver.h | 4 ++-- 2 files changed, 11 insertions(+), 8 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/srq.c b/drivers/infiniband/hw/mlx5/srq.c index 6d5fadad9090..3c7522d025f2 100644 --- a/drivers/infiniband/hw/mlx5/srq.c +++ b/drivers/infiniband/hw/mlx5/srq.c @@ -241,8 +241,8 @@ struct ib_srq *mlx5_ib_create_srq(struct ib_pd *pd, { struct mlx5_ib_dev *dev = to_mdev(pd->device); struct mlx5_ib_srq *srq; - int desc_size; - int buf_size; + size_t desc_size; + size_t buf_size; int err; struct mlx5_srq_attr in = {0}; __u32 max_srq_wqes = 1 << MLX5_CAP_GEN(dev->mdev, log_max_srq_sz); @@ -266,15 +266,18 @@ struct ib_srq *mlx5_ib_create_srq(struct ib_pd *pd, desc_size = sizeof(struct mlx5_wqe_srq_next_seg) + srq->msrq.max_gs * sizeof(struct mlx5_wqe_data_seg); + if (desc_size == 0 || srq->msrq.max_gs > desc_size) + return ERR_PTR(-EINVAL); desc_size = roundup_pow_of_two(desc_size); - desc_size = max_t(int, 32, desc_size); + desc_size = max_t(size_t, 32, desc_size); + if (desc_size < sizeof(struct mlx5_wqe_srq_next_seg)) + return ERR_PTR(-EINVAL); srq->msrq.max_avail_gather = (desc_size - sizeof(struct mlx5_wqe_srq_next_seg)) / sizeof(struct mlx5_wqe_data_seg); srq->msrq.wqe_shift = ilog2(desc_size); buf_size = srq->msrq.max * desc_size; - mlx5_ib_dbg(dev, "desc_size 0x%x, req wr 0x%x, srq size 0x%x, max_gs 0x%x, max_avail_gather 0x%x\n", - desc_size, init_attr->attr.max_wr, srq->msrq.max, srq->msrq.max_gs, - srq->msrq.max_avail_gather); + if (buf_size < desc_size) + return ERR_PTR(-EINVAL); in.type = init_attr->srq_type; if (pd->uobject) diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 6ed79a8a8318..9d3a03364e6e 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -453,8 +453,8 @@ struct mlx5_core_srq { struct mlx5_core_rsc_common common; /* must be first */ u32 srqn; int max; - int max_gs; - int max_avail_gather; + size_t max_gs; + size_t max_avail_gather; int wqe_shift; void (*event) (struct mlx5_core_srq *, enum mlx5_event); -- cgit v1.2.3 From 6417250d3f894e66a68ba1cd93676143f2376a6f Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Tue, 6 Mar 2018 19:34:42 -0800 Subject: workqueue: remove unused cancel_work() Found this by accident. There are no usages of bare cancel_work() in current kernel source. Signed-off-by: Stephen Hemminger Signed-off-by: Tejun Heo --- include/linux/workqueue.h | 1 - kernel/workqueue.c | 8 -------- 2 files changed, 9 deletions(-) diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index bc0cda180c8b..0c3301421c57 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -456,7 +456,6 @@ extern int schedule_on_each_cpu(work_func_t func); int execute_in_process_context(work_func_t fn, struct execute_work *); extern bool flush_work(struct work_struct *work); -extern bool cancel_work(struct work_struct *work); extern bool cancel_work_sync(struct work_struct *work); extern bool flush_delayed_work(struct delayed_work *dwork); diff --git a/kernel/workqueue.c b/kernel/workqueue.c index ccd1080dd6e7..6ec6ba65127b 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -3018,14 +3018,6 @@ static bool __cancel_work(struct work_struct *work, bool is_dwork) return ret; } -/* - * See cancel_delayed_work() - */ -bool cancel_work(struct work_struct *work) -{ - return __cancel_work(work, false); -} - /** * cancel_delayed_work - cancel a delayed work * @dwork: delayed_work to cancel -- cgit v1.2.3 From a8b48a4dccea77e29462e59f1dbf0d5aa1ff167c Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Wed, 7 Mar 2018 22:17:20 +1100 Subject: KVM: PPC: Book3S HV: Fix trap number return from __kvmppc_vcore_entry This fixes a bug where the trap number that is returned by __kvmppc_vcore_entry gets corrupted. The effect of the corruption is that IPIs get ignored on POWER9 systems when the IPI is sent via a doorbell interrupt to a CPU which is executing in a KVM guest. The effect of the IPI being ignored is often that another CPU locks up inside smp_call_function_many() (and if that CPU is holding a spinlock, other CPUs then lock up inside raw_spin_lock()). The trap number is currently held in register r12 for most of the assembly-language part of the guest exit path. In that path, we call kvmppc_subcore_exit_guest(), which is a C function, without restoring r12 afterwards. Depending on the kernel config and the compiler, it may modify r12 or it may not, so some config/compiler combinations see the bug and others don't. To fix this, we arrange for the trap number to be stored on the stack from the 'guest_bypass:' label until the end of the function, then the trap number is loaded and returned in r12 as before. Cc: stable@vger.kernel.org # v4.8+ Fixes: fd7bacbca47a ("KVM: PPC: Book3S HV: Fix TB corruption in guest exit path on HMI interrupt") Signed-off-by: Paul Mackerras --- arch/powerpc/kvm/book3s_hv_rmhandlers.S | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index f31f357b8c5a..d33264697a31 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -320,7 +320,6 @@ kvm_novcpu_exit: stw r12, STACK_SLOT_TRAP(r1) bl kvmhv_commence_exit nop - lwz r12, STACK_SLOT_TRAP(r1) b kvmhv_switch_to_host /* @@ -1220,6 +1219,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) secondary_too_late: li r12, 0 + stw r12, STACK_SLOT_TRAP(r1) cmpdi r4, 0 beq 11f stw r12, VCPU_TRAP(r4) @@ -1558,12 +1558,12 @@ mc_cont: 3: stw r5,VCPU_SLB_MAX(r9) guest_bypass: + stw r12, STACK_SLOT_TRAP(r1) mr r3, r12 /* Increment exit count, poke other threads to exit */ bl kvmhv_commence_exit nop ld r9, HSTATE_KVM_VCPU(r13) - lwz r12, VCPU_TRAP(r9) /* Stop others sending VCPU interrupts to this physical CPU */ li r0, -1 @@ -1898,6 +1898,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_POWER9_DD1) * POWER7/POWER8 guest -> host partition switch code. * We don't have to lock against tlbies but we do * have to coordinate the hardware threads. + * Here STACK_SLOT_TRAP(r1) contains the trap number. */ kvmhv_switch_to_host: /* Secondary threads wait for primary to do partition switch */ @@ -1950,12 +1951,12 @@ BEGIN_FTR_SECTION END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) /* If HMI, call kvmppc_realmode_hmi_handler() */ + lwz r12, STACK_SLOT_TRAP(r1) cmpwi r12, BOOK3S_INTERRUPT_HMI bne 27f bl kvmppc_realmode_hmi_handler nop cmpdi r3, 0 - li r12, BOOK3S_INTERRUPT_HMI /* * At this point kvmppc_realmode_hmi_handler may have resync-ed * the TB, and if it has, we must not subtract the guest timebase @@ -2008,10 +2009,8 @@ BEGIN_FTR_SECTION lwz r8, KVM_SPLIT_DO_RESTORE(r3) cmpwi r8, 0 beq 47f - stw r12, STACK_SLOT_TRAP(r1) bl kvmhv_p9_restore_lpcr nop - lwz r12, STACK_SLOT_TRAP(r1) b 48f 47: END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) @@ -2049,6 +2048,7 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_RADIX) li r0, KVM_GUEST_MODE_NONE stb r0, HSTATE_IN_GUEST(r13) + lwz r12, STACK_SLOT_TRAP(r1) /* return trap # in r12 */ ld r0, SFS+PPC_LR_STKOFF(r1) addi r1, r1, SFS mtlr r0 -- cgit v1.2.3 From 0cbfeef230571b132d2ac2ea4346b200b311258f Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Mon, 5 Feb 2018 14:08:52 +0000 Subject: libnvdimm: remove redundant assignment to pointer 'dev' Pointer dev is being assigned a value that is never read, it is being re-assigned the same value later on, hence the initialization is redundant and can be removed. Cleans up clang warning: drivers/nvdimm/pfn_devs.c:307:17: warning: Value stored to 'dev' during its initialization is never read Signed-off-by: Colin Ian King Reviewed-by: Ross Zwisler Signed-off-by: Dan Williams --- drivers/nvdimm/pfn_devs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvdimm/pfn_devs.c b/drivers/nvdimm/pfn_devs.c index f5c4e8c6e29d..2f4d18752c97 100644 --- a/drivers/nvdimm/pfn_devs.c +++ b/drivers/nvdimm/pfn_devs.c @@ -304,7 +304,7 @@ static const struct attribute_group *nd_pfn_attribute_groups[] = { struct device *nd_pfn_devinit(struct nd_pfn *nd_pfn, struct nd_namespace_common *ndns) { - struct device *dev = &nd_pfn->dev; + struct device *dev; if (!nd_pfn) return NULL; -- cgit v1.2.3 From 327d53d005ca47b10eae940616ed11c569f75a9b Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Tue, 13 Mar 2018 22:03:10 -0700 Subject: selftests/x86/entry_from_vm86: Exit with 1 if we fail Fix a logic error that caused the test to exit with 0 even if test cases failed. Signed-off-by: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stas Sergeev Cc: Thomas Gleixner Cc: bartoldeman@gmail.com Cc: stable@vger.kernel.org Link: http://lkml.kernel.org/r/b1cc37144038958a469c8f70a5f47a6a5638636a.1521003603.git.luto@kernel.org Signed-off-by: Ingo Molnar --- tools/testing/selftests/x86/entry_from_vm86.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/x86/entry_from_vm86.c b/tools/testing/selftests/x86/entry_from_vm86.c index 361466a2eaef..6e85f0d0498d 100644 --- a/tools/testing/selftests/x86/entry_from_vm86.c +++ b/tools/testing/selftests/x86/entry_from_vm86.c @@ -318,7 +318,7 @@ int main(void) clearhandler(SIGSEGV); /* Make sure nothing explodes if we fork. */ - if (fork() > 0) + if (fork() == 0) return 0; return (nerrs == 0 ? 0 : 1); -- cgit v1.2.3 From 78393fdde2a456cafa414b171c90f26a3df98b20 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Tue, 13 Mar 2018 22:03:11 -0700 Subject: selftests/x86/entry_from_vm86: Add test cases for POPF POPF is currently broken -- add tests to catch the error. This results in: [RUN] POPF with VIP set and IF clear from vm86 mode [INFO] Exited vm86 mode due to STI [FAIL] Incorrect return reason (started at eip = 0xd, ended at eip = 0xf) because POPF currently fails to check IF before reporting a pending interrupt. This patch also makes the FAIL message a bit more informative. Reported-by: Bart Oldeman Signed-off-by: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stas Sergeev Cc: Thomas Gleixner Cc: stable@vger.kernel.org Link: http://lkml.kernel.org/r/a16270b5cfe7832d6d00c479d0f871066cbdb52b.1521003603.git.luto@kernel.org Signed-off-by: Ingo Molnar --- tools/testing/selftests/x86/entry_from_vm86.c | 30 ++++++++++++++++++++++++--- 1 file changed, 27 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/x86/entry_from_vm86.c b/tools/testing/selftests/x86/entry_from_vm86.c index 6e85f0d0498d..ade443a88421 100644 --- a/tools/testing/selftests/x86/entry_from_vm86.c +++ b/tools/testing/selftests/x86/entry_from_vm86.c @@ -95,6 +95,10 @@ asm ( "int3\n\t" "vmcode_int80:\n\t" "int $0x80\n\t" + "vmcode_popf_hlt:\n\t" + "push %ax\n\t" + "popf\n\t" + "hlt\n\t" "vmcode_umip:\n\t" /* addressing via displacements */ "smsw (2052)\n\t" @@ -124,8 +128,8 @@ asm ( extern unsigned char vmcode[], end_vmcode[]; extern unsigned char vmcode_bound[], vmcode_sysenter[], vmcode_syscall[], - vmcode_sti[], vmcode_int3[], vmcode_int80[], vmcode_umip[], - vmcode_umip_str[], vmcode_umip_sldt[]; + vmcode_sti[], vmcode_int3[], vmcode_int80[], vmcode_popf_hlt[], + vmcode_umip[], vmcode_umip_str[], vmcode_umip_sldt[]; /* Returns false if the test was skipped. */ static bool do_test(struct vm86plus_struct *v86, unsigned long eip, @@ -175,7 +179,7 @@ static bool do_test(struct vm86plus_struct *v86, unsigned long eip, (VM86_TYPE(ret) == rettype && VM86_ARG(ret) == retarg)) { printf("[OK]\tReturned correctly\n"); } else { - printf("[FAIL]\tIncorrect return reason\n"); + printf("[FAIL]\tIncorrect return reason (started at eip = 0x%lx, ended at eip = 0x%lx)\n", eip, v86->regs.eip); nerrs++; } @@ -264,6 +268,9 @@ int main(void) v86.regs.ds = load_addr / 16; v86.regs.es = load_addr / 16; + /* Use the end of the page as our stack. */ + v86.regs.esp = 4096; + assert((v86.regs.cs & 3) == 0); /* Looks like RPL = 0 */ /* #BR -- should deliver SIG??? */ @@ -295,6 +302,23 @@ int main(void) v86.regs.eflags &= ~X86_EFLAGS_IF; do_test(&v86, vmcode_sti - vmcode, VM86_STI, 0, "STI with VIP set"); + /* POPF with VIP set but IF clear: should not trap */ + v86.regs.eflags = X86_EFLAGS_VIP; + v86.regs.eax = 0; + do_test(&v86, vmcode_popf_hlt - vmcode, VM86_UNKNOWN, 0, "POPF with VIP set and IF clear"); + + /* POPF with VIP set and IF set: should trap */ + v86.regs.eflags = X86_EFLAGS_VIP; + v86.regs.eax = X86_EFLAGS_IF; + do_test(&v86, vmcode_popf_hlt - vmcode, VM86_STI, 0, "POPF with VIP and IF set"); + + /* POPF with VIP clear and IF set: should not trap */ + v86.regs.eflags = 0; + v86.regs.eax = X86_EFLAGS_IF; + do_test(&v86, vmcode_popf_hlt - vmcode, VM86_UNKNOWN, 0, "POPF with VIP clear and IF set"); + + v86.regs.eflags = 0; + /* INT3 -- should cause #BP */ do_test(&v86, vmcode_int3 - vmcode, VM86_TRAP, 3, "INT3"); -- cgit v1.2.3 From b5069782453459f6ec1fdeb495d9901a4545fcb5 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Tue, 13 Mar 2018 22:03:12 -0700 Subject: x86/vm86/32: Fix POPF emulation POPF would trap if VIP was set regardless of whether IF was set. Fix it. Suggested-by: Stas Sergeev Reported-by: Bart Oldeman Signed-off-by: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: stable@vger.kernel.org Fixes: 5ed92a8ab71f ("x86/vm86: Use the normal pt_regs area for vm86") Link: http://lkml.kernel.org/r/ce95f40556e7b2178b6bc06ee9557827ff94bd28.1521003603.git.luto@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/vm86_32.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c index 5edb27f1a2c4..9d0b5af7db91 100644 --- a/arch/x86/kernel/vm86_32.c +++ b/arch/x86/kernel/vm86_32.c @@ -727,7 +727,8 @@ void handle_vm86_fault(struct kernel_vm86_regs *regs, long error_code) return; check_vip: - if (VEFLAGS & X86_EFLAGS_VIP) { + if ((VEFLAGS & (X86_EFLAGS_VIP | X86_EFLAGS_VIF)) == + (X86_EFLAGS_VIP | X86_EFLAGS_VIF)) { save_v86_state(regs, VM86_STI); return; } -- cgit v1.2.3 From 9bdc00a5c16f5d5306756afdbe2811be4061c945 Mon Sep 17 00:00:00 2001 From: Joel Stanley Date: Wed, 14 Mar 2018 17:43:12 +1030 Subject: ARM: dts: aspeed: Add default memory node When we removed the inclusion of skeleton.dtsi from the device trees, we broke booting for systems with bootloaders that aren't device tre aware. This can be seen, for example, when appending the device tree blob to the kernel image. The reason booting broke was that the kernel lacked the device_type label in the memory node. Add in a default memory node wth the device_type. It can contain the memory address as the location is fixed for each SoC generation, but the size needs to be added by the bootloader or the board specific dts. Fixes: 73102d6fdc32 ("ARM: dts: aspeed: Remove skeleton.dtsi") Cc: Reported-by: Benjamin Herrenschmidt Signed-off-by: Joel Stanley Signed-off-by: Arnd Bergmann --- arch/arm/boot/dts/aspeed-g4.dtsi | 5 +++++ arch/arm/boot/dts/aspeed-g5.dtsi | 5 +++++ 2 files changed, 10 insertions(+) diff --git a/arch/arm/boot/dts/aspeed-g4.dtsi b/arch/arm/boot/dts/aspeed-g4.dtsi index b0d8431a3700..ae2b8c952e80 100644 --- a/arch/arm/boot/dts/aspeed-g4.dtsi +++ b/arch/arm/boot/dts/aspeed-g4.dtsi @@ -42,6 +42,11 @@ }; }; + memory@40000000 { + device_type = "memory"; + reg = <0x40000000 0>; + }; + ahb { compatible = "simple-bus"; #address-cells = <1>; diff --git a/arch/arm/boot/dts/aspeed-g5.dtsi b/arch/arm/boot/dts/aspeed-g5.dtsi index 40de3b66c33f..2477ebc11d9d 100644 --- a/arch/arm/boot/dts/aspeed-g5.dtsi +++ b/arch/arm/boot/dts/aspeed-g5.dtsi @@ -42,6 +42,11 @@ }; }; + memory@80000000 { + device_type = "memory"; + reg = <0x80000000 0>; + }; + ahb { compatible = "simple-bus"; #address-cells = <1>; -- cgit v1.2.3 From e4b79900222b8cccd4da4a7a89581f0e1b764ed2 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 13 Mar 2018 15:58:11 +1100 Subject: powerpc/64s: Fix NULL AT_BASE_PLATFORM when using DT CPU features When running virtualised the powerpc kernel is able to run the system in "compat mode" - which means the kernel and hardware are pretending to userspace that the CPU is an older version than it actually is. AT_BASE_PLATFORM is an AUXV entry that we export to userspace for use when we're running in that mode, which tells userspace the "platform" string for the real CPU version, as opposed to the faked version. Although we don't support compat mode when using DT CPU features, and arguably don't need to set AT_BASE_PLATFORM, the existing cputable based code always sets it even when we're running bare metal. That means the lack of AT_BASE_PLATFORM is a user-visible artifact of the fact that the kernel is using DT CPU features, which we don't want. So set it in the DT CPU features code also. This results in eg: $ LD_SHOW_AUXV=1 /bin/true | grep "AT_.*PLATFORM" AT_PLATFORM: power9 AT_BASE_PLATFORM:power9 Signed-off-by: Michael Ellerman Reviewed-by: Nicholas Piggin --- arch/powerpc/kernel/dt_cpu_ftrs.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/powerpc/kernel/dt_cpu_ftrs.c b/arch/powerpc/kernel/dt_cpu_ftrs.c index 945e2c29ad2d..0bcfb0f256e1 100644 --- a/arch/powerpc/kernel/dt_cpu_ftrs.c +++ b/arch/powerpc/kernel/dt_cpu_ftrs.c @@ -720,6 +720,9 @@ static void __init cpufeatures_setup_finished(void) cur_cpu_spec->cpu_features |= CPU_FTR_HVMODE; } + /* Make sure powerpc_base_platform is non-NULL */ + powerpc_base_platform = cur_cpu_spec->platform; + system_registers.lpcr = mfspr(SPRN_LPCR); system_registers.hfscr = mfspr(SPRN_HFSCR); system_registers.fscr = mfspr(SPRN_FSCR); -- cgit v1.2.3 From f4353daf4905c0099fd25fa742e2ffd4a4bab26a Mon Sep 17 00:00:00 2001 From: Andri Yngvason Date: Wed, 14 Mar 2018 11:52:56 +0000 Subject: can: cc770: Fix stalls on rt-linux, remove redundant IRQ ack This has been reported to cause stalls on rt-linux. Suggested-by: Richard Weinberger Tested-by: Richard Weinberger Signed-off-by: Andri Yngvason Cc: linux-stable Signed-off-by: Marc Kleine-Budde --- drivers/net/can/cc770/cc770.c | 15 --------------- 1 file changed, 15 deletions(-) diff --git a/drivers/net/can/cc770/cc770.c b/drivers/net/can/cc770/cc770.c index 1e37313054f3..9fed163262e0 100644 --- a/drivers/net/can/cc770/cc770.c +++ b/drivers/net/can/cc770/cc770.c @@ -447,15 +447,6 @@ static netdev_tx_t cc770_start_xmit(struct sk_buff *skb, struct net_device *dev) stats->tx_bytes += dlc; - - /* - * HM: We had some cases of repeated IRQs so make sure the - * INT is acknowledged I know it's already further up, but - * doing again fixed the issue - */ - cc770_write_reg(priv, msgobj[mo].ctrl0, - MSGVAL_UNC | TXIE_UNC | RXIE_UNC | INTPND_RES); - return NETDEV_TX_OK; } @@ -684,12 +675,6 @@ static void cc770_tx_interrupt(struct net_device *dev, unsigned int o) /* Nothing more to send, switch off interrupts */ cc770_write_reg(priv, msgobj[mo].ctrl0, MSGVAL_RES | TXIE_RES | RXIE_RES | INTPND_RES); - /* - * We had some cases of repeated IRQ so make sure the - * INT is acknowledged - */ - cc770_write_reg(priv, msgobj[mo].ctrl0, - MSGVAL_UNC | TXIE_UNC | RXIE_UNC | INTPND_RES); stats->tx_packets++; can_get_echo_skb(dev, 0); -- cgit v1.2.3 From 746201235b3f876792099079f4c6fea941d76183 Mon Sep 17 00:00:00 2001 From: Andri Yngvason Date: Wed, 14 Mar 2018 11:52:57 +0000 Subject: can: cc770: Fix queue stall & dropped RTR reply While waiting for the TX object to send an RTR, an external message with a matching id can overwrite the TX data. In this case we must call the rx routine and then try transmitting the message that was overwritten again. The queue was being stalled because the RX event did not generate an interrupt to wake up the queue again and the TX event did not happen because the TXRQST flag is reset by the chip when new data is received. According to the CC770 datasheet the id of a message object should not be changed while the MSGVAL bit is set. This has been fixed by resetting the MSGVAL bit before modifying the object in the transmit function and setting it after. It is not enough to set & reset CPUUPD. It is important to keep the MSGVAL bit reset while the message object is being modified. Otherwise, during RTR transmission, a frame with matching id could trigger an rx-interrupt, which would cause a race condition between the interrupt routine and the transmit function. Signed-off-by: Andri Yngvason Tested-by: Richard Weinberger Cc: linux-stable Signed-off-by: Marc Kleine-Budde --- drivers/net/can/cc770/cc770.c | 94 ++++++++++++++++++++++++++++++------------- drivers/net/can/cc770/cc770.h | 2 + 2 files changed, 68 insertions(+), 28 deletions(-) diff --git a/drivers/net/can/cc770/cc770.c b/drivers/net/can/cc770/cc770.c index 9fed163262e0..2743d82d4424 100644 --- a/drivers/net/can/cc770/cc770.c +++ b/drivers/net/can/cc770/cc770.c @@ -390,37 +390,23 @@ static int cc770_get_berr_counter(const struct net_device *dev, return 0; } -static netdev_tx_t cc770_start_xmit(struct sk_buff *skb, struct net_device *dev) +static void cc770_tx(struct net_device *dev, int mo) { struct cc770_priv *priv = netdev_priv(dev); - struct net_device_stats *stats = &dev->stats; - struct can_frame *cf = (struct can_frame *)skb->data; - unsigned int mo = obj2msgobj(CC770_OBJ_TX); + struct can_frame *cf = (struct can_frame *)priv->tx_skb->data; u8 dlc, rtr; u32 id; int i; - if (can_dropped_invalid_skb(dev, skb)) - return NETDEV_TX_OK; - - if ((cc770_read_reg(priv, - msgobj[mo].ctrl1) & TXRQST_UNC) == TXRQST_SET) { - netdev_err(dev, "TX register is still occupied!\n"); - return NETDEV_TX_BUSY; - } - - netif_stop_queue(dev); - dlc = cf->can_dlc; id = cf->can_id; - if (cf->can_id & CAN_RTR_FLAG) - rtr = 0; - else - rtr = MSGCFG_DIR; + rtr = cf->can_id & CAN_RTR_FLAG ? 0 : MSGCFG_DIR; + + cc770_write_reg(priv, msgobj[mo].ctrl0, + MSGVAL_RES | TXIE_RES | RXIE_RES | INTPND_RES); cc770_write_reg(priv, msgobj[mo].ctrl1, RMTPND_RES | TXRQST_RES | CPUUPD_SET | NEWDAT_RES); - cc770_write_reg(priv, msgobj[mo].ctrl0, - MSGVAL_SET | TXIE_SET | RXIE_RES | INTPND_RES); + if (id & CAN_EFF_FLAG) { id &= CAN_EFF_MASK; cc770_write_reg(priv, msgobj[mo].config, @@ -439,13 +425,30 @@ static netdev_tx_t cc770_start_xmit(struct sk_buff *skb, struct net_device *dev) for (i = 0; i < dlc; i++) cc770_write_reg(priv, msgobj[mo].data[i], cf->data[i]); - /* Store echo skb before starting the transfer */ - can_put_echo_skb(skb, dev, 0); - cc770_write_reg(priv, msgobj[mo].ctrl1, - RMTPND_RES | TXRQST_SET | CPUUPD_RES | NEWDAT_UNC); + RMTPND_UNC | TXRQST_SET | CPUUPD_RES | NEWDAT_UNC); + cc770_write_reg(priv, msgobj[mo].ctrl0, + MSGVAL_SET | TXIE_SET | RXIE_SET | INTPND_UNC); +} + +static netdev_tx_t cc770_start_xmit(struct sk_buff *skb, struct net_device *dev) +{ + struct cc770_priv *priv = netdev_priv(dev); + unsigned int mo = obj2msgobj(CC770_OBJ_TX); + + if (can_dropped_invalid_skb(dev, skb)) + return NETDEV_TX_OK; - stats->tx_bytes += dlc; + netif_stop_queue(dev); + + if ((cc770_read_reg(priv, + msgobj[mo].ctrl1) & TXRQST_UNC) == TXRQST_SET) { + netdev_err(dev, "TX register is still occupied!\n"); + return NETDEV_TX_BUSY; + } + + priv->tx_skb = skb; + cc770_tx(dev, mo); return NETDEV_TX_OK; } @@ -671,13 +674,47 @@ static void cc770_tx_interrupt(struct net_device *dev, unsigned int o) struct cc770_priv *priv = netdev_priv(dev); struct net_device_stats *stats = &dev->stats; unsigned int mo = obj2msgobj(o); + struct can_frame *cf; + u8 ctrl1; + + ctrl1 = cc770_read_reg(priv, msgobj[mo].ctrl1); - /* Nothing more to send, switch off interrupts */ cc770_write_reg(priv, msgobj[mo].ctrl0, MSGVAL_RES | TXIE_RES | RXIE_RES | INTPND_RES); + cc770_write_reg(priv, msgobj[mo].ctrl1, + RMTPND_RES | TXRQST_RES | MSGLST_RES | NEWDAT_RES); - stats->tx_packets++; + if (unlikely(!priv->tx_skb)) { + netdev_err(dev, "missing tx skb in tx interrupt\n"); + return; + } + + if (unlikely(ctrl1 & MSGLST_SET)) { + stats->rx_over_errors++; + stats->rx_errors++; + } + + /* When the CC770 is sending an RTR message and it receives a regular + * message that matches the id of the RTR message, it will overwrite the + * outgoing message in the TX register. When this happens we must + * process the received message and try to transmit the outgoing skb + * again. + */ + if (unlikely(ctrl1 & NEWDAT_SET)) { + cc770_rx(dev, mo, ctrl1); + cc770_tx(dev, mo); + return; + } + + can_put_echo_skb(priv->tx_skb, dev, 0); can_get_echo_skb(dev, 0); + + cf = (struct can_frame *)priv->tx_skb->data; + stats->tx_bytes += cf->can_dlc; + stats->tx_packets++; + + priv->tx_skb = NULL; + netif_wake_queue(dev); } @@ -789,6 +826,7 @@ struct net_device *alloc_cc770dev(int sizeof_priv) priv->can.do_set_bittiming = cc770_set_bittiming; priv->can.do_set_mode = cc770_set_mode; priv->can.ctrlmode_supported = CAN_CTRLMODE_3_SAMPLES; + priv->tx_skb = NULL; memcpy(priv->obj_flags, cc770_obj_flags, sizeof(cc770_obj_flags)); diff --git a/drivers/net/can/cc770/cc770.h b/drivers/net/can/cc770/cc770.h index a1739db98d91..95752e1d1283 100644 --- a/drivers/net/can/cc770/cc770.h +++ b/drivers/net/can/cc770/cc770.h @@ -193,6 +193,8 @@ struct cc770_priv { u8 cpu_interface; /* CPU interface register */ u8 clkout; /* Clock out register */ u8 bus_config; /* Bus conffiguration register */ + + struct sk_buff *tx_skb; }; struct net_device *alloc_cc770dev(int sizeof_priv); -- cgit v1.2.3 From a14bff131108faf50cc0cf864589fd71ee216c96 Mon Sep 17 00:00:00 2001 From: Andy Whitcroft Date: Wed, 14 Mar 2018 11:24:27 +0000 Subject: x86/speculation, objtool: Annotate indirect calls/jumps for objtool on 32-bit kernels In the following commit: 9e0e3c5130e9 ("x86/speculation, objtool: Annotate indirect calls/jumps for objtool") ... we added annotations for CALL_NOSPEC/JMP_NOSPEC on 64-bit x86 kernels, but we did not annotate the 32-bit path. Annotate it similarly. Signed-off-by: Andy Whitcroft Acked-by: Peter Zijlstra (Intel) Cc: Andy Lutomirski Cc: Arjan van de Ven Cc: Borislav Petkov Cc: Dan Williams Cc: Dave Hansen Cc: David Woodhouse Cc: David Woodhouse Cc: Greg Kroah-Hartman Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20180314112427.22351-1-apw@canonical.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/nospec-branch.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index b7063cfa19f9..b3996d60f981 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -183,7 +183,10 @@ * otherwise we'll run out of registers. We don't care about CET * here, anyway. */ -# define CALL_NOSPEC ALTERNATIVE("call *%[thunk_target]\n", \ +# define CALL_NOSPEC \ + ALTERNATIVE( \ + ANNOTATE_RETPOLINE_SAFE \ + "call *%[thunk_target]\n", \ " jmp 904f;\n" \ " .align 16\n" \ "901: call 903f;\n" \ -- cgit v1.2.3 From ca6bfcb2f6d9deab3924bf901e73622a94900473 Mon Sep 17 00:00:00 2001 From: Ju Hyung Park Date: Sun, 11 Mar 2018 02:28:35 +0900 Subject: libata: Enable queued TRIM for Samsung SSD 860 Samsung explicitly states that queued TRIM is supported for Linux with 860 PRO and 860 EVO. Make the previous blacklist to cover only 840 and 850 series. Signed-off-by: Park Ju Hyung Reviewed-by: Martin K. Petersen Signed-off-by: Tejun Heo Cc: stable@vger.kernel.org --- drivers/ata/libata-core.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index cb789f8849ae..aec609f80c4e 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -4549,7 +4549,9 @@ static const struct ata_blacklist_entry ata_device_blacklist [] = { ATA_HORKAGE_ZERO_AFTER_TRIM, }, { "Crucial_CT*MX100*", "MU01", ATA_HORKAGE_NO_NCQ_TRIM | ATA_HORKAGE_ZERO_AFTER_TRIM, }, - { "Samsung SSD 8*", NULL, ATA_HORKAGE_NO_NCQ_TRIM | + { "Samsung SSD 840*", NULL, ATA_HORKAGE_NO_NCQ_TRIM | + ATA_HORKAGE_ZERO_AFTER_TRIM, }, + { "Samsung SSD 850*", NULL, ATA_HORKAGE_NO_NCQ_TRIM | ATA_HORKAGE_ZERO_AFTER_TRIM, }, { "FCCT*M500*", NULL, ATA_HORKAGE_NO_NCQ_TRIM | ATA_HORKAGE_ZERO_AFTER_TRIM, }, -- cgit v1.2.3 From af1d830bf32b27b387b97c8b29dc09e306a9ff7f Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Wed, 14 Mar 2018 10:24:20 -0500 Subject: jump_label: Fix sparc64 warning The kbuild test robot reported the following warning on sparc64: kernel/jump_label.c: In function '__jump_label_update': kernel/jump_label.c:376:51: warning: cast to pointer from integer of different size [-Wint-to-pointer-cast] WARN_ONCE(1, "can't patch jump_label at %pS", (void *)entry->code); On sparc64, the jump_label entry->code field is of type u32, but pointers are 64-bit. Silence the warning by casting entry->code to an unsigned long before casting it to a pointer. This is also what the sparc jump label code does. Fixes: dc1dd184c2f0 ("jump_label: Warn on failed jump_label patching attempt") Reported-by: kbuild test robot Signed-off-by: Josh Poimboeuf Signed-off-by: Thomas Gleixner Cc: Peter Zijlstra Cc: Jason Baron Cc: Borislav Petkov Cc: "David S . Miller" Link: https://lkml.kernel.org/r/c966fed42be6611254a62d46579ec7416548d572.1521041026.git.jpoimboe@redhat.com --- kernel/jump_label.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kernel/jump_label.c b/kernel/jump_label.c index 52a0a7af8640..e7214093dcd1 100644 --- a/kernel/jump_label.c +++ b/kernel/jump_label.c @@ -373,7 +373,8 @@ static void __jump_label_update(struct static_key *key, if (kernel_text_address(entry->code)) arch_jump_label_transform(entry, jump_label_type(entry)); else - WARN_ONCE(1, "can't patch jump_label at %pS", (void *)entry->code); + WARN_ONCE(1, "can't patch jump_label at %pS", + (void *)(unsigned long)entry->code); } } } -- cgit v1.2.3 From 4dcb31d4649df36297296b819437709f5407059c Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 14 Mar 2018 09:04:16 -0700 Subject: net: use skb_to_full_sk() in skb_update_prio() Andrei Vagin reported a KASAN: slab-out-of-bounds error in skb_update_prio() Since SYNACK might be attached to a request socket, we need to get back to the listener socket. Since this listener is manipulated without locks, add const qualifiers to sock_cgroup_prioidx() so that the const can also be used in skb_update_prio() Also add the const qualifier to sock_cgroup_classid() for consistency. Fixes: ca6fb0651883 ("tcp: attach SYNACK messages to request sockets instead of listener") Signed-off-by: Eric Dumazet Reported-by: Andrei Vagin Signed-off-by: David S. Miller --- include/linux/cgroup-defs.h | 4 ++-- net/core/dev.c | 22 +++++++++++++++------- 2 files changed, 17 insertions(+), 9 deletions(-) diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index 9f242b876fde..f8e76d01a5ad 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -755,13 +755,13 @@ struct sock_cgroup_data { * updaters and return part of the previous pointer as the prioidx or * classid. Such races are short-lived and the result isn't critical. */ -static inline u16 sock_cgroup_prioidx(struct sock_cgroup_data *skcd) +static inline u16 sock_cgroup_prioidx(const struct sock_cgroup_data *skcd) { /* fallback to 1 which is always the ID of the root cgroup */ return (skcd->is_data & 1) ? skcd->prioidx : 1; } -static inline u32 sock_cgroup_classid(struct sock_cgroup_data *skcd) +static inline u32 sock_cgroup_classid(const struct sock_cgroup_data *skcd) { /* fallback to 0 which is the unconfigured default classid */ return (skcd->is_data & 1) ? skcd->classid : 0; diff --git a/net/core/dev.c b/net/core/dev.c index 2cedf520cb28..12be20535714 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3278,15 +3278,23 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, #if IS_ENABLED(CONFIG_CGROUP_NET_PRIO) static void skb_update_prio(struct sk_buff *skb) { - struct netprio_map *map = rcu_dereference_bh(skb->dev->priomap); + const struct netprio_map *map; + const struct sock *sk; + unsigned int prioidx; - if (!skb->priority && skb->sk && map) { - unsigned int prioidx = - sock_cgroup_prioidx(&skb->sk->sk_cgrp_data); + if (skb->priority) + return; + map = rcu_dereference_bh(skb->dev->priomap); + if (!map) + return; + sk = skb_to_full_sk(skb); + if (!sk) + return; - if (prioidx < map->priomap_len) - skb->priority = map->priomap[prioidx]; - } + prioidx = sock_cgroup_prioidx(&sk->sk_cgrp_data); + + if (prioidx < map->priomap_len) + skb->priority = map->priomap[prioidx]; } #else #define skb_update_prio(skb) -- cgit v1.2.3 From 96f413f47677366e0ae03797409bfcc4151dbf9e Mon Sep 17 00:00:00 2001 From: Madalin Bucur Date: Wed, 14 Mar 2018 08:37:28 -0500 Subject: soc/fsl/qbman: fix issue in qman_delete_cgr_safe() The wait_for_completion() call in qman_delete_cgr_safe() was triggering a scheduling while atomic bug, replacing the kthread with a smp_call_function_single() call to fix it. Signed-off-by: Madalin Bucur Signed-off-by: Roy Pledge Signed-off-by: David S. Miller --- drivers/soc/fsl/qbman/qman.c | 28 +++++----------------------- 1 file changed, 5 insertions(+), 23 deletions(-) diff --git a/drivers/soc/fsl/qbman/qman.c b/drivers/soc/fsl/qbman/qman.c index e4f5bb056fd2..ba3cfa8e279b 100644 --- a/drivers/soc/fsl/qbman/qman.c +++ b/drivers/soc/fsl/qbman/qman.c @@ -2443,39 +2443,21 @@ struct cgr_comp { struct completion completion; }; -static int qman_delete_cgr_thread(void *p) +static void qman_delete_cgr_smp_call(void *p) { - struct cgr_comp *cgr_comp = (struct cgr_comp *)p; - int ret; - - ret = qman_delete_cgr(cgr_comp->cgr); - complete(&cgr_comp->completion); - - return ret; + qman_delete_cgr((struct qman_cgr *)p); } void qman_delete_cgr_safe(struct qman_cgr *cgr) { - struct task_struct *thread; - struct cgr_comp cgr_comp; - preempt_disable(); if (qman_cgr_cpus[cgr->cgrid] != smp_processor_id()) { - init_completion(&cgr_comp.completion); - cgr_comp.cgr = cgr; - thread = kthread_create(qman_delete_cgr_thread, &cgr_comp, - "cgr_del"); - - if (IS_ERR(thread)) - goto out; - - kthread_bind(thread, qman_cgr_cpus[cgr->cgrid]); - wake_up_process(thread); - wait_for_completion(&cgr_comp.completion); + smp_call_function_single(qman_cgr_cpus[cgr->cgrid], + qman_delete_cgr_smp_call, cgr, true); preempt_enable(); return; } -out: + qman_delete_cgr(cgr); preempt_enable(); } -- cgit v1.2.3 From 88075256ee817041d68c2387f29065b5cb2b342a Mon Sep 17 00:00:00 2001 From: Madalin Bucur Date: Wed, 14 Mar 2018 08:37:29 -0500 Subject: dpaa_eth: fix error in dpaa_remove() The recent changes that make the driver probing compatible with DSA were not propagated in the dpa_remove() function, breaking the module unload function. Using the proper device to address the issue. Signed-off-by: Madalin Bucur Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/dpaa/dpaa_eth.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c index 7caa8da48421..3af5e0c08233 100644 --- a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c +++ b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c @@ -2860,7 +2860,7 @@ static int dpaa_remove(struct platform_device *pdev) struct device *dev; int err; - dev = &pdev->dev; + dev = pdev->dev.parent; net_dev = dev_get_drvdata(dev); priv = netdev_priv(net_dev); -- cgit v1.2.3 From 565186362b73226a288830abe595f05f0cec0bbc Mon Sep 17 00:00:00 2001 From: Camelia Groza Date: Wed, 14 Mar 2018 08:37:30 -0500 Subject: dpaa_eth: remove duplicate initialization The fd_format has already been initialized at this point. Signed-off-by: Camelia Groza Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/dpaa/dpaa_eth.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c index 3af5e0c08233..627f7f714b18 100644 --- a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c +++ b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c @@ -2278,7 +2278,6 @@ static enum qman_cb_dqrr_result rx_default_dqrr(struct qman_portal *portal, vaddr = phys_to_virt(addr); prefetch(vaddr + qm_fd_get_offset(fd)); - fd_format = qm_fd_get_format(fd); /* The only FD types that we may receive are contig and S/G */ WARN_ON((fd_format != qm_fd_contig) && (fd_format != qm_fd_sg)); -- cgit v1.2.3 From e4d1b37c17d000a3da9368a3e260fb9ea4927c25 Mon Sep 17 00:00:00 2001 From: Camelia Groza Date: Wed, 14 Mar 2018 08:37:31 -0500 Subject: dpaa_eth: increment the RX dropped counter when needed Signed-off-by: Camelia Groza Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/dpaa/dpaa_eth.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c index 627f7f714b18..6c0679fd78b8 100644 --- a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c +++ b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c @@ -2310,8 +2310,10 @@ static enum qman_cb_dqrr_result rx_default_dqrr(struct qman_portal *portal, skb_len = skb->len; - if (unlikely(netif_receive_skb(skb) == NET_RX_DROP)) + if (unlikely(netif_receive_skb(skb) == NET_RX_DROP)) { + percpu_stats->rx_dropped++; return qman_cb_dqrr_consume; + } percpu_stats->rx_packets++; percpu_stats->rx_bytes += skb_len; -- cgit v1.2.3 From 82d141cd19d088ee41feafde4a6f86eeb40d93c5 Mon Sep 17 00:00:00 2001 From: Camelia Groza Date: Wed, 14 Mar 2018 08:37:32 -0500 Subject: dpaa_eth: remove duplicate increment of the tx_errors counter The tx_errors counter is incremented by the dpaa_xmit caller. Signed-off-by: Camelia Groza Signed-off-by: Madalin Bucur Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/dpaa/dpaa_eth.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c index 6c0679fd78b8..e4ec32a9ca15 100644 --- a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c +++ b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c @@ -2008,7 +2008,6 @@ static inline int dpaa_xmit(struct dpaa_priv *priv, } if (unlikely(err < 0)) { - percpu_stats->tx_errors++; percpu_stats->tx_fifo_errors++; return err; } -- cgit v1.2.3 From d52e5a7e7ca49457dd31fc8b42fb7c0d58a31221 Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Wed, 14 Mar 2018 10:21:14 +0100 Subject: ipv4: lock mtu in fnhe when received PMTU < net.ipv4.route.min_pmtu Prior to the rework of PMTU information storage in commit 2c8cec5c10bc ("ipv4: Cache learned PMTU information in inetpeer."), when a PMTU event advertising a PMTU smaller than net.ipv4.route.min_pmtu was received, we would disable setting the DF flag on packets by locking the MTU metric, and set the PMTU to net.ipv4.route.min_pmtu. Since then, we don't disable DF, and set PMTU to net.ipv4.route.min_pmtu, so the intermediate router that has this link with a small MTU will have to drop the packets. This patch reestablishes pre-2.6.39 behavior by splitting rtable->rt_pmtu into a bitfield with rt_mtu_locked and rt_pmtu. rt_mtu_locked indicates that we shouldn't set the DF bit on that path, and is checked in ip_dont_fragment(). One possible workaround is to set net.ipv4.route.min_pmtu to a value low enough to accommodate the lowest MTU encountered. Fixes: 2c8cec5c10bc ("ipv4: Cache learned PMTU information in inetpeer.") Signed-off-by: Sabrina Dubroca Reviewed-by: Stefano Brivio Signed-off-by: David S. Miller --- include/net/ip.h | 11 +++++++++-- include/net/ip_fib.h | 1 + include/net/route.h | 3 ++- net/ipv4/route.c | 26 +++++++++++++++++++------- net/ipv4/xfrm4_policy.c | 1 + 5 files changed, 32 insertions(+), 10 deletions(-) diff --git a/include/net/ip.h b/include/net/ip.h index 746abff9ce51..f49b3a576bec 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -328,6 +328,13 @@ int ip_decrease_ttl(struct iphdr *iph) return --iph->ttl; } +static inline int ip_mtu_locked(const struct dst_entry *dst) +{ + const struct rtable *rt = (const struct rtable *)dst; + + return rt->rt_mtu_locked || dst_metric_locked(dst, RTAX_MTU); +} + static inline int ip_dont_fragment(const struct sock *sk, const struct dst_entry *dst) { @@ -335,7 +342,7 @@ int ip_dont_fragment(const struct sock *sk, const struct dst_entry *dst) return pmtudisc == IP_PMTUDISC_DO || (pmtudisc == IP_PMTUDISC_WANT && - !(dst_metric_locked(dst, RTAX_MTU))); + !ip_mtu_locked(dst)); } static inline bool ip_sk_accept_pmtu(const struct sock *sk) @@ -361,7 +368,7 @@ static inline unsigned int ip_dst_mtu_maybe_forward(const struct dst_entry *dst, struct net *net = dev_net(dst->dev); if (net->ipv4.sysctl_ip_fwd_use_pmtu || - dst_metric_locked(dst, RTAX_MTU) || + ip_mtu_locked(dst) || !forwarding) return dst_mtu(dst); diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index f80524396c06..77d0a78cf7d2 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -59,6 +59,7 @@ struct fib_nh_exception { int fnhe_genid; __be32 fnhe_daddr; u32 fnhe_pmtu; + bool fnhe_mtu_locked; __be32 fnhe_gw; unsigned long fnhe_expires; struct rtable __rcu *fnhe_rth_input; diff --git a/include/net/route.h b/include/net/route.h index 40b870d58f38..20a92ca9e115 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -63,7 +63,8 @@ struct rtable { __be32 rt_gateway; /* Miscellaneous cached information */ - u32 rt_pmtu; + u32 rt_mtu_locked:1, + rt_pmtu:31; u32 rt_table_id; diff --git a/net/ipv4/route.c b/net/ipv4/route.c index f9dbb8cb66bf..299e247b2032 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -634,6 +634,7 @@ static inline u32 fnhe_hashfun(__be32 daddr) static void fill_route_from_fnhe(struct rtable *rt, struct fib_nh_exception *fnhe) { rt->rt_pmtu = fnhe->fnhe_pmtu; + rt->rt_mtu_locked = fnhe->fnhe_mtu_locked; rt->dst.expires = fnhe->fnhe_expires; if (fnhe->fnhe_gw) { @@ -644,7 +645,7 @@ static void fill_route_from_fnhe(struct rtable *rt, struct fib_nh_exception *fnh } static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw, - u32 pmtu, unsigned long expires) + u32 pmtu, bool lock, unsigned long expires) { struct fnhe_hash_bucket *hash; struct fib_nh_exception *fnhe; @@ -681,8 +682,10 @@ static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw, fnhe->fnhe_genid = genid; if (gw) fnhe->fnhe_gw = gw; - if (pmtu) + if (pmtu) { fnhe->fnhe_pmtu = pmtu; + fnhe->fnhe_mtu_locked = lock; + } fnhe->fnhe_expires = max(1UL, expires); /* Update all cached dsts too */ rt = rcu_dereference(fnhe->fnhe_rth_input); @@ -706,6 +709,7 @@ static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw, fnhe->fnhe_daddr = daddr; fnhe->fnhe_gw = gw; fnhe->fnhe_pmtu = pmtu; + fnhe->fnhe_mtu_locked = lock; fnhe->fnhe_expires = expires; /* Exception created; mark the cached routes for the nexthop @@ -787,7 +791,8 @@ static void __ip_do_redirect(struct rtable *rt, struct sk_buff *skb, struct flow struct fib_nh *nh = &FIB_RES_NH(res); update_or_create_fnhe(nh, fl4->daddr, new_gw, - 0, jiffies + ip_rt_gc_timeout); + 0, false, + jiffies + ip_rt_gc_timeout); } if (kill_route) rt->dst.obsolete = DST_OBSOLETE_KILL; @@ -1009,15 +1014,18 @@ static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu) { struct dst_entry *dst = &rt->dst; struct fib_result res; + bool lock = false; - if (dst_metric_locked(dst, RTAX_MTU)) + if (ip_mtu_locked(dst)) return; if (ipv4_mtu(dst) < mtu) return; - if (mtu < ip_rt_min_pmtu) + if (mtu < ip_rt_min_pmtu) { + lock = true; mtu = ip_rt_min_pmtu; + } if (rt->rt_pmtu == mtu && time_before(jiffies, dst->expires - ip_rt_mtu_expires / 2)) @@ -1027,7 +1035,7 @@ static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu) if (fib_lookup(dev_net(dst->dev), fl4, &res, 0) == 0) { struct fib_nh *nh = &FIB_RES_NH(res); - update_or_create_fnhe(nh, fl4->daddr, 0, mtu, + update_or_create_fnhe(nh, fl4->daddr, 0, mtu, lock, jiffies + ip_rt_mtu_expires); } rcu_read_unlock(); @@ -1280,7 +1288,7 @@ static unsigned int ipv4_mtu(const struct dst_entry *dst) mtu = READ_ONCE(dst->dev->mtu); - if (unlikely(dst_metric_locked(dst, RTAX_MTU))) { + if (unlikely(ip_mtu_locked(dst))) { if (rt->rt_uses_gateway && mtu > 576) mtu = 576; } @@ -1521,6 +1529,7 @@ struct rtable *rt_dst_alloc(struct net_device *dev, rt->rt_is_input = 0; rt->rt_iif = 0; rt->rt_pmtu = 0; + rt->rt_mtu_locked = 0; rt->rt_gateway = 0; rt->rt_uses_gateway = 0; rt->rt_table_id = 0; @@ -2546,6 +2555,7 @@ struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_or rt->rt_is_input = ort->rt_is_input; rt->rt_iif = ort->rt_iif; rt->rt_pmtu = ort->rt_pmtu; + rt->rt_mtu_locked = ort->rt_mtu_locked; rt->rt_genid = rt_genid_ipv4(net); rt->rt_flags = ort->rt_flags; @@ -2648,6 +2658,8 @@ static int rt_fill_info(struct net *net, __be32 dst, __be32 src, u32 table_id, memcpy(metrics, dst_metrics_ptr(&rt->dst), sizeof(metrics)); if (rt->rt_pmtu && expires) metrics[RTAX_MTU - 1] = rt->rt_pmtu; + if (rt->rt_mtu_locked && expires) + metrics[RTAX_LOCK - 1] |= BIT(RTAX_MTU); if (rtnetlink_put_metrics(skb, metrics) < 0) goto nla_put_failure; diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index 8d33f7b311f4..fbebda67ac1b 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -100,6 +100,7 @@ static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev, xdst->u.rt.rt_gateway = rt->rt_gateway; xdst->u.rt.rt_uses_gateway = rt->rt_uses_gateway; xdst->u.rt.rt_pmtu = rt->rt_pmtu; + xdst->u.rt.rt_mtu_locked = rt->rt_mtu_locked; xdst->u.rt.rt_table_id = rt->rt_table_id; INIT_LIST_HEAD(&xdst->u.rt.rt_uncached); rt_add_uncached_list(&xdst->u.rt); -- cgit v1.2.3 From ea91df6d8a55836c7401eb6d5d4d828b659d8d67 Mon Sep 17 00:00:00 2001 From: Jonathan Toppins Date: Wed, 14 Mar 2018 12:36:25 -0400 Subject: tg3: prevent scheduling while atomic splat The problem was introduced in commit 506b0a395f26 ("[netdrv] tg3: APE heartbeat changes"). The bug occurs because tp->lock spinlock is held which is obtained in tg3_start by way of tg3_full_lock(), line 11571. The documentation for usleep_range() specifically states it cannot be used inside a spinlock. Fixes: 506b0a395f26 ("[netdrv] tg3: APE heartbeat changes") Signed-off-by: Jonathan Toppins Acked-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/tg3.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c index c1841db1b500..f2593978ae75 100644 --- a/drivers/net/ethernet/broadcom/tg3.c +++ b/drivers/net/ethernet/broadcom/tg3.c @@ -820,7 +820,7 @@ static int tg3_ape_event_lock(struct tg3 *tp, u32 timeout_us) tg3_ape_unlock(tp, TG3_APE_LOCK_MEM); - usleep_range(10, 20); + udelay(10); timeout_us -= (timeout_us > 10) ? 10 : timeout_us; } -- cgit v1.2.3 From cf55612a945039476abfd73e39064b2e721c3272 Mon Sep 17 00:00:00 2001 From: Cathy Zhou Date: Wed, 14 Mar 2018 10:56:07 -0700 Subject: sunvnet: does not support GSO for sctp The NETIF_F_GSO_SOFTWARE implies support for GSO on SCTP, but the sunvnet driver does not support GSO for sctp. Here we remove the NETIF_F_GSO_SOFTWARE feature flag and only report NETIF_F_ALL_TSO instead. Signed-off-by: Cathy Zhou Signed-off-by: Shannon Nelson Signed-off-by: David S. Miller --- drivers/net/ethernet/sun/sunvnet.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/sun/sunvnet.c b/drivers/net/ethernet/sun/sunvnet.c index 63d3d6b215f3..a94f50442613 100644 --- a/drivers/net/ethernet/sun/sunvnet.c +++ b/drivers/net/ethernet/sun/sunvnet.c @@ -312,7 +312,7 @@ static struct vnet *vnet_new(const u64 *local_mac, dev->ethtool_ops = &vnet_ethtool_ops; dev->watchdog_timeo = VNET_TX_TIMEOUT; - dev->hw_features = NETIF_F_TSO | NETIF_F_GSO | NETIF_F_GSO_SOFTWARE | + dev->hw_features = NETIF_F_TSO | NETIF_F_GSO | NETIF_F_ALL_TSO | NETIF_F_HW_CSUM | NETIF_F_SG; dev->features = dev->hw_features; -- cgit v1.2.3 From 75073a64a98cecf4b2a81567b31b5a52dd3518bc Mon Sep 17 00:00:00 2001 From: Darren Hart Date: Sat, 10 Mar 2018 16:12:16 -0800 Subject: platform/x86: dell-smbios: Resolve dependency error on ACPI_WMI Similarly to DCDBAS for DELL_SMBIOS_SMM, if DELL_SMBIOS_WMI is enabled, DELL_SMBIOS becomes dependent on ACPI_WMI. Update the depends lines to prevent a configuration where DELL_SMBIOS=y and either backend dependency =m. Update the comment accordingly. Cc: Mario Limonciello Cc: Andy Shevchenko Cc: Dominik Brodowski Signed-off-by: Darren Hart (VMware) --- drivers/platform/x86/Kconfig | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/platform/x86/Kconfig b/drivers/platform/x86/Kconfig index d10ffe51da24..51ebc5a6053f 100644 --- a/drivers/platform/x86/Kconfig +++ b/drivers/platform/x86/Kconfig @@ -106,13 +106,14 @@ config ASUS_LAPTOP If you have an ACPI-compatible ASUS laptop, say Y or M here. # -# If the DELL_SMBIOS_SMM feature is enabled, the DELL_SMBIOS driver -# becomes dependent on the DCDBAS driver. The "depends" line prevents a -# configuration where DELL_SMBIOS=y while DCDBAS=m. +# The DELL_SMBIOS driver depends on ACPI_WMI and/or DCDBAS if those +# backends are selected. The "depends" line prevents a configuration +# where DELL_SMBIOS=y while either of those dependencies =m. # config DELL_SMBIOS tristate "Dell SMBIOS driver" depends on DCDBAS || DCDBAS=n + depends on ACPI_WMI || ACPI_WMI=n ---help--- This provides support for the Dell SMBIOS calling interface. If you have a Dell computer you should enable this option. -- cgit v1.2.3 From 49368c13217f3b4d2e2d168e88b16c74910ad0ae Mon Sep 17 00:00:00 2001 From: "Darren Hart (VMware)" Date: Mon, 12 Mar 2018 23:28:00 -0700 Subject: platform/x86: Fix dell driver init order Update the initcall ordering to satisfy the following dependency ordering: 1. DCDBAS, ACPI_WMI 2. DELL_SMBIOS, DELL_RBTN 3. DELL_LAPTOP, DELL_WMI By assigning them to the following initcall levels: subsys_initcall: DCDBAS, ACPI_WMI module_init: DELL_SMBIOS, DELL_RBTN late_initcall: DELL_LAPTOP, DELL_WMI Cc: Dominik Brodowski Cc: Mario.Limonciello@dell.com Signed-off-by: Darren Hart (VMware) --- drivers/firmware/dcdbas.c | 2 +- drivers/platform/x86/dell-smbios-base.c | 2 +- drivers/platform/x86/dell-wmi.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/firmware/dcdbas.c b/drivers/firmware/dcdbas.c index c16600f30611..0bdea60c65dd 100644 --- a/drivers/firmware/dcdbas.c +++ b/drivers/firmware/dcdbas.c @@ -639,7 +639,7 @@ static void __exit dcdbas_exit(void) platform_driver_unregister(&dcdbas_driver); } -module_init(dcdbas_init); +subsys_initcall_sync(dcdbas_init); module_exit(dcdbas_exit); MODULE_DESCRIPTION(DRIVER_DESCRIPTION " (version " DRIVER_VERSION ")"); diff --git a/drivers/platform/x86/dell-smbios-base.c b/drivers/platform/x86/dell-smbios-base.c index 5bcf8a18f785..2485c80a9fdd 100644 --- a/drivers/platform/x86/dell-smbios-base.c +++ b/drivers/platform/x86/dell-smbios-base.c @@ -637,7 +637,7 @@ static void __exit dell_smbios_exit(void) mutex_unlock(&smbios_mutex); } -subsys_initcall(dell_smbios_init); +module_init(dell_smbios_init); module_exit(dell_smbios_exit); MODULE_AUTHOR("Matthew Garrett "); diff --git a/drivers/platform/x86/dell-wmi.c b/drivers/platform/x86/dell-wmi.c index 2c9927430d85..8d102195a392 100644 --- a/drivers/platform/x86/dell-wmi.c +++ b/drivers/platform/x86/dell-wmi.c @@ -714,7 +714,7 @@ static int __init dell_wmi_init(void) return wmi_driver_register(&dell_wmi_driver); } -module_init(dell_wmi_init); +late_initcall(dell_wmi_init); static void __exit dell_wmi_exit(void) { -- cgit v1.2.3 From 62b06f8f429cd233e4e2e7bbd21081ad60c9018f Mon Sep 17 00:00:00 2001 From: Andre Przywara Date: Tue, 6 Mar 2018 09:21:06 +0000 Subject: KVM: arm/arm64: vgic: Add missing irq_lock to vgic_mmio_read_pending Our irq_is_pending() helper function accesses multiple members of the vgic_irq struct, so we need to hold the lock when calling it. Add that requirement as a comment to the definition and take the lock around the call in vgic_mmio_read_pending(), where we were missing it before. Fixes: 96b298000db4 ("KVM: arm/arm64: vgic-new: Add PENDING registers handlers") Signed-off-by: Andre Przywara Signed-off-by: Marc Zyngier --- virt/kvm/arm/vgic/vgic-mmio.c | 3 +++ virt/kvm/arm/vgic/vgic.h | 1 + 2 files changed, 4 insertions(+) diff --git a/virt/kvm/arm/vgic/vgic-mmio.c b/virt/kvm/arm/vgic/vgic-mmio.c index 83d82bd7dc4e..dbe99d635c80 100644 --- a/virt/kvm/arm/vgic/vgic-mmio.c +++ b/virt/kvm/arm/vgic/vgic-mmio.c @@ -113,9 +113,12 @@ unsigned long vgic_mmio_read_pending(struct kvm_vcpu *vcpu, /* Loop over all IRQs affected by this read */ for (i = 0; i < len * 8; i++) { struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); + unsigned long flags; + spin_lock_irqsave(&irq->irq_lock, flags); if (irq_is_pending(irq)) value |= (1U << i); + spin_unlock_irqrestore(&irq->irq_lock, flags); vgic_put_irq(vcpu->kvm, irq); } diff --git a/virt/kvm/arm/vgic/vgic.h b/virt/kvm/arm/vgic/vgic.h index 12c37b89f7a3..5b11859a1a1e 100644 --- a/virt/kvm/arm/vgic/vgic.h +++ b/virt/kvm/arm/vgic/vgic.h @@ -96,6 +96,7 @@ /* we only support 64 kB translation table page size */ #define KVM_ITS_L1E_ADDR_MASK GENMASK_ULL(51, 16) +/* Requires the irq_lock to be held by the caller. */ static inline bool irq_is_pending(struct vgic_irq *irq) { if (irq->config == VGIC_CONFIG_EDGE) -- cgit v1.2.3 From e21a4f3a930cda6e4902cb5b3213365e5ff3ce7c Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Tue, 27 Feb 2018 12:33:50 +0100 Subject: KVM: arm/arm64: Avoid vcpu_load for other vcpu ioctls than KVM_RUN Calling vcpu_load() registers preempt notifiers for this vcpu and calls kvm_arch_vcpu_load(). The latter will soon be doing a lot of heavy lifting on arm/arm64 and will try to do things such as enabling the virtual timer and setting us up to handle interrupts from the timer hardware. Loading state onto hardware registers and enabling hardware to signal interrupts can be problematic when we're not actually about to run the VCPU, because it makes it difficult to establish the right context when handling interrupts from the timer, and it makes the register access code difficult to reason about. Luckily, now when we call vcpu_load in each ioctl implementation, we can simply remove the call from the non-KVM_RUN vcpu ioctls, and our kvm_arch_vcpu_load() is only used for loading vcpu content to the physical CPU when we're actually going to run the vcpu. Cc: stable@vger.kernel.org Fixes: 9b062471e52a ("KVM: Move vcpu_load to arch-specific kvm_arch_vcpu_ioctl") Reviewed-by: Julien Grall Reviewed-by: Marc Zyngier Reviewed-by: Andrew Jones Signed-off-by: Christoffer Dall Signed-off-by: Marc Zyngier --- arch/arm64/kvm/guest.c | 3 --- virt/kvm/arm/arm.c | 9 --------- 2 files changed, 12 deletions(-) diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c index d7e3299a7734..959e50d2588c 100644 --- a/arch/arm64/kvm/guest.c +++ b/arch/arm64/kvm/guest.c @@ -363,8 +363,6 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, { int ret = 0; - vcpu_load(vcpu); - trace_kvm_set_guest_debug(vcpu, dbg->control); if (dbg->control & ~KVM_GUESTDBG_VALID_MASK) { @@ -386,7 +384,6 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, } out: - vcpu_put(vcpu); return ret; } diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c index 86941f6181bb..53572304843b 100644 --- a/virt/kvm/arm/arm.c +++ b/virt/kvm/arm/arm.c @@ -384,14 +384,11 @@ static void vcpu_power_off(struct kvm_vcpu *vcpu) int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, struct kvm_mp_state *mp_state) { - vcpu_load(vcpu); - if (vcpu->arch.power_off) mp_state->mp_state = KVM_MP_STATE_STOPPED; else mp_state->mp_state = KVM_MP_STATE_RUNNABLE; - vcpu_put(vcpu); return 0; } @@ -400,8 +397,6 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, { int ret = 0; - vcpu_load(vcpu); - switch (mp_state->mp_state) { case KVM_MP_STATE_RUNNABLE: vcpu->arch.power_off = false; @@ -413,7 +408,6 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, ret = -EINVAL; } - vcpu_put(vcpu); return ret; } @@ -1036,8 +1030,6 @@ long kvm_arch_vcpu_ioctl(struct file *filp, struct kvm_device_attr attr; long r; - vcpu_load(vcpu); - switch (ioctl) { case KVM_ARM_VCPU_INIT: { struct kvm_vcpu_init init; @@ -1114,7 +1106,6 @@ long kvm_arch_vcpu_ioctl(struct file *filp, r = -EINVAL; } - vcpu_put(vcpu); return r; } -- cgit v1.2.3 From 413aa807ae39fed7e387c175d2d0ae9fcf6c0c9d Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Mon, 5 Mar 2018 11:36:38 +0100 Subject: KVM: arm/arm64: Reset mapped IRQs on VM reset We currently don't allow resetting mapped IRQs from userspace, because their state is controlled by the hardware. But we do need to reset the state when the VM is reset, so we provide a function for the 'owner' of the mapped interrupt to reset the interrupt state. Currently only the timer uses mapped interrupts, so we call this function from the timer reset logic. Cc: stable@vger.kernel.org Fixes: 4c60e360d6df ("KVM: arm/arm64: Provide a get_input_level for the arch timer") Signed-off-by: Christoffer Dall Signed-off-by: Marc Zyngier --- include/kvm/arm_vgic.h | 1 + virt/kvm/arm/arch_timer.c | 4 ++++ virt/kvm/arm/vgic/vgic.c | 26 ++++++++++++++++++++++++++ 3 files changed, 31 insertions(+) diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index cdbd142ca7f2..02924ae2527e 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -360,6 +360,7 @@ void kvm_vgic_put(struct kvm_vcpu *vcpu); bool kvm_vcpu_has_pending_irqs(struct kvm_vcpu *vcpu); void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu); void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu); +void kvm_vgic_reset_mapped_irq(struct kvm_vcpu *vcpu, u32 vintid); void vgic_v3_dispatch_sgi(struct kvm_vcpu *vcpu, u64 reg); diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c index 70f4c30918eb..3945021510a9 100644 --- a/virt/kvm/arm/arch_timer.c +++ b/virt/kvm/arm/arch_timer.c @@ -581,6 +581,7 @@ void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu) int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu) { + struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); struct arch_timer_context *ptimer = vcpu_ptimer(vcpu); @@ -594,6 +595,9 @@ int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu) ptimer->cnt_ctl = 0; kvm_timer_update_state(vcpu); + if (timer->enabled && irqchip_in_kernel(vcpu->kvm)) + kvm_vgic_reset_mapped_irq(vcpu, vtimer->irq.irq); + return 0; } diff --git a/virt/kvm/arm/vgic/vgic.c b/virt/kvm/arm/vgic/vgic.c index c7c5ef190afa..0001858a2c23 100644 --- a/virt/kvm/arm/vgic/vgic.c +++ b/virt/kvm/arm/vgic/vgic.c @@ -495,6 +495,32 @@ int kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu, unsigned int host_irq, return ret; } +/** + * kvm_vgic_reset_mapped_irq - Reset a mapped IRQ + * @vcpu: The VCPU pointer + * @vintid: The INTID of the interrupt + * + * Reset the active and pending states of a mapped interrupt. Kernel + * subsystems injecting mapped interrupts should reset their interrupt lines + * when we are doing a reset of the VM. + */ +void kvm_vgic_reset_mapped_irq(struct kvm_vcpu *vcpu, u32 vintid) +{ + struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, vintid); + unsigned long flags; + + if (!irq->hw) + goto out; + + spin_lock_irqsave(&irq->irq_lock, flags); + irq->active = false; + irq->pending_latch = false; + irq->line_level = false; + spin_unlock_irqrestore(&irq->irq_lock, flags); +out: + vgic_put_irq(vcpu->kvm, irq); +} + int kvm_vgic_unmap_phys_irq(struct kvm_vcpu *vcpu, unsigned int vintid) { struct vgic_irq *irq; -- cgit v1.2.3 From 76600428c3677659e3c3633bb4f2ea302220a275 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Fri, 2 Mar 2018 08:16:30 +0000 Subject: KVM: arm/arm64: Reduce verbosity of KVM init log On my GICv3 system, the following is printed to the kernel log at boot: kvm [1]: 8-bit VMID kvm [1]: IDMAP page: d20e35000 kvm [1]: HYP VA range: 800000000000:ffffffffffff kvm [1]: vgic-v2@2c020000 kvm [1]: GIC system register CPU interface enabled kvm [1]: vgic interrupt IRQ1 kvm [1]: virtual timer IRQ4 kvm [1]: Hyp mode initialized successfully The KVM IDMAP is a mapping of a statically allocated kernel structure, and so printing its physical address leaks the physical placement of the kernel when physical KASLR in effect. So change the kvm_info() to kvm_debug() to remove it from the log output. While at it, trim the output a bit more: IRQ numbers can be found in /proc/interrupts, and the HYP VA and vgic-v2 lines are not highly informational either. Cc: Acked-by: Will Deacon Acked-by: Christoffer Dall Signed-off-by: Ard Biesheuvel Signed-off-by: Marc Zyngier --- virt/kvm/arm/arch_timer.c | 2 +- virt/kvm/arm/mmu.c | 6 +++--- virt/kvm/arm/vgic/vgic-v2.c | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c index 3945021510a9..282389eb204f 100644 --- a/virt/kvm/arm/arch_timer.c +++ b/virt/kvm/arm/arch_timer.c @@ -771,7 +771,7 @@ int kvm_timer_hyp_init(bool has_gic) static_branch_enable(&has_gic_active_state); } - kvm_info("virtual timer IRQ%d\n", host_vtimer_irq); + kvm_debug("virtual timer IRQ%d\n", host_vtimer_irq); cpuhp_setup_state(CPUHP_AP_KVM_ARM_TIMER_STARTING, "kvm/arm/timer:starting", kvm_timer_starting_cpu, diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c index ec62d1cccab7..b960acdd0c05 100644 --- a/virt/kvm/arm/mmu.c +++ b/virt/kvm/arm/mmu.c @@ -1810,9 +1810,9 @@ int kvm_mmu_init(void) */ BUG_ON((hyp_idmap_start ^ (hyp_idmap_end - 1)) & PAGE_MASK); - kvm_info("IDMAP page: %lx\n", hyp_idmap_start); - kvm_info("HYP VA range: %lx:%lx\n", - kern_hyp_va(PAGE_OFFSET), kern_hyp_va(~0UL)); + kvm_debug("IDMAP page: %lx\n", hyp_idmap_start); + kvm_debug("HYP VA range: %lx:%lx\n", + kern_hyp_va(PAGE_OFFSET), kern_hyp_va(~0UL)); if (hyp_idmap_start >= kern_hyp_va(PAGE_OFFSET) && hyp_idmap_start < kern_hyp_va(~0UL) && diff --git a/virt/kvm/arm/vgic/vgic-v2.c b/virt/kvm/arm/vgic/vgic-v2.c index c32d7b93ffd1..e9d840a75e7b 100644 --- a/virt/kvm/arm/vgic/vgic-v2.c +++ b/virt/kvm/arm/vgic/vgic-v2.c @@ -410,7 +410,7 @@ int vgic_v2_probe(const struct gic_kvm_info *info) kvm_vgic_global_state.type = VGIC_V2; kvm_vgic_global_state.max_gic_vcpus = VGIC_V2_MAX_CPUS; - kvm_info("vgic-v2@%llx\n", info->vctrl.start); + kvm_debug("vgic-v2@%llx\n", info->vctrl.start); return 0; out: -- cgit v1.2.3 From 16ca6a607d84bef0129698d8d808f501afd08d43 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 6 Mar 2018 21:48:01 +0000 Subject: KVM: arm/arm64: vgic: Don't populate multiple LRs with the same vintid The vgic code is trying to be clever when injecting GICv2 SGIs, and will happily populate LRs with the same interrupt number if they come from multiple vcpus (after all, they are distinct interrupt sources). Unfortunately, this is against the letter of the architecture, and the GICv2 architecture spec says "Each valid interrupt stored in the List registers must have a unique VirtualID for that virtual CPU interface.". GICv3 has similar (although slightly ambiguous) restrictions. This results in guests locking up when using GICv2-on-GICv3, for example. The obvious fix is to stop trying so hard, and inject a single vcpu per SGI per guest entry. After all, pending SGIs with multiple source vcpus are pretty rare, and are mostly seen in scenario where the physical CPUs are severely overcomitted. But as we now only inject a single instance of a multi-source SGI per vcpu entry, we may delay those interrupts for longer than strictly necessary, and run the risk of injecting lower priority interrupts in the meantime. In order to address this, we adopt a three stage strategy: - If we encounter a multi-source SGI in the AP list while computing its depth, we force the list to be sorted - When populating the LRs, we prevent the injection of any interrupt of lower priority than that of the first multi-source SGI we've injected. - Finally, the injection of a multi-source SGI triggers the request of a maintenance interrupt when there will be no pending interrupt in the LRs (HCR_NPIE). At the point where the last pending interrupt in the LRs switches from Pending to Active, the maintenance interrupt will be delivered, allowing us to add the remaining SGIs using the same process. Cc: stable@vger.kernel.org Fixes: 0919e84c0fc1 ("KVM: arm/arm64: vgic-new: Add IRQ sync/flush framework") Acked-by: Christoffer Dall Signed-off-by: Marc Zyngier --- include/linux/irqchip/arm-gic-v3.h | 1 + include/linux/irqchip/arm-gic.h | 1 + virt/kvm/arm/vgic/vgic-v2.c | 9 +++++- virt/kvm/arm/vgic/vgic-v3.c | 9 +++++- virt/kvm/arm/vgic/vgic.c | 61 +++++++++++++++++++++++++++++--------- virt/kvm/arm/vgic/vgic.h | 2 ++ 6 files changed, 67 insertions(+), 16 deletions(-) diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h index c00c4c33e432..b26eccc78fb1 100644 --- a/include/linux/irqchip/arm-gic-v3.h +++ b/include/linux/irqchip/arm-gic-v3.h @@ -503,6 +503,7 @@ #define ICH_HCR_EN (1 << 0) #define ICH_HCR_UIE (1 << 1) +#define ICH_HCR_NPIE (1 << 3) #define ICH_HCR_TC (1 << 10) #define ICH_HCR_TALL0 (1 << 11) #define ICH_HCR_TALL1 (1 << 12) diff --git a/include/linux/irqchip/arm-gic.h b/include/linux/irqchip/arm-gic.h index d3453ee072fc..68d8b1f73682 100644 --- a/include/linux/irqchip/arm-gic.h +++ b/include/linux/irqchip/arm-gic.h @@ -84,6 +84,7 @@ #define GICH_HCR_EN (1 << 0) #define GICH_HCR_UIE (1 << 1) +#define GICH_HCR_NPIE (1 << 3) #define GICH_LR_VIRTUALID (0x3ff << 0) #define GICH_LR_PHYSID_CPUID_SHIFT (10) diff --git a/virt/kvm/arm/vgic/vgic-v2.c b/virt/kvm/arm/vgic/vgic-v2.c index e9d840a75e7b..29556f71b691 100644 --- a/virt/kvm/arm/vgic/vgic-v2.c +++ b/virt/kvm/arm/vgic/vgic-v2.c @@ -37,6 +37,13 @@ void vgic_v2_init_lrs(void) vgic_v2_write_lr(i, 0); } +void vgic_v2_set_npie(struct kvm_vcpu *vcpu) +{ + struct vgic_v2_cpu_if *cpuif = &vcpu->arch.vgic_cpu.vgic_v2; + + cpuif->vgic_hcr |= GICH_HCR_NPIE; +} + void vgic_v2_set_underflow(struct kvm_vcpu *vcpu) { struct vgic_v2_cpu_if *cpuif = &vcpu->arch.vgic_cpu.vgic_v2; @@ -64,7 +71,7 @@ void vgic_v2_fold_lr_state(struct kvm_vcpu *vcpu) int lr; unsigned long flags; - cpuif->vgic_hcr &= ~GICH_HCR_UIE; + cpuif->vgic_hcr &= ~(GICH_HCR_UIE | GICH_HCR_NPIE); for (lr = 0; lr < vgic_cpu->used_lrs; lr++) { u32 val = cpuif->vgic_lr[lr]; diff --git a/virt/kvm/arm/vgic/vgic-v3.c b/virt/kvm/arm/vgic/vgic-v3.c index 6b329414e57a..0ff2006f3781 100644 --- a/virt/kvm/arm/vgic/vgic-v3.c +++ b/virt/kvm/arm/vgic/vgic-v3.c @@ -26,6 +26,13 @@ static bool group1_trap; static bool common_trap; static bool gicv4_enable; +void vgic_v3_set_npie(struct kvm_vcpu *vcpu) +{ + struct vgic_v3_cpu_if *cpuif = &vcpu->arch.vgic_cpu.vgic_v3; + + cpuif->vgic_hcr |= ICH_HCR_NPIE; +} + void vgic_v3_set_underflow(struct kvm_vcpu *vcpu) { struct vgic_v3_cpu_if *cpuif = &vcpu->arch.vgic_cpu.vgic_v3; @@ -47,7 +54,7 @@ void vgic_v3_fold_lr_state(struct kvm_vcpu *vcpu) int lr; unsigned long flags; - cpuif->vgic_hcr &= ~ICH_HCR_UIE; + cpuif->vgic_hcr &= ~(ICH_HCR_UIE | ICH_HCR_NPIE); for (lr = 0; lr < vgic_cpu->used_lrs; lr++) { u64 val = cpuif->vgic_lr[lr]; diff --git a/virt/kvm/arm/vgic/vgic.c b/virt/kvm/arm/vgic/vgic.c index 0001858a2c23..8201899126f6 100644 --- a/virt/kvm/arm/vgic/vgic.c +++ b/virt/kvm/arm/vgic/vgic.c @@ -710,22 +710,37 @@ static inline void vgic_set_underflow(struct kvm_vcpu *vcpu) vgic_v3_set_underflow(vcpu); } +static inline void vgic_set_npie(struct kvm_vcpu *vcpu) +{ + if (kvm_vgic_global_state.type == VGIC_V2) + vgic_v2_set_npie(vcpu); + else + vgic_v3_set_npie(vcpu); +} + /* Requires the ap_list_lock to be held. */ -static int compute_ap_list_depth(struct kvm_vcpu *vcpu) +static int compute_ap_list_depth(struct kvm_vcpu *vcpu, + bool *multi_sgi) { struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; struct vgic_irq *irq; int count = 0; + *multi_sgi = false; + DEBUG_SPINLOCK_BUG_ON(!spin_is_locked(&vgic_cpu->ap_list_lock)); list_for_each_entry(irq, &vgic_cpu->ap_list_head, ap_list) { spin_lock(&irq->irq_lock); /* GICv2 SGIs can count for more than one... */ - if (vgic_irq_is_sgi(irq->intid) && irq->source) - count += hweight8(irq->source); - else + if (vgic_irq_is_sgi(irq->intid) && irq->source) { + int w = hweight8(irq->source); + + count += w; + *multi_sgi |= (w > 1); + } else { count++; + } spin_unlock(&irq->irq_lock); } return count; @@ -736,28 +751,43 @@ static void vgic_flush_lr_state(struct kvm_vcpu *vcpu) { struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; struct vgic_irq *irq; - int count = 0; + int count; + bool npie = false; + bool multi_sgi; + u8 prio = 0xff; DEBUG_SPINLOCK_BUG_ON(!spin_is_locked(&vgic_cpu->ap_list_lock)); - if (compute_ap_list_depth(vcpu) > kvm_vgic_global_state.nr_lr) + count = compute_ap_list_depth(vcpu, &multi_sgi); + if (count > kvm_vgic_global_state.nr_lr || multi_sgi) vgic_sort_ap_list(vcpu); + count = 0; + list_for_each_entry(irq, &vgic_cpu->ap_list_head, ap_list) { spin_lock(&irq->irq_lock); - if (unlikely(vgic_target_oracle(irq) != vcpu)) - goto next; - /* - * If we get an SGI with multiple sources, try to get - * them in all at once. + * If we have multi-SGIs in the pipeline, we need to + * guarantee that they are all seen before any IRQ of + * lower priority. In that case, we need to filter out + * these interrupts by exiting early. This is easy as + * the AP list has been sorted already. */ - do { + if (multi_sgi && irq->priority > prio) { + spin_unlock(&irq->irq_lock); + break; + } + + if (likely(vgic_target_oracle(irq) == vcpu)) { vgic_populate_lr(vcpu, irq, count++); - } while (irq->source && count < kvm_vgic_global_state.nr_lr); -next: + if (irq->source) { + npie = true; + prio = irq->priority; + } + } + spin_unlock(&irq->irq_lock); if (count == kvm_vgic_global_state.nr_lr) { @@ -768,6 +798,9 @@ next: } } + if (npie) + vgic_set_npie(vcpu); + vcpu->arch.vgic_cpu.used_lrs = count; /* Nuke remaining LRs */ diff --git a/virt/kvm/arm/vgic/vgic.h b/virt/kvm/arm/vgic/vgic.h index 5b11859a1a1e..f5b8519e5546 100644 --- a/virt/kvm/arm/vgic/vgic.h +++ b/virt/kvm/arm/vgic/vgic.h @@ -160,6 +160,7 @@ void vgic_v2_fold_lr_state(struct kvm_vcpu *vcpu); void vgic_v2_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr); void vgic_v2_clear_lr(struct kvm_vcpu *vcpu, int lr); void vgic_v2_set_underflow(struct kvm_vcpu *vcpu); +void vgic_v2_set_npie(struct kvm_vcpu *vcpu); int vgic_v2_has_attr_regs(struct kvm_device *dev, struct kvm_device_attr *attr); int vgic_v2_dist_uaccess(struct kvm_vcpu *vcpu, bool is_write, int offset, u32 *val); @@ -189,6 +190,7 @@ void vgic_v3_fold_lr_state(struct kvm_vcpu *vcpu); void vgic_v3_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr); void vgic_v3_clear_lr(struct kvm_vcpu *vcpu, int lr); void vgic_v3_set_underflow(struct kvm_vcpu *vcpu); +void vgic_v3_set_npie(struct kvm_vcpu *vcpu); void vgic_v3_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr); void vgic_v3_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr); void vgic_v3_enable(struct kvm_vcpu *vcpu); -- cgit v1.2.3 From 27e91ad1e746e341ca2312f29bccb9736be7b476 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 6 Mar 2018 21:44:37 +0000 Subject: kvm: arm/arm64: vgic-v3: Tighten synchronization for guests using v2 on v3 On guest exit, and when using GICv2 on GICv3, we use a dsb(st) to force synchronization between the memory-mapped guest view and the system-register view that the hypervisor uses. This is incorrect, as the spec calls out the need for "a DSB whose required access type is both loads and stores with any Shareability attribute", while we're only synchronizing stores. We also lack an isb after the dsb to ensure that the latter has actually been executed before we start reading stuff from the sysregs. The fix is pretty easy: turn dsb(st) into dsb(sy), and slap an isb() just after. Cc: stable@vger.kernel.org Fixes: f68d2b1b73cc ("arm64: KVM: Implement vgic-v3 save/restore") Acked-by: Christoffer Dall Reviewed-by: Andre Przywara Signed-off-by: Marc Zyngier --- virt/kvm/arm/hyp/vgic-v3-sr.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/virt/kvm/arm/hyp/vgic-v3-sr.c b/virt/kvm/arm/hyp/vgic-v3-sr.c index f5c3d6d7019e..b89ce5432214 100644 --- a/virt/kvm/arm/hyp/vgic-v3-sr.c +++ b/virt/kvm/arm/hyp/vgic-v3-sr.c @@ -215,7 +215,8 @@ void __hyp_text __vgic_v3_save_state(struct kvm_vcpu *vcpu) * are now visible to the system register interface. */ if (!cpu_if->vgic_sre) { - dsb(st); + dsb(sy); + isb(); cpu_if->vgic_vmcr = read_gicreg(ICH_VMCR_EL2); } -- cgit v1.2.3 From a6d7cff472eea87d96899a20fa718d2bab7109f3 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 14 Mar 2018 12:10:17 -0700 Subject: fs/aio: Add explicit RCU grace period when freeing kioctx While fixing refcounting, e34ecee2ae79 ("aio: Fix a trinity splat") incorrectly removed explicit RCU grace period before freeing kioctx. The intention seems to be depending on the internal RCU grace periods of percpu_ref; however, percpu_ref uses a different flavor of RCU, sched-RCU. This can lead to kioctx being freed while RCU read protected dereferences are still in progress. Fix it by updating free_ioctx() to go through call_rcu() explicitly. v2: Comment added to explain double bouncing. Signed-off-by: Tejun Heo Reported-by: Jann Horn Fixes: e34ecee2ae79 ("aio: Fix a trinity splat") Cc: Kent Overstreet Cc: Linus Torvalds Cc: stable@vger.kernel.org # v3.13+ --- fs/aio.c | 23 +++++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) diff --git a/fs/aio.c b/fs/aio.c index a062d75109cb..eb2e0cfbe2d2 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -115,7 +115,8 @@ struct kioctx { struct page **ring_pages; long nr_pages; - struct work_struct free_work; + struct rcu_head free_rcu; + struct work_struct free_work; /* see free_ioctx() */ /* * signals when all in-flight requests are done @@ -588,6 +589,12 @@ static int kiocb_cancel(struct aio_kiocb *kiocb) return cancel(&kiocb->common); } +/* + * free_ioctx() should be RCU delayed to synchronize against the RCU + * protected lookup_ioctx() and also needs process context to call + * aio_free_ring(), so the double bouncing through kioctx->free_rcu and + * ->free_work. + */ static void free_ioctx(struct work_struct *work) { struct kioctx *ctx = container_of(work, struct kioctx, free_work); @@ -601,6 +608,14 @@ static void free_ioctx(struct work_struct *work) kmem_cache_free(kioctx_cachep, ctx); } +static void free_ioctx_rcufn(struct rcu_head *head) +{ + struct kioctx *ctx = container_of(head, struct kioctx, free_rcu); + + INIT_WORK(&ctx->free_work, free_ioctx); + schedule_work(&ctx->free_work); +} + static void free_ioctx_reqs(struct percpu_ref *ref) { struct kioctx *ctx = container_of(ref, struct kioctx, reqs); @@ -609,8 +624,8 @@ static void free_ioctx_reqs(struct percpu_ref *ref) if (ctx->rq_wait && atomic_dec_and_test(&ctx->rq_wait->count)) complete(&ctx->rq_wait->comp); - INIT_WORK(&ctx->free_work, free_ioctx); - schedule_work(&ctx->free_work); + /* Synchronize against RCU protected table->table[] dereferences */ + call_rcu(&ctx->free_rcu, free_ioctx_rcufn); } /* @@ -838,7 +853,7 @@ static int kill_ioctx(struct mm_struct *mm, struct kioctx *ctx, table->table[ctx->id] = NULL; spin_unlock(&mm->ioctx_lock); - /* percpu_ref_kill() will do the necessary call_rcu() */ + /* free_ioctx_reqs() will do the necessary RCU synchronization */ wake_up_all(&ctx->wait); /* -- cgit v1.2.3 From d0264c01e7587001a8c4608a5d1818dba9a4c11a Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 14 Mar 2018 12:10:17 -0700 Subject: fs/aio: Use RCU accessors for kioctx_table->table[] While converting ioctx index from a list to a table, db446a08c23d ("aio: convert the ioctx list to table lookup v3") missed tagging kioctx_table->table[] as an array of RCU pointers and using the appropriate RCU accessors. This introduces a small window in the lookup path where init and access may race. Mark kioctx_table->table[] with __rcu and use the approriate RCU accessors when using the field. Signed-off-by: Tejun Heo Reported-by: Jann Horn Fixes: db446a08c23d ("aio: convert the ioctx list to table lookup v3") Cc: Benjamin LaHaise Cc: Linus Torvalds Cc: stable@vger.kernel.org # v3.12+ --- fs/aio.c | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/fs/aio.c b/fs/aio.c index eb2e0cfbe2d2..6bcd3fb5265a 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -68,9 +68,9 @@ struct aio_ring { #define AIO_RING_PAGES 8 struct kioctx_table { - struct rcu_head rcu; - unsigned nr; - struct kioctx *table[]; + struct rcu_head rcu; + unsigned nr; + struct kioctx __rcu *table[]; }; struct kioctx_cpu { @@ -330,7 +330,7 @@ static int aio_ring_mremap(struct vm_area_struct *vma) for (i = 0; i < table->nr; i++) { struct kioctx *ctx; - ctx = table->table[i]; + ctx = rcu_dereference(table->table[i]); if (ctx && ctx->aio_ring_file == file) { if (!atomic_read(&ctx->dead)) { ctx->user_id = ctx->mmap_base = vma->vm_start; @@ -666,9 +666,9 @@ static int ioctx_add_table(struct kioctx *ctx, struct mm_struct *mm) while (1) { if (table) for (i = 0; i < table->nr; i++) - if (!table->table[i]) { + if (!rcu_access_pointer(table->table[i])) { ctx->id = i; - table->table[i] = ctx; + rcu_assign_pointer(table->table[i], ctx); spin_unlock(&mm->ioctx_lock); /* While kioctx setup is in progress, @@ -849,8 +849,8 @@ static int kill_ioctx(struct mm_struct *mm, struct kioctx *ctx, } table = rcu_dereference_raw(mm->ioctx_table); - WARN_ON(ctx != table->table[ctx->id]); - table->table[ctx->id] = NULL; + WARN_ON(ctx != rcu_access_pointer(table->table[ctx->id])); + RCU_INIT_POINTER(table->table[ctx->id], NULL); spin_unlock(&mm->ioctx_lock); /* free_ioctx_reqs() will do the necessary RCU synchronization */ @@ -895,7 +895,8 @@ void exit_aio(struct mm_struct *mm) skipped = 0; for (i = 0; i < table->nr; ++i) { - struct kioctx *ctx = table->table[i]; + struct kioctx *ctx = + rcu_dereference_protected(table->table[i], true); if (!ctx) { skipped++; @@ -1084,7 +1085,7 @@ static struct kioctx *lookup_ioctx(unsigned long ctx_id) if (!table || id >= table->nr) goto out; - ctx = table->table[id]; + ctx = rcu_dereference(table->table[id]); if (ctx && ctx->user_id == ctx_id) { percpu_ref_get(&ctx->users); ret = ctx; -- cgit v1.2.3 From 74b44bbe80b4c62113ac1501482ea1ee40eb9d67 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 14 Mar 2018 12:10:18 -0700 Subject: RDMAVT: Fix synchronization around percpu_ref rvt_mregion uses percpu_ref for reference counting and RCU to protect accesses from lkey_table. When a rvt_mregion needs to be freed, it first gets unregistered from lkey_table and then rvt_check_refs() is called to wait for in-flight usages before the rvt_mregion is freed. rvt_check_refs() seems to have a couple issues. * It has a fast exit path which tests percpu_ref_is_zero(). However, a percpu_ref reading zero doesn't mean that the object can be released. In fact, the ->release() callback might not even have started executing yet. Proceeding with freeing can lead to use-after-free. * lkey_table is RCU protected but there is no RCU grace period in the free path. percpu_ref uses RCU internally but it's sched-RCU whose grace periods are different from regular RCU. Also, it generally isn't a good idea to depend on internal behaviors like this. To address the above issues, this patch removes the fast exit and adds an explicit synchronize_rcu(). Signed-off-by: Tejun Heo Acked-by: Dennis Dalessandro Cc: Mike Marciniszyn Cc: linux-rdma@vger.kernel.org Cc: Linus Torvalds --- drivers/infiniband/sw/rdmavt/mr.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/infiniband/sw/rdmavt/mr.c b/drivers/infiniband/sw/rdmavt/mr.c index 1b2e5362a3ff..cc429b567d0a 100644 --- a/drivers/infiniband/sw/rdmavt/mr.c +++ b/drivers/infiniband/sw/rdmavt/mr.c @@ -489,11 +489,13 @@ static int rvt_check_refs(struct rvt_mregion *mr, const char *t) unsigned long timeout; struct rvt_dev_info *rdi = ib_to_rvt(mr->pd->device); - if (percpu_ref_is_zero(&mr->refcount)) - return 0; - /* avoid dma mr */ - if (mr->lkey) + if (mr->lkey) { + /* avoid dma mr */ rvt_dereg_clean_qps(mr); + /* @mr was indexed on rcu protected @lkey_table */ + synchronize_rcu(); + } + timeout = wait_for_completion_timeout(&mr->comp, 5 * HZ); if (!timeout) { rvt_pr_err(rdi, -- cgit v1.2.3 From 18a955219bf7d9008ce480d4451b6b8bf4483a22 Mon Sep 17 00:00:00 2001 From: Toshi Kani Date: Tue, 13 Mar 2018 11:03:46 -0600 Subject: x86/mm: Fix vmalloc_fault to use pXd_large Gratian Crisan reported that vmalloc_fault() crashes when CONFIG_HUGETLBFS is not set since the function inadvertently uses pXn_huge(), which always return 0 in this case. ioremap() does not depend on CONFIG_HUGETLBFS. Fix vmalloc_fault() to call pXd_large() instead. Fixes: f4eafd8bcd52 ("x86/mm: Fix vmalloc_fault() to handle large pages properly") Reported-by: Gratian Crisan Signed-off-by: Toshi Kani Signed-off-by: Thomas Gleixner Cc: stable@vger.kernel.org Cc: linux-mm@kvack.org Cc: Borislav Petkov Cc: Andy Lutomirski Link: https://lkml.kernel.org/r/20180313170347.3829-2-toshi.kani@hpe.com --- arch/x86/mm/fault.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index c88573d90f3e..25a30b5d6582 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -330,7 +330,7 @@ static noinline int vmalloc_fault(unsigned long address) if (!pmd_k) return -1; - if (pmd_huge(*pmd_k)) + if (pmd_large(*pmd_k)) return 0; pte_k = pte_offset_kernel(pmd_k, address); @@ -475,7 +475,7 @@ static noinline int vmalloc_fault(unsigned long address) if (pud_none(*pud) || pud_pfn(*pud) != pud_pfn(*pud_ref)) BUG(); - if (pud_huge(*pud)) + if (pud_large(*pud)) return 0; pmd = pmd_offset(pud, address); @@ -486,7 +486,7 @@ static noinline int vmalloc_fault(unsigned long address) if (pmd_none(*pmd) || pmd_pfn(*pmd) != pmd_pfn(*pmd_ref)) BUG(); - if (pmd_huge(*pmd)) + if (pmd_large(*pmd)) return 0; pte_ref = pte_offset_kernel(pmd_ref, address); -- cgit v1.2.3 From f3f134f5260ae9ee1f5a4d0a8cc625c6c77655b4 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Mon, 12 Mar 2018 21:26:37 +0200 Subject: RDMA/mlx5: Fix crash while accessing garbage pointer and freed memory The failure in rereg_mr flow caused to set garbage value (error value) into mr->umem pointer. This pointer is accessed at the release stage and it causes to the following crash. There is not enough to simply change umem to point to NULL, because the MR struct is needed to be accessed during MR deregistration phase, so delay kfree too. [ 6.237617] BUG: unable to handle kernel NULL pointer dereference a 0000000000000228 [ 6.238756] IP: ib_dereg_mr+0xd/0x30 [ 6.239264] PGD 80000000167eb067 P4D 80000000167eb067 PUD 167f9067 PMD 0 [ 6.240320] Oops: 0000 [#1] SMP PTI [ 6.240782] CPU: 0 PID: 367 Comm: dereg Not tainted 4.16.0-rc1-00029-gc198fafe0453 #183 [ 6.242120] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.7.5-0-ge51488c-20140602_164612-nilsson.home.kraxel.org 04/01/2014 [ 6.244504] RIP: 0010:ib_dereg_mr+0xd/0x30 [ 6.245253] RSP: 0018:ffffaf5d001d7d68 EFLAGS: 00010246 [ 6.246100] RAX: 0000000000000000 RBX: ffff95d4172daf00 RCX: 0000000000000000 [ 6.247414] RDX: 00000000ffffffff RSI: 0000000000000001 RDI: ffff95d41a317600 [ 6.248591] RBP: 0000000000000001 R08: 0000000000000000 R09: 0000000000000000 [ 6.249810] R10: ffff95d417033c10 R11: 0000000000000000 R12: ffff95d4172c3a80 [ 6.251121] R13: ffff95d4172c3720 R14: ffff95d4172c3a98 R15: 00000000ffffffff [ 6.252437] FS: 0000000000000000(0000) GS:ffff95d41fc00000(0000) knlGS:0000000000000000 [ 6.253887] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 6.254814] CR2: 0000000000000228 CR3: 00000000172b4000 CR4: 00000000000006b0 [ 6.255943] Call Trace: [ 6.256368] remove_commit_idr_uobject+0x1b/0x80 [ 6.257118] uverbs_cleanup_ucontext+0xe4/0x190 [ 6.257855] ib_uverbs_cleanup_ucontext.constprop.14+0x19/0x40 [ 6.258857] ib_uverbs_close+0x2a/0x100 [ 6.259494] __fput+0xca/0x1c0 [ 6.259938] task_work_run+0x84/0xa0 [ 6.260519] do_exit+0x312/0xb40 [ 6.261023] ? __do_page_fault+0x24d/0x490 [ 6.261707] do_group_exit+0x3a/0xa0 [ 6.262267] SyS_exit_group+0x10/0x10 [ 6.262802] do_syscall_64+0x75/0x180 [ 6.263391] entry_SYSCALL_64_after_hwframe+0x21/0x86 [ 6.264253] RIP: 0033:0x7f1b39c49488 [ 6.264827] RSP: 002b:00007ffe2de05b68 EFLAGS: 00000246 ORIG_RAX: 00000000000000e7 [ 6.266049] RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007f1b39c49488 [ 6.267187] RDX: 0000000000000000 RSI: 000000000000003c RDI: 0000000000000000 [ 6.268377] RBP: 00007f1b39f258e0 R08: 00000000000000e7 R09: ffffffffffffff98 [ 6.269640] R10: 00007f1b3a147260 R11: 0000000000000246 R12: 00007f1b39f258e0 [ 6.270783] R13: 00007f1b39f2ac20 R14: 0000000000000000 R15: 0000000000000000 [ 6.271943] Code: 74 07 31 d2 e9 25 d8 6c 00 b8 da ff ff ff c3 0f 1f 44 00 00 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 48 8b 07 53 48 8b 5f 08 <48> 8b 80 28 02 00 00 e8 f7 d7 6c 00 85 c0 75 04 3e ff 4b 18 5b [ 6.274927] RIP: ib_dereg_mr+0xd/0x30 RSP: ffffaf5d001d7d68 [ 6.275760] CR2: 0000000000000228 [ 6.276200] ---[ end trace a35641f1c474bd20 ]--- Fixes: e126ba97dba9 ("mlx5: Add driver for Mellanox Connect-IB adapters") Cc: syzkaller Cc: Reported-by: Noa Osherovich Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/mr.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 1961c6a45437..c51c602f06d6 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -838,7 +838,8 @@ static int mr_umem_get(struct ib_pd *pd, u64 start, u64 length, *umem = ib_umem_get(pd->uobject->context, start, length, access_flags, 0); err = PTR_ERR_OR_ZERO(*umem); - if (err < 0) { + if (err) { + *umem = NULL; mlx5_ib_err(dev, "umem get failed (%d)\n", err); return err; } @@ -1415,6 +1416,7 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, if (err) { mlx5_ib_warn(dev, "Failed to rereg UMR\n"); ib_umem_release(mr->umem); + mr->umem = NULL; clean_mr(dev, mr); return err; } @@ -1498,14 +1500,11 @@ static int clean_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) u32 key = mr->mmkey.key; err = destroy_mkey(dev, mr); - kfree(mr); if (err) { mlx5_ib_warn(dev, "failed to destroy mkey 0x%x (%d)\n", key, err); return err; } - } else { - mlx5_mr_cache_free(dev, mr); } return 0; @@ -1548,6 +1547,11 @@ static int dereg_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) atomic_sub(npages, &dev->mdev->priv.reg_pages); } + if (!mr->allocated_from_cache) + kfree(mr); + else + mlx5_mr_cache_free(dev, mr); + return 0; } -- cgit v1.2.3 From 8c5c147339d2e201108169327b1f99aa6d57d2cd Mon Sep 17 00:00:00 2001 From: Steffen Maier Date: Wed, 14 Mar 2018 15:33:06 +0100 Subject: dm mpath: fix passing integrity data After v4.12 commit e2460f2a4bc7 ("dm: mark targets that pass integrity data"), dm-multipath, e.g. on DIF+DIX SCSI disk paths, does not support block integrity any more. So add it to the whitelist. This is also a pre-requisite to use block integrity with other dm layer(s) on top of multipath, such as kpartx partitions (dm-linear) or LVM. Also, bump target version to reflect this fix. Fixes: e2460f2a4bc7 ("dm: mark targets that pass integrity data") Cc: #4.12+ Bisected-by: Fedor Loshakov Signed-off-by: Steffen Maier Reviewed-by: Hannes Reinecke Reviewed-by: Martin K. Petersen Signed-off-by: Mike Snitzer --- drivers/md/dm-mpath.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c index 2481c4794ed6..a05a560d3cba 100644 --- a/drivers/md/dm-mpath.c +++ b/drivers/md/dm-mpath.c @@ -2020,8 +2020,9 @@ static int multipath_busy(struct dm_target *ti) *---------------------------------------------------------------*/ static struct target_type multipath_target = { .name = "multipath", - .version = {1, 12, 0}, - .features = DM_TARGET_SINGLETON | DM_TARGET_IMMUTABLE, + .version = {1, 13, 0}, + .features = DM_TARGET_SINGLETON | DM_TARGET_IMMUTABLE | + DM_TARGET_PASSES_INTEGRITY, .module = THIS_MODULE, .ctr = multipath_ctr, .dtr = multipath_dtr, -- cgit v1.2.3 From 9dea9a2ff61c5efb4d4937ae23b14babd25a5547 Mon Sep 17 00:00:00 2001 From: Tatyana Nikolova Date: Mon, 12 Mar 2018 17:14:02 -0500 Subject: RDMA/core: Do not use invalid destination in determining port reuse cma_port_is_unique() allows local port reuse if the quad (source address and port, destination address and port) for this connection is unique. However, if the destination info is zero or unspecified, it can't make a correct decision but still allows port reuse. For example, sometimes rdma_bind_addr() is called with unspecified destination and reusing the port can lead to creating a connection with a duplicate quad, after the destination is resolved. The issue manifests when MPI scale-up tests hang after the duplicate quad is used. Set the destination address family and add checks for zero destination address and port to prevent source port reuse based on invalid destination. Fixes: 19b752a19dce ("IB/cma: Allow port reuse for rdma_id") Reviewed-by: Sean Hefty Signed-off-by: Tatyana Nikolova Signed-off-by: Shiraz Saleem Signed-off-by: Doug Ledford --- drivers/infiniband/core/cma.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index e66963ca58bd..0e7b913254cc 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -3069,7 +3069,8 @@ static int cma_port_is_unique(struct rdma_bind_list *bind_list, continue; /* different dest port -> unique */ - if (!cma_any_port(cur_daddr) && + if (!cma_any_port(daddr) && + !cma_any_port(cur_daddr) && (dport != cur_dport)) continue; @@ -3080,7 +3081,8 @@ static int cma_port_is_unique(struct rdma_bind_list *bind_list, continue; /* different dst address -> unique */ - if (!cma_any_addr(cur_daddr) && + if (!cma_any_addr(daddr) && + !cma_any_addr(cur_daddr) && cma_addr_cmp(daddr, cur_daddr)) continue; @@ -3378,13 +3380,13 @@ int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr) } #endif } + daddr = cma_dst_addr(id_priv); + daddr->sa_family = addr->sa_family; + ret = cma_get_port(id_priv); if (ret) goto err2; - daddr = cma_dst_addr(id_priv); - daddr->sa_family = addr->sa_family; - return 0; err2: if (id_priv->cma_dev) -- cgit v1.2.3 From 7688f2c3bbf55e52388e37ac5d63ca471a7712e1 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Tue, 13 Mar 2018 11:43:23 +0200 Subject: RDMA/ucma: Fix access to non-initialized CM_ID object The attempt to join multicast group without ensuring that CMA device exists will lead to the following crash reported by syzkaller. [ 64.076794] BUG: KASAN: null-ptr-deref in rdma_join_multicast+0x26e/0x12c0 [ 64.076797] Read of size 8 at addr 00000000000000b0 by task join/691 [ 64.076797] [ 64.076800] CPU: 1 PID: 691 Comm: join Not tainted 4.16.0-rc1-00219-gb97853b65b93 #23 [ 64.076802] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.11.0-0-g63451fca13-prebuilt.qemu-proj4 [ 64.076803] Call Trace: [ 64.076809] dump_stack+0x5c/0x77 [ 64.076817] kasan_report+0x163/0x380 [ 64.085859] ? rdma_join_multicast+0x26e/0x12c0 [ 64.086634] rdma_join_multicast+0x26e/0x12c0 [ 64.087370] ? rdma_disconnect+0xf0/0xf0 [ 64.088579] ? __radix_tree_replace+0xc3/0x110 [ 64.089132] ? node_tag_clear+0x81/0xb0 [ 64.089606] ? idr_alloc_u32+0x12e/0x1a0 [ 64.090517] ? __fprop_inc_percpu_max+0x150/0x150 [ 64.091768] ? tracing_record_taskinfo+0x10/0xc0 [ 64.092340] ? idr_alloc+0x76/0xc0 [ 64.092951] ? idr_alloc_u32+0x1a0/0x1a0 [ 64.093632] ? ucma_process_join+0x23d/0x460 [ 64.094510] ucma_process_join+0x23d/0x460 [ 64.095199] ? ucma_migrate_id+0x440/0x440 [ 64.095696] ? futex_wake+0x10b/0x2a0 [ 64.096159] ucma_join_multicast+0x88/0xe0 [ 64.096660] ? ucma_process_join+0x460/0x460 [ 64.097540] ? _copy_from_user+0x5e/0x90 [ 64.098017] ucma_write+0x174/0x1f0 [ 64.098640] ? ucma_resolve_route+0xf0/0xf0 [ 64.099343] ? rb_erase_cached+0x6c7/0x7f0 [ 64.099839] __vfs_write+0xc4/0x350 [ 64.100622] ? perf_syscall_enter+0xe4/0x5f0 [ 64.101335] ? kernel_read+0xa0/0xa0 [ 64.103525] ? perf_sched_cb_inc+0xc0/0xc0 [ 64.105510] ? syscall_exit_register+0x2a0/0x2a0 [ 64.107359] ? __switch_to+0x351/0x640 [ 64.109285] ? fsnotify+0x899/0x8f0 [ 64.111610] ? fsnotify_unmount_inodes+0x170/0x170 [ 64.113876] ? __fsnotify_update_child_dentry_flags+0x30/0x30 [ 64.115813] ? ring_buffer_record_is_on+0xd/0x20 [ 64.117824] ? __fget+0xa8/0xf0 [ 64.119869] vfs_write+0xf7/0x280 [ 64.122001] SyS_write+0xa1/0x120 [ 64.124213] ? SyS_read+0x120/0x120 [ 64.126644] ? SyS_read+0x120/0x120 [ 64.128563] do_syscall_64+0xeb/0x250 [ 64.130732] entry_SYSCALL_64_after_hwframe+0x21/0x86 [ 64.132984] RIP: 0033:0x7f5c994ade99 [ 64.135699] RSP: 002b:00007f5c99b97d98 EFLAGS: 00000246 ORIG_RAX: 0000000000000001 [ 64.138740] RAX: ffffffffffffffda RBX: 00000000200001e4 RCX: 00007f5c994ade99 [ 64.141056] RDX: 00000000000000a0 RSI: 00000000200001c0 RDI: 0000000000000015 [ 64.143536] RBP: 00007f5c99b97ec0 R08: 0000000000000000 R09: 0000000000000000 [ 64.146017] R10: 0000000000000000 R11: 0000000000000246 R12: 00007f5c99b97fc0 [ 64.148608] R13: 0000000000000000 R14: 00007fff660e1c40 R15: 00007f5c99b989c0 [ 64.151060] [ 64.153703] Disabling lock debugging due to kernel taint [ 64.156032] BUG: unable to handle kernel NULL pointer dereference at 00000000000000b0 [ 64.159066] IP: rdma_join_multicast+0x26e/0x12c0 [ 64.161451] PGD 80000001d0298067 P4D 80000001d0298067 PUD 1dea39067 PMD 0 [ 64.164442] Oops: 0000 [#1] SMP KASAN PTI [ 64.166817] CPU: 1 PID: 691 Comm: join Tainted: G B 4.16.0-rc1-00219-gb97853b65b93 #23 [ 64.170004] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.11.0-0-g63451fca13-prebuilt.qemu-proj4 [ 64.174985] RIP: 0010:rdma_join_multicast+0x26e/0x12c0 [ 64.177246] RSP: 0018:ffff8801c8207860 EFLAGS: 00010282 [ 64.179901] RAX: 0000000000000000 RBX: 0000000000000000 RCX: ffffffff94789522 [ 64.183344] RDX: 1ffffffff2d50fa5 RSI: 0000000000000297 RDI: 0000000000000297 [ 64.186237] RBP: ffff8801c8207a50 R08: 0000000000000000 R09: ffffed0039040ea7 [ 64.189328] R10: 0000000000000001 R11: ffffed0039040ea6 R12: 0000000000000000 [ 64.192634] R13: 0000000000000000 R14: ffff8801e2022800 R15: ffff8801d4ac2400 [ 64.196105] FS: 00007f5c99b98700(0000) GS:ffff8801e5d00000(0000) knlGS:0000000000000000 [ 64.199211] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 64.202046] CR2: 00000000000000b0 CR3: 00000001d1c48004 CR4: 00000000003606a0 [ 64.205032] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 64.208221] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 64.211554] Call Trace: [ 64.213464] ? rdma_disconnect+0xf0/0xf0 [ 64.216124] ? __radix_tree_replace+0xc3/0x110 [ 64.219337] ? node_tag_clear+0x81/0xb0 [ 64.222140] ? idr_alloc_u32+0x12e/0x1a0 [ 64.224422] ? __fprop_inc_percpu_max+0x150/0x150 [ 64.226588] ? tracing_record_taskinfo+0x10/0xc0 [ 64.229763] ? idr_alloc+0x76/0xc0 [ 64.232186] ? idr_alloc_u32+0x1a0/0x1a0 [ 64.234505] ? ucma_process_join+0x23d/0x460 [ 64.237024] ucma_process_join+0x23d/0x460 [ 64.240076] ? ucma_migrate_id+0x440/0x440 [ 64.243284] ? futex_wake+0x10b/0x2a0 [ 64.245302] ucma_join_multicast+0x88/0xe0 [ 64.247783] ? ucma_process_join+0x460/0x460 [ 64.250841] ? _copy_from_user+0x5e/0x90 [ 64.253878] ucma_write+0x174/0x1f0 [ 64.257008] ? ucma_resolve_route+0xf0/0xf0 [ 64.259877] ? rb_erase_cached+0x6c7/0x7f0 [ 64.262746] __vfs_write+0xc4/0x350 [ 64.265537] ? perf_syscall_enter+0xe4/0x5f0 [ 64.267792] ? kernel_read+0xa0/0xa0 [ 64.270358] ? perf_sched_cb_inc+0xc0/0xc0 [ 64.272575] ? syscall_exit_register+0x2a0/0x2a0 [ 64.275367] ? __switch_to+0x351/0x640 [ 64.277700] ? fsnotify+0x899/0x8f0 [ 64.280530] ? fsnotify_unmount_inodes+0x170/0x170 [ 64.283156] ? __fsnotify_update_child_dentry_flags+0x30/0x30 [ 64.286182] ? ring_buffer_record_is_on+0xd/0x20 [ 64.288749] ? __fget+0xa8/0xf0 [ 64.291136] vfs_write+0xf7/0x280 [ 64.292972] SyS_write+0xa1/0x120 [ 64.294965] ? SyS_read+0x120/0x120 [ 64.297474] ? SyS_read+0x120/0x120 [ 64.299751] do_syscall_64+0xeb/0x250 [ 64.301826] entry_SYSCALL_64_after_hwframe+0x21/0x86 [ 64.304352] RIP: 0033:0x7f5c994ade99 [ 64.306711] RSP: 002b:00007f5c99b97d98 EFLAGS: 00000246 ORIG_RAX: 0000000000000001 [ 64.309577] RAX: ffffffffffffffda RBX: 00000000200001e4 RCX: 00007f5c994ade99 [ 64.312334] RDX: 00000000000000a0 RSI: 00000000200001c0 RDI: 0000000000000015 [ 64.315783] RBP: 00007f5c99b97ec0 R08: 0000000000000000 R09: 0000000000000000 [ 64.318365] R10: 0000000000000000 R11: 0000000000000246 R12: 00007f5c99b97fc0 [ 64.320980] R13: 0000000000000000 R14: 00007fff660e1c40 R15: 00007f5c99b989c0 [ 64.323515] Code: e8 e8 79 08 ff 4c 89 ff 45 0f b6 a7 b8 01 00 00 e8 68 7c 08 ff 49 8b 1f 4d 89 e5 49 c1 e4 04 48 8 [ 64.330753] RIP: rdma_join_multicast+0x26e/0x12c0 RSP: ffff8801c8207860 [ 64.332979] CR2: 00000000000000b0 [ 64.335550] ---[ end trace 0c00c17a408849c1 ]--- Reported-by: Fixes: c8f6a362bf3e ("RDMA/cma: Add multicast communication support") Signed-off-by: Leon Romanovsky Reviewed-by: Sean Hefty Signed-off-by: Doug Ledford --- drivers/infiniband/core/cma.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 0e7b913254cc..a5367c5efbe7 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -4175,6 +4175,9 @@ int rdma_join_multicast(struct rdma_cm_id *id, struct sockaddr *addr, struct cma_multicast *mc; int ret; + if (!id->device) + return -EINVAL; + id_priv = container_of(id, struct rdma_id_private, id); if (!cma_comp(id_priv, RDMA_CM_ADDR_BOUND) && !cma_comp(id_priv, RDMA_CM_ADDR_RESOLVED)) -- cgit v1.2.3 From 0c81ffc60d5280991773d17e84bda605387148b1 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Tue, 13 Mar 2018 18:37:27 +0200 Subject: RDMA/ucma: Don't allow join attempts for unsupported AF family Users can provide garbage while calling to ucma_join_ip_multicast(), it will indirectly cause to rdma_addr_size() return 0, making the call to ucma_process_join(), which had the right checks, but it is better to check the input as early as possible. The following crash from syzkaller revealed it. kernel BUG at lib/string.c:1052! invalid opcode: 0000 [#1] SMP KASAN Dumping ftrace buffer: (ftrace buffer empty) Modules linked in: CPU: 0 PID: 4113 Comm: syz-executor0 Not tainted 4.16.0-rc5+ #261 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 RIP: 0010:fortify_panic+0x13/0x20 lib/string.c:1051 RSP: 0018:ffff8801ca81f8f0 EFLAGS: 00010286 RAX: 0000000000000022 RBX: 1ffff10039503f23 RCX: 0000000000000000 RDX: 0000000000000022 RSI: 1ffff10039503ed3 RDI: ffffed0039503f12 RBP: ffff8801ca81f8f0 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000006 R11: 0000000000000000 R12: ffff8801ca81f998 R13: ffff8801ca81f938 R14: ffff8801ca81fa58 R15: 000000000000fa00 FS: 0000000000000000(0000) GS:ffff8801db200000(0063) knlGS:000000000a12a900 CS: 0010 DS: 002b ES: 002b CR0: 0000000080050033 CR2: 0000000008138024 CR3: 00000001cbb58004 CR4: 00000000001606f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: memcpy include/linux/string.h:344 [inline] ucma_join_ip_multicast+0x36b/0x3b0 drivers/infiniband/core/ucma.c:1421 ucma_write+0x2d6/0x3d0 drivers/infiniband/core/ucma.c:1633 __vfs_write+0xef/0x970 fs/read_write.c:480 vfs_write+0x189/0x510 fs/read_write.c:544 SYSC_write fs/read_write.c:589 [inline] SyS_write+0xef/0x220 fs/read_write.c:581 do_syscall_32_irqs_on arch/x86/entry/common.c:330 [inline] do_fast_syscall_32+0x3ec/0xf9f arch/x86/entry/common.c:392 entry_SYSENTER_compat+0x70/0x7f arch/x86/entry/entry_64_compat.S:139 RIP: 0023:0xf7f9ec99 RSP: 002b:00000000ff8172cc EFLAGS: 00000282 ORIG_RAX: 0000000000000004 RAX: ffffffffffffffda RBX: 0000000000000003 RCX: 0000000020000100 RDX: 0000000000000063 RSI: 0000000000000000 RDI: 0000000000000000 RBP: 0000000000000000 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000000 R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000 Code: 08 5b 41 5c 41 5d 41 5e 41 5f 5d c3 0f 0b 48 89 df e8 42 2c e3 fb eb de 55 48 89 fe 48 c7 c7 80 75 98 86 48 89 e5 e8 85 95 94 fb <0f> 0b 90 90 90 90 90 90 90 90 90 90 90 55 48 89 e5 41 57 41 56 RIP: fortify_panic+0x13/0x20 lib/string.c:1051 RSP: ffff8801ca81f8f0 Fixes: 5bc2b7b397b0 ("RDMA/ucma: Allow user space to specify AF_IB when joining multicast") Reported-by: Signed-off-by: Leon Romanovsky Reviewed-by: Sean Hefty Signed-off-by: Doug Ledford --- drivers/infiniband/core/ucma.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c index 3a9d0f5b5881..699a46d4c0c5 100644 --- a/drivers/infiniband/core/ucma.c +++ b/drivers/infiniband/core/ucma.c @@ -1349,7 +1349,7 @@ static ssize_t ucma_process_join(struct ucma_file *file, return -ENOSPC; addr = (struct sockaddr *) &cmd->addr; - if (!cmd->addr_size || (cmd->addr_size != rdma_addr_size(addr))) + if (cmd->addr_size != rdma_addr_size(addr)) return -EINVAL; if (cmd->join_flags == RDMA_MC_JOIN_FLAG_FULLMEMBER) @@ -1417,6 +1417,9 @@ static ssize_t ucma_join_ip_multicast(struct ucma_file *file, join_cmd.uid = cmd.uid; join_cmd.id = cmd.id; join_cmd.addr_size = rdma_addr_size((struct sockaddr *) &cmd.addr); + if (!join_cmd.addr_size) + return -EINVAL; + join_cmd.join_flags = RDMA_MC_JOIN_FLAG_FULLMEMBER; memcpy(&join_cmd.addr, &cmd.addr, join_cmd.addr_size); @@ -1432,6 +1435,9 @@ static ssize_t ucma_join_multicast(struct ucma_file *file, if (copy_from_user(&cmd, inbuf, sizeof(cmd))) return -EFAULT; + if (!rdma_addr_size((struct sockaddr *)&cmd.addr)) + return -EINVAL; + return ucma_process_join(file, &cmd, out_len); } -- cgit v1.2.3 From 342038d92403b3efa1138a8599666b9f026279d6 Mon Sep 17 00:00:00 2001 From: Christian König Date: Fri, 9 Mar 2018 14:42:54 +0100 Subject: drm/amdgpu: fix prime teardown order MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We unmapped imported DMA-bufs when the GEM handle was dropped, not when the hardware was done with the buffere. Signed-off-by: Christian König Reviewed-by: Michel Dänzer CC: stable@vger.kernel.org Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 2 -- drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 2 ++ 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index e48b4ec88c8c..ca6c931dabfa 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -36,8 +36,6 @@ void amdgpu_gem_object_free(struct drm_gem_object *gobj) struct amdgpu_bo *robj = gem_to_amdgpu_bo(gobj); if (robj) { - if (robj->gem_base.import_attach) - drm_prime_gem_destroy(&robj->gem_base, robj->tbo.sg); amdgpu_mn_unregister(robj); amdgpu_bo_unref(&robj); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index 5c4c3e0d527b..1220322c1680 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -56,6 +56,8 @@ static void amdgpu_ttm_bo_destroy(struct ttm_buffer_object *tbo) amdgpu_bo_kunmap(bo); + if (bo->gem_base.import_attach) + drm_prime_gem_destroy(&bo->gem_base, bo->tbo.sg); drm_gem_object_release(&bo->gem_base); amdgpu_bo_unref(&bo->parent); if (!list_empty(&bo->shadow_list)) { -- cgit v1.2.3 From 0f4f715bc6bed3bf14c5cd7d5fe88d443e756b14 Mon Sep 17 00:00:00 2001 From: Christian König Date: Fri, 9 Mar 2018 14:44:32 +0100 Subject: drm/radeon: fix prime teardown order MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We unmapped imported DMA-bufs when the GEM handle was dropped, not when the hardware was done with the buffere. Signed-off-by: Christian König Reviewed-by: Michel Dänzer CC: stable@vger.kernel.org Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/radeon_gem.c | 2 -- drivers/gpu/drm/radeon/radeon_object.c | 2 ++ 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/radeon/radeon_gem.c b/drivers/gpu/drm/radeon/radeon_gem.c index a9962ffba720..27d8e7dd2d06 100644 --- a/drivers/gpu/drm/radeon/radeon_gem.c +++ b/drivers/gpu/drm/radeon/radeon_gem.c @@ -34,8 +34,6 @@ void radeon_gem_object_free(struct drm_gem_object *gobj) struct radeon_bo *robj = gem_to_radeon_bo(gobj); if (robj) { - if (robj->gem_base.import_attach) - drm_prime_gem_destroy(&robj->gem_base, robj->tbo.sg); radeon_mn_unregister(robj); radeon_bo_unref(&robj); } diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c index 15404af9d740..31f5ad605e59 100644 --- a/drivers/gpu/drm/radeon/radeon_object.c +++ b/drivers/gpu/drm/radeon/radeon_object.c @@ -82,6 +82,8 @@ static void radeon_ttm_bo_destroy(struct ttm_buffer_object *tbo) mutex_unlock(&bo->rdev->gem.mutex); radeon_bo_clear_surface_reg(bo); WARN_ON_ONCE(!list_empty(&bo->va)); + if (bo->gem_base.import_attach) + drm_prime_gem_destroy(&bo->gem_base, bo->tbo.sg); drm_gem_object_release(&bo->gem_base); kfree(bo); } -- cgit v1.2.3 From b5e3241316973b9f01228d2fa5b8b2bb157d44aa Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 8 Mar 2018 09:56:01 -0500 Subject: drm/amdgpu: save/restore backlight level in legacy dce code MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Save/restore the backlight level scratch register in S3/S4 so the backlight level comes back at the previously requested level. Bug: https://bugzilla.kernel.org/show_bug.cgi?id=199047 Fixes: 4ec6ecf48c64d (drm/amdgpu: drop scratch regs save and restore from S3/S4 handling) Acked-by: Michel Dänzer Reviewed-by: Harry Wentland Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h | 1 + drivers/gpu/drm/amd/amdgpu/atombios_encoders.c | 4 ++-- drivers/gpu/drm/amd/amdgpu/atombios_encoders.h | 5 +++++ drivers/gpu/drm/amd/amdgpu/dce_v10_0.c | 8 ++++++++ drivers/gpu/drm/amd/amdgpu/dce_v11_0.c | 8 ++++++++ drivers/gpu/drm/amd/amdgpu/dce_v6_0.c | 8 ++++++++ drivers/gpu/drm/amd/amdgpu/dce_v8_0.c | 8 ++++++++ 7 files changed, 40 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h index 54f06c959340..2264c5c97009 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h @@ -352,6 +352,7 @@ struct amdgpu_mode_info { u16 firmware_flags; /* pointer to backlight encoder */ struct amdgpu_encoder *bl_encoder; + u8 bl_level; /* saved backlight level */ struct amdgpu_audio audio; /* audio stuff */ int num_crtc; /* number of crtcs */ int num_hpd; /* number of hpd pins */ diff --git a/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c b/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c index 2af26d2da127..d702fb8e3427 100644 --- a/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c +++ b/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c @@ -34,7 +34,7 @@ #include #include "bif/bif_4_1_d.h" -static u8 +u8 amdgpu_atombios_encoder_get_backlight_level_from_reg(struct amdgpu_device *adev) { u8 backlight_level; @@ -48,7 +48,7 @@ amdgpu_atombios_encoder_get_backlight_level_from_reg(struct amdgpu_device *adev) return backlight_level; } -static void +void amdgpu_atombios_encoder_set_backlight_level_to_reg(struct amdgpu_device *adev, u8 backlight_level) { diff --git a/drivers/gpu/drm/amd/amdgpu/atombios_encoders.h b/drivers/gpu/drm/amd/amdgpu/atombios_encoders.h index 2bdec40515ce..f77cbdef679e 100644 --- a/drivers/gpu/drm/amd/amdgpu/atombios_encoders.h +++ b/drivers/gpu/drm/amd/amdgpu/atombios_encoders.h @@ -24,6 +24,11 @@ #ifndef __ATOMBIOS_ENCODER_H__ #define __ATOMBIOS_ENCODER_H__ +u8 +amdgpu_atombios_encoder_get_backlight_level_from_reg(struct amdgpu_device *adev); +void +amdgpu_atombios_encoder_set_backlight_level_to_reg(struct amdgpu_device *adev, + u8 backlight_level); u8 amdgpu_atombios_encoder_get_backlight_level(struct amdgpu_encoder *amdgpu_encoder); void diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c index f34bc68aadfb..022f303463fc 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c @@ -2921,6 +2921,11 @@ static int dce_v10_0_hw_fini(void *handle) static int dce_v10_0_suspend(void *handle) { + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + adev->mode_info.bl_level = + amdgpu_atombios_encoder_get_backlight_level_from_reg(adev); + return dce_v10_0_hw_fini(handle); } @@ -2929,6 +2934,9 @@ static int dce_v10_0_resume(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; int ret; + amdgpu_atombios_encoder_set_backlight_level_to_reg(adev, + adev->mode_info.bl_level); + ret = dce_v10_0_hw_init(handle); /* turn on the BL */ diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c index 26378bd6aba4..800a9f36ab4f 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c @@ -3047,6 +3047,11 @@ static int dce_v11_0_hw_fini(void *handle) static int dce_v11_0_suspend(void *handle) { + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + adev->mode_info.bl_level = + amdgpu_atombios_encoder_get_backlight_level_from_reg(adev); + return dce_v11_0_hw_fini(handle); } @@ -3055,6 +3060,9 @@ static int dce_v11_0_resume(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; int ret; + amdgpu_atombios_encoder_set_backlight_level_to_reg(adev, + adev->mode_info.bl_level); + ret = dce_v11_0_hw_init(handle); /* turn on the BL */ diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c index a712f4b285f6..b8368f69ce1f 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c @@ -2787,6 +2787,11 @@ static int dce_v6_0_hw_fini(void *handle) static int dce_v6_0_suspend(void *handle) { + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + adev->mode_info.bl_level = + amdgpu_atombios_encoder_get_backlight_level_from_reg(adev); + return dce_v6_0_hw_fini(handle); } @@ -2795,6 +2800,9 @@ static int dce_v6_0_resume(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; int ret; + amdgpu_atombios_encoder_set_backlight_level_to_reg(adev, + adev->mode_info.bl_level); + ret = dce_v6_0_hw_init(handle); /* turn on the BL */ diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c index c008dc030687..012e0a9ae0ff 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c @@ -2819,6 +2819,11 @@ static int dce_v8_0_hw_fini(void *handle) static int dce_v8_0_suspend(void *handle) { + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + adev->mode_info.bl_level = + amdgpu_atombios_encoder_get_backlight_level_from_reg(adev); + return dce_v8_0_hw_fini(handle); } @@ -2827,6 +2832,9 @@ static int dce_v8_0_resume(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; int ret; + amdgpu_atombios_encoder_set_backlight_level_to_reg(adev, + adev->mode_info.bl_level); + ret = dce_v8_0_hw_init(handle); /* turn on the BL */ -- cgit v1.2.3 From 7d617264eb22b18d979eac6e85877a141253034e Mon Sep 17 00:00:00 2001 From: Michel Dänzer Date: Fri, 9 Mar 2018 18:26:18 +0100 Subject: drm/amdgpu/dce: Don't turn off DP sink when disconnected MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Turning off the sink in this case causes various issues, because userspace expects it to stay on until it turns it off explicitly. Instead, turn the sink off and back on when a display is connected again. This dance seems necessary for link training to work correctly. Bugzilla: https://bugs.freedesktop.org/105308 Cc: stable@vger.kernel.org Reviewed-by: Alex Deucher Signed-off-by: Michel Dänzer Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c | 31 ++++++++++---------------- 1 file changed, 12 insertions(+), 19 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c index 74d2efaec52f..7a073ac5f9c6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c @@ -69,25 +69,18 @@ void amdgpu_connector_hotplug(struct drm_connector *connector) /* don't do anything if sink is not display port, i.e., * passive dp->(dvi|hdmi) adaptor */ - if (dig_connector->dp_sink_type == CONNECTOR_OBJECT_ID_DISPLAYPORT) { - int saved_dpms = connector->dpms; - /* Only turn off the display if it's physically disconnected */ - if (!amdgpu_display_hpd_sense(adev, amdgpu_connector->hpd.hpd)) { - drm_helper_connector_dpms(connector, DRM_MODE_DPMS_OFF); - } else if (amdgpu_atombios_dp_needs_link_train(amdgpu_connector)) { - /* Don't try to start link training before we - * have the dpcd */ - if (amdgpu_atombios_dp_get_dpcd(amdgpu_connector)) - return; - - /* set it to OFF so that drm_helper_connector_dpms() - * won't return immediately since the current state - * is ON at this point. - */ - connector->dpms = DRM_MODE_DPMS_OFF; - drm_helper_connector_dpms(connector, DRM_MODE_DPMS_ON); - } - connector->dpms = saved_dpms; + if (dig_connector->dp_sink_type == CONNECTOR_OBJECT_ID_DISPLAYPORT && + amdgpu_display_hpd_sense(adev, amdgpu_connector->hpd.hpd) && + amdgpu_atombios_dp_needs_link_train(amdgpu_connector)) { + /* Don't start link training before we have the DPCD */ + if (amdgpu_atombios_dp_get_dpcd(amdgpu_connector)) + return; + + /* Turn the connector off and back on immediately, which + * will trigger link training + */ + drm_helper_connector_dpms(connector, DRM_MODE_DPMS_OFF); + drm_helper_connector_dpms(connector, DRM_MODE_DPMS_ON); } } } -- cgit v1.2.3 From 42cea83f952499f31e2671c4917be8627617db81 Mon Sep 17 00:00:00 2001 From: Mark Bloch Date: Wed, 14 Mar 2018 09:14:15 +0200 Subject: IB/mlx5: Fix cleanup order on unload On load we create private CQ/QP/PD in order to be used by UMR, we create those resources after we register ourself as an IB device, and we destroy them after we unregister as an IB device. This was changed by commit 16c1975f1032 ("IB/mlx5: Create profile infrastructure to add and remove stages") which moved the destruction before we unregistration. This allowed to trigger an invalid memory access when unloading mlx5_ib while there are open resources: BUG: unable to handle kernel paging request at 00000001002c012c ... Call Trace: mlx5_ib_post_send_wait+0x75/0x110 [mlx5_ib] __slab_free+0x9a/0x2d0 delay_time_func+0x10/0x10 [mlx5_ib] unreg_umr.isra.15+0x4b/0x50 [mlx5_ib] mlx5_mr_cache_free+0x46/0x150 [mlx5_ib] clean_mr+0xc9/0x190 [mlx5_ib] dereg_mr+0xba/0xf0 [mlx5_ib] ib_dereg_mr+0x13/0x20 [ib_core] remove_commit_idr_uobject+0x16/0x70 [ib_uverbs] uverbs_cleanup_ucontext+0xe8/0x1a0 [ib_uverbs] ib_uverbs_cleanup_ucontext.isra.9+0x19/0x40 [ib_uverbs] ib_uverbs_remove_one+0x162/0x2e0 [ib_uverbs] ib_unregister_device+0xd4/0x190 [ib_core] __mlx5_ib_remove+0x2e/0x40 [mlx5_ib] mlx5_remove_device+0xf5/0x120 [mlx5_core] mlx5_unregister_interface+0x37/0x90 [mlx5_core] mlx5_ib_cleanup+0xc/0x225 [mlx5_ib] SyS_delete_module+0x153/0x230 do_syscall_64+0x62/0x110 entry_SYSCALL_64_after_hwframe+0x21/0x86 ... We restore the original behavior by breaking the UMR stage into two parts, pre and post IB registration stages, this way we can restore the original functionality and maintain clean separation of logic between stages. Fixes: 16c1975f1032 ("IB/mlx5: Create profile infrastructure to add and remove stages") Signed-off-by: Mark Bloch Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/main.c | 21 ++++++++++++--------- drivers/infiniband/hw/mlx5/mlx5_ib.h | 3 ++- 2 files changed, 14 insertions(+), 10 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 033b6af90de9..da091de4e69d 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -4860,19 +4860,19 @@ static int mlx5_ib_stage_ib_reg_init(struct mlx5_ib_dev *dev) return ib_register_device(&dev->ib_dev, NULL); } -static void mlx5_ib_stage_ib_reg_cleanup(struct mlx5_ib_dev *dev) +static void mlx5_ib_stage_pre_ib_reg_umr_cleanup(struct mlx5_ib_dev *dev) { - ib_unregister_device(&dev->ib_dev); + destroy_umrc_res(dev); } -static int mlx5_ib_stage_umr_res_init(struct mlx5_ib_dev *dev) +static void mlx5_ib_stage_ib_reg_cleanup(struct mlx5_ib_dev *dev) { - return create_umr_res(dev); + ib_unregister_device(&dev->ib_dev); } -static void mlx5_ib_stage_umr_res_cleanup(struct mlx5_ib_dev *dev) +static int mlx5_ib_stage_post_ib_reg_umr_init(struct mlx5_ib_dev *dev) { - destroy_umrc_res(dev); + return create_umr_res(dev); } static int mlx5_ib_stage_delay_drop_init(struct mlx5_ib_dev *dev) @@ -4982,12 +4982,15 @@ static const struct mlx5_ib_profile pf_profile = { STAGE_CREATE(MLX5_IB_STAGE_BFREG, mlx5_ib_stage_bfrag_init, mlx5_ib_stage_bfrag_cleanup), + STAGE_CREATE(MLX5_IB_STAGE_PRE_IB_REG_UMR, + NULL, + mlx5_ib_stage_pre_ib_reg_umr_cleanup), STAGE_CREATE(MLX5_IB_STAGE_IB_REG, mlx5_ib_stage_ib_reg_init, mlx5_ib_stage_ib_reg_cleanup), - STAGE_CREATE(MLX5_IB_STAGE_UMR_RESOURCES, - mlx5_ib_stage_umr_res_init, - mlx5_ib_stage_umr_res_cleanup), + STAGE_CREATE(MLX5_IB_STAGE_POST_IB_REG_UMR, + mlx5_ib_stage_post_ib_reg_umr_init, + NULL), STAGE_CREATE(MLX5_IB_STAGE_DELAY_DROP, mlx5_ib_stage_delay_drop_init, mlx5_ib_stage_delay_drop_cleanup), diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 139385129973..a5272499b600 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -739,8 +739,9 @@ enum mlx5_ib_stages { MLX5_IB_STAGE_CONG_DEBUGFS, MLX5_IB_STAGE_UAR, MLX5_IB_STAGE_BFREG, + MLX5_IB_STAGE_PRE_IB_REG_UMR, MLX5_IB_STAGE_IB_REG, - MLX5_IB_STAGE_UMR_RESOURCES, + MLX5_IB_STAGE_POST_IB_REG_UMR, MLX5_IB_STAGE_DELAY_DROP, MLX5_IB_STAGE_CLASS_ATTR, MLX5_IB_STAGE_MAX, -- cgit v1.2.3 From 047fdea6341966a0898e3b16c51f54d4f5ba030a Mon Sep 17 00:00:00 2001 From: Dmitriy Gorokh Date: Fri, 16 Feb 2018 19:51:38 +0000 Subject: btrfs: Fix NULL pointer exception in find_bio_stripe MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On detaching of a disk which is a part of a RAID6 filesystem, the following kernel OOPS may happen: [63122.680461] BTRFS error (device sdo): bdev /dev/sdo errs: wr 0, rd 0, flush 1, corrupt 0, gen 0 [63122.719584] BTRFS warning (device sdo): lost page write due to IO error on /dev/sdo [63122.719587] BTRFS error (device sdo): bdev /dev/sdo errs: wr 1, rd 0, flush 1, corrupt 0, gen 0 [63122.803516] BTRFS warning (device sdo): lost page write due to IO error on /dev/sdo [63122.803519] BTRFS error (device sdo): bdev /dev/sdo errs: wr 2, rd 0, flush 1, corrupt 0, gen 0 [63122.863902] BTRFS critical (device sdo): fatal error on device /dev/sdo [63122.935338] BUG: unable to handle kernel NULL pointer dereference at 0000000000000080 [63122.946554] IP: fail_bio_stripe+0x58/0xa0 [btrfs] [63122.958185] PGD 9ecda067 P4D 9ecda067 PUD b2b37067 PMD 0 [63122.971202] Oops: 0000 [#1] SMP [63123.006760] CPU: 0 PID: 3979 Comm: kworker/u8:9 Tainted: G W 4.14.2-16-scst34x+ #8 [63123.007091] Hardware name: innotek GmbH VirtualBox/VirtualBox, BIOS VirtualBox 12/01/2006 [63123.007402] Workqueue: btrfs-worker btrfs_worker_helper [btrfs] [63123.007595] task: ffff880036ea4040 task.stack: ffffc90006384000 [63123.007796] RIP: 0010:fail_bio_stripe+0x58/0xa0 [btrfs] [63123.007968] RSP: 0018:ffffc90006387ad8 EFLAGS: 00010287 [63123.008140] RAX: 0000000000000002 RBX: ffff88004beaa0b8 RCX: ffff8800b2bd5690 [63123.008359] RDX: 0000000000000000 RSI: ffff88007bb43500 RDI: ffff88004beaa000 [63123.008621] RBP: ffffc90006387ae8 R08: 0000000099100000 R09: ffff8800b2bd5600 [63123.008840] R10: 0000000000000004 R11: 0000000000010000 R12: ffff88007bb43500 [63123.009059] R13: 00000000fffffffb R14: ffff880036fc5180 R15: 0000000000000004 [63123.009278] FS: 0000000000000000(0000) GS:ffff8800b7000000(0000) knlGS:0000000000000000 [63123.009564] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [63123.009748] CR2: 0000000000000080 CR3: 00000000b0866000 CR4: 00000000000406f0 [63123.009969] Call Trace: [63123.010085] raid_write_end_io+0x7e/0x80 [btrfs] [63123.010251] bio_endio+0xa1/0x120 [63123.010378] generic_make_request+0x218/0x270 [63123.010921] submit_bio+0x66/0x130 [63123.011073] finish_rmw+0x3fc/0x5b0 [btrfs] [63123.011245] full_stripe_write+0x96/0xc0 [btrfs] [63123.011428] raid56_parity_write+0x117/0x170 [btrfs] [63123.011604] btrfs_map_bio+0x2ec/0x320 [btrfs] [63123.011759] ? ___cache_free+0x1c5/0x300 [63123.011909] __btrfs_submit_bio_done+0x26/0x50 [btrfs] [63123.012087] run_one_async_done+0x9c/0xc0 [btrfs] [63123.012257] normal_work_helper+0x19e/0x300 [btrfs] [63123.012429] btrfs_worker_helper+0x12/0x20 [btrfs] [63123.012656] process_one_work+0x14d/0x350 [63123.012888] worker_thread+0x4d/0x3a0 [63123.013026] ? _raw_spin_unlock_irqrestore+0x15/0x20 [63123.013192] kthread+0x109/0x140 [63123.013315] ? process_scheduled_works+0x40/0x40 [63123.013472] ? kthread_stop+0x110/0x110 [63123.013610] ret_from_fork+0x25/0x30 [63123.014469] RIP: fail_bio_stripe+0x58/0xa0 [btrfs] RSP: ffffc90006387ad8 [63123.014678] CR2: 0000000000000080 [63123.016590] ---[ end trace a295ea7259c17880 ]— This is reproducible in a cycle, where a series of writes is followed by SCSI device delete command. The test may take up to few minutes. Fixes: 74d46992e0d9 ("block: replace bi_bdev with a gendisk pointer and partitions index") [ no signed-off-by provided ] Author: Dmitriy Gorokh Reviewed-by: Liu Bo Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/raid56.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c index dec0907dfb8a..fcfc20de2df3 100644 --- a/fs/btrfs/raid56.c +++ b/fs/btrfs/raid56.c @@ -1370,6 +1370,7 @@ static int find_bio_stripe(struct btrfs_raid_bio *rbio, stripe_start = stripe->physical; if (physical >= stripe_start && physical < stripe_start + rbio->stripe_len && + stripe->dev->bdev && bio->bi_disk == stripe->dev->bdev->bd_disk && bio->bi_partno == stripe->dev->bdev->bd_partno) { return i; -- cgit v1.2.3 From 18bf591ba9753e3e5ba91f38f756a800693408f4 Mon Sep 17 00:00:00 2001 From: Edmund Nadolski Date: Wed, 14 Mar 2018 09:03:11 -0600 Subject: btrfs: add missing initialization in btrfs_check_shared This patch addresses an issue that causes fiemap to falsely report a shared extent. The test case is as follows: xfs_io -f -d -c "pwrite -b 16k 0 64k" -c "fiemap -v" /media/scratch/file5 sync xfs_io -c "fiemap -v" /media/scratch/file5 which gives the resulting output: wrote 65536/65536 bytes at offset 0 64 KiB, 4 ops; 0.0000 sec (121.359 MiB/sec and 7766.9903 ops/sec) /media/scratch/file5: EXT: FILE-OFFSET BLOCK-RANGE TOTAL FLAGS 0: [0..127]: 24576..24703 128 0x2001 /media/scratch/file5: EXT: FILE-OFFSET BLOCK-RANGE TOTAL FLAGS 0: [0..127]: 24576..24703 128 0x1 This is because btrfs_check_shared calls find_parent_nodes repeatedly in a loop, passing a share_check struct to report the count of shared extent. But btrfs_check_shared does not re-initialize the count value to zero for subsequent calls from the loop, resulting in a false share count value. This is a regressive behavior from 4.13. With proper re-initialization the test result is as follows: wrote 65536/65536 bytes at offset 0 64 KiB, 4 ops; 0.0000 sec (110.035 MiB/sec and 7042.2535 ops/sec) /media/scratch/file5: EXT: FILE-OFFSET BLOCK-RANGE TOTAL FLAGS 0: [0..127]: 24576..24703 128 0x1 /media/scratch/file5: EXT: FILE-OFFSET BLOCK-RANGE TOTAL FLAGS 0: [0..127]: 24576..24703 128 0x1 which corrects the regression. Fixes: 3ec4d3238ab ("btrfs: allow backref search checks for shared extents") Signed-off-by: Edmund Nadolski [ add text from cover letter to changelog ] Signed-off-by: David Sterba --- fs/btrfs/backref.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index f94b2d8c744a..26484648d090 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c @@ -1519,6 +1519,7 @@ int btrfs_check_shared(struct btrfs_root *root, u64 inum, u64 bytenr) if (!node) break; bytenr = node->val; + shared.share_count = 0; cond_resched(); } -- cgit v1.2.3 From a7e6c7015bf3e0cb467a2f6c0e1de985ee1a0ecb Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 13 Mar 2018 21:36:22 -0700 Subject: x86, memremap: fix altmap accounting at free Commit 24b6d4164348 "mm: pass the vmem_altmap to vmemmap_free" converted the vmemmap_free() path to pass the altmap argument all the way through the call chain rather than looking it up based on the page. Unfortunately that ends up over freeing altmap allocated pages in some cases since free_pagetable() is used to free both memmap space and pte space, where only the memmap stored in huge pages uses altmap allocations. Given that altmap allocations for memmap space are special cased in vmemmap_populate_hugepages() add a symmetric / special case free_hugepage_table() to handle altmap freeing, and cleanup the unneeded passing of altmap to leaf functions that do not require it. Without this change the sanity check accounting in devm_memremap_pages_release() will throw a warning with the following signature. nd_pmem pfn10.1: devm_memremap_pages_release: failed to free all reserved pages WARNING: CPU: 44 PID: 3539 at kernel/memremap.c:310 devm_memremap_pages_release+0x1c7/0x220 CPU: 44 PID: 3539 Comm: ndctl Tainted: G L 4.16.0-rc1-linux-stable #7 RIP: 0010:devm_memremap_pages_release+0x1c7/0x220 [..] Call Trace: release_nodes+0x225/0x270 device_release_driver_internal+0x15d/0x210 bus_remove_device+0xe2/0x160 device_del+0x130/0x310 ? klist_release+0x56/0x100 ? nd_region_notify+0xc0/0xc0 [libnvdimm] device_unregister+0x16/0x60 This was missed in testing since not all configurations will trigger this warning. Fixes: 24b6d4164348 ("mm: pass the vmem_altmap to vmemmap_free") Reported-by: Jane Chu Cc: Ross Zwisler Reviewed-by: Christoph Hellwig Signed-off-by: Dan Williams --- arch/x86/mm/init_64.c | 60 ++++++++++++++++++++++++--------------------------- 1 file changed, 28 insertions(+), 32 deletions(-) diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 8b72923f1d35..af11a2890235 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -800,17 +800,11 @@ int arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap, #define PAGE_INUSE 0xFD -static void __meminit free_pagetable(struct page *page, int order, - struct vmem_altmap *altmap) +static void __meminit free_pagetable(struct page *page, int order) { unsigned long magic; unsigned int nr_pages = 1 << order; - if (altmap) { - vmem_altmap_free(altmap, nr_pages); - return; - } - /* bootmem page has reserved flag */ if (PageReserved(page)) { __ClearPageReserved(page); @@ -826,8 +820,16 @@ static void __meminit free_pagetable(struct page *page, int order, free_pages((unsigned long)page_address(page), order); } -static void __meminit free_pte_table(pte_t *pte_start, pmd_t *pmd, +static void __meminit free_hugepage_table(struct page *page, struct vmem_altmap *altmap) +{ + if (altmap) + vmem_altmap_free(altmap, PMD_SIZE / PAGE_SIZE); + else + free_pagetable(page, get_order(PMD_SIZE)); +} + +static void __meminit free_pte_table(pte_t *pte_start, pmd_t *pmd) { pte_t *pte; int i; @@ -839,14 +841,13 @@ static void __meminit free_pte_table(pte_t *pte_start, pmd_t *pmd, } /* free a pte talbe */ - free_pagetable(pmd_page(*pmd), 0, altmap); + free_pagetable(pmd_page(*pmd), 0); spin_lock(&init_mm.page_table_lock); pmd_clear(pmd); spin_unlock(&init_mm.page_table_lock); } -static void __meminit free_pmd_table(pmd_t *pmd_start, pud_t *pud, - struct vmem_altmap *altmap) +static void __meminit free_pmd_table(pmd_t *pmd_start, pud_t *pud) { pmd_t *pmd; int i; @@ -858,14 +859,13 @@ static void __meminit free_pmd_table(pmd_t *pmd_start, pud_t *pud, } /* free a pmd talbe */ - free_pagetable(pud_page(*pud), 0, altmap); + free_pagetable(pud_page(*pud), 0); spin_lock(&init_mm.page_table_lock); pud_clear(pud); spin_unlock(&init_mm.page_table_lock); } -static void __meminit free_pud_table(pud_t *pud_start, p4d_t *p4d, - struct vmem_altmap *altmap) +static void __meminit free_pud_table(pud_t *pud_start, p4d_t *p4d) { pud_t *pud; int i; @@ -877,7 +877,7 @@ static void __meminit free_pud_table(pud_t *pud_start, p4d_t *p4d, } /* free a pud talbe */ - free_pagetable(p4d_page(*p4d), 0, altmap); + free_pagetable(p4d_page(*p4d), 0); spin_lock(&init_mm.page_table_lock); p4d_clear(p4d); spin_unlock(&init_mm.page_table_lock); @@ -885,7 +885,7 @@ static void __meminit free_pud_table(pud_t *pud_start, p4d_t *p4d, static void __meminit remove_pte_table(pte_t *pte_start, unsigned long addr, unsigned long end, - struct vmem_altmap *altmap, bool direct) + bool direct) { unsigned long next, pages = 0; pte_t *pte; @@ -916,7 +916,7 @@ remove_pte_table(pte_t *pte_start, unsigned long addr, unsigned long end, * freed when offlining, or simplely not in use. */ if (!direct) - free_pagetable(pte_page(*pte), 0, altmap); + free_pagetable(pte_page(*pte), 0); spin_lock(&init_mm.page_table_lock); pte_clear(&init_mm, addr, pte); @@ -939,7 +939,7 @@ remove_pte_table(pte_t *pte_start, unsigned long addr, unsigned long end, page_addr = page_address(pte_page(*pte)); if (!memchr_inv(page_addr, PAGE_INUSE, PAGE_SIZE)) { - free_pagetable(pte_page(*pte), 0, altmap); + free_pagetable(pte_page(*pte), 0); spin_lock(&init_mm.page_table_lock); pte_clear(&init_mm, addr, pte); @@ -974,9 +974,8 @@ remove_pmd_table(pmd_t *pmd_start, unsigned long addr, unsigned long end, if (IS_ALIGNED(addr, PMD_SIZE) && IS_ALIGNED(next, PMD_SIZE)) { if (!direct) - free_pagetable(pmd_page(*pmd), - get_order(PMD_SIZE), - altmap); + free_hugepage_table(pmd_page(*pmd), + altmap); spin_lock(&init_mm.page_table_lock); pmd_clear(pmd); @@ -989,9 +988,8 @@ remove_pmd_table(pmd_t *pmd_start, unsigned long addr, unsigned long end, page_addr = page_address(pmd_page(*pmd)); if (!memchr_inv(page_addr, PAGE_INUSE, PMD_SIZE)) { - free_pagetable(pmd_page(*pmd), - get_order(PMD_SIZE), - altmap); + free_hugepage_table(pmd_page(*pmd), + altmap); spin_lock(&init_mm.page_table_lock); pmd_clear(pmd); @@ -1003,8 +1001,8 @@ remove_pmd_table(pmd_t *pmd_start, unsigned long addr, unsigned long end, } pte_base = (pte_t *)pmd_page_vaddr(*pmd); - remove_pte_table(pte_base, addr, next, altmap, direct); - free_pte_table(pte_base, pmd, altmap); + remove_pte_table(pte_base, addr, next, direct); + free_pte_table(pte_base, pmd); } /* Call free_pmd_table() in remove_pud_table(). */ @@ -1033,8 +1031,7 @@ remove_pud_table(pud_t *pud_start, unsigned long addr, unsigned long end, IS_ALIGNED(next, PUD_SIZE)) { if (!direct) free_pagetable(pud_page(*pud), - get_order(PUD_SIZE), - altmap); + get_order(PUD_SIZE)); spin_lock(&init_mm.page_table_lock); pud_clear(pud); @@ -1048,8 +1045,7 @@ remove_pud_table(pud_t *pud_start, unsigned long addr, unsigned long end, if (!memchr_inv(page_addr, PAGE_INUSE, PUD_SIZE)) { free_pagetable(pud_page(*pud), - get_order(PUD_SIZE), - altmap); + get_order(PUD_SIZE)); spin_lock(&init_mm.page_table_lock); pud_clear(pud); @@ -1062,7 +1058,7 @@ remove_pud_table(pud_t *pud_start, unsigned long addr, unsigned long end, pmd_base = pmd_offset(pud, 0); remove_pmd_table(pmd_base, addr, next, direct, altmap); - free_pmd_table(pmd_base, pud, altmap); + free_pmd_table(pmd_base, pud); } if (direct) @@ -1094,7 +1090,7 @@ remove_p4d_table(p4d_t *p4d_start, unsigned long addr, unsigned long end, * to adapt for boot-time switching between 4 and 5 level page tables. */ if (CONFIG_PGTABLE_LEVELS == 5) - free_pud_table(pud_base, p4d, altmap); + free_pud_table(pud_base, p4d); } if (direct) -- cgit v1.2.3 From 5388a508479d8b7e6d24fe5ca2645806d8e3d0f1 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 20 Feb 2018 21:56:26 +0100 Subject: infiniband: qplib_fp: fix pointer cast Building for a 32-bit target results in a couple of warnings from casting between a 32-bit pointer and a 64-bit integer: drivers/infiniband/hw/bnxt_re/qplib_fp.c: In function 'bnxt_qplib_service_nq': drivers/infiniband/hw/bnxt_re/qplib_fp.c:333:23: error: cast to pointer from integer of different size [-Werror=int-to-pointer-cast] bnxt_qplib_arm_srq((struct bnxt_qplib_srq *)q_handle, ^ drivers/infiniband/hw/bnxt_re/qplib_fp.c:336:12: error: cast to pointer from integer of different size [-Werror=int-to-pointer-cast] (struct bnxt_qplib_srq *)q_handle, ^ In file included from include/linux/byteorder/little_endian.h:5, from arch/arm/include/uapi/asm/byteorder.h:22, from include/asm-generic/bitops/le.h:6, from arch/arm/include/asm/bitops.h:342, from include/linux/bitops.h:38, from include/linux/kernel.h:11, from include/linux/interrupt.h:6, from drivers/infiniband/hw/bnxt_re/qplib_fp.c:39: drivers/infiniband/hw/bnxt_re/qplib_fp.c: In function 'bnxt_qplib_create_srq': include/uapi/linux/byteorder/little_endian.h:31:43: error: cast from pointer to integer of different size [-Werror=pointer-to-int-cast] #define __cpu_to_le64(x) ((__force __le64)(__u64)(x)) ^ include/linux/byteorder/generic.h:86:21: note: in expansion of macro '__cpu_to_le64' #define cpu_to_le64 __cpu_to_le64 ^~~~~~~~~~~~~ drivers/infiniband/hw/bnxt_re/qplib_fp.c:569:19: note: in expansion of macro 'cpu_to_le64' req.srq_handle = cpu_to_le64(srq); Using a uintptr_t as an intermediate works on all architectures. Fixes: 37cb11acf1f7 ("RDMA/bnxt_re: Add SRQ support for Broadcom adapters") Signed-off-by: Arnd Bergmann Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/bnxt_re/qplib_fp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.c b/drivers/infiniband/hw/bnxt_re/qplib_fp.c index 06b42c880fd4..3a78faba8d91 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_fp.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.c @@ -243,7 +243,7 @@ static void bnxt_qplib_service_nq(unsigned long data) u32 sw_cons, raw_cons; u16 type; int budget = nq->budget; - u64 q_handle; + uintptr_t q_handle; /* Service the NQ until empty */ raw_cons = hwq->cons; @@ -526,7 +526,7 @@ int bnxt_qplib_create_srq(struct bnxt_qplib_res *res, /* Configure the request */ req.dpi = cpu_to_le32(srq->dpi->dpi); - req.srq_handle = cpu_to_le64(srq); + req.srq_handle = cpu_to_le64((uintptr_t)srq); req.srq_size = cpu_to_le16((u16)srq->hwq.max_elements); pbl = &srq->hwq.pbl[PBL_LVL_0]; -- cgit v1.2.3 From bd8602ca42f6d8b25392a8f2a0e7be34e1206618 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 20 Feb 2018 21:56:27 +0100 Subject: infiniband: bnxt_re: use BIT_ULL() for 64-bit bit masks On 32-bit targets, we otherwise get a warning about an impossible constant integer expression: In file included from include/linux/kernel.h:11, from include/linux/interrupt.h:6, from drivers/infiniband/hw/bnxt_re/ib_verbs.c:39: drivers/infiniband/hw/bnxt_re/ib_verbs.c: In function 'bnxt_re_query_device': include/linux/bitops.h:7:24: error: left shift count >= width of type [-Werror=shift-count-overflow] #define BIT(nr) (1UL << (nr)) ^~ drivers/infiniband/hw/bnxt_re/bnxt_re.h:61:34: note: in expansion of macro 'BIT' #define BNXT_RE_MAX_MR_SIZE_HIGH BIT(39) ^~~ drivers/infiniband/hw/bnxt_re/bnxt_re.h:62:30: note: in expansion of macro 'BNXT_RE_MAX_MR_SIZE_HIGH' #define BNXT_RE_MAX_MR_SIZE BNXT_RE_MAX_MR_SIZE_HIGH ^~~~~~~~~~~~~~~~~~~~~~~~ drivers/infiniband/hw/bnxt_re/ib_verbs.c:149:25: note: in expansion of macro 'BNXT_RE_MAX_MR_SIZE' ib_attr->max_mr_size = BNXT_RE_MAX_MR_SIZE; ^~~~~~~~~~~~~~~~~~~ Fixes: 872f3578241d ("RDMA/bnxt_re: Add support for MRs with Huge pages") Signed-off-by: Arnd Bergmann Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/bnxt_re/bnxt_re.h | 4 ++-- drivers/infiniband/hw/bnxt_re/ib_verbs.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/hw/bnxt_re/bnxt_re.h b/drivers/infiniband/hw/bnxt_re/bnxt_re.h index 3eb7a8387116..96f76896488d 100644 --- a/drivers/infiniband/hw/bnxt_re/bnxt_re.h +++ b/drivers/infiniband/hw/bnxt_re/bnxt_re.h @@ -57,8 +57,8 @@ #define BNXT_RE_PAGE_SIZE_8M BIT(BNXT_RE_PAGE_SHIFT_8M) #define BNXT_RE_PAGE_SIZE_1G BIT(BNXT_RE_PAGE_SHIFT_1G) -#define BNXT_RE_MAX_MR_SIZE_LOW BIT(BNXT_RE_PAGE_SHIFT_1G) -#define BNXT_RE_MAX_MR_SIZE_HIGH BIT(39) +#define BNXT_RE_MAX_MR_SIZE_LOW BIT_ULL(BNXT_RE_PAGE_SHIFT_1G) +#define BNXT_RE_MAX_MR_SIZE_HIGH BIT_ULL(39) #define BNXT_RE_MAX_MR_SIZE BNXT_RE_MAX_MR_SIZE_HIGH #define BNXT_RE_MAX_QPC_COUNT (64 * 1024) diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c index 0dd75f449872..8301d7e5fa8c 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c @@ -3598,7 +3598,7 @@ struct ib_mr *bnxt_re_reg_user_mr(struct ib_pd *ib_pd, u64 start, u64 length, int umem_pgs, page_shift, rc; if (length > BNXT_RE_MAX_MR_SIZE) { - dev_err(rdev_to_dev(rdev), "MR Size: %lld > Max supported:%ld\n", + dev_err(rdev_to_dev(rdev), "MR Size: %lld > Max supported:%lld\n", length, BNXT_RE_MAX_MR_SIZE); return ERR_PTR(-ENOMEM); } -- cgit v1.2.3 From 3e04040df6d4613a8af5a80882d5f7f298f49810 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Wed, 14 Mar 2018 19:29:37 +0000 Subject: Revert "mm/page_alloc: fix memmap_init_zone pageblock alignment" This reverts commit 864b75f9d6b0100bb24fdd9a20d156e7cda9b5ae. Commit 864b75f9d6b0 ("mm/page_alloc: fix memmap_init_zone pageblock alignment") modified the logic in memmap_init_zone() to initialize struct pages associated with invalid PFNs, to appease a VM_BUG_ON() in move_freepages(), which is redundant by its own admission, and dereferences struct page fields to obtain the zone without checking whether the struct pages in question are valid to begin with. Commit 864b75f9d6b0 only makes it worse, since the rounding it does may cause pfn assume the same value it had in a prior iteration of the loop, resulting in an infinite loop and a hang very early in the boot. Also, since it doesn't perform the same rounding on start_pfn itself but only on intermediate values following an invalid PFN, we may still hit the same VM_BUG_ON() as before. So instead, let's fix this at the core, and ensure that the BUG check doesn't dereference struct page fields of invalid pages. Fixes: 864b75f9d6b0 ("mm/page_alloc: fix memmap_init_zone pageblock alignment") Tested-by: Jan Glauber Tested-by: Shanker Donthineni Cc: Daniel Vacek Cc: Mel Gorman Cc: Michal Hocko Cc: Paul Burton Cc: Pavel Tatashin Cc: Vlastimil Babka Cc: Andrew Morton Signed-off-by: Ard Biesheuvel Signed-off-by: Linus Torvalds --- mm/page_alloc.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 3d974cb2a1a1..635d7dd29d7f 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -1910,7 +1910,9 @@ static int move_freepages(struct zone *zone, * Remove at a later date when no bug reports exist related to * grouping pages by mobility */ - VM_BUG_ON(page_zone(start_page) != page_zone(end_page)); + VM_BUG_ON(pfn_valid(page_to_pfn(start_page)) && + pfn_valid(page_to_pfn(end_page)) && + page_zone(start_page) != page_zone(end_page)); #endif if (num_movable) @@ -5359,14 +5361,9 @@ void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone, /* * Skip to the pfn preceding the next valid one (or * end_pfn), such that we hit a valid pfn (or end_pfn) - * on our next iteration of the loop. Note that it needs - * to be pageblock aligned even when the region itself - * is not. move_freepages_block() can shift ahead of - * the valid region but still depends on correct page - * metadata. + * on our next iteration of the loop. */ - pfn = (memblock_next_valid_pfn(pfn, end_pfn) & - ~(pageblock_nr_pages-1)) - 1; + pfn = memblock_next_valid_pfn(pfn, end_pfn) - 1; #endif continue; } -- cgit v1.2.3 From 8df3aaaf9b5f8bdfc4036695fa22f35b45b4d92f Mon Sep 17 00:00:00 2001 From: Joern Engel Date: Tue, 13 Mar 2018 11:36:49 -0700 Subject: btree: avoid variable-length allocations geo->keylen cannot be larger than 4. So we might as well make fixed-size allocations. Given the one remaining user, geo->keylen cannot even be larger than 1. Logfs used to have 64bit and 128bit keys, tcm_qla2xxx only has 32bit keys. But let's not break the code if we don't have to. Signed-off-by: Joern Engel Signed-off-by: Linus Torvalds --- lib/btree.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/lib/btree.c b/lib/btree.c index f93a945274af..590facba2c50 100644 --- a/lib/btree.c +++ b/lib/btree.c @@ -3,7 +3,7 @@ * * As should be obvious for Linux kernel code, license is GPLv2 * - * Copyright (c) 2007-2008 Joern Engel + * Copyright (c) 2007-2008 Joern Engel * Bits and pieces stolen from Peter Zijlstra's code, which is * Copyright 2007, Red Hat Inc. Peter Zijlstra * GPLv2 @@ -76,6 +76,8 @@ struct btree_geo btree_geo128 = { }; EXPORT_SYMBOL_GPL(btree_geo128); +#define MAX_KEYLEN (2 * LONG_PER_U64) + static struct kmem_cache *btree_cachep; void *btree_alloc(gfp_t gfp_mask, void *pool_data) @@ -313,7 +315,7 @@ void *btree_get_prev(struct btree_head *head, struct btree_geo *geo, { int i, height; unsigned long *node, *oldnode; - unsigned long *retry_key = NULL, key[geo->keylen]; + unsigned long *retry_key = NULL, key[MAX_KEYLEN]; if (keyzero(geo, __key)) return NULL; @@ -639,8 +641,8 @@ EXPORT_SYMBOL_GPL(btree_remove); int btree_merge(struct btree_head *target, struct btree_head *victim, struct btree_geo *geo, gfp_t gfp) { - unsigned long key[geo->keylen]; - unsigned long dup[geo->keylen]; + unsigned long key[MAX_KEYLEN]; + unsigned long dup[MAX_KEYLEN]; void *val; int err; -- cgit v1.2.3 From 8b834bff1b73dce46f4e9f5e84af6f73fed8b0ef Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Tue, 13 Mar 2018 17:42:39 +0800 Subject: scsi: hpsa: fix selection of reply queue Since commit 84676c1f21e8 ("genirq/affinity: assign vectors to all possible CPUs") we could end up with an MSI-X vector that did not have any online CPUs mapped. This would lead to I/O hangs since there was no CPU to receive the completion. Retrieve IRQ affinity information using pci_irq_get_affinity() and use this mapping to choose a reply queue. [mkp: tweaked commit desc] Cc: Hannes Reinecke Cc: "Martin K. Petersen" , Cc: James Bottomley , Cc: Christoph Hellwig , Cc: Don Brace Cc: Kashyap Desai Cc: Laurence Oberman Cc: Meelis Roos Cc: Artem Bityutskiy Cc: Mike Snitzer Fixes: 84676c1f21e8 ("genirq/affinity: assign vectors to all possible CPUs") Signed-off-by: Ming Lei Tested-by: Laurence Oberman Tested-by: Don Brace Tested-by: Artem Bityutskiy Acked-by: Don Brace Reviewed-by: Christoph Hellwig Signed-off-by: Martin K. Petersen --- drivers/scsi/hpsa.c | 73 +++++++++++++++++++++++++++++++++++++++-------------- drivers/scsi/hpsa.h | 1 + 2 files changed, 55 insertions(+), 19 deletions(-) diff --git a/drivers/scsi/hpsa.c b/drivers/scsi/hpsa.c index 87b260e403ec..31423b6dc26d 100644 --- a/drivers/scsi/hpsa.c +++ b/drivers/scsi/hpsa.c @@ -1045,11 +1045,7 @@ static void set_performant_mode(struct ctlr_info *h, struct CommandList *c, c->busaddr |= 1 | (h->blockFetchTable[c->Header.SGList] << 1); if (unlikely(!h->msix_vectors)) return; - if (likely(reply_queue == DEFAULT_REPLY_QUEUE)) - c->Header.ReplyQueue = - raw_smp_processor_id() % h->nreply_queues; - else - c->Header.ReplyQueue = reply_queue % h->nreply_queues; + c->Header.ReplyQueue = reply_queue; } } @@ -1063,10 +1059,7 @@ static void set_ioaccel1_performant_mode(struct ctlr_info *h, * Tell the controller to post the reply to the queue for this * processor. This seems to give the best I/O throughput. */ - if (likely(reply_queue == DEFAULT_REPLY_QUEUE)) - cp->ReplyQueue = smp_processor_id() % h->nreply_queues; - else - cp->ReplyQueue = reply_queue % h->nreply_queues; + cp->ReplyQueue = reply_queue; /* * Set the bits in the address sent down to include: * - performant mode bit (bit 0) @@ -1087,10 +1080,7 @@ static void set_ioaccel2_tmf_performant_mode(struct ctlr_info *h, /* Tell the controller to post the reply to the queue for this * processor. This seems to give the best I/O throughput. */ - if (likely(reply_queue == DEFAULT_REPLY_QUEUE)) - cp->reply_queue = smp_processor_id() % h->nreply_queues; - else - cp->reply_queue = reply_queue % h->nreply_queues; + cp->reply_queue = reply_queue; /* Set the bits in the address sent down to include: * - performant mode bit not used in ioaccel mode 2 * - pull count (bits 0-3) @@ -1109,10 +1099,7 @@ static void set_ioaccel2_performant_mode(struct ctlr_info *h, * Tell the controller to post the reply to the queue for this * processor. This seems to give the best I/O throughput. */ - if (likely(reply_queue == DEFAULT_REPLY_QUEUE)) - cp->reply_queue = smp_processor_id() % h->nreply_queues; - else - cp->reply_queue = reply_queue % h->nreply_queues; + cp->reply_queue = reply_queue; /* * Set the bits in the address sent down to include: * - performant mode bit not used in ioaccel mode 2 @@ -1157,6 +1144,8 @@ static void __enqueue_cmd_and_start_io(struct ctlr_info *h, { dial_down_lockup_detection_during_fw_flash(h, c); atomic_inc(&h->commands_outstanding); + + reply_queue = h->reply_map[raw_smp_processor_id()]; switch (c->cmd_type) { case CMD_IOACCEL1: set_ioaccel1_performant_mode(h, c, reply_queue); @@ -7376,6 +7365,26 @@ static void hpsa_disable_interrupt_mode(struct ctlr_info *h) h->msix_vectors = 0; } +static void hpsa_setup_reply_map(struct ctlr_info *h) +{ + const struct cpumask *mask; + unsigned int queue, cpu; + + for (queue = 0; queue < h->msix_vectors; queue++) { + mask = pci_irq_get_affinity(h->pdev, queue); + if (!mask) + goto fallback; + + for_each_cpu(cpu, mask) + h->reply_map[cpu] = queue; + } + return; + +fallback: + for_each_possible_cpu(cpu) + h->reply_map[cpu] = 0; +} + /* If MSI/MSI-X is supported by the kernel we will try to enable it on * controllers that are capable. If not, we use legacy INTx mode. */ @@ -7771,6 +7780,10 @@ static int hpsa_pci_init(struct ctlr_info *h) err = hpsa_interrupt_mode(h); if (err) goto clean1; + + /* setup mapping between CPU and reply queue */ + hpsa_setup_reply_map(h); + err = hpsa_pci_find_memory_BAR(h->pdev, &h->paddr); if (err) goto clean2; /* intmode+region, pci */ @@ -8480,6 +8493,28 @@ static struct workqueue_struct *hpsa_create_controller_wq(struct ctlr_info *h, return wq; } +static void hpda_free_ctlr_info(struct ctlr_info *h) +{ + kfree(h->reply_map); + kfree(h); +} + +static struct ctlr_info *hpda_alloc_ctlr_info(void) +{ + struct ctlr_info *h; + + h = kzalloc(sizeof(*h), GFP_KERNEL); + if (!h) + return NULL; + + h->reply_map = kzalloc(sizeof(*h->reply_map) * nr_cpu_ids, GFP_KERNEL); + if (!h->reply_map) { + kfree(h); + return NULL; + } + return h; +} + static int hpsa_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) { int dac, rc; @@ -8517,7 +8552,7 @@ reinit_after_soft_reset: * the driver. See comments in hpsa.h for more info. */ BUILD_BUG_ON(sizeof(struct CommandList) % COMMANDLIST_ALIGNMENT); - h = kzalloc(sizeof(*h), GFP_KERNEL); + h = hpda_alloc_ctlr_info(); if (!h) { dev_err(&pdev->dev, "Failed to allocate controller head\n"); return -ENOMEM; @@ -8916,7 +8951,7 @@ static void hpsa_remove_one(struct pci_dev *pdev) h->lockup_detected = NULL; /* init_one 2 */ /* (void) pci_disable_pcie_error_reporting(pdev); */ /* init_one 1 */ - kfree(h); /* init_one 1 */ + hpda_free_ctlr_info(h); /* init_one 1 */ } static int hpsa_suspend(__attribute__((unused)) struct pci_dev *pdev, diff --git a/drivers/scsi/hpsa.h b/drivers/scsi/hpsa.h index 018f980a701c..fb9f5e7f8209 100644 --- a/drivers/scsi/hpsa.h +++ b/drivers/scsi/hpsa.h @@ -158,6 +158,7 @@ struct bmic_controller_parameters { #pragma pack() struct ctlr_info { + unsigned int *reply_map; int ctlr; char devname[8]; char *product_name; -- cgit v1.2.3 From adbe552349f2d1e48357a00e564d26135e586634 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Tue, 13 Mar 2018 17:42:40 +0800 Subject: scsi: megaraid_sas: fix selection of reply queue Since commit 84676c1f21e8 ("genirq/affinity: assign vectors to all possible CPUs") we could end up with an MSI-X vector that did not have any online CPUs mapped. This would lead to I/O hangs since there was no CPU to receive the completion. Retrieve IRQ affinity information using pci_irq_get_affinity() and use this mapping to choose a reply queue. [mkp: tweaked commit desc] Cc: Hannes Reinecke Cc: "Martin K. Petersen" , Cc: James Bottomley , Cc: Christoph Hellwig , Cc: Don Brace Cc: Kashyap Desai Cc: Laurence Oberman Cc: Mike Snitzer Cc: Meelis Roos Cc: Artem Bityutskiy Fixes: 84676c1f21e8 ("genirq/affinity: assign vectors to all possible CPUs") Signed-off-by: Ming Lei Acked-by: Kashyap Desai Tested-by: Kashyap Desai Reviewed-by: Christoph Hellwig Tested-by: Artem Bityutskiy Signed-off-by: Martin K. Petersen --- drivers/scsi/megaraid/megaraid_sas.h | 1 + drivers/scsi/megaraid/megaraid_sas_base.c | 39 ++++++++++++++++++++++++++--- drivers/scsi/megaraid/megaraid_sas_fusion.c | 12 +++------ 3 files changed, 41 insertions(+), 11 deletions(-) diff --git a/drivers/scsi/megaraid/megaraid_sas.h b/drivers/scsi/megaraid/megaraid_sas.h index ba6503f37756..27fab8235ea5 100644 --- a/drivers/scsi/megaraid/megaraid_sas.h +++ b/drivers/scsi/megaraid/megaraid_sas.h @@ -2128,6 +2128,7 @@ enum MR_PD_TYPE { struct megasas_instance { + unsigned int *reply_map; __le32 *producer; dma_addr_t producer_h; __le32 *consumer; diff --git a/drivers/scsi/megaraid/megaraid_sas_base.c b/drivers/scsi/megaraid/megaraid_sas_base.c index 0f1d88f272be..7b2bed5ab04c 100644 --- a/drivers/scsi/megaraid/megaraid_sas_base.c +++ b/drivers/scsi/megaraid/megaraid_sas_base.c @@ -5165,6 +5165,26 @@ skip_alloc: instance->use_seqnum_jbod_fp = false; } +static void megasas_setup_reply_map(struct megasas_instance *instance) +{ + const struct cpumask *mask; + unsigned int queue, cpu; + + for (queue = 0; queue < instance->msix_vectors; queue++) { + mask = pci_irq_get_affinity(instance->pdev, queue); + if (!mask) + goto fallback; + + for_each_cpu(cpu, mask) + instance->reply_map[cpu] = queue; + } + return; + +fallback: + for_each_possible_cpu(cpu) + instance->reply_map[cpu] = cpu % instance->msix_vectors; +} + /** * megasas_init_fw - Initializes the FW * @instance: Adapter soft state @@ -5343,6 +5363,8 @@ static int megasas_init_fw(struct megasas_instance *instance) goto fail_setup_irqs; } + megasas_setup_reply_map(instance); + dev_info(&instance->pdev->dev, "firmware supports msix\t: (%d)", fw_msix_count); dev_info(&instance->pdev->dev, @@ -6123,20 +6145,29 @@ static inline int megasas_alloc_mfi_ctrl_mem(struct megasas_instance *instance) */ static int megasas_alloc_ctrl_mem(struct megasas_instance *instance) { + instance->reply_map = kzalloc(sizeof(unsigned int) * nr_cpu_ids, + GFP_KERNEL); + if (!instance->reply_map) + return -ENOMEM; + switch (instance->adapter_type) { case MFI_SERIES: if (megasas_alloc_mfi_ctrl_mem(instance)) - return -ENOMEM; + goto fail; break; case VENTURA_SERIES: case THUNDERBOLT_SERIES: case INVADER_SERIES: if (megasas_alloc_fusion_context(instance)) - return -ENOMEM; + goto fail; break; } return 0; + fail: + kfree(instance->reply_map); + instance->reply_map = NULL; + return -ENOMEM; } /* @@ -6148,6 +6179,7 @@ static int megasas_alloc_ctrl_mem(struct megasas_instance *instance) */ static inline void megasas_free_ctrl_mem(struct megasas_instance *instance) { + kfree(instance->reply_map); if (instance->adapter_type == MFI_SERIES) { if (instance->producer) pci_free_consistent(instance->pdev, sizeof(u32), @@ -6540,7 +6572,6 @@ fail_io_attach: pci_free_irq_vectors(instance->pdev); fail_init_mfi: scsi_host_put(host); - fail_alloc_instance: pci_disable_device(pdev); @@ -6746,6 +6777,8 @@ megasas_resume(struct pci_dev *pdev) if (rval < 0) goto fail_reenable_msix; + megasas_setup_reply_map(instance); + if (instance->adapter_type != MFI_SERIES) { megasas_reset_reply_desc(instance); if (megasas_ioc_init_fusion(instance)) { diff --git a/drivers/scsi/megaraid/megaraid_sas_fusion.c b/drivers/scsi/megaraid/megaraid_sas_fusion.c index dc8e850fbfd2..5ec3b74e8aed 100644 --- a/drivers/scsi/megaraid/megaraid_sas_fusion.c +++ b/drivers/scsi/megaraid/megaraid_sas_fusion.c @@ -2641,11 +2641,8 @@ megasas_build_ldio_fusion(struct megasas_instance *instance, fp_possible = (io_info.fpOkForIo > 0) ? true : false; } - /* Use raw_smp_processor_id() for now until cmd->request->cpu is CPU - id by default, not CPU group id, otherwise all MSI-X queues won't - be utilized */ - cmd->request_desc->SCSIIO.MSIxIndex = instance->msix_vectors ? - raw_smp_processor_id() % instance->msix_vectors : 0; + cmd->request_desc->SCSIIO.MSIxIndex = + instance->reply_map[raw_smp_processor_id()]; praid_context = &io_request->RaidContext; @@ -2971,10 +2968,9 @@ megasas_build_syspd_fusion(struct megasas_instance *instance, } cmd->request_desc->SCSIIO.DevHandle = io_request->DevHandle; - cmd->request_desc->SCSIIO.MSIxIndex = - instance->msix_vectors ? - (raw_smp_processor_id() % instance->msix_vectors) : 0; + cmd->request_desc->SCSIIO.MSIxIndex = + instance->reply_map[raw_smp_processor_id()]; if (!fp_possible) { /* system pd firmware path */ -- cgit v1.2.3 From 2f31115e940c4afd49b99c33123534e2ac924ffb Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Tue, 13 Mar 2018 17:42:41 +0800 Subject: scsi: core: introduce force_blk_mq This patch introduces 'force_blk_mq' to the scsi_host_template so that drivers that have no desire to support the legacy I/O path can signal blk-mq only support. [mkp: commit desc] Cc: Omar Sandoval , Cc: "Martin K. Petersen" , Cc: James Bottomley , Cc: Christoph Hellwig , Cc: Don Brace Cc: Kashyap Desai Cc: Mike Snitzer Cc: Laurence Oberman Signed-off-by: Ming Lei Reviewed-by: Hannes Reinecke Reviewed-by: Christoph Hellwig Signed-off-by: Martin K. Petersen --- drivers/scsi/hosts.c | 1 + include/scsi/scsi_host.h | 3 +++ 2 files changed, 4 insertions(+) diff --git a/drivers/scsi/hosts.c b/drivers/scsi/hosts.c index dd9464920456..ef22b275d050 100644 --- a/drivers/scsi/hosts.c +++ b/drivers/scsi/hosts.c @@ -474,6 +474,7 @@ struct Scsi_Host *scsi_host_alloc(struct scsi_host_template *sht, int privsize) shost->dma_boundary = 0xffffffff; shost->use_blk_mq = scsi_use_blk_mq; + shost->use_blk_mq = scsi_use_blk_mq || shost->hostt->force_blk_mq; device_initialize(&shost->shost_gendev); dev_set_name(&shost->shost_gendev, "host%d", shost->host_no); diff --git a/include/scsi/scsi_host.h b/include/scsi/scsi_host.h index a8b7bf879ced..9c1e4bad6581 100644 --- a/include/scsi/scsi_host.h +++ b/include/scsi/scsi_host.h @@ -452,6 +452,9 @@ struct scsi_host_template { /* True if the controller does not support WRITE SAME */ unsigned no_write_same:1; + /* True if the low-level driver supports blk-mq only */ + unsigned force_blk_mq:1; + /* * Countdown for host blocking with no commands outstanding. */ -- cgit v1.2.3 From b5b6e8c8d3b4cbeb447a0f10c7d5de3caa573299 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Tue, 13 Mar 2018 17:42:42 +0800 Subject: scsi: virtio_scsi: fix IO hang caused by automatic irq vector affinity Since commit 84676c1f21e8ff5 ("genirq/affinity: assign vectors to all possible CPUs") it is possible to end up in a scenario where only offline CPUs are mapped to an interrupt vector. This is only an issue for the legacy I/O path since with blk-mq/scsi-mq an I/O can't be submitted to a hardware queue if the queue isn't mapped to an online CPU. Fix this issue by forcing virtio-scsi to use blk-mq. [mkp: commit desc] Cc: Omar Sandoval , Cc: "Martin K. Petersen" , Cc: James Bottomley , Cc: Christoph Hellwig , Cc: Don Brace Cc: Kashyap Desai Cc: Paolo Bonzini Cc: Mike Snitzer Cc: Laurence Oberman Fixes: 84676c1f21e8 ("genirq/affinity: assign vectors to all possible CPUs") Signed-off-by: Ming Lei Reviewed-by: Hannes Reinecke Acked-by: Paolo Bonzini Reviewed-by: Christoph Hellwig Signed-off-by: Martin K. Petersen --- drivers/scsi/virtio_scsi.c | 59 +++------------------------------------------- 1 file changed, 3 insertions(+), 56 deletions(-) diff --git a/drivers/scsi/virtio_scsi.c b/drivers/scsi/virtio_scsi.c index 7c28e8d4955a..54e3a0f6844c 100644 --- a/drivers/scsi/virtio_scsi.c +++ b/drivers/scsi/virtio_scsi.c @@ -91,9 +91,6 @@ struct virtio_scsi_vq { struct virtio_scsi_target_state { seqcount_t tgt_seq; - /* Count of outstanding requests. */ - atomic_t reqs; - /* Currently active virtqueue for requests sent to this target. */ struct virtio_scsi_vq *req_vq; }; @@ -152,8 +149,6 @@ static void virtscsi_complete_cmd(struct virtio_scsi *vscsi, void *buf) struct virtio_scsi_cmd *cmd = buf; struct scsi_cmnd *sc = cmd->sc; struct virtio_scsi_cmd_resp *resp = &cmd->resp.cmd; - struct virtio_scsi_target_state *tgt = - scsi_target(sc->device)->hostdata; dev_dbg(&sc->device->sdev_gendev, "cmd %p response %u status %#02x sense_len %u\n", @@ -210,8 +205,6 @@ static void virtscsi_complete_cmd(struct virtio_scsi *vscsi, void *buf) } sc->scsi_done(sc); - - atomic_dec(&tgt->reqs); } static void virtscsi_vq_done(struct virtio_scsi *vscsi, @@ -580,10 +573,7 @@ static int virtscsi_queuecommand_single(struct Scsi_Host *sh, struct scsi_cmnd *sc) { struct virtio_scsi *vscsi = shost_priv(sh); - struct virtio_scsi_target_state *tgt = - scsi_target(sc->device)->hostdata; - atomic_inc(&tgt->reqs); return virtscsi_queuecommand(vscsi, &vscsi->req_vqs[0], sc); } @@ -596,55 +586,11 @@ static struct virtio_scsi_vq *virtscsi_pick_vq_mq(struct virtio_scsi *vscsi, return &vscsi->req_vqs[hwq]; } -static struct virtio_scsi_vq *virtscsi_pick_vq(struct virtio_scsi *vscsi, - struct virtio_scsi_target_state *tgt) -{ - struct virtio_scsi_vq *vq; - unsigned long flags; - u32 queue_num; - - local_irq_save(flags); - if (atomic_inc_return(&tgt->reqs) > 1) { - unsigned long seq; - - do { - seq = read_seqcount_begin(&tgt->tgt_seq); - vq = tgt->req_vq; - } while (read_seqcount_retry(&tgt->tgt_seq, seq)); - } else { - /* no writes can be concurrent because of atomic_t */ - write_seqcount_begin(&tgt->tgt_seq); - - /* keep previous req_vq if a reader just arrived */ - if (unlikely(atomic_read(&tgt->reqs) > 1)) { - vq = tgt->req_vq; - goto unlock; - } - - queue_num = smp_processor_id(); - while (unlikely(queue_num >= vscsi->num_queues)) - queue_num -= vscsi->num_queues; - tgt->req_vq = vq = &vscsi->req_vqs[queue_num]; - unlock: - write_seqcount_end(&tgt->tgt_seq); - } - local_irq_restore(flags); - - return vq; -} - static int virtscsi_queuecommand_multi(struct Scsi_Host *sh, struct scsi_cmnd *sc) { struct virtio_scsi *vscsi = shost_priv(sh); - struct virtio_scsi_target_state *tgt = - scsi_target(sc->device)->hostdata; - struct virtio_scsi_vq *req_vq; - - if (shost_use_blk_mq(sh)) - req_vq = virtscsi_pick_vq_mq(vscsi, sc); - else - req_vq = virtscsi_pick_vq(vscsi, tgt); + struct virtio_scsi_vq *req_vq = virtscsi_pick_vq_mq(vscsi, sc); return virtscsi_queuecommand(vscsi, req_vq, sc); } @@ -775,7 +721,6 @@ static int virtscsi_target_alloc(struct scsi_target *starget) return -ENOMEM; seqcount_init(&tgt->tgt_seq); - atomic_set(&tgt->reqs, 0); tgt->req_vq = &vscsi->req_vqs[0]; starget->hostdata = tgt; @@ -823,6 +768,7 @@ static struct scsi_host_template virtscsi_host_template_single = { .target_alloc = virtscsi_target_alloc, .target_destroy = virtscsi_target_destroy, .track_queue_depth = 1, + .force_blk_mq = 1, }; static struct scsi_host_template virtscsi_host_template_multi = { @@ -844,6 +790,7 @@ static struct scsi_host_template virtscsi_host_template_multi = { .target_destroy = virtscsi_target_destroy, .map_queues = virtscsi_map_queues, .track_queue_depth = 1, + .force_blk_mq = 1, }; #define virtscsi_config_get(vdev, fld) \ -- cgit v1.2.3 From c3506df85091ab41cf7716244c460e15136100c4 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Tue, 13 Mar 2018 17:42:43 +0800 Subject: scsi: virtio_scsi: unify scsi_host_template Now that virtio_scsi uses blk-mq exclusively, we can remove the scsi_host_template and associated plumbing for the legacy I/O path. [mkp: commit desc] Cc: Omar Sandoval , Cc: "Martin K. Petersen" , Cc: James Bottomley , Cc: Christoph Hellwig , Cc: Paolo Bonzini Cc: Mike Snitzer Cc: Laurence Oberman Cc: Hannes Reinecke Signed-off-by: Ming Lei Suggested-by: Christoph Hellwig , Reviewed-by: Christoph Hellwig Signed-off-by: Martin K. Petersen --- drivers/scsi/virtio_scsi.c | 74 ++++++++++------------------------------------ 1 file changed, 15 insertions(+), 59 deletions(-) diff --git a/drivers/scsi/virtio_scsi.c b/drivers/scsi/virtio_scsi.c index 54e3a0f6844c..45d04631888a 100644 --- a/drivers/scsi/virtio_scsi.c +++ b/drivers/scsi/virtio_scsi.c @@ -522,11 +522,20 @@ static void virtio_scsi_init_hdr_pi(struct virtio_device *vdev, } #endif -static int virtscsi_queuecommand(struct virtio_scsi *vscsi, - struct virtio_scsi_vq *req_vq, +static struct virtio_scsi_vq *virtscsi_pick_vq_mq(struct virtio_scsi *vscsi, + struct scsi_cmnd *sc) +{ + u32 tag = blk_mq_unique_tag(sc->request); + u16 hwq = blk_mq_unique_tag_to_hwq(tag); + + return &vscsi->req_vqs[hwq]; +} + +static int virtscsi_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *sc) { - struct Scsi_Host *shost = virtio_scsi_host(vscsi->vdev); + struct virtio_scsi *vscsi = shost_priv(shost); + struct virtio_scsi_vq *req_vq = virtscsi_pick_vq_mq(vscsi, sc); struct virtio_scsi_cmd *cmd = scsi_cmd_priv(sc); unsigned long flags; int req_size; @@ -569,32 +578,6 @@ static int virtscsi_queuecommand(struct virtio_scsi *vscsi, return 0; } -static int virtscsi_queuecommand_single(struct Scsi_Host *sh, - struct scsi_cmnd *sc) -{ - struct virtio_scsi *vscsi = shost_priv(sh); - - return virtscsi_queuecommand(vscsi, &vscsi->req_vqs[0], sc); -} - -static struct virtio_scsi_vq *virtscsi_pick_vq_mq(struct virtio_scsi *vscsi, - struct scsi_cmnd *sc) -{ - u32 tag = blk_mq_unique_tag(sc->request); - u16 hwq = blk_mq_unique_tag_to_hwq(tag); - - return &vscsi->req_vqs[hwq]; -} - -static int virtscsi_queuecommand_multi(struct Scsi_Host *sh, - struct scsi_cmnd *sc) -{ - struct virtio_scsi *vscsi = shost_priv(sh); - struct virtio_scsi_vq *req_vq = virtscsi_pick_vq_mq(vscsi, sc); - - return virtscsi_queuecommand(vscsi, req_vq, sc); -} - static int virtscsi_tmf(struct virtio_scsi *vscsi, struct virtio_scsi_cmd *cmd) { DECLARE_COMPLETION_ONSTACK(comp); @@ -750,34 +733,13 @@ static enum blk_eh_timer_return virtscsi_eh_timed_out(struct scsi_cmnd *scmnd) return BLK_EH_RESET_TIMER; } -static struct scsi_host_template virtscsi_host_template_single = { - .module = THIS_MODULE, - .name = "Virtio SCSI HBA", - .proc_name = "virtio_scsi", - .this_id = -1, - .cmd_size = sizeof(struct virtio_scsi_cmd), - .queuecommand = virtscsi_queuecommand_single, - .change_queue_depth = virtscsi_change_queue_depth, - .eh_abort_handler = virtscsi_abort, - .eh_device_reset_handler = virtscsi_device_reset, - .eh_timed_out = virtscsi_eh_timed_out, - .slave_alloc = virtscsi_device_alloc, - - .dma_boundary = UINT_MAX, - .use_clustering = ENABLE_CLUSTERING, - .target_alloc = virtscsi_target_alloc, - .target_destroy = virtscsi_target_destroy, - .track_queue_depth = 1, - .force_blk_mq = 1, -}; - -static struct scsi_host_template virtscsi_host_template_multi = { +static struct scsi_host_template virtscsi_host_template = { .module = THIS_MODULE, .name = "Virtio SCSI HBA", .proc_name = "virtio_scsi", .this_id = -1, .cmd_size = sizeof(struct virtio_scsi_cmd), - .queuecommand = virtscsi_queuecommand_multi, + .queuecommand = virtscsi_queuecommand, .change_queue_depth = virtscsi_change_queue_depth, .eh_abort_handler = virtscsi_abort, .eh_device_reset_handler = virtscsi_device_reset, @@ -883,7 +845,6 @@ static int virtscsi_probe(struct virtio_device *vdev) u32 sg_elems, num_targets; u32 cmd_per_lun; u32 num_queues; - struct scsi_host_template *hostt; if (!vdev->config->get) { dev_err(&vdev->dev, "%s failure: config access disabled\n", @@ -896,12 +857,7 @@ static int virtscsi_probe(struct virtio_device *vdev) num_targets = virtscsi_config_get(vdev, max_target) + 1; - if (num_queues == 1) - hostt = &virtscsi_host_template_single; - else - hostt = &virtscsi_host_template_multi; - - shost = scsi_host_alloc(hostt, + shost = scsi_host_alloc(&virtscsi_host_template, sizeof(*vscsi) + sizeof(vscsi->req_vqs[0]) * num_queues); if (!shost) return -ENOMEM; -- cgit v1.2.3 From 09dd15e0d9547ca424de4043bcd429bab6f285c8 Mon Sep 17 00:00:00 2001 From: Brian King Date: Wed, 14 Mar 2018 17:13:39 -0500 Subject: scsi: ibmvfc: Avoid unnecessary port relogin Following an RSCN, ibmvfc will issue an ADISC to determine if the underlying target has changed, comparing the SCSI ID, WWPN, and WWNN to determine how to handle the rport in discovery. However, the comparison of the WWPN and WWNN was performing a memcmp between a big endian field against a CPU endian field, which resulted in the wrong answer on LE systems. This was observed as unexpected errors getting logged at boot time as targets were getting relogins when not needed. Signed-off-by: Brian King Signed-off-by: Martin K. Petersen --- drivers/scsi/ibmvscsi/ibmvfc.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/scsi/ibmvscsi/ibmvfc.c b/drivers/scsi/ibmvscsi/ibmvfc.c index b1b1d3a3b173..daefe8172b04 100644 --- a/drivers/scsi/ibmvscsi/ibmvfc.c +++ b/drivers/scsi/ibmvscsi/ibmvfc.c @@ -3579,11 +3579,9 @@ static void ibmvfc_tgt_implicit_logout(struct ibmvfc_target *tgt) static int ibmvfc_adisc_needs_plogi(struct ibmvfc_passthru_mad *mad, struct ibmvfc_target *tgt) { - if (memcmp(&mad->fc_iu.response[2], &tgt->ids.port_name, - sizeof(tgt->ids.port_name))) + if (wwn_to_u64((u8 *)&mad->fc_iu.response[2]) != tgt->ids.port_name) return 1; - if (memcmp(&mad->fc_iu.response[4], &tgt->ids.node_name, - sizeof(tgt->ids.node_name))) + if (wwn_to_u64((u8 *)&mad->fc_iu.response[4]) != tgt->ids.node_name) return 1; if (be32_to_cpu(mad->fc_iu.response[6]) != tgt->scsi_id) return 1; -- cgit v1.2.3 From ef75c685869ea2059f85855a7dc00148a704c36c Mon Sep 17 00:00:00 2001 From: fred gao Date: Thu, 15 Mar 2018 13:21:10 +0800 Subject: drm/i915/gvt: Correct the privilege shadow batch buffer address Once the ring buffer is copied to ring_scan_buffer and scanned, the shadow batch buffer start address is only updated into ring_scan_buffer, not the real ring address allocated through intel_ring_begin in later copy_workload_to_ring_buffer. This patch is only to set the right shadow batch buffer address from Ring buffer, not include the shadow_wa_ctx. v2: - refine some comments. (Zhenyu) v3: - fix typo in title. (Zhenyu) v4: - remove the unnecessary comments. (Zhenyu) - add comments in bb_start_cmd_va update. (Zhenyu) Fixes: 0a53bc07f044 ("drm/i915/gvt: Separate cmd scan from request allocation") Cc: stable@vger.kernel.org # v4.15 Cc: Zhenyu Wang Cc: Yulei Zhang Signed-off-by: fred gao Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/cmd_parser.c | 8 ++++++++ drivers/gpu/drm/i915/gvt/scheduler.c | 11 +++++++++++ drivers/gpu/drm/i915/gvt/scheduler.h | 1 + 3 files changed, 20 insertions(+) diff --git a/drivers/gpu/drm/i915/gvt/cmd_parser.c b/drivers/gpu/drm/i915/gvt/cmd_parser.c index c8454ac43fae..db6b94dda5df 100644 --- a/drivers/gpu/drm/i915/gvt/cmd_parser.c +++ b/drivers/gpu/drm/i915/gvt/cmd_parser.c @@ -471,6 +471,7 @@ struct parser_exec_state { * used when ret from 2nd level batch buffer */ int saved_buf_addr_type; + bool is_ctx_wa; struct cmd_info *info; @@ -1715,6 +1716,11 @@ static int perform_bb_shadow(struct parser_exec_state *s) bb->accessing = true; bb->bb_start_cmd_va = s->ip_va; + if ((s->buf_type == BATCH_BUFFER_INSTRUCTION) && (!s->is_ctx_wa)) + bb->bb_offset = s->ip_va - s->rb_va; + else + bb->bb_offset = 0; + /* * ip_va saves the virtual address of the shadow batch buffer, while * ip_gma saves the graphics address of the original batch buffer. @@ -2571,6 +2577,7 @@ static int scan_workload(struct intel_vgpu_workload *workload) s.ring_tail = gma_tail; s.rb_va = workload->shadow_ring_buffer_va; s.workload = workload; + s.is_ctx_wa = false; if ((bypass_scan_mask & (1 << workload->ring_id)) || gma_head == gma_tail) @@ -2624,6 +2631,7 @@ static int scan_wa_ctx(struct intel_shadow_wa_ctx *wa_ctx) s.ring_tail = gma_tail; s.rb_va = wa_ctx->indirect_ctx.shadow_va; s.workload = workload; + s.is_ctx_wa = true; if (!intel_gvt_ggtt_validate_range(s.vgpu, s.ring_start, s.ring_size)) { ret = -EINVAL; diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c index 8caf72c1e794..fdf1c0bf0d55 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.c +++ b/drivers/gpu/drm/i915/gvt/scheduler.c @@ -426,6 +426,17 @@ static int prepare_shadow_batch_buffer(struct intel_vgpu_workload *workload) goto err; } + /* For privilge batch buffer and not wa_ctx, the bb_start_cmd_va + * is only updated into ring_scan_buffer, not real ring address + * allocated in later copy_workload_to_ring_buffer. pls be noted + * shadow_ring_buffer_va is now pointed to real ring buffer va + * in copy_workload_to_ring_buffer. + */ + + if (bb->bb_offset) + bb->bb_start_cmd_va = workload->shadow_ring_buffer_va + + bb->bb_offset; + /* relocate shadow batch buffer */ bb->bb_start_cmd_va[1] = i915_ggtt_offset(bb->vma); if (gmadr_bytes == 8) diff --git a/drivers/gpu/drm/i915/gvt/scheduler.h b/drivers/gpu/drm/i915/gvt/scheduler.h index 2603336b7c6d..a79a4f60637e 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.h +++ b/drivers/gpu/drm/i915/gvt/scheduler.h @@ -124,6 +124,7 @@ struct intel_vgpu_shadow_bb { u32 *bb_start_cmd_va; unsigned int clflush; bool accessing; + unsigned long bb_offset; }; #define workload_q_head(vgpu, ring_id) \ -- cgit v1.2.3 From 850555d1d31e45fc3e9a2982f81717387e8d5e1b Mon Sep 17 00:00:00 2001 From: Zhenyu Wang Date: Wed, 14 Feb 2018 11:35:01 +0800 Subject: drm/i915/gvt: fix user copy warning by whitelist workload rb_tail field This is to fix warning got as: [ 6730.476938] ------------[ cut here ]------------ [ 6730.476979] Bad or missing usercopy whitelist? Kernel memory exposure attempt detected from SLAB object 'gvt-g_vgpu_workload' (offset 120, size 4)! [ 6730.477021] WARNING: CPU: 2 PID: 441 at mm/usercopy.c:81 usercopy_warn+0x7e/0xa0 [ 6730.477042] Modules linked in: tun(E) bridge(E) stp(E) llc(E) kvmgt(E) x86_pkg_temp_thermal(E) vfio_mdev(E) intel_powerclamp(E) mdev(E) coretemp(E) vfio_iommu_type1(E) vfio(E) kvm_intel(E) kvm(E) hid_generic(E) irqbypass(E) crct10dif_pclmul(E) crc32_pclmul(E) usbhid(E) i915(E) crc32c_intel(E) hid(E) ghash_clmulni_intel(E) pcbc(E) aesni_intel(E) aes_x86_64(E) crypto_simd(E) cryptd(E) glue_helper(E) intel_cstate(E) idma64(E) evdev(E) virt_dma(E) iTCO_wdt(E) intel_uncore(E) intel_rapl_perf(E) intel_lpss_pci(E) sg(E) shpchp(E) mei_me(E) pcspkr(E) iTCO_vendor_support(E) intel_lpss(E) intel_pch_thermal(E) prime_numbers(E) mei(E) mfd_core(E) video(E) acpi_pad(E) button(E) binfmt_misc(E) ip_tables(E) x_tables(E) autofs4(E) ext4(E) crc16(E) mbcache(E) jbd2(E) fscrypto(E) sd_mod(E) e1000e(E) xhci_pci(E) sdhci_pci(E) [ 6730.477244] ptp(E) cqhci(E) xhci_hcd(E) pps_core(E) sdhci(E) mmc_core(E) i2c_i801(E) usbcore(E) thermal(E) fan(E) [ 6730.477276] CPU: 2 PID: 441 Comm: gvt workload 0 Tainted: G E 4.16.0-rc1-gvt-staging-0213+ #127 [ 6730.477303] Hardware name: /NUC6i5SYB, BIOS SYSKLi35.86A.0039.2016.0316.1747 03/16/2016 [ 6730.477326] RIP: 0010:usercopy_warn+0x7e/0xa0 [ 6730.477340] RSP: 0018:ffffba6301223d18 EFLAGS: 00010286 [ 6730.477355] RAX: 0000000000000000 RBX: ffff8f41caae9838 RCX: 0000000000000006 [ 6730.477375] RDX: 0000000000000007 RSI: 0000000000000082 RDI: ffff8f41dad166f0 [ 6730.477395] RBP: 0000000000000004 R08: 0000000000000576 R09: 0000000000000000 [ 6730.477415] R10: ffffffffb1293fb2 R11: 00000000ffffffff R12: 0000000000000001 [ 6730.477447] R13: ffff8f41caae983c R14: ffff8f41caae9838 R15: 00007f183ca2b000 [ 6730.477467] FS: 0000000000000000(0000) GS:ffff8f41dad00000(0000) knlGS:0000000000000000 [ 6730.477489] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 6730.477506] CR2: 0000559462817291 CR3: 000000028b46c006 CR4: 00000000003626e0 [ 6730.477526] Call Trace: [ 6730.477537] __check_object_size+0x9c/0x1a0 [ 6730.477562] __kvm_write_guest_page+0x45/0x90 [kvm] [ 6730.477585] kvm_write_guest+0x46/0x80 [kvm] [ 6730.477599] kvmgt_rw_gpa+0x9b/0xf0 [kvmgt] [ 6730.477642] workload_thread+0xa38/0x1040 [i915] [ 6730.477659] ? do_wait_intr_irq+0xc0/0xc0 [ 6730.477673] ? finish_wait+0x80/0x80 [ 6730.477707] ? clean_workloads+0x120/0x120 [i915] [ 6730.477722] kthread+0x111/0x130 [ 6730.477733] ? _kthread_create_worker_on_cpu+0x60/0x60 [ 6730.477750] ? exit_to_usermode_loop+0x6f/0xb0 [ 6730.477766] ret_from_fork+0x35/0x40 [ 6730.477777] Code: 48 c7 c0 20 e3 25 b1 48 0f 44 c2 41 50 51 41 51 48 89 f9 49 89 f1 4d 89 d8 4c 89 d2 48 89 c6 48 c7 c7 78 e3 25 b1 e8 b2 bc e4 ff <0f> ff 48 83 c4 18 c3 48 c7 c6 09 d0 26 b1 49 89 f1 49 89 f3 eb [ 6730.477849] ---[ end trace cae869c1c323e45a ]--- By whitelist guest page write from workload struct allocated from kmem cache. Reviewed-by: Hang Yuan Signed-off-by: Zhenyu Wang (cherry picked from commit 5627705406874df57fdfad3b4e0c9aedd3b007df) --- drivers/gpu/drm/i915/gvt/scheduler.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c index fdf1c0bf0d55..d74d6f05c62c 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.c +++ b/drivers/gpu/drm/i915/gvt/scheduler.c @@ -1105,10 +1105,12 @@ int intel_vgpu_setup_submission(struct intel_vgpu *vgpu) bitmap_zero(s->shadow_ctx_desc_updated, I915_NUM_ENGINES); - s->workloads = kmem_cache_create("gvt-g_vgpu_workload", - sizeof(struct intel_vgpu_workload), 0, - SLAB_HWCACHE_ALIGN, - NULL); + s->workloads = kmem_cache_create_usercopy("gvt-g_vgpu_workload", + sizeof(struct intel_vgpu_workload), 0, + SLAB_HWCACHE_ALIGN, + offsetof(struct intel_vgpu_workload, rb_tail), + sizeof_field(struct intel_vgpu_workload, rb_tail), + NULL); if (!s->workloads) { ret = -ENOMEM; -- cgit v1.2.3 From e74ef2194b41ba5e511fab29fe5ff00e72d2f42a Mon Sep 17 00:00:00 2001 From: Bastian Stender Date: Thu, 8 Mar 2018 15:08:11 +0100 Subject: mmc: block: fix updating ext_csd caches on ioctl call PARTITION_CONFIG is cached in mmc_card->ext_csd.part_config and the currently active partition in mmc_blk_data->part_curr. These caches do not always reflect changes if the ioctl call modifies the PARTITION_CONFIG registers, e.g. by changing BOOT_PARTITION_ENABLE. Write the PARTITION_CONFIG value extracted from the ioctl call to the cache and update the currently active partition accordingly. This ensures that the user space cannot change the values behind the kernel's back. The next call to mmc_blk_part_switch() will operate on the data set by the ioctl and reflect the changes appropriately. Signed-off-by: Bastian Stender Signed-off-by: Jan Luebbe Cc: stable@vger.kernel.org Signed-off-by: Ulf Hansson --- drivers/mmc/core/block.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/drivers/mmc/core/block.c b/drivers/mmc/core/block.c index 20135a5de748..2cfb963d9f37 100644 --- a/drivers/mmc/core/block.c +++ b/drivers/mmc/core/block.c @@ -72,6 +72,7 @@ MODULE_ALIAS("mmc:block"); #define MMC_BLK_TIMEOUT_MS (10 * 1000) #define MMC_SANITIZE_REQ_TIMEOUT 240000 #define MMC_EXTRACT_INDEX_FROM_ARG(x) ((x & 0x00FF0000) >> 16) +#define MMC_EXTRACT_VALUE_FROM_ARG(x) ((x & 0x0000FF00) >> 8) #define mmc_req_rel_wr(req) ((req->cmd_flags & REQ_FUA) && \ (rq_data_dir(req) == WRITE)) @@ -586,6 +587,24 @@ static int __mmc_blk_ioctl_cmd(struct mmc_card *card, struct mmc_blk_data *md, return data.error; } + /* + * Make sure the cache of the PARTITION_CONFIG register and + * PARTITION_ACCESS bits is updated in case the ioctl ext_csd write + * changed it successfully. + */ + if ((MMC_EXTRACT_INDEX_FROM_ARG(cmd.arg) == EXT_CSD_PART_CONFIG) && + (cmd.opcode == MMC_SWITCH)) { + struct mmc_blk_data *main_md = dev_get_drvdata(&card->dev); + u8 value = MMC_EXTRACT_VALUE_FROM_ARG(cmd.arg); + + /* + * Update cache so the next mmc_blk_part_switch call operates + * on up-to-date data. + */ + card->ext_csd.part_config = value; + main_md->part_curr = value & EXT_CSD_PART_CONFIG_ACC_MASK; + } + /* * According to the SD specs, some commands require a delay after * issuing the command. -- cgit v1.2.3 From e22842dd64bf86753d3f2b6ea474d73fc1e6ca24 Mon Sep 17 00:00:00 2001 From: Jaehoon Chung Date: Fri, 9 Mar 2018 15:10:21 +0900 Subject: mmc: dw_mmc: exynos: fix the suspend/resume issue for exynos5433 Before enabling the clock, dwmmc exynos driver is trying to access the register. Then the kernel panic can be occurred. Signed-off-by: Jaehoon Chung Reviewed-by: Chanwoo Choi Tested-by: Chanwoo Choi Cc: stable@vger.kernel.org Signed-off-by: Ulf Hansson --- drivers/mmc/host/dw_mmc-exynos.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/mmc/host/dw_mmc-exynos.c b/drivers/mmc/host/dw_mmc-exynos.c index fa41d9422d57..a84aa3f1ae85 100644 --- a/drivers/mmc/host/dw_mmc-exynos.c +++ b/drivers/mmc/host/dw_mmc-exynos.c @@ -165,9 +165,15 @@ static void dw_mci_exynos_set_clksel_timing(struct dw_mci *host, u32 timing) static int dw_mci_exynos_runtime_resume(struct device *dev) { struct dw_mci *host = dev_get_drvdata(dev); + int ret; + + ret = dw_mci_runtime_resume(dev); + if (ret) + return ret; dw_mci_exynos_config_smu(host); - return dw_mci_runtime_resume(dev); + + return ret; } /** -- cgit v1.2.3 From dbe7dc6b9b28f5b012b0bedc372aa0c52521f3e4 Mon Sep 17 00:00:00 2001 From: Dirk Behme Date: Wed, 14 Mar 2018 14:50:09 +0000 Subject: mmc: core: Disable HPI for certain Micron (Numonyx) eMMC cards Certain Micron eMMC v4.5 cards might get broken when HPI feature is used and hence this patch disables the HPI feature for such buggy cards. In U-Boot, these cards are reported as Manufacturer: Micron (ID: 0xFE) OEM: 0x4E Name: MMC32G Revision: 19 (0x13) Serial: 959241022 Manufact. date: 8/2015 (0x82) CRC: 0x00 Tran Speed: 52000000 Rd Block Len: 512 MMC version 4.5 High Capacity: Yes Capacity: 29.1 GiB Boot Partition Size: 16 MiB Bus Width: 8-bit According to JEDEC JEP106 manufacturer 0xFE is Numonyx, which was bought by Micron. Signed-off-by: Dirk Behme Signed-off-by: Mark Craske Cc: # 4.8+ Signed-off-by: Ulf Hansson --- drivers/mmc/core/card.h | 1 + drivers/mmc/core/quirks.h | 6 ++++++ 2 files changed, 7 insertions(+) diff --git a/drivers/mmc/core/card.h b/drivers/mmc/core/card.h index 79a5b985ccf5..9c821eedd156 100644 --- a/drivers/mmc/core/card.h +++ b/drivers/mmc/core/card.h @@ -82,6 +82,7 @@ struct mmc_fixup { #define CID_MANFID_APACER 0x27 #define CID_MANFID_KINGSTON 0x70 #define CID_MANFID_HYNIX 0x90 +#define CID_MANFID_NUMONYX 0xFE #define END_FIXUP { NULL } diff --git a/drivers/mmc/core/quirks.h b/drivers/mmc/core/quirks.h index 75d317623852..5153577754f0 100644 --- a/drivers/mmc/core/quirks.h +++ b/drivers/mmc/core/quirks.h @@ -109,6 +109,12 @@ static const struct mmc_fixup mmc_ext_csd_fixups[] = { */ MMC_FIXUP_EXT_CSD_REV(CID_NAME_ANY, CID_MANFID_HYNIX, 0x014a, add_quirk, MMC_QUIRK_BROKEN_HPI, 5), + /* + * Certain Micron (Numonyx) eMMC 4.5 cards might get broken when HPI + * feature is used so disable the HPI feature for such buggy cards. + */ + MMC_FIXUP_EXT_CSD_REV(CID_NAME_ANY, CID_MANFID_NUMONYX, + 0x014e, add_quirk, MMC_QUIRK_BROKEN_HPI, 6), END_FIXUP }; -- cgit v1.2.3 From c658dc58c7eaa8569ceb0edd1ddbdfda84fe8aa5 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Thu, 15 Mar 2018 11:22:28 +0200 Subject: mmc: core: Fix tracepoint print of blk_addr and blksz Swap the positions of blk_addr and blksz in the tracepoint print arguments so that they match the print format. Signed-off-by: Adrian Hunter Fixes: d2f82254e4e8 ("mmc: core: Add members to mmc_request and mmc_data for CQE's") Cc: # 4.14+ Signed-off-by: Ulf Hansson --- include/trace/events/mmc.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/trace/events/mmc.h b/include/trace/events/mmc.h index 200f731be557..7b706ff21335 100644 --- a/include/trace/events/mmc.h +++ b/include/trace/events/mmc.h @@ -86,8 +86,8 @@ TRACE_EVENT(mmc_request_start, __entry->stop_flags, __entry->stop_retries, __entry->sbc_opcode, __entry->sbc_arg, __entry->sbc_flags, __entry->sbc_retries, - __entry->blocks, __entry->blk_addr, - __entry->blksz, __entry->data_flags, __entry->tag, + __entry->blocks, __entry->blksz, + __entry->blk_addr, __entry->data_flags, __entry->tag, __entry->can_retune, __entry->doing_retune, __entry->retune_now, __entry->need_retune, __entry->hold_retune, __entry->retune_period) -- cgit v1.2.3 From 8a927d648c2ee20f2fd746d733c5cd76d0fbb0c1 Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Thu, 15 Mar 2018 11:09:35 +0100 Subject: drm/tegra: plane: Fix RGB565 format on older Tegra The opaque/alpha format conversion code is currently only looking at XRGB formats because they have an equivalent ARGB format. The opaque format for RGB565 is RGB565 itself, much like the YUV formats map to themselves. Reported-by: Dmitry Osipenko Fixes: ebae8d07435a ("drm/tegra: dc: Implement legacy blending") Signed-off-by: Thierry Reding --- drivers/gpu/drm/tegra/plane.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/tegra/plane.c b/drivers/gpu/drm/tegra/plane.c index 36a06a993698..eddaa2f05ed6 100644 --- a/drivers/gpu/drm/tegra/plane.c +++ b/drivers/gpu/drm/tegra/plane.c @@ -297,6 +297,10 @@ int tegra_plane_format_get_alpha(unsigned int opaque, unsigned int *alpha) case WIN_COLOR_DEPTH_B8G8R8X8: *alpha = WIN_COLOR_DEPTH_B8G8R8A8; return 0; + + case WIN_COLOR_DEPTH_B5G6R5: + *alpha = opaque; + return 0; } return -EINVAL; -- cgit v1.2.3 From 3d502067599f0db12e74e6646aee8728efe3e5be Mon Sep 17 00:00:00 2001 From: Ursula Braun Date: Tue, 13 Mar 2018 10:41:54 +0100 Subject: net/smc: simplify wait when closing listen socket Closing of a listen socket wakes up kernel_accept() of smc_tcp_listen_worker(), and then has to wait till smc_tcp_listen_worker() gives up the internal clcsock. The wait logic introduced with commit 127f49705823 ("net/smc: release clcsock from tcp_listen_worker") might wait longer than necessary. This patch implements the idea to implement the wait just with flush_work(), and gets rid of the extra smc_close_wait_listen_clcsock() function. Fixes: 127f49705823 ("net/smc: release clcsock from tcp_listen_worker") Reported-by: Hans Wippel Signed-off-by: Ursula Braun Signed-off-by: David S. Miller --- net/smc/af_smc.c | 4 ---- net/smc/smc_close.c | 25 +++---------------------- 2 files changed, 3 insertions(+), 26 deletions(-) diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index 8cc97834d4f6..1e0d780855c3 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -978,10 +978,6 @@ out: lsmc->clcsock = NULL; } release_sock(lsk); - /* no more listening, wake up smc_close_wait_listen_clcsock and - * accept - */ - lsk->sk_state_change(lsk); sock_put(&lsmc->sk); /* sock_hold in smc_listen */ } diff --git a/net/smc/smc_close.c b/net/smc/smc_close.c index e339c0186dcf..fa41d9881741 100644 --- a/net/smc/smc_close.c +++ b/net/smc/smc_close.c @@ -30,27 +30,6 @@ static void smc_close_cleanup_listen(struct sock *parent) smc_close_non_accepted(sk); } -static void smc_close_wait_listen_clcsock(struct smc_sock *smc) -{ - DEFINE_WAIT_FUNC(wait, woken_wake_function); - struct sock *sk = &smc->sk; - signed long timeout; - - timeout = SMC_CLOSE_WAIT_LISTEN_CLCSOCK_TIME; - add_wait_queue(sk_sleep(sk), &wait); - do { - release_sock(sk); - if (smc->clcsock) - timeout = wait_woken(&wait, TASK_UNINTERRUPTIBLE, - timeout); - sched_annotate_sleep(); - lock_sock(sk); - if (!smc->clcsock) - break; - } while (timeout); - remove_wait_queue(sk_sleep(sk), &wait); -} - /* wait for sndbuf data being transmitted */ static void smc_close_stream_wait(struct smc_sock *smc, long timeout) { @@ -204,9 +183,11 @@ again: rc = kernel_sock_shutdown(smc->clcsock, SHUT_RDWR); /* wake up kernel_accept of smc_tcp_listen_worker */ smc->clcsock->sk->sk_data_ready(smc->clcsock->sk); - smc_close_wait_listen_clcsock(smc); } smc_close_cleanup_listen(sk); + release_sock(sk); + flush_work(&smc->tcp_listen_work); + lock_sock(sk); break; case SMC_ACTIVE: smc_close_stream_wait(smc, timeout); -- cgit v1.2.3 From 48519232bea9230d1c5dbbb680d9257d4861bb4c Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Thu, 15 Mar 2018 04:00:24 +0300 Subject: drm/tegra: plane: Correct legacy blending Keep old 'dependent' state of unaffected planes, this way new state takes into account current state of unaffected planes. Fixes: ebae8d07435a ("drm/tegra: dc: Implement legacy blending") Signed-off-by: Dmitry Osipenko Signed-off-by: Thierry Reding --- drivers/gpu/drm/tegra/plane.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/tegra/plane.c b/drivers/gpu/drm/tegra/plane.c index eddaa2f05ed6..94dac79ac3c9 100644 --- a/drivers/gpu/drm/tegra/plane.c +++ b/drivers/gpu/drm/tegra/plane.c @@ -334,9 +334,6 @@ void tegra_plane_check_dependent(struct tegra_plane *tegra, unsigned int zpos[2]; unsigned int i; - for (i = 0; i < 3; i++) - state->dependent[i] = false; - for (i = 0; i < 2; i++) zpos[i] = 0; @@ -350,6 +347,8 @@ void tegra_plane_check_dependent(struct tegra_plane *tegra, index = tegra_plane_get_overlap_index(tegra, p); + state->dependent[index] = false; + /* * If any of the other planes is on top of this plane and uses * a format with an alpha component, mark this plane as being -- cgit v1.2.3 From 2681bc79eeb640562c932007bfebbbdc55bf6a7d Mon Sep 17 00:00:00 2001 From: Michel Dänzer Date: Wed, 14 Mar 2018 18:14:04 +0100 Subject: drm/radeon: Don't turn off DP sink when disconnected MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Turning off the sink in this case causes various issues, because userspace expects it to stay on until it turns it off explicitly. Instead, turn the sink off and back on when a display is connected again. This dance seems necessary for link training to work correctly. Bugzilla: https://bugs.freedesktop.org/105308 Cc: stable@vger.kernel.org Reviewed-by: Alex Deucher Signed-off-by: Michel Dänzer Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/radeon_connectors.c | 31 ++++++++++++------------------ 1 file changed, 12 insertions(+), 19 deletions(-) diff --git a/drivers/gpu/drm/radeon/radeon_connectors.c b/drivers/gpu/drm/radeon/radeon_connectors.c index 2e2ca3c6b47d..df9469a8fdb1 100644 --- a/drivers/gpu/drm/radeon/radeon_connectors.c +++ b/drivers/gpu/drm/radeon/radeon_connectors.c @@ -90,25 +90,18 @@ void radeon_connector_hotplug(struct drm_connector *connector) /* don't do anything if sink is not display port, i.e., * passive dp->(dvi|hdmi) adaptor */ - if (dig_connector->dp_sink_type == CONNECTOR_OBJECT_ID_DISPLAYPORT) { - int saved_dpms = connector->dpms; - /* Only turn off the display if it's physically disconnected */ - if (!radeon_hpd_sense(rdev, radeon_connector->hpd.hpd)) { - drm_helper_connector_dpms(connector, DRM_MODE_DPMS_OFF); - } else if (radeon_dp_needs_link_train(radeon_connector)) { - /* Don't try to start link training before we - * have the dpcd */ - if (!radeon_dp_getdpcd(radeon_connector)) - return; - - /* set it to OFF so that drm_helper_connector_dpms() - * won't return immediately since the current state - * is ON at this point. - */ - connector->dpms = DRM_MODE_DPMS_OFF; - drm_helper_connector_dpms(connector, DRM_MODE_DPMS_ON); - } - connector->dpms = saved_dpms; + if (dig_connector->dp_sink_type == CONNECTOR_OBJECT_ID_DISPLAYPORT && + radeon_hpd_sense(rdev, radeon_connector->hpd.hpd) && + radeon_dp_needs_link_train(radeon_connector)) { + /* Don't start link training before we have the DPCD */ + if (!radeon_dp_getdpcd(radeon_connector)) + return; + + /* Turn the connector off and back on immediately, which + * will trigger link training + */ + drm_helper_connector_dpms(connector, DRM_MODE_DPMS_OFF); + drm_helper_connector_dpms(connector, DRM_MODE_DPMS_ON); } } } -- cgit v1.2.3 From d1bb88e078c2aec6a8456d7eb28ca572c06e12f3 Mon Sep 17 00:00:00 2001 From: Mikita Lipski Date: Wed, 14 Mar 2018 13:41:29 -0400 Subject: drm/amdgpu: Use atomic function to disable crtcs with dc enabled This change fixes the deadlock when unloading the driver with displays connected. Signed-off-by: Mikita Lipski Reviewed-by: Andrey Grodzovsky Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index af1b879a9ee9..66cb10cdc7c3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -2063,9 +2063,12 @@ void amdgpu_device_fini(struct amdgpu_device *adev) DRM_INFO("amdgpu: finishing device.\n"); adev->shutdown = true; - if (adev->mode_info.mode_config_initialized) - drm_crtc_force_disable_all(adev->ddev); - + if (adev->mode_info.mode_config_initialized){ + if (!amdgpu_device_has_dc_support(adev)) + drm_crtc_force_disable_all(adev->ddev); + else + drm_atomic_helper_shutdown(adev->ddev); + } amdgpu_ib_pool_fini(adev); amdgpu_fence_driver_fini(adev); amdgpu_fbdev_fini(adev); -- cgit v1.2.3 From 746d024c3211813946b319411aeb2b47767f8fb0 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 15 Mar 2018 17:19:57 +0100 Subject: gpu: ipu-v3: prg: avoid possible array underflow gcc-8 reports that we access an array with a negative index in an error case: drivers/gpu/ipu-v3/ipu-prg.c: In function 'ipu_prg_channel_disable': drivers/gpu/ipu-v3/ipu-prg.c:252:43: error: array subscript -22 is below array bounds of 'struct ipu_prg_channel[3]' [-Werror=array-bounds] This moves the range check in front of the first time that variable gets used. Signed-off-by: Arnd Bergmann Signed-off-by: Philipp Zabel --- drivers/gpu/ipu-v3/ipu-prg.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/ipu-v3/ipu-prg.c b/drivers/gpu/ipu-v3/ipu-prg.c index 97b99500153d..83f9dd934a5d 100644 --- a/drivers/gpu/ipu-v3/ipu-prg.c +++ b/drivers/gpu/ipu-v3/ipu-prg.c @@ -250,10 +250,14 @@ void ipu_prg_channel_disable(struct ipuv3_channel *ipu_chan) { int prg_chan = ipu_prg_ipu_to_prg_chan(ipu_chan->num); struct ipu_prg *prg = ipu_chan->ipu->prg_priv; - struct ipu_prg_channel *chan = &prg->chan[prg_chan]; + struct ipu_prg_channel *chan; u32 val; - if (!chan->enabled || prg_chan < 0) + if (prg_chan < 0) + return; + + chan = &prg->chan[prg_chan]; + if (!chan->enabled) return; pm_runtime_get_sync(prg->dev); @@ -280,13 +284,15 @@ int ipu_prg_channel_configure(struct ipuv3_channel *ipu_chan, { int prg_chan = ipu_prg_ipu_to_prg_chan(ipu_chan->num); struct ipu_prg *prg = ipu_chan->ipu->prg_priv; - struct ipu_prg_channel *chan = &prg->chan[prg_chan]; + struct ipu_prg_channel *chan; u32 val; int ret; if (prg_chan < 0) return prg_chan; + chan = &prg->chan[prg_chan]; + if (chan->enabled) { ipu_pre_update(prg->pres[chan->used_pre], *eba); return 0; -- cgit v1.2.3 From 2ead44a5984b30fad7e6da507cdddb97f0401075 Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Tue, 13 Feb 2018 17:11:34 -0200 Subject: drm/imx: ipuv3-plane: Make functions static when possible ipu_plane_state_reset(), ipu_plane_duplicate_state() and ipu_plane_destroy_state() are only used in this file, so make them static. This fixes the following sparse warnings: drivers/gpu/drm/imx/ipuv3-plane.c:275:6: warning: symbol 'ipu_plane_state_reset' was not declared. Should it be static? drivers/gpu/drm/imx/ipuv3-plane.c:295:24: warning: symbol 'ipu_plane_duplicate_state' was not declared. Should it be static? drivers/gpu/drm/imx/ipuv3-plane.c:309:6: warning: symbol 'ipu_plane_destroy_state' was not declared. Should it be static? Signed-off-by: Fabio Estevam Signed-off-by: Philipp Zabel --- drivers/gpu/drm/imx/ipuv3-plane.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/imx/ipuv3-plane.c b/drivers/gpu/drm/imx/ipuv3-plane.c index 57ed56d8623f..d9f0a7684ec8 100644 --- a/drivers/gpu/drm/imx/ipuv3-plane.c +++ b/drivers/gpu/drm/imx/ipuv3-plane.c @@ -272,7 +272,7 @@ static void ipu_plane_destroy(struct drm_plane *plane) kfree(ipu_plane); } -void ipu_plane_state_reset(struct drm_plane *plane) +static void ipu_plane_state_reset(struct drm_plane *plane) { struct ipu_plane_state *ipu_state; @@ -292,7 +292,8 @@ void ipu_plane_state_reset(struct drm_plane *plane) plane->state = &ipu_state->base; } -struct drm_plane_state *ipu_plane_duplicate_state(struct drm_plane *plane) +static struct drm_plane_state * +ipu_plane_duplicate_state(struct drm_plane *plane) { struct ipu_plane_state *state; @@ -306,8 +307,8 @@ struct drm_plane_state *ipu_plane_duplicate_state(struct drm_plane *plane) return &state->base; } -void ipu_plane_destroy_state(struct drm_plane *plane, - struct drm_plane_state *state) +static void ipu_plane_destroy_state(struct drm_plane *plane, + struct drm_plane_state *state) { struct ipu_plane_state *ipu_state = to_ipu_plane_state(state); -- cgit v1.2.3 From a71d3241db6b9430e613cff168611bad97297ba2 Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Tue, 13 Feb 2018 17:11:35 -0200 Subject: drm/imx: ipuv3-plane: Include "imx-drm.h" header file ipu_planes_assign_pre() prototype is in "imx-drm.h" header file, so include it to fix the following sparse warning: drivers/gpu/drm/imx/ipuv3-plane.c:729:5: warning: symbol 'ipu_planes_assign_pre' was not declared. Should it be static? Signed-off-by: Fabio Estevam Signed-off-by: Philipp Zabel --- drivers/gpu/drm/imx/ipuv3-plane.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/imx/ipuv3-plane.c b/drivers/gpu/drm/imx/ipuv3-plane.c index d9f0a7684ec8..d9113faaa62f 100644 --- a/drivers/gpu/drm/imx/ipuv3-plane.c +++ b/drivers/gpu/drm/imx/ipuv3-plane.c @@ -22,6 +22,7 @@ #include #include "video/imx-ipu-v3.h" +#include "imx-drm.h" #include "ipuv3-plane.h" struct ipu_plane_state { -- cgit v1.2.3 From 6a055b92de15af987b4027826d43aa103c65a3c4 Mon Sep 17 00:00:00 2001 From: Lucas Stach Date: Thu, 15 Mar 2018 10:11:59 +0100 Subject: drm/imx: move arming of the vblank event to atomic_flush Right now the vblank event completion is racing with the atomic update, which is especially bad when the PRE is in use, as one of the hardware issue workaround might extend the atomic commit for quite some time. If the vblank IRQ happens to trigger during that time, we will prematurely signal the atomic commit completion to userspace, which causes tearing when userspace re-uses a framebuffer we haven't managed to flip away from yet. Signed-off-by: Lucas Stach Signed-off-by: Philipp Zabel --- drivers/gpu/drm/imx/ipuv3-crtc.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/gpu/drm/imx/ipuv3-crtc.c b/drivers/gpu/drm/imx/ipuv3-crtc.c index 9a9961802f5c..e83af0f2be86 100644 --- a/drivers/gpu/drm/imx/ipuv3-crtc.c +++ b/drivers/gpu/drm/imx/ipuv3-crtc.c @@ -225,7 +225,11 @@ static void ipu_crtc_atomic_begin(struct drm_crtc *crtc, struct drm_crtc_state *old_crtc_state) { drm_crtc_vblank_on(crtc); +} +static void ipu_crtc_atomic_flush(struct drm_crtc *crtc, + struct drm_crtc_state *old_crtc_state) +{ spin_lock_irq(&crtc->dev->event_lock); if (crtc->state->event) { WARN_ON(drm_crtc_vblank_get(crtc)); @@ -293,6 +297,7 @@ static const struct drm_crtc_helper_funcs ipu_helper_funcs = { .mode_set_nofb = ipu_crtc_mode_set_nofb, .atomic_check = ipu_crtc_atomic_check, .atomic_begin = ipu_crtc_atomic_begin, + .atomic_flush = ipu_crtc_atomic_flush, .atomic_disable = ipu_crtc_atomic_disable, .atomic_enable = ipu_crtc_atomic_enable, }; -- cgit v1.2.3 From d90c76bb61128ed9022b9418c31c4749764b6cd9 Mon Sep 17 00:00:00 2001 From: Eddie James Date: Thu, 8 Mar 2018 14:57:19 -0600 Subject: clk: aspeed: Fix is_enabled for certain clocks Some of the Aspeed clocks are disabled by setting the relevant bit in the "clock stop control" register to one, while others are disabled by setting their bit to zero. The driver already uses a flag per gate to identify this behavior, but doesn't apply it in the clock is_enabled function. Use the existing gate flag to correctly return whether or not a clock is enabled in the aspeed_clk_is_enabled function. Signed-off-by: Eddie James Fixes: 6671507f0fbd ("clk: aspeed: Handle inverse polarity of USB port 1 clock gate") Reviewed-by: Joel Stanley Signed-off-by: Stephen Boyd --- drivers/clk/clk-aspeed.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/clk/clk-aspeed.c b/drivers/clk/clk-aspeed.c index 9f7f931d6b2f..168777175cd1 100644 --- a/drivers/clk/clk-aspeed.c +++ b/drivers/clk/clk-aspeed.c @@ -259,11 +259,12 @@ static int aspeed_clk_is_enabled(struct clk_hw *hw) { struct aspeed_clk_gate *gate = to_aspeed_clk_gate(hw); u32 clk = BIT(gate->clock_idx); + u32 enval = (gate->flags & CLK_GATE_SET_TO_DISABLE) ? 0 : clk; u32 reg; regmap_read(gate->map, ASPEED_CLK_STOP_CTRL, ®); - return (reg & clk) ? 0 : 1; + return ((reg & clk) == enval) ? 1 : 0; } static const struct clk_ops aspeed_clk_gate_ops = { -- cgit v1.2.3 From 8a53fc511c5ec81347b981b438f68c3dde421608 Mon Sep 17 00:00:00 2001 From: Eddie James Date: Thu, 8 Mar 2018 14:57:20 -0600 Subject: clk: aspeed: Prevent reset if clock is enabled According to the Aspeed specification, the reset and enable sequence should be done when the clock is stopped. The specification doesn't define behavior if the reset is done while the clock is enabled. From testing on the AST2500, the LPC Controller has problems if the clock is reset while enabled. Therefore, check whether the clock is enabled or not before performing the reset and enable sequence in the Aspeed clock driver. Reported-by: Lei Yu Signed-off-by: Eddie James Fixes: 15ed8ce5f84e ("clk: aspeed: Register gated clocks") Reviewed-by: Joel Stanley Signed-off-by: Stephen Boyd --- drivers/clk/clk-aspeed.c | 29 +++++++++++++++++------------ 1 file changed, 17 insertions(+), 12 deletions(-) diff --git a/drivers/clk/clk-aspeed.c b/drivers/clk/clk-aspeed.c index 168777175cd1..5eb50c31e455 100644 --- a/drivers/clk/clk-aspeed.c +++ b/drivers/clk/clk-aspeed.c @@ -205,6 +205,18 @@ static const struct aspeed_clk_soc_data ast2400_data = { .calc_pll = aspeed_ast2400_calc_pll, }; +static int aspeed_clk_is_enabled(struct clk_hw *hw) +{ + struct aspeed_clk_gate *gate = to_aspeed_clk_gate(hw); + u32 clk = BIT(gate->clock_idx); + u32 enval = (gate->flags & CLK_GATE_SET_TO_DISABLE) ? 0 : clk; + u32 reg; + + regmap_read(gate->map, ASPEED_CLK_STOP_CTRL, ®); + + return ((reg & clk) == enval) ? 1 : 0; +} + static int aspeed_clk_enable(struct clk_hw *hw) { struct aspeed_clk_gate *gate = to_aspeed_clk_gate(hw); @@ -215,6 +227,11 @@ static int aspeed_clk_enable(struct clk_hw *hw) spin_lock_irqsave(gate->lock, flags); + if (aspeed_clk_is_enabled(hw)) { + spin_unlock_irqrestore(gate->lock, flags); + return 0; + } + if (gate->reset_idx >= 0) { /* Put IP in reset */ regmap_update_bits(gate->map, ASPEED_RESET_CTRL, rst, rst); @@ -255,18 +272,6 @@ static void aspeed_clk_disable(struct clk_hw *hw) spin_unlock_irqrestore(gate->lock, flags); } -static int aspeed_clk_is_enabled(struct clk_hw *hw) -{ - struct aspeed_clk_gate *gate = to_aspeed_clk_gate(hw); - u32 clk = BIT(gate->clock_idx); - u32 enval = (gate->flags & CLK_GATE_SET_TO_DISABLE) ? 0 : clk; - u32 reg; - - regmap_read(gate->map, ASPEED_CLK_STOP_CTRL, ®); - - return ((reg & clk) == enval) ? 1 : 0; -} - static const struct clk_ops aspeed_clk_gate_ops = { .enable = aspeed_clk_enable, .disable = aspeed_clk_disable, -- cgit v1.2.3 From f44cb4b19ed40b655c2d422c9021ab2c2625adb6 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Thu, 15 Mar 2018 17:02:34 +0100 Subject: Bluetooth: btusb: Fix quirk for Atheros 1525/QCA6174 The Atheros 1525/QCA6174 BT doesn't seem working properly on the recent kernels, as it tries to load a wrong firmware ar3k/AthrBT_0x00000200.dfu and it fails. This seems to have been a problem for some time, and the known workaround is to apply BTUSB_QCA_ROM quirk instead of BTUSB_ATH3012. The device in question is: T: Bus=01 Lev=01 Prnt=01 Port=09 Cnt=03 Dev#= 4 Spd=12 MxCh= 0 D: Ver= 1.10 Cls=e0(wlcon) Sub=01 Prot=01 MxPS=64 #Cfgs= 1 P: Vendor=0cf3 ProdID=3004 Rev= 0.01 C:* #Ifs= 2 Cfg#= 1 Atr=e0 MxPwr=100mA I:* If#= 0 Alt= 0 #EPs= 3 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=81(I) Atr=03(Int.) MxPS= 16 Ivl=1ms E: Ad=82(I) Atr=02(Bulk) MxPS= 64 Ivl=0ms E: Ad=02(O) Atr=02(Bulk) MxPS= 64 Ivl=0ms I:* If#= 1 Alt= 0 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=83(I) Atr=01(Isoc) MxPS= 0 Ivl=1ms E: Ad=03(O) Atr=01(Isoc) MxPS= 0 Ivl=1ms I: If#= 1 Alt= 1 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=83(I) Atr=01(Isoc) MxPS= 9 Ivl=1ms E: Ad=03(O) Atr=01(Isoc) MxPS= 9 Ivl=1ms I: If#= 1 Alt= 2 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=83(I) Atr=01(Isoc) MxPS= 17 Ivl=1ms E: Ad=03(O) Atr=01(Isoc) MxPS= 17 Ivl=1ms I: If#= 1 Alt= 3 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=83(I) Atr=01(Isoc) MxPS= 25 Ivl=1ms E: Ad=03(O) Atr=01(Isoc) MxPS= 25 Ivl=1ms I: If#= 1 Alt= 4 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=83(I) Atr=01(Isoc) MxPS= 33 Ivl=1ms E: Ad=03(O) Atr=01(Isoc) MxPS= 33 Ivl=1ms I: If#= 1 Alt= 5 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=83(I) Atr=01(Isoc) MxPS= 49 Ivl=1ms E: Ad=03(O) Atr=01(Isoc) MxPS= 49 Ivl=1ms Bugzilla: http://bugzilla.opensuse.org/show_bug.cgi?id=1082504 Reported-by: Ivan Levshin Tested-by: Ivan Levshin Cc: Signed-off-by: Takashi Iwai Signed-off-by: Marcel Holtmann --- drivers/bluetooth/btusb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index 382be00a8329..366a49c7c08f 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -231,7 +231,6 @@ static const struct usb_device_id blacklist_table[] = { { USB_DEVICE(0x0930, 0x0227), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x0b05, 0x17d0), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x0cf3, 0x0036), .driver_info = BTUSB_ATH3012 }, - { USB_DEVICE(0x0cf3, 0x3004), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x0cf3, 0x3008), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x0cf3, 0x311d), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x0cf3, 0x311e), .driver_info = BTUSB_ATH3012 }, @@ -264,6 +263,7 @@ static const struct usb_device_id blacklist_table[] = { { USB_DEVICE(0x0489, 0xe03c), .driver_info = BTUSB_ATH3012 }, /* QCA ROME chipset */ + { USB_DEVICE(0x0cf3, 0x3004), .driver_info = BTUSB_QCA_ROME }, { USB_DEVICE(0x0cf3, 0xe007), .driver_info = BTUSB_QCA_ROME }, { USB_DEVICE(0x0cf3, 0xe009), .driver_info = BTUSB_QCA_ROME }, { USB_DEVICE(0x0cf3, 0xe010), .driver_info = BTUSB_QCA_ROME }, -- cgit v1.2.3 From b09c61522c81886c34966825f9e5afcbfafac446 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Wed, 14 Mar 2018 23:06:02 +0100 Subject: Revert "Bluetooth: hci_bcm: Streamline runtime PM code" This reverts commit 43fff7683468 ("Bluetooth: hci_bcm: Streamline runtime PM code"). The commit msg for this commit states "No functional change intended.", but replacing: pm_runtime_get(); pm_runtime_mark_last_busy(); pm_runtime_put_autosuspend(); with: pm_request_resume(); Does result in a functional change, pm_request_resume() only calls pm_runtime_mark_last_busy() if the device was suspended before the call. This results in the following happening: 1) Device is runtime suspended 2) Device drives host_wake IRQ logically high as it starts receiving data 3) bcm_host_wake() gets called, causes the device to runtime-resume, current time gets marked as last_busy time 4) After 5 seconds the autosuspend timer expires and the dev autosuspends as no one has been calling pm_runtime_mark_last_busy(), the device was resumed during those 5 seconds, so all the pm_request_resume() calls while receiving data and/or bcm_host_wake() calls were nops 5) If 4) happens while the device has (just received) data in its buffer to be read by the host the IRQ line is *already* / still logically high when we autosuspend and since we use an edge triggered IRQ, the IRQ will never trigger, causing the device to get stuck in suspend Therefor this commit has to be reverted, so that we avoid the device getting stuck in suspend. Signed-off-by: Hans de Goede Acked-by: Lukas Wunner Signed-off-by: Marcel Holtmann --- drivers/bluetooth/hci_bcm.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/bluetooth/hci_bcm.c b/drivers/bluetooth/hci_bcm.c index 6314dfb02969..af5658121601 100644 --- a/drivers/bluetooth/hci_bcm.c +++ b/drivers/bluetooth/hci_bcm.c @@ -244,7 +244,9 @@ static irqreturn_t bcm_host_wake(int irq, void *data) bt_dev_dbg(bdev, "Host wake IRQ"); - pm_request_resume(bdev->dev); + pm_runtime_get(bdev->dev); + pm_runtime_mark_last_busy(bdev->dev); + pm_runtime_put_autosuspend(bdev->dev); return IRQ_HANDLED; } @@ -586,8 +588,11 @@ static int bcm_recv(struct hci_uart *hu, const void *data, int count) } else if (!bcm->rx_skb) { /* Delay auto-suspend when receiving completed packet */ mutex_lock(&bcm_device_lock); - if (bcm->dev && bcm_device_exists(bcm->dev)) - pm_request_resume(bcm->dev->dev); + if (bcm->dev && bcm_device_exists(bcm->dev)) { + pm_runtime_get(bcm->dev->dev); + pm_runtime_mark_last_busy(bcm->dev->dev); + pm_runtime_put_autosuspend(bcm->dev->dev); + } mutex_unlock(&bcm_device_lock); } -- cgit v1.2.3 From e07c99b07ae85255d5c5bc2480fbd4c4e77f71bc Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Wed, 14 Mar 2018 23:06:03 +0100 Subject: Bluetooth: hci_bcm: Set pulsed_host_wake flag in sleep parameters The IRQ output of the bcm bt-device is really a level IRQ signal, which signals a logical high as long as the device's buffer contains data. Since the draining in the buffer is done in the tty driver, we cannot (easily) wait in a threaded interrupt handler for the draining, after which the IRQ should go low again. So instead we treat the IRQ as an edge interrupt. This opens the window for a theoretical race where we wakeup, read some data and then autosuspend *before* the IRQ has gone (logical) low, followed by the device just at that moment receiving more data, causing the IRQ to stay high and we never see an edge. Since we call pm_runtime_mark_last_busy() on every received byte, there should be plenty time for the IRQ to go (logical) low before we ever suspend, so this should never happen, but after commit 43fff7683468 ("Bluetooth: hci_bcm: Streamline runtime PM code"), which has been reverted since, this was actually happening causing the device to get stuck in runtime suspend. The bcm bt-device actually has a workaround for this, if we set the pulsed_host_wake flag in the sleep parameters, then the device monitors if the host is draining the buffer and if not then after a timeout the device will pulse the IRQ line, causing us to see an edge, fixing the stuck in suspend condition. This commit sets the pulsed_host_wake flag to fix the (mostly theoretical) race caused by us treating the IRQ as an edge IRQ. Signed-off-by: Hans de Goede Reviewed-by: Lukas Wunner Signed-off-by: Marcel Holtmann --- drivers/bluetooth/hci_bcm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/bluetooth/hci_bcm.c b/drivers/bluetooth/hci_bcm.c index af5658121601..40b9fb247010 100644 --- a/drivers/bluetooth/hci_bcm.c +++ b/drivers/bluetooth/hci_bcm.c @@ -303,7 +303,7 @@ static const struct bcm_set_sleep_mode default_sleep_params = { .usb_auto_sleep = 0, .usb_resume_timeout = 0, .break_to_host = 0, - .pulsed_host_wake = 0, + .pulsed_host_wake = 1, }; static int bcm_setup_sleep(struct hci_uart *hu) -- cgit v1.2.3 From 51d4740f88affd85d49c04e3c9cd129c0e33bcb9 Mon Sep 17 00:00:00 2001 From: Roman Mashak Date: Mon, 12 Mar 2018 16:20:58 -0400 Subject: net sched actions: return explicit error when tunnel_key mode is not specified If set/unset mode of the tunnel_key action is not provided, ->init() still returns 0, and the caller proceeds with bogus 'struct tc_action *' object, this results in crash: % tc actions add action tunnel_key src_ip 1.1.1.1 dst_ip 2.2.2.1 id 7 index 1 [ 35.805515] general protection fault: 0000 [#1] SMP PTI [ 35.806161] Modules linked in: act_tunnel_key kvm_intel kvm irqbypass crct10dif_pclmul crc32_pclmul ghash_clmulni_intel pcbc aesni_intel aes_x86_64 crypto_simd glue_helper cryptd serio_raw [ 35.808233] CPU: 1 PID: 428 Comm: tc Not tainted 4.16.0-rc4+ #286 [ 35.808929] RIP: 0010:tcf_action_init+0x90/0x190 [ 35.809457] RSP: 0018:ffffb8edc068b9a0 EFLAGS: 00010206 [ 35.810053] RAX: 1320c000000a0003 RBX: 0000000000000001 RCX: 0000000000000000 [ 35.810866] RDX: 0000000000000070 RSI: 0000000000007965 RDI: ffffb8edc068b910 [ 35.811660] RBP: ffffb8edc068b9d0 R08: 0000000000000000 R09: ffffb8edc068b808 [ 35.812463] R10: ffffffffc02bf040 R11: 0000000000000040 R12: ffffb8edc068bb38 [ 35.813235] R13: 0000000000000000 R14: 0000000000000000 R15: ffffb8edc068b910 [ 35.814006] FS: 00007f3d0d8556c0(0000) GS:ffff91d1dbc40000(0000) knlGS:0000000000000000 [ 35.814881] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 35.815540] CR2: 000000000043f720 CR3: 0000000019248001 CR4: 00000000001606a0 [ 35.816457] Call Trace: [ 35.817158] tc_ctl_action+0x11a/0x220 [ 35.817795] rtnetlink_rcv_msg+0x23d/0x2e0 [ 35.818457] ? __slab_alloc+0x1c/0x30 [ 35.819079] ? __kmalloc_node_track_caller+0xb1/0x2b0 [ 35.819544] ? rtnl_calcit.isra.30+0xe0/0xe0 [ 35.820231] netlink_rcv_skb+0xce/0x100 [ 35.820744] netlink_unicast+0x164/0x220 [ 35.821500] netlink_sendmsg+0x293/0x370 [ 35.822040] sock_sendmsg+0x30/0x40 [ 35.822508] ___sys_sendmsg+0x2c5/0x2e0 [ 35.823149] ? pagecache_get_page+0x27/0x220 [ 35.823714] ? filemap_fault+0xa2/0x640 [ 35.824423] ? page_add_file_rmap+0x108/0x200 [ 35.825065] ? alloc_set_pte+0x2aa/0x530 [ 35.825585] ? finish_fault+0x4e/0x70 [ 35.826140] ? __handle_mm_fault+0xbc1/0x10d0 [ 35.826723] ? __sys_sendmsg+0x41/0x70 [ 35.827230] __sys_sendmsg+0x41/0x70 [ 35.827710] do_syscall_64+0x68/0x120 [ 35.828195] entry_SYSCALL_64_after_hwframe+0x3d/0xa2 [ 35.828859] RIP: 0033:0x7f3d0ca4da67 [ 35.829331] RSP: 002b:00007ffc9f284338 EFLAGS: 00000246 ORIG_RAX: 000000000000002e [ 35.830304] RAX: ffffffffffffffda RBX: 00007ffc9f284460 RCX: 00007f3d0ca4da67 [ 35.831247] RDX: 0000000000000000 RSI: 00007ffc9f2843b0 RDI: 0000000000000003 [ 35.832167] RBP: 000000005aa6a7a9 R08: 0000000000000001 R09: 0000000000000000 [ 35.833075] R10: 00000000000005f1 R11: 0000000000000246 R12: 0000000000000000 [ 35.833997] R13: 00007ffc9f2884c0 R14: 0000000000000001 R15: 0000000000674640 [ 35.834923] Code: 24 30 bb 01 00 00 00 45 31 f6 eb 5e 8b 50 08 83 c2 07 83 e2 fc 83 c2 70 49 8b 07 48 8b 40 70 48 85 c0 74 10 48 89 14 24 4c 89 ff d0 48 8b 14 24 48 01 c2 49 01 d6 45 85 ed 74 05 41 83 47 2c [ 35.837442] RIP: tcf_action_init+0x90/0x190 RSP: ffffb8edc068b9a0 [ 35.838291] ---[ end trace a095c06ee4b97a26 ]--- Fixes: d0f6dd8a914f ("net/sched: Introduce act_tunnel_key") Signed-off-by: Roman Mashak Acked-by: Cong Wang Signed-off-by: David S. Miller --- net/sched/act_tunnel_key.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/sched/act_tunnel_key.c b/net/sched/act_tunnel_key.c index 0e23aac09ad6..fea772e66e62 100644 --- a/net/sched/act_tunnel_key.c +++ b/net/sched/act_tunnel_key.c @@ -153,6 +153,7 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla, metadata->u.tun_info.mode |= IP_TUNNEL_INFO_TX; break; default: + ret = -EINVAL; goto err_out; } -- cgit v1.2.3 From cfb61b5e3e09f8b49bc4d685429df75f45127adc Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 15 Mar 2018 14:18:00 -0700 Subject: sparc64: Fix regression in pmdp_invalidate(). pmdp_invalidate() was changed to update the pmd atomically (to not lose dirty/access bits) and return the original pmd value. However, in doing so, we lost a lot of the essential work that set_pmd_at() does, namely to update hugepage mapping counts and queuing up the batched TLB flush entry. Thus we were not flushing entries out of the TLB when making such PMD changes. Fix this by abstracting the accounting work of set_pmd_at() out into a separate function, and call it from pmdp_establish(). Fixes: a8e654f01cb7 ("sparc64: update pmdp_invalidate() to return old pmd value") Signed-off-by: David S. Miller --- arch/sparc/mm/tlb.c | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/arch/sparc/mm/tlb.c b/arch/sparc/mm/tlb.c index 847ddffbf38a..b5cfab711651 100644 --- a/arch/sparc/mm/tlb.c +++ b/arch/sparc/mm/tlb.c @@ -163,13 +163,10 @@ static void tlb_batch_pmd_scan(struct mm_struct *mm, unsigned long vaddr, pte_unmap(pte); } -void set_pmd_at(struct mm_struct *mm, unsigned long addr, - pmd_t *pmdp, pmd_t pmd) -{ - pmd_t orig = *pmdp; - - *pmdp = pmd; +static void __set_pmd_acct(struct mm_struct *mm, unsigned long addr, + pmd_t orig, pmd_t pmd) +{ if (mm == &init_mm) return; @@ -219,6 +216,15 @@ void set_pmd_at(struct mm_struct *mm, unsigned long addr, } } +void set_pmd_at(struct mm_struct *mm, unsigned long addr, + pmd_t *pmdp, pmd_t pmd) +{ + pmd_t orig = *pmdp; + + *pmdp = pmd; + __set_pmd_acct(mm, addr, orig, pmd); +} + static inline pmd_t pmdp_establish(struct vm_area_struct *vma, unsigned long address, pmd_t *pmdp, pmd_t pmd) { @@ -227,6 +233,7 @@ static inline pmd_t pmdp_establish(struct vm_area_struct *vma, do { old = *pmdp; } while (cmpxchg64(&pmdp->pmd, old.pmd, pmd.pmd) != old.pmd); + __set_pmd_acct(vma->vm_mm, address, old, pmd); return old; } -- cgit v1.2.3 From 95dd77580ccd66a0da96e6d4696945b8cea39431 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 14 Mar 2018 18:20:29 -0500 Subject: fs: Teach path_connected to handle nfs filesystems with multiple roots. On nfsv2 and nfsv3 the nfs server can export subsets of the same filesystem and report the same filesystem identifier, so that the nfs client can know they are the same filesystem. The subsets can be from disjoint directory trees. The nfsv2 and nfsv3 filesystems provides no way to find the common root of all directory trees exported form the server with the same filesystem identifier. The practical result is that in struct super s_root for nfs s_root is not necessarily the root of the filesystem. The nfs mount code sets s_root to the root of the first subset of the nfs filesystem that the kernel mounts. This effects the dcache invalidation code in generic_shutdown_super currently called shrunk_dcache_for_umount and that code for years has gone through an additional list of dentries that might be dentry trees that need to be freed to accomodate nfs. When I wrote path_connected I did not realize nfs was so special, and it's hueristic for avoiding calling is_subdir can fail. The practical case where this fails is when there is a move of a directory from the subtree exposed by one nfs mount to the subtree exposed by another nfs mount. This move can happen either locally or remotely. With the remote case requiring that the move directory be cached before the move and that after the move someone walks the path to where the move directory now exists and in so doing causes the already cached directory to be moved in the dcache through the magic of d_splice_alias. If someone whose working directory is in the move directory or a subdirectory and now starts calling .. from the initial mount of nfs (where s_root == mnt_root), then path_connected as a heuristic will not bother with the is_subdir check. As s_root really is not the root of the nfs filesystem this heuristic is wrong, and the path may actually not be connected and path_connected can fail. The is_subdir function might be cheap enough that we can call it unconditionally. Verifying that will take some benchmarking and the result may not be the same on all kernels this fix needs to be backported to. So I am avoiding that for now. Filesystems with snapshots such as nilfs and btrfs do something similar. But as the directory tree of the snapshots are disjoint from one another and from the main directory tree rename won't move things between them and this problem will not occur. Cc: stable@vger.kernel.org Reported-by: Al Viro Fixes: 397d425dc26d ("vfs: Test for and handle paths that are unreachable from their mnt_root") Signed-off-by: "Eric W. Biederman" Signed-off-by: Al Viro --- fs/namei.c | 5 +++-- fs/nfs/super.c | 2 ++ include/linux/fs.h | 1 + 3 files changed, 6 insertions(+), 2 deletions(-) diff --git a/fs/namei.c b/fs/namei.c index 921ae32dbc80..cafa365eeb70 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -559,9 +559,10 @@ static int __nd_alloc_stack(struct nameidata *nd) static bool path_connected(const struct path *path) { struct vfsmount *mnt = path->mnt; + struct super_block *sb = mnt->mnt_sb; - /* Only bind mounts can have disconnected paths */ - if (mnt->mnt_root == mnt->mnt_sb->s_root) + /* Bind mounts and multi-root filesystems can have disconnected paths */ + if (!(sb->s_iflags & SB_I_MULTIROOT) && (mnt->mnt_root == sb->s_root)) return true; return is_subdir(path->dentry, mnt->mnt_root); diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 29bacdc56f6a..5e470e233c83 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -2631,6 +2631,8 @@ struct dentry *nfs_fs_mount_common(struct nfs_server *server, /* initial superblock/root creation */ mount_info->fill_super(s, mount_info); nfs_get_cache_cookie(s, mount_info->parsed, mount_info->cloned); + if (!(server->flags & NFS_MOUNT_UNSHARED)) + s->s_iflags |= SB_I_MULTIROOT; } mntroot = nfs_get_root(s, mount_info->mntfh, dev_name); diff --git a/include/linux/fs.h b/include/linux/fs.h index 2a815560fda0..0430e03febaa 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1317,6 +1317,7 @@ extern int send_sigurg(struct fown_struct *fown); #define SB_I_CGROUPWB 0x00000001 /* cgroup-aware writeback enabled */ #define SB_I_NOEXEC 0x00000002 /* Ignore executables on this fs */ #define SB_I_NODEV 0x00000004 /* Ignore devices on this fs */ +#define SB_I_MULTIROOT 0x00000008 /* Multiple roots to the dentry tree */ /* sb->s_iflags to limit user namespace mounts */ #define SB_I_USERNS_VISIBLE 0x00000010 /* fstype already mounted */ -- cgit v1.2.3 From da5e45e619b3f101420c38b3006a9ae4f3ad19b0 Mon Sep 17 00:00:00 2001 From: Māris Nartišs Date: Fri, 16 Mar 2018 11:38:43 +1000 Subject: drm/nouveau/mmu: ALIGN_DOWN correct variable Commit 7110c89bb8852ff8b0f88ce05b332b3fe22bd11e ("mmu: swap out round for ALIGN") replaced two calls to round/rounddown with ALIGN/ALIGN_DOWN, but erroneously applied ALIGN_DOWN to a different variable (addr) and left intended variable (tail) not rounded/ALIGNed. As a result screen corruption, X lockups are observable. An example of kernel log of affected system with NV98 card where it was bisected: nouveau 0000:01:00.0: gr: TRAP_M2MF 00000002 [IN] nouveau 0000:01:00.0: gr: TRAP_M2MF 00320951 400007c0 00000000 04000000 nouveau 0000:01:00.0: gr: 00200000 [] ch 1 [000fbbe000 DRM] subc 4 class 5039 mthd 0100 data 00000000 nouveau 0000:01:00.0: fb: trapped read at 0040000000 on channel 1 [0fbbe000 DRM] engine 00 [PGRAPH] client 03 [DISPATCH] subclient 04 [M2M_IN] reason 00000006 [NULL_DMAOBJ] Fixes bug 105173 ("[MCP79][Regression] Unhandled NULL pointer dereference in nvkm_object_unmap since kernel 4.15") https://bugs.freedesktop.org/show_bug.cgi?id=105173 Fixes: 7110c89bb885 ("mmu: swap out round for ALIGN ") Tested-by: Pierre Moreau Reviewed-by: Pierre Moreau Signed-off-by: Maris Nartiss Signed-off-by: Ben Skeggs Cc: stable@vger.kernel.org # v4.15+ --- drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.c index 93946dcee319..1c12e58f44c2 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.c @@ -1354,7 +1354,7 @@ nvkm_vmm_get_locked(struct nvkm_vmm *vmm, bool getref, bool mapref, bool sparse, tail = this->addr + this->size; if (vmm->func->page_block && next && next->page != p) - tail = ALIGN_DOWN(addr, vmm->func->page_block); + tail = ALIGN_DOWN(tail, vmm->func->page_block); if (addr <= tail && tail - addr >= size) { rb_erase(&this->tree, &vmm->free); -- cgit v1.2.3 From 76f2e2bc627f7d08360ac731b6277d744d4eb599 Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Sat, 17 Feb 2018 13:40:23 +0100 Subject: drm/nouveau/bl: Fix oops on driver unbind Unbinding nouveau on a dual GPU MacBook Pro oopses because we iterate over the bl_connectors list in nouveau_backlight_exit() but skipped initializing it in nouveau_backlight_init(). Stacktrace for posterity: BUG: unable to handle kernel NULL pointer dereference at 0000000000000010 IP: nouveau_backlight_exit+0x2b/0x70 [nouveau] nouveau_display_destroy+0x29/0x80 [nouveau] nouveau_drm_unload+0x65/0xe0 [nouveau] drm_dev_unregister+0x3c/0xe0 [drm] drm_put_dev+0x2e/0x60 [drm] nouveau_drm_device_remove+0x47/0x70 [nouveau] pci_device_remove+0x36/0xb0 device_release_driver_internal+0x157/0x220 driver_detach+0x39/0x70 bus_remove_driver+0x51/0xd0 pci_unregister_driver+0x2a/0xa0 nouveau_drm_exit+0x15/0xfb0 [nouveau] SyS_delete_module+0x18c/0x290 system_call_fast_compare_end+0xc/0x6f Fixes: b53ac1ee12a3 ("drm/nouveau/bl: Do not register interface if Apple GMUX detected") Cc: stable@vger.kernel.org # v4.10+ Cc: Pierre Moreau Signed-off-by: Lukas Wunner Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nouveau_backlight.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_backlight.c b/drivers/gpu/drm/nouveau/nouveau_backlight.c index 380f340204e8..f56f60f695e1 100644 --- a/drivers/gpu/drm/nouveau/nouveau_backlight.c +++ b/drivers/gpu/drm/nouveau/nouveau_backlight.c @@ -268,13 +268,13 @@ nouveau_backlight_init(struct drm_device *dev) struct nvif_device *device = &drm->client.device; struct drm_connector *connector; + INIT_LIST_HEAD(&drm->bl_connectors); + if (apple_gmux_present()) { NV_INFO(drm, "Apple GMUX detected: not registering Nouveau backlight interface\n"); return 0; } - INIT_LIST_HEAD(&drm->bl_connectors); - list_for_each_entry(connector, &dev->mode_config.connector_list, head) { if (connector->connector_type != DRM_MODE_CONNECTOR_LVDS && connector->connector_type != DRM_MODE_CONNECTOR_eDP) -- cgit v1.2.3 From 9e75dc61eaa9acd1bff83c3b814ac2af6dc1f64c Mon Sep 17 00:00:00 2001 From: Karol Herbst Date: Mon, 19 Feb 2018 17:09:45 +0100 Subject: drm/nouveau/bl: fix backlight regression Fixes: 3c66c87dc9 ("drm/nouveau/disp: remove hw-specific customisation of output paths") Suggested-by: Ben Skeggs Signed-off-by: Karol Herbst Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nouveau_backlight.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_backlight.c b/drivers/gpu/drm/nouveau/nouveau_backlight.c index f56f60f695e1..debbbf0fd4bd 100644 --- a/drivers/gpu/drm/nouveau/nouveau_backlight.c +++ b/drivers/gpu/drm/nouveau/nouveau_backlight.c @@ -134,7 +134,7 @@ nv50_get_intensity(struct backlight_device *bd) struct nouveau_encoder *nv_encoder = bl_get_data(bd); struct nouveau_drm *drm = nouveau_drm(nv_encoder->base.base.dev); struct nvif_object *device = &drm->client.device.object; - int or = nv_encoder->or; + int or = ffs(nv_encoder->dcb->or) - 1; u32 div = 1025; u32 val; @@ -149,7 +149,7 @@ nv50_set_intensity(struct backlight_device *bd) struct nouveau_encoder *nv_encoder = bl_get_data(bd); struct nouveau_drm *drm = nouveau_drm(nv_encoder->base.base.dev); struct nvif_object *device = &drm->client.device.object; - int or = nv_encoder->or; + int or = ffs(nv_encoder->dcb->or) - 1; u32 div = 1025; u32 val = (bd->props.brightness * div) / 100; @@ -170,7 +170,7 @@ nva3_get_intensity(struct backlight_device *bd) struct nouveau_encoder *nv_encoder = bl_get_data(bd); struct nouveau_drm *drm = nouveau_drm(nv_encoder->base.base.dev); struct nvif_object *device = &drm->client.device.object; - int or = nv_encoder->or; + int or = ffs(nv_encoder->dcb->or) - 1; u32 div, val; div = nvif_rd32(device, NV50_PDISP_SOR_PWM_DIV(or)); @@ -188,7 +188,7 @@ nva3_set_intensity(struct backlight_device *bd) struct nouveau_encoder *nv_encoder = bl_get_data(bd); struct nouveau_drm *drm = nouveau_drm(nv_encoder->base.base.dev); struct nvif_object *device = &drm->client.device.object; - int or = nv_encoder->or; + int or = ffs(nv_encoder->dcb->or) - 1; u32 div, val; div = nvif_rd32(device, NV50_PDISP_SOR_PWM_DIV(or)); @@ -228,7 +228,7 @@ nv50_backlight_init(struct drm_connector *connector) return -ENODEV; } - if (!nvif_rd32(device, NV50_PDISP_SOR_PWM_CTL(nv_encoder->or))) + if (!nvif_rd32(device, NV50_PDISP_SOR_PWM_CTL(ffs(nv_encoder->dcb->or) - 1))) return 0; if (drm->client.device.info.chipset <= 0xa0 || -- cgit v1.2.3 From 2975d5de6428ff6d9317e9948f0968f7d42e5d74 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Thu, 15 Mar 2018 15:33:02 +0200 Subject: RDMA/ucma: Check AF family prior resolving address Garbage supplied by user will cause to UCMA module provide zero memory size for memcpy(), because it wasn't checked, it will produce unpredictable results in rdma_resolve_addr(). [ 42.873814] BUG: KASAN: null-ptr-deref in rdma_resolve_addr+0xc8/0xfb0 [ 42.874816] Write of size 28 at addr 00000000000000a0 by task resaddr/1044 [ 42.876765] [ 42.876960] CPU: 1 PID: 1044 Comm: resaddr Not tainted 4.16.0-rc1-00057-gaa56a5293d7e #34 [ 42.877840] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.11.0-0-g63451fca13-prebuilt.qemu-project.org 04/01/2014 [ 42.879691] Call Trace: [ 42.880236] dump_stack+0x5c/0x77 [ 42.880664] kasan_report+0x163/0x380 [ 42.881354] ? rdma_resolve_addr+0xc8/0xfb0 [ 42.881864] memcpy+0x34/0x50 [ 42.882692] rdma_resolve_addr+0xc8/0xfb0 [ 42.883366] ? deref_stack_reg+0x88/0xd0 [ 42.883856] ? vsnprintf+0x31a/0x770 [ 42.884686] ? rdma_bind_addr+0xc40/0xc40 [ 42.885327] ? num_to_str+0x130/0x130 [ 42.885773] ? deref_stack_reg+0x88/0xd0 [ 42.886217] ? __read_once_size_nocheck.constprop.6+0x10/0x10 [ 42.887698] ? unwind_get_return_address_ptr+0x50/0x50 [ 42.888302] ? replace_slot+0x147/0x170 [ 42.889176] ? delete_node+0x12c/0x340 [ 42.890223] ? __radix_tree_lookup+0xa9/0x160 [ 42.891196] ? ucma_resolve_ip+0xb7/0x110 [ 42.891917] ucma_resolve_ip+0xb7/0x110 [ 42.893003] ? ucma_resolve_addr+0x190/0x190 [ 42.893531] ? _copy_from_user+0x5e/0x90 [ 42.894204] ucma_write+0x174/0x1f0 [ 42.895162] ? ucma_resolve_route+0xf0/0xf0 [ 42.896309] ? dequeue_task_fair+0x67e/0xd90 [ 42.897192] ? put_prev_entity+0x7d/0x170 [ 42.897870] ? ring_buffer_record_is_on+0xd/0x20 [ 42.898439] ? tracing_record_taskinfo_skip+0x20/0x50 [ 42.899686] __vfs_write+0xc4/0x350 [ 42.900142] ? kernel_read+0xa0/0xa0 [ 42.900602] ? firmware_map_remove+0xdf/0xdf [ 42.901135] ? do_task_dead+0x5d/0x60 [ 42.901598] ? do_exit+0xcc6/0x1220 [ 42.902789] ? __fget+0xa8/0xf0 [ 42.903190] vfs_write+0xf7/0x280 [ 42.903600] SyS_write+0xa1/0x120 [ 42.904206] ? SyS_read+0x120/0x120 [ 42.905710] ? compat_start_thread+0x60/0x60 [ 42.906423] ? SyS_read+0x120/0x120 [ 42.908716] do_syscall_64+0xeb/0x250 [ 42.910760] entry_SYSCALL_64_after_hwframe+0x21/0x86 [ 42.912735] RIP: 0033:0x7f138b0afe99 [ 42.914734] RSP: 002b:00007f138b799e98 EFLAGS: 00000287 ORIG_RAX: 0000000000000001 [ 42.917134] RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007f138b0afe99 [ 42.919487] RDX: 000000000000002e RSI: 0000000020000c40 RDI: 0000000000000004 [ 42.922393] RBP: 00007f138b799ec0 R08: 00007f138b79a700 R09: 0000000000000000 [ 42.925266] R10: 00007f138b79a700 R11: 0000000000000287 R12: 00007f138b799fc0 [ 42.927570] R13: 0000000000000000 R14: 00007ffdbae757c0 R15: 00007f138b79a9c0 [ 42.930047] [ 42.932681] Disabling lock debugging due to kernel taint [ 42.934795] BUG: unable to handle kernel NULL pointer dereference at 00000000000000a0 [ 42.936939] IP: memcpy_erms+0x6/0x10 [ 42.938864] PGD 80000001bea92067 P4D 80000001bea92067 PUD 1bea96067 PMD 0 [ 42.941576] Oops: 0002 [#1] SMP KASAN PTI [ 42.943952] CPU: 1 PID: 1044 Comm: resaddr Tainted: G B 4.16.0-rc1-00057-gaa56a5293d7e #34 [ 42.946964] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.11.0-0-g63451fca13-prebuilt.qemu-project.org 04/01/2014 [ 42.952336] RIP: 0010:memcpy_erms+0x6/0x10 [ 42.954707] RSP: 0018:ffff8801c8b479c8 EFLAGS: 00010286 [ 42.957227] RAX: 00000000000000a0 RBX: ffff8801c8b47ba0 RCX: 000000000000001c [ 42.960543] RDX: 000000000000001c RSI: ffff8801c8b47bbc RDI: 00000000000000a0 [ 42.963867] RBP: ffff8801c8b47b60 R08: 0000000000000000 R09: ffffed0039168ed1 [ 42.967303] R10: 0000000000000001 R11: ffffed0039168ed0 R12: ffff8801c8b47bbc [ 42.970685] R13: 00000000000000a0 R14: 1ffff10039168f4a R15: 0000000000000000 [ 42.973631] FS: 00007f138b79a700(0000) GS:ffff8801e5d00000(0000) knlGS:0000000000000000 [ 42.976831] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 42.979239] CR2: 00000000000000a0 CR3: 00000001be908002 CR4: 00000000003606a0 [ 42.982060] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 42.984877] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 42.988033] Call Trace: [ 42.990487] rdma_resolve_addr+0xc8/0xfb0 [ 42.993202] ? deref_stack_reg+0x88/0xd0 [ 42.996055] ? vsnprintf+0x31a/0x770 [ 42.998707] ? rdma_bind_addr+0xc40/0xc40 [ 43.000985] ? num_to_str+0x130/0x130 [ 43.003410] ? deref_stack_reg+0x88/0xd0 [ 43.006302] ? __read_once_size_nocheck.constprop.6+0x10/0x10 [ 43.008780] ? unwind_get_return_address_ptr+0x50/0x50 [ 43.011178] ? replace_slot+0x147/0x170 [ 43.013517] ? delete_node+0x12c/0x340 [ 43.016019] ? __radix_tree_lookup+0xa9/0x160 [ 43.018755] ? ucma_resolve_ip+0xb7/0x110 [ 43.021270] ucma_resolve_ip+0xb7/0x110 [ 43.023968] ? ucma_resolve_addr+0x190/0x190 [ 43.026312] ? _copy_from_user+0x5e/0x90 [ 43.029384] ucma_write+0x174/0x1f0 [ 43.031861] ? ucma_resolve_route+0xf0/0xf0 [ 43.034782] ? dequeue_task_fair+0x67e/0xd90 [ 43.037483] ? put_prev_entity+0x7d/0x170 [ 43.040215] ? ring_buffer_record_is_on+0xd/0x20 [ 43.042990] ? tracing_record_taskinfo_skip+0x20/0x50 [ 43.045595] __vfs_write+0xc4/0x350 [ 43.048624] ? kernel_read+0xa0/0xa0 [ 43.051604] ? firmware_map_remove+0xdf/0xdf [ 43.055379] ? do_task_dead+0x5d/0x60 [ 43.058000] ? do_exit+0xcc6/0x1220 [ 43.060783] ? __fget+0xa8/0xf0 [ 43.063133] vfs_write+0xf7/0x280 [ 43.065677] SyS_write+0xa1/0x120 [ 43.068647] ? SyS_read+0x120/0x120 [ 43.071179] ? compat_start_thread+0x60/0x60 [ 43.074025] ? SyS_read+0x120/0x120 [ 43.076705] do_syscall_64+0xeb/0x250 [ 43.079006] entry_SYSCALL_64_after_hwframe+0x21/0x86 [ 43.081606] RIP: 0033:0x7f138b0afe99 [ 43.083679] RSP: 002b:00007f138b799e98 EFLAGS: 00000287 ORIG_RAX: 0000000000000001 [ 43.086802] RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007f138b0afe99 [ 43.089989] RDX: 000000000000002e RSI: 0000000020000c40 RDI: 0000000000000004 [ 43.092866] RBP: 00007f138b799ec0 R08: 00007f138b79a700 R09: 0000000000000000 [ 43.096233] R10: 00007f138b79a700 R11: 0000000000000287 R12: 00007f138b799fc0 [ 43.098913] R13: 0000000000000000 R14: 00007ffdbae757c0 R15: 00007f138b79a9c0 [ 43.101809] Code: 90 90 90 90 90 eb 1e 0f 1f 00 48 89 f8 48 89 d1 48 c1 e9 03 83 e2 07 f3 48 a5 89 d1 f3 a4 c3 66 0f 1f 44 00 00 48 89 f8 48 89 d1 a4 c3 0f 1f 80 00 00 00 00 48 89 f8 48 83 fa 20 72 7e 40 38 [ 43.107950] RIP: memcpy_erms+0x6/0x10 RSP: ffff8801c8b479c8 Reported-by: Fixes: 75216638572f ("RDMA/cma: Export rdma cm interface to userspace") Signed-off-by: Leon Romanovsky Reviewed-by: Sean Hefty Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/ucma.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c index 699a46d4c0c5..3f3f7bbc4528 100644 --- a/drivers/infiniband/core/ucma.c +++ b/drivers/infiniband/core/ucma.c @@ -664,19 +664,23 @@ static ssize_t ucma_resolve_ip(struct ucma_file *file, int in_len, int out_len) { struct rdma_ucm_resolve_ip cmd; + struct sockaddr *src, *dst; struct ucma_context *ctx; int ret; if (copy_from_user(&cmd, inbuf, sizeof(cmd))) return -EFAULT; + src = (struct sockaddr *) &cmd.src_addr; + dst = (struct sockaddr *) &cmd.dst_addr; + if (!rdma_addr_size(src) || !rdma_addr_size(dst)) + return -EINVAL; + ctx = ucma_get_ctx(file, cmd.id); if (IS_ERR(ctx)) return PTR_ERR(ctx); - ret = rdma_resolve_addr(ctx->cm_id, (struct sockaddr *) &cmd.src_addr, - (struct sockaddr *) &cmd.dst_addr, - cmd.timeout_ms); + ret = rdma_resolve_addr(ctx->cm_id, src, dst, cmd.timeout_ms); ucma_put_ctx(ctx); return ret; } -- cgit v1.2.3 From dc9e0a9347e932e3fd3cd03e7ff241022ed6ea8a Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Thu, 15 Mar 2018 19:49:14 -0700 Subject: acpi, numa: fix pxm to online numa node associations Commit 99759869faf1 "acpi: Add acpi_map_pxm_to_online_node()" added support for mapping a given proximity to its nearest, by SLIT distance, online node. However, it sometimes returns unexpected results due to the fact that it switches from comparing the PXM node to the last node that was closer than the current max. for_each_online_node(n) { dist = node_distance(node, n); if (dist < min_dist) { min_dist = dist; node = n; <---- from this point we're using the wrong node for node_distance() Fixes: 99759869faf1 ("acpi: Add acpi_map_pxm_to_online_node()") Cc: Reviewed-by: Toshi Kani Acked-by: Rafael J. Wysocki > Signed-off-by: Dan Williams --- drivers/acpi/numa.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/acpi/numa.c b/drivers/acpi/numa.c index 8ccaae3550d2..85167603b9c9 100644 --- a/drivers/acpi/numa.c +++ b/drivers/acpi/numa.c @@ -103,25 +103,27 @@ int acpi_map_pxm_to_node(int pxm) */ int acpi_map_pxm_to_online_node(int pxm) { - int node, n, dist, min_dist; + int node, min_node; node = acpi_map_pxm_to_node(pxm); if (node == NUMA_NO_NODE) node = 0; + min_node = node; if (!node_online(node)) { - min_dist = INT_MAX; + int min_dist = INT_MAX, dist, n; + for_each_online_node(n) { dist = node_distance(node, n); if (dist < min_dist) { min_dist = dist; - node = n; + min_node = n; } } } - return node; + return min_node; } EXPORT_SYMBOL(acpi_map_pxm_to_online_node); -- cgit v1.2.3 From 47b7de2f6c18f75d1f2716efe752cba43f32a626 Mon Sep 17 00:00:00 2001 From: Evgeniy Didin Date: Wed, 14 Mar 2018 22:30:51 +0300 Subject: mmc: dw_mmc: fix falling from idmac to PIO mode when dw_mci_reset occurs It was found that in IDMAC mode after soft-reset driver switches to PIO mode. That's what happens in case of DTO timeout overflow calculation failure: 1. soft-reset is called 2. driver restarts dma 3. descriptors states are checked, one of descriptor is owned by the IDMAC. 4. driver can't use DMA and then switches to PIO mode. Failure was already fixed in: https://www.spinics.net/lists/linux-mmc/msg48125.html. Behaviour while soft-reset is not something we except or even want to happen. So we switch from dw_mci_idmac_reset to dw_mci_idmac_init, so descriptors are cleaned before starting dma. And while at it explicitly zero des0 which otherwise might contain garbage as being allocated by dmam_alloc_coherent(). Signed-off-by: Evgeniy Didin Cc: Jaehoon Chung Cc: Ulf Hansson Cc: Andy Shevchenko Cc: Jisheng Zhang Cc: Shawn Lin Cc: Alexey Brodkin Cc: Eugeniy Paltsev Cc: linux-snps-arc@lists.infradead.org Cc: # 4.4+ Signed-off-by: Ulf Hansson --- drivers/mmc/host/dw_mmc.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/mmc/host/dw_mmc.c b/drivers/mmc/host/dw_mmc.c index 545550591389..06d47414d0c1 100644 --- a/drivers/mmc/host/dw_mmc.c +++ b/drivers/mmc/host/dw_mmc.c @@ -564,6 +564,7 @@ static int dw_mci_idmac_init(struct dw_mci *host) (sizeof(struct idmac_desc_64addr) * (i + 1))) >> 32; /* Initialize reserved and buffer size fields to "0" */ + p->des0 = 0; p->des1 = 0; p->des2 = 0; p->des3 = 0; @@ -586,6 +587,7 @@ static int dw_mci_idmac_init(struct dw_mci *host) i++, p++) { p->des3 = cpu_to_le32(host->sg_dma + (sizeof(struct idmac_desc) * (i + 1))); + p->des0 = 0; p->des1 = 0; } @@ -1801,8 +1803,8 @@ static bool dw_mci_reset(struct dw_mci *host) } if (host->use_dma == TRANS_MODE_IDMAC) - /* It is also recommended that we reset and reprogram idmac */ - dw_mci_idmac_reset(host); + /* It is also required that we reinit idmac */ + dw_mci_idmac_init(host); ret = true; -- cgit v1.2.3 From e3b3121fa8da94cb20f9e0c64ab7981ae47fd085 Mon Sep 17 00:00:00 2001 From: Alexander Sergeyev Date: Tue, 13 Mar 2018 22:38:56 +0300 Subject: x86/speculation: Remove Skylake C2 from Speculation Control microcode blacklist In accordance with Intel's microcode revision guidance from March 6 MCU rev 0xc2 is cleared on both Skylake H/S and Skylake Xeon E3 processors that share CPUID 506E3. Signed-off-by: Alexander Sergeyev Signed-off-by: Thomas Gleixner Cc: Jia Zhang Cc: Greg Kroah-Hartman Cc: Kyle Huey Cc: David Woodhouse Link: https://lkml.kernel.org/r/20180313193856.GA8580@localhost.localdomain --- arch/x86/kernel/cpu/intel.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 4aa9fd379390..c3af167d0a70 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -105,7 +105,7 @@ static void probe_xeon_phi_r3mwait(struct cpuinfo_x86 *c) /* * Early microcode releases for the Spectre v2 mitigation were broken. * Information taken from; - * - https://newsroom.intel.com/wp-content/uploads/sites/11/2018/01/microcode-update-guidance.pdf + * - https://newsroom.intel.com/wp-content/uploads/sites/11/2018/03/microcode-update-guidance.pdf * - https://kb.vmware.com/s/article/52345 * - Microcode revisions observed in the wild * - Release note from 20180108 microcode release @@ -123,7 +123,6 @@ static const struct sku_microcode spectre_bad_microcodes[] = { { INTEL_FAM6_KABYLAKE_MOBILE, 0x09, 0x80 }, { INTEL_FAM6_SKYLAKE_X, 0x03, 0x0100013e }, { INTEL_FAM6_SKYLAKE_X, 0x04, 0x0200003c }, - { INTEL_FAM6_SKYLAKE_DESKTOP, 0x03, 0xc2 }, { INTEL_FAM6_BROADWELL_CORE, 0x04, 0x28 }, { INTEL_FAM6_BROADWELL_GT3E, 0x01, 0x1b }, { INTEL_FAM6_BROADWELL_XEON_D, 0x02, 0x14 }, -- cgit v1.2.3 From 18ffc0cce4ff947a2acc9b2e06ae5309a6e6fb43 Mon Sep 17 00:00:00 2001 From: Michal Simek Date: Thu, 22 Feb 2018 15:19:37 +0100 Subject: microblaze: Setup dependencies for ASM optimized lib functions The patch: "microblaze: Setup proper dependency for optimized lib functions" (sha1: 7b6ce52be3f86520524711a6f33f3866f9339694) didn't setup all dependencies properly. Optimized lib functions in C are also present for little endian and optimized library functions in assembler are implemented only for big endian version. Reported-by: kbuild test robot Signed-off-by: Michal Simek --- arch/microblaze/Kconfig.platform | 2 +- arch/microblaze/lib/fastcopy.S | 4 ---- 2 files changed, 1 insertion(+), 5 deletions(-) diff --git a/arch/microblaze/Kconfig.platform b/arch/microblaze/Kconfig.platform index 6996f397c16c..f7f1739c11b9 100644 --- a/arch/microblaze/Kconfig.platform +++ b/arch/microblaze/Kconfig.platform @@ -8,7 +8,6 @@ menu "Platform options" config OPT_LIB_FUNCTION bool "Optimalized lib function" - depends on CPU_LITTLE_ENDIAN default y help Allows turn on optimalized library function (memcpy and memmove). @@ -21,6 +20,7 @@ config OPT_LIB_FUNCTION config OPT_LIB_ASM bool "Optimalized lib function ASM" depends on OPT_LIB_FUNCTION && (XILINX_MICROBLAZE0_USE_BARREL = 1) + depends on CPU_BIG_ENDIAN default n help Allows turn on optimalized library function (memcpy and memmove). diff --git a/arch/microblaze/lib/fastcopy.S b/arch/microblaze/lib/fastcopy.S index 62021d7e249e..fdc48bb065d8 100644 --- a/arch/microblaze/lib/fastcopy.S +++ b/arch/microblaze/lib/fastcopy.S @@ -29,10 +29,6 @@ * between mem locations with size of xfer spec'd in bytes */ -#ifdef __MICROBLAZEEL__ -#error Microblaze LE not support ASM optimized lib func. Disable OPT_LIB_ASM. -#endif - #include .text .globl memcpy -- cgit v1.2.3 From cd4dfee6a8bfbbe404e9905aff85e267ec99f5fa Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Fri, 9 Mar 2018 09:52:55 -0600 Subject: microblaze: remove unused alloc_maybe_bootmem alloc_maybe_bootmem is unused, so remove it. Signed-off-by: Rob Herring Signed-off-by: Michal Simek --- arch/microblaze/include/asm/setup.h | 1 - arch/microblaze/mm/init.c | 8 -------- 2 files changed, 9 deletions(-) diff --git a/arch/microblaze/include/asm/setup.h b/arch/microblaze/include/asm/setup.h index be84a4d3917f..7c968c1d1729 100644 --- a/arch/microblaze/include/asm/setup.h +++ b/arch/microblaze/include/asm/setup.h @@ -44,7 +44,6 @@ void machine_shutdown(void); void machine_halt(void); void machine_power_off(void); -extern void *alloc_maybe_bootmem(size_t size, gfp_t mask); extern void *zalloc_maybe_bootmem(size_t size, gfp_t mask); # endif /* __ASSEMBLY__ */ diff --git a/arch/microblaze/mm/init.c b/arch/microblaze/mm/init.c index 434639f9a3a6..5bc9c7fbb541 100644 --- a/arch/microblaze/mm/init.c +++ b/arch/microblaze/mm/init.c @@ -414,14 +414,6 @@ void __init *early_get_page(void) #endif /* CONFIG_MMU */ -void * __ref alloc_maybe_bootmem(size_t size, gfp_t mask) -{ - if (mem_init_done) - return kmalloc(size, mask); - else - return alloc_bootmem(size); -} - void * __ref zalloc_maybe_bootmem(size_t size, gfp_t mask) { void *p; -- cgit v1.2.3 From 101646a24a2f9cdb61d7732459fbf068a7bbb542 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Fri, 9 Mar 2018 09:54:07 -0600 Subject: microblaze: switch to NO_BOOTMEM Microblaze doesn't set CONFIG_NO_BOOTMEM and so memblock_virt_alloc() doesn't work for CONFIG_HAVE_MEMBLOCK && !CONFIG_NO_BOOTMEM. Similar change was already done by others architectures "ARM: mm: Remove bootmem code and switch to NO_BOOTMEM" (sha1: 84f452b1e8fc73ac0e31254c66e3e2260ce5263d) or "openrisc: Consolidate setup to use memblock instead of bootmem" (sha1: 266c7fad157265bb54d17db1c9545f2aaa488643) or "parisc: Drop bootmem and switch to memblock" (sha1: 4fe9e1d957e45ad8eba9885ee860a0e93d13a7c7) or "powerpc: Remove bootmem allocator" (sha1: 10239733ee8617bac3f1c1769af43a88ed979324) or "s390/mm: Convert bootmem to memblock" (sha1: 50be634507284eea38df78154d22615d21200b42) or "sparc64: Convert over to NO_BOOTMEM." (sha1: 625d693e9784f988371e69c2b41a2172c0be6c11) or "xtensa: drop sysmem and switch to memblock" (sha1: 0e46c1115f5816949220d62dd3ff04aa68e7ac6b) Issue was introduced by: "of/fdt: use memblock_virt_alloc for early alloc" (sha1: 0fa1c579349fdd90173381712ad78aa99c09d38b) Signed-off-by: Rob Herring Tested-by: Alvaro Gamez Machado Tested-by: Michal Simek Signed-off-by: Michal Simek --- arch/microblaze/Kconfig | 1 + arch/microblaze/mm/init.c | 56 +++++------------------------------------------ 2 files changed, 7 insertions(+), 50 deletions(-) diff --git a/arch/microblaze/Kconfig b/arch/microblaze/Kconfig index 4f798aa671dd..3817a3e2146c 100644 --- a/arch/microblaze/Kconfig +++ b/arch/microblaze/Kconfig @@ -24,6 +24,7 @@ config MICROBLAZE select HAVE_FTRACE_MCOUNT_RECORD select HAVE_FUNCTION_GRAPH_TRACER select HAVE_FUNCTION_TRACER + select NO_BOOTMEM select HAVE_MEMBLOCK select HAVE_MEMBLOCK_NODE_MAP select HAVE_OPROFILE diff --git a/arch/microblaze/mm/init.c b/arch/microblaze/mm/init.c index 5bc9c7fbb541..df6de7ccdc2e 100644 --- a/arch/microblaze/mm/init.c +++ b/arch/microblaze/mm/init.c @@ -32,9 +32,6 @@ int mem_init_done; #ifndef CONFIG_MMU unsigned int __page_offset; EXPORT_SYMBOL(__page_offset); - -#else -static int init_bootmem_done; #endif /* CONFIG_MMU */ char *klimit = _end; @@ -117,7 +114,6 @@ static void __init paging_init(void) void __init setup_memory(void) { - unsigned long map_size; struct memblock_region *reg; #ifndef CONFIG_MMU @@ -174,17 +170,6 @@ void __init setup_memory(void) pr_info("%s: max_low_pfn: %#lx\n", __func__, max_low_pfn); pr_info("%s: max_pfn: %#lx\n", __func__, max_pfn); - /* - * Find an area to use for the bootmem bitmap. - * We look for the first area which is at least - * 128kB in length (128kB is enough for a bitmap - * for 4GB of memory, using 4kB pages), plus 1 page - * (in case the address isn't page-aligned). - */ - map_size = init_bootmem_node(NODE_DATA(0), - PFN_UP(TOPHYS((u32)klimit)), min_low_pfn, max_low_pfn); - memblock_reserve(PFN_UP(TOPHYS((u32)klimit)) << PAGE_SHIFT, map_size); - /* Add active regions with valid PFNs */ for_each_memblock(memory, reg) { unsigned long start_pfn, end_pfn; @@ -196,32 +181,9 @@ void __init setup_memory(void) &memblock.memory, 0); } - /* free bootmem is whole main memory */ - free_bootmem_with_active_regions(0, max_low_pfn); - - /* reserve allocate blocks */ - for_each_memblock(reserved, reg) { - unsigned long top = reg->base + reg->size - 1; - - pr_debug("reserved - 0x%08x-0x%08x, %lx, %lx\n", - (u32) reg->base, (u32) reg->size, top, - memory_start + lowmem_size - 1); - - if (top <= (memory_start + lowmem_size - 1)) { - reserve_bootmem(reg->base, reg->size, BOOTMEM_DEFAULT); - } else if (reg->base < (memory_start + lowmem_size - 1)) { - unsigned long trunc_size = memory_start + lowmem_size - - reg->base; - reserve_bootmem(reg->base, trunc_size, BOOTMEM_DEFAULT); - } - } - /* XXX need to clip this if using highmem? */ sparse_memory_present_with_active_regions(0); -#ifdef CONFIG_MMU - init_bootmem_done = 1; -#endif paging_init(); } @@ -398,18 +360,12 @@ asmlinkage void __init mmu_init(void) /* This is only called until mem_init is done. */ void __init *early_get_page(void) { - void *p; - if (init_bootmem_done) { - p = alloc_bootmem_pages(PAGE_SIZE); - } else { - /* - * Mem start + kernel_tlb -> here is limit - * because of mem mapping from head.S - */ - p = __va(memblock_alloc_base(PAGE_SIZE, PAGE_SIZE, - memory_start + kernel_tlb)); - } - return p; + /* + * Mem start + kernel_tlb -> here is limit + * because of mem mapping from head.S + */ + return __va(memblock_alloc_base(PAGE_SIZE, PAGE_SIZE, + memory_start + kernel_tlb)); } #endif /* CONFIG_MMU */ -- cgit v1.2.3 From daaf216c06fba4ee4dc3f62715667da929d68774 Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Thu, 8 Mar 2018 17:17:31 -0600 Subject: KVM: x86: Fix device passthrough when SME is active When using device passthrough with SME active, the MMIO range that is mapped for the device should not be mapped encrypted. Add a check in set_spte() to insure that a page is not mapped encrypted if that page is a device MMIO page as indicated by kvm_is_mmio_pfn(). Cc: # 4.14.x- Signed-off-by: Tom Lendacky Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index f551962ac294..763bb3bade63 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -2770,8 +2770,10 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep, else pte_access &= ~ACC_WRITE_MASK; + if (!kvm_is_mmio_pfn(pfn)) + spte |= shadow_me_mask; + spte |= (u64)pfn << PAGE_SHIFT; - spte |= shadow_me_mask; if (pte_access & ACC_WRITE_MASK) { -- cgit v1.2.3 From 093e037ca88e1767693bc6bcb2df3f49c6be68c7 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Fri, 16 Mar 2018 14:31:43 +0100 Subject: Revert "btrfs: use proper endianness accessors for super_copy" This reverts commit 3c181c12c431fe33b669410d663beb9cceefcd1b. The offending patch was merged in 4.16-rc4 and was promptly applied to stable kernels 4.14.25 and 4.15.8. The patch causes a corruption in several superblock items on big-endian machines because of messed up endianity conversions. The damage is manually repairable. A filesystem cannot be mounted again after it has been unmounted once. We do a full revert and not a fixup so stable can pick that patch ASAP. Fixes: 3c181c12c431 ("btrfs: use proper endianness accessors for super_copy") Link: https://lkml.kernel.org/r/1521139304@msgid.manchmal.in-ulm.de CC: stable@vger.kernel.org # 4.14+ Reported-by: Christoph Biedl Signed-off-by: David Sterba --- fs/btrfs/sysfs.c | 8 +++++--- fs/btrfs/transaction.c | 20 ++++++++------------ 2 files changed, 13 insertions(+), 15 deletions(-) diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c index d11c70bff5a9..a8bafed931f4 100644 --- a/fs/btrfs/sysfs.c +++ b/fs/btrfs/sysfs.c @@ -423,7 +423,7 @@ static ssize_t btrfs_nodesize_show(struct kobject *kobj, { struct btrfs_fs_info *fs_info = to_fs_info(kobj); - return snprintf(buf, PAGE_SIZE, "%u\n", fs_info->nodesize); + return snprintf(buf, PAGE_SIZE, "%u\n", fs_info->super_copy->nodesize); } BTRFS_ATTR(, nodesize, btrfs_nodesize_show); @@ -433,7 +433,8 @@ static ssize_t btrfs_sectorsize_show(struct kobject *kobj, { struct btrfs_fs_info *fs_info = to_fs_info(kobj); - return snprintf(buf, PAGE_SIZE, "%u\n", fs_info->sectorsize); + return snprintf(buf, PAGE_SIZE, "%u\n", + fs_info->super_copy->sectorsize); } BTRFS_ATTR(, sectorsize, btrfs_sectorsize_show); @@ -443,7 +444,8 @@ static ssize_t btrfs_clone_alignment_show(struct kobject *kobj, { struct btrfs_fs_info *fs_info = to_fs_info(kobj); - return snprintf(buf, PAGE_SIZE, "%u\n", fs_info->sectorsize); + return snprintf(buf, PAGE_SIZE, "%u\n", + fs_info->super_copy->sectorsize); } BTRFS_ATTR(, clone_alignment, btrfs_clone_alignment_show); diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 9220f004001c..04f07144b45c 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -1722,23 +1722,19 @@ static void update_super_roots(struct btrfs_fs_info *fs_info) super = fs_info->super_copy; - /* update latest btrfs_super_block::chunk_root refs */ root_item = &fs_info->chunk_root->root_item; - btrfs_set_super_chunk_root(super, root_item->bytenr); - btrfs_set_super_chunk_root_generation(super, root_item->generation); - btrfs_set_super_chunk_root_level(super, root_item->level); + super->chunk_root = root_item->bytenr; + super->chunk_root_generation = root_item->generation; + super->chunk_root_level = root_item->level; - /* update latest btrfs_super_block::root refs */ root_item = &fs_info->tree_root->root_item; - btrfs_set_super_root(super, root_item->bytenr); - btrfs_set_super_generation(super, root_item->generation); - btrfs_set_super_root_level(super, root_item->level); - + super->root = root_item->bytenr; + super->generation = root_item->generation; + super->root_level = root_item->level; if (btrfs_test_opt(fs_info, SPACE_CACHE)) - btrfs_set_super_cache_generation(super, root_item->generation); + super->cache_generation = root_item->generation; if (test_bit(BTRFS_FS_UPDATE_UUID_TREE_GEN, &fs_info->flags)) - btrfs_set_super_uuid_tree_generation(super, - root_item->generation); + super->uuid_tree_generation = root_item->generation; } int btrfs_transaction_in_commit(struct btrfs_fs_info *info) -- cgit v1.2.3 From 4bbb3e0e8239f9079bf1fe20b3c0cb598714ae61 Mon Sep 17 00:00:00 2001 From: Toshiaki Makita Date: Tue, 13 Mar 2018 14:51:27 +0900 Subject: net: Fix vlan untag for bridge and vlan_dev with reorder_hdr off When we have a bridge with vlan_filtering on and a vlan device on top of it, packets would be corrupted in skb_vlan_untag() called from br_dev_xmit(). The problem sits in skb_reorder_vlan_header() used in skb_vlan_untag(), which makes use of skb->mac_len. In this function mac_len is meant for handling rx path with vlan devices with reorder_header disabled, but in tx path mac_len is typically 0 and cannot be used, which is the problem in this case. The current code even does not properly handle rx path (skb_vlan_untag() called from __netif_receive_skb_core()) with reorder_header off actually. In rx path single tag case, it works as follows: - Before skb_reorder_vlan_header() mac_header data v v +-------------------+-------------+------+---- | ETH | VLAN | ETH | | ADDRS | TPID | TCI | TYPE | +-------------------+-------------+------+---- <-------- mac_len ---------> <-------------> to be removed - After skb_reorder_vlan_header() mac_header data v v +-------------------+------+---- | ETH | ETH | | ADDRS | TYPE | +-------------------+------+---- <-------- mac_len ---------> This is ok, but in rx double tag case, it corrupts packets: - Before skb_reorder_vlan_header() mac_header data v v +-------------------+-------------+-------------+------+---- | ETH | VLAN | VLAN | ETH | | ADDRS | TPID | TCI | TPID | TCI | TYPE | +-------------------+-------------+-------------+------+---- <--------------- mac_len ----------------> <-------------> should be removed <---------------------------> actually will be removed - After skb_reorder_vlan_header() mac_header data v v +-------------------+------+---- | ETH | ETH | | ADDRS | TYPE | +-------------------+------+---- <--------------- mac_len ----------------> So, two of vlan tags are both removed while only inner one should be removed and mac_header (and mac_len) is broken. skb_vlan_untag() is meant for removing the vlan header at (skb->data - 2), so use skb->data and skb->mac_header to calculate the right offset. Reported-by: Brandon Carpenter Fixes: a6e18ff11170 ("vlan: Fix untag operations of stacked vlans with REORDER_HEADER off") Signed-off-by: Toshiaki Makita Signed-off-by: David S. Miller --- include/uapi/linux/if_ether.h | 1 + net/core/skbuff.c | 7 +++++-- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/include/uapi/linux/if_ether.h b/include/uapi/linux/if_ether.h index 8bbbcb5cd94b..820de5d222d2 100644 --- a/include/uapi/linux/if_ether.h +++ b/include/uapi/linux/if_ether.h @@ -30,6 +30,7 @@ */ #define ETH_ALEN 6 /* Octets in one ethernet addr */ +#define ETH_TLEN 2 /* Octets in ethernet type field */ #define ETH_HLEN 14 /* Total octets in header. */ #define ETH_ZLEN 60 /* Min. octets in frame sans FCS */ #define ETH_DATA_LEN 1500 /* Max. octets in payload */ diff --git a/net/core/skbuff.c b/net/core/skbuff.c index baf990528943..b103f46ec512 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -5020,13 +5020,16 @@ EXPORT_SYMBOL_GPL(skb_gso_validate_mac_len); static struct sk_buff *skb_reorder_vlan_header(struct sk_buff *skb) { + int mac_len; + if (skb_cow(skb, skb_headroom(skb)) < 0) { kfree_skb(skb); return NULL; } - memmove(skb->data - ETH_HLEN, skb->data - skb->mac_len - VLAN_HLEN, - 2 * ETH_ALEN); + mac_len = skb->data - skb_mac_header(skb); + memmove(skb_mac_header(skb) + VLAN_HLEN, skb_mac_header(skb), + mac_len - VLAN_HLEN - ETH_TLEN); skb->mac_header += VLAN_HLEN; return skb; } -- cgit v1.2.3 From cbe7128c4b92e2004984f477fd38dfa81662f02e Mon Sep 17 00:00:00 2001 From: Toshiaki Makita Date: Tue, 13 Mar 2018 14:51:28 +0900 Subject: vlan: Fix out of order vlan headers with reorder header off With reorder header off, received packets are untagged in skb_vlan_untag() called from within __netif_receive_skb_core(), and later the tag will be inserted back in vlan_do_receive(). This caused out of order vlan headers when we create a vlan device on top of another vlan device, because vlan_do_receive() inserts a tag as the outermost vlan tag. E.g. the outer tag is first removed in skb_vlan_untag() and inserted back in vlan_do_receive(), then the inner tag is next removed and inserted back as the outermost tag. This patch fixes the behaviour by inserting the inner tag at the right position. Signed-off-by: Toshiaki Makita Signed-off-by: David S. Miller --- include/linux/if_vlan.h | 66 ++++++++++++++++++++++++++++++++++++++++--------- net/8021q/vlan_core.c | 4 +-- 2 files changed, 57 insertions(+), 13 deletions(-) diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h index 5e6a2d4dc366..c4a1cff9c768 100644 --- a/include/linux/if_vlan.h +++ b/include/linux/if_vlan.h @@ -300,30 +300,34 @@ static inline bool vlan_hw_offload_capable(netdev_features_t features, } /** - * __vlan_insert_tag - regular VLAN tag inserting + * __vlan_insert_inner_tag - inner VLAN tag inserting * @skb: skbuff to tag * @vlan_proto: VLAN encapsulation protocol * @vlan_tci: VLAN TCI to insert + * @mac_len: MAC header length including outer vlan headers * - * Inserts the VLAN tag into @skb as part of the payload + * Inserts the VLAN tag into @skb as part of the payload at offset mac_len * Returns error if skb_cow_head failes. * * Does not change skb->protocol so this function can be used during receive. */ -static inline int __vlan_insert_tag(struct sk_buff *skb, - __be16 vlan_proto, u16 vlan_tci) +static inline int __vlan_insert_inner_tag(struct sk_buff *skb, + __be16 vlan_proto, u16 vlan_tci, + unsigned int mac_len) { struct vlan_ethhdr *veth; if (skb_cow_head(skb, VLAN_HLEN) < 0) return -ENOMEM; - veth = skb_push(skb, VLAN_HLEN); + skb_push(skb, VLAN_HLEN); - /* Move the mac addresses to the beginning of the new header. */ - memmove(skb->data, skb->data + VLAN_HLEN, 2 * ETH_ALEN); + /* Move the mac header sans proto to the beginning of the new header. */ + memmove(skb->data, skb->data + VLAN_HLEN, mac_len - ETH_TLEN); skb->mac_header -= VLAN_HLEN; + veth = (struct vlan_ethhdr *)(skb->data + mac_len - ETH_HLEN); + /* first, the ethernet type */ veth->h_vlan_proto = vlan_proto; @@ -334,12 +338,30 @@ static inline int __vlan_insert_tag(struct sk_buff *skb, } /** - * vlan_insert_tag - regular VLAN tag inserting + * __vlan_insert_tag - regular VLAN tag inserting * @skb: skbuff to tag * @vlan_proto: VLAN encapsulation protocol * @vlan_tci: VLAN TCI to insert * * Inserts the VLAN tag into @skb as part of the payload + * Returns error if skb_cow_head failes. + * + * Does not change skb->protocol so this function can be used during receive. + */ +static inline int __vlan_insert_tag(struct sk_buff *skb, + __be16 vlan_proto, u16 vlan_tci) +{ + return __vlan_insert_inner_tag(skb, vlan_proto, vlan_tci, ETH_HLEN); +} + +/** + * vlan_insert_inner_tag - inner VLAN tag inserting + * @skb: skbuff to tag + * @vlan_proto: VLAN encapsulation protocol + * @vlan_tci: VLAN TCI to insert + * @mac_len: MAC header length including outer vlan headers + * + * Inserts the VLAN tag into @skb as part of the payload at offset mac_len * Returns a VLAN tagged skb. If a new skb is created, @skb is freed. * * Following the skb_unshare() example, in case of error, the calling function @@ -347,12 +369,14 @@ static inline int __vlan_insert_tag(struct sk_buff *skb, * * Does not change skb->protocol so this function can be used during receive. */ -static inline struct sk_buff *vlan_insert_tag(struct sk_buff *skb, - __be16 vlan_proto, u16 vlan_tci) +static inline struct sk_buff *vlan_insert_inner_tag(struct sk_buff *skb, + __be16 vlan_proto, + u16 vlan_tci, + unsigned int mac_len) { int err; - err = __vlan_insert_tag(skb, vlan_proto, vlan_tci); + err = __vlan_insert_inner_tag(skb, vlan_proto, vlan_tci, mac_len); if (err) { dev_kfree_skb_any(skb); return NULL; @@ -360,6 +384,26 @@ static inline struct sk_buff *vlan_insert_tag(struct sk_buff *skb, return skb; } +/** + * vlan_insert_tag - regular VLAN tag inserting + * @skb: skbuff to tag + * @vlan_proto: VLAN encapsulation protocol + * @vlan_tci: VLAN TCI to insert + * + * Inserts the VLAN tag into @skb as part of the payload + * Returns a VLAN tagged skb. If a new skb is created, @skb is freed. + * + * Following the skb_unshare() example, in case of error, the calling function + * doesn't have to worry about freeing the original skb. + * + * Does not change skb->protocol so this function can be used during receive. + */ +static inline struct sk_buff *vlan_insert_tag(struct sk_buff *skb, + __be16 vlan_proto, u16 vlan_tci) +{ + return vlan_insert_inner_tag(skb, vlan_proto, vlan_tci, ETH_HLEN); +} + /** * vlan_insert_tag_set_proto - regular VLAN tag inserting * @skb: skbuff to tag diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c index 64aa9f755e1d..45c9bf5ff3a0 100644 --- a/net/8021q/vlan_core.c +++ b/net/8021q/vlan_core.c @@ -48,8 +48,8 @@ bool vlan_do_receive(struct sk_buff **skbp) * original position later */ skb_push(skb, offset); - skb = *skbp = vlan_insert_tag(skb, skb->vlan_proto, - skb->vlan_tci); + skb = *skbp = vlan_insert_inner_tag(skb, skb->vlan_proto, + skb->vlan_tci, skb->mac_len); if (!skb) return false; skb_pull(skb, offset + VLAN_HLEN); -- cgit v1.2.3 From 2cc683e88c0c993ac3721d9b702cb0630abe2879 Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Tue, 13 Mar 2018 12:01:43 -0700 Subject: kcm: lock lower socket in kcm_attach Need to lock lower socket in order to provide mutual exclusion with kcm_unattach. v2: Add Reported-by for syzbot Fixes: ab7ac4eb9832e32a09f4e804 ("kcm: Kernel Connection Multiplexor module") Reported-by: syzbot+ea75c0ffcd353d32515f064aaebefc5279e6161e@syzkaller.appspotmail.com Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- net/kcm/kcmsock.c | 33 +++++++++++++++++++++++---------- 1 file changed, 23 insertions(+), 10 deletions(-) diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c index f297d53a11aa..34355fd19f27 100644 --- a/net/kcm/kcmsock.c +++ b/net/kcm/kcmsock.c @@ -1381,24 +1381,32 @@ static int kcm_attach(struct socket *sock, struct socket *csock, .parse_msg = kcm_parse_func_strparser, .read_sock_done = kcm_read_sock_done, }; - int err; + int err = 0; csk = csock->sk; if (!csk) return -EINVAL; + lock_sock(csk); + /* Only allow TCP sockets to be attached for now */ if ((csk->sk_family != AF_INET && csk->sk_family != AF_INET6) || - csk->sk_protocol != IPPROTO_TCP) - return -EOPNOTSUPP; + csk->sk_protocol != IPPROTO_TCP) { + err = -EOPNOTSUPP; + goto out; + } /* Don't allow listeners or closed sockets */ - if (csk->sk_state == TCP_LISTEN || csk->sk_state == TCP_CLOSE) - return -EOPNOTSUPP; + if (csk->sk_state == TCP_LISTEN || csk->sk_state == TCP_CLOSE) { + err = -EOPNOTSUPP; + goto out; + } psock = kmem_cache_zalloc(kcm_psockp, GFP_KERNEL); - if (!psock) - return -ENOMEM; + if (!psock) { + err = -ENOMEM; + goto out; + } psock->mux = mux; psock->sk = csk; @@ -1407,7 +1415,7 @@ static int kcm_attach(struct socket *sock, struct socket *csock, err = strp_init(&psock->strp, csk, &cb); if (err) { kmem_cache_free(kcm_psockp, psock); - return err; + goto out; } write_lock_bh(&csk->sk_callback_lock); @@ -1419,7 +1427,8 @@ static int kcm_attach(struct socket *sock, struct socket *csock, write_unlock_bh(&csk->sk_callback_lock); strp_done(&psock->strp); kmem_cache_free(kcm_psockp, psock); - return -EALREADY; + err = -EALREADY; + goto out; } psock->save_data_ready = csk->sk_data_ready; @@ -1455,7 +1464,10 @@ static int kcm_attach(struct socket *sock, struct socket *csock, /* Schedule RX work in case there are already bytes queued */ strp_check_rcv(&psock->strp); - return 0; +out: + release_sock(csk); + + return err; } static int kcm_attach_ioctl(struct socket *sock, struct kcm_attach *info) @@ -1507,6 +1519,7 @@ static void kcm_unattach(struct kcm_psock *psock) if (WARN_ON(psock->rx_kcm)) { write_unlock_bh(&csk->sk_callback_lock); + release_sock(csk); return; } -- cgit v1.2.3 From 484d802d0f2f29c335563fcac2a8facf174a1bbc Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Tue, 13 Mar 2018 14:45:07 -0700 Subject: net: systemport: Rewrite __bcm_sysport_tx_reclaim() There is no need for complex checking between the last consumed index and current consumed index, a simple subtraction will do. This also eliminates the possibility of a permanent transmit queue stall under the following conditions: - one CPU bursts ring->size worth of traffic (up to 256 buffers), to the point where we run out of free descriptors, so we stop the transmit queue at the end of bcm_sysport_xmit() - because of our locking, we have the transmit process disable interrupts which means we can be blocking the TX reclamation process - when TX reclamation finally runs, we will be computing the difference between ring->c_index (last consumed index by SW) and what the HW reports through its register - this register is masked with (ring->size - 1) = 0xff, which will lead to stripping the upper bits of the index (register is 16-bits wide) - we will be computing last_tx_cn as 0, which means there is no work to be done, and we never wake-up the transmit queue, leaving it permanently disabled A practical example is e.g: ring->c_index aka last_c_index = 12, we pushed 256 entries, HW consumer index = 268, we mask it with 0xff = 12, so last_tx_cn == 0, nothing happens. Fixes: 80105befdb4b ("net: systemport: add Broadcom SYSTEMPORT Ethernet MAC driver") Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bcmsysport.c | 33 ++++++++++++++---------------- drivers/net/ethernet/broadcom/bcmsysport.h | 2 +- 2 files changed, 16 insertions(+), 19 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bcmsysport.c b/drivers/net/ethernet/broadcom/bcmsysport.c index f15a8fc6dfc9..3fc549b88c43 100644 --- a/drivers/net/ethernet/broadcom/bcmsysport.c +++ b/drivers/net/ethernet/broadcom/bcmsysport.c @@ -855,10 +855,12 @@ static void bcm_sysport_tx_reclaim_one(struct bcm_sysport_tx_ring *ring, static unsigned int __bcm_sysport_tx_reclaim(struct bcm_sysport_priv *priv, struct bcm_sysport_tx_ring *ring) { - unsigned int c_index, last_c_index, last_tx_cn, num_tx_cbs; unsigned int pkts_compl = 0, bytes_compl = 0; struct net_device *ndev = priv->netdev; + unsigned int txbds_processed = 0; struct bcm_sysport_cb *cb; + unsigned int txbds_ready; + unsigned int c_index; u32 hw_ind; /* Clear status before servicing to reduce spurious interrupts */ @@ -871,29 +873,23 @@ static unsigned int __bcm_sysport_tx_reclaim(struct bcm_sysport_priv *priv, /* Compute how many descriptors have been processed since last call */ hw_ind = tdma_readl(priv, TDMA_DESC_RING_PROD_CONS_INDEX(ring->index)); c_index = (hw_ind >> RING_CONS_INDEX_SHIFT) & RING_CONS_INDEX_MASK; - ring->p_index = (hw_ind & RING_PROD_INDEX_MASK); - - last_c_index = ring->c_index; - num_tx_cbs = ring->size; - - c_index &= (num_tx_cbs - 1); - - if (c_index >= last_c_index) - last_tx_cn = c_index - last_c_index; - else - last_tx_cn = num_tx_cbs - last_c_index + c_index; + txbds_ready = (c_index - ring->c_index) & RING_CONS_INDEX_MASK; netif_dbg(priv, tx_done, ndev, - "ring=%d c_index=%d last_tx_cn=%d last_c_index=%d\n", - ring->index, c_index, last_tx_cn, last_c_index); + "ring=%d old_c_index=%u c_index=%u txbds_ready=%u\n", + ring->index, ring->c_index, c_index, txbds_ready); - while (last_tx_cn-- > 0) { - cb = ring->cbs + last_c_index; + while (txbds_processed < txbds_ready) { + cb = &ring->cbs[ring->clean_index]; bcm_sysport_tx_reclaim_one(ring, cb, &bytes_compl, &pkts_compl); ring->desc_count++; - last_c_index++; - last_c_index &= (num_tx_cbs - 1); + txbds_processed++; + + if (likely(ring->clean_index < ring->size - 1)) + ring->clean_index++; + else + ring->clean_index = 0; } u64_stats_update_begin(&priv->syncp); @@ -1394,6 +1390,7 @@ static int bcm_sysport_init_tx_ring(struct bcm_sysport_priv *priv, netif_tx_napi_add(priv->netdev, &ring->napi, bcm_sysport_tx_poll, 64); ring->index = index; ring->size = size; + ring->clean_index = 0; ring->alloc_size = ring->size; ring->desc_cpu = p; ring->desc_count = ring->size; diff --git a/drivers/net/ethernet/broadcom/bcmsysport.h b/drivers/net/ethernet/broadcom/bcmsysport.h index f5a984c1c986..19c91c76e327 100644 --- a/drivers/net/ethernet/broadcom/bcmsysport.h +++ b/drivers/net/ethernet/broadcom/bcmsysport.h @@ -706,7 +706,7 @@ struct bcm_sysport_tx_ring { unsigned int desc_count; /* Number of descriptors */ unsigned int curr_desc; /* Current descriptor */ unsigned int c_index; /* Last consumer index */ - unsigned int p_index; /* Current producer index */ + unsigned int clean_index; /* Current clean index */ struct bcm_sysport_cb *cbs; /* Transmit control blocks */ struct dma_desc *desc_cpu; /* CPU view of the descriptor */ struct bcm_sysport_priv *priv; /* private context backpointer */ -- cgit v1.2.3 From fa6a91e9b907231d2e38ea5ed89c537b3525df3d Mon Sep 17 00:00:00 2001 From: Arvind Yadav Date: Tue, 13 Mar 2018 16:50:06 +0100 Subject: net/iucv: Free memory obtained by kzalloc Free memory by calling put_device(), if afiucv_iucv_init is not successful. Signed-off-by: Arvind Yadav Reviewed-by: Cornelia Huck Signed-off-by: Ursula Braun Signed-off-by: Julian Wiedmann Signed-off-by: David S. Miller --- net/iucv/af_iucv.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c index 1e8cc7bcbca3..9e2643ab4ccb 100644 --- a/net/iucv/af_iucv.c +++ b/net/iucv/af_iucv.c @@ -2433,9 +2433,11 @@ static int afiucv_iucv_init(void) af_iucv_dev->driver = &af_iucv_driver; err = device_register(af_iucv_dev); if (err) - goto out_driver; + goto out_iucv_dev; return 0; +out_iucv_dev: + put_device(af_iucv_dev); out_driver: driver_unregister(&af_iucv_driver); out_iucv: -- cgit v1.2.3 From 933e8c91b9f5a2f504f6da1f069c410449b9f4b9 Mon Sep 17 00:00:00 2001 From: Michal Kalderon Date: Wed, 14 Mar 2018 14:49:27 +0200 Subject: qed: Fix MPA unalign flow in case header is split across two packets. There is a corner case in the MPA unalign flow where a FPDU header is split over two tcp segments. The length of the first fragment in this case was not initialized properly and should be '1' Fixes: c7d1d839 ("qed: Add support for MPA header being split over two tcp packets") Signed-off-by: Michal Kalderon Signed-off-by: Ariel Elior Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/qed_iwarp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_iwarp.c b/drivers/net/ethernet/qlogic/qed/qed_iwarp.c index ca4a81dc1ace..fefe5277f20d 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_iwarp.c +++ b/drivers/net/ethernet/qlogic/qed/qed_iwarp.c @@ -1928,8 +1928,8 @@ qed_iwarp_update_fpdu_length(struct qed_hwfn *p_hwfn, /* Missing lower byte is now available */ mpa_len = fpdu->fpdu_length | *mpa_data; fpdu->fpdu_length = QED_IWARP_FPDU_LEN_WITH_PAD(mpa_len); - fpdu->mpa_frag_len = fpdu->fpdu_length; /* one byte of hdr */ + fpdu->mpa_frag_len = 1; fpdu->incomplete_bytes = fpdu->fpdu_length - 1; DP_VERBOSE(p_hwfn, QED_MSG_RDMA, -- cgit v1.2.3 From 16da09047d3fb991dc48af41f6d255fd578e8ca2 Mon Sep 17 00:00:00 2001 From: Michal Kalderon Date: Wed, 14 Mar 2018 14:49:28 +0200 Subject: qed: Fix non TCP packets should be dropped on iWARP ll2 connection FW workaround. The iWARP LL2 connection did not expect TCP packets to arrive on it's connection. The fix drops any non-tcp packets Fixes b5c29ca ("qed: iWARP CM - setup a ll2 connection for handling SYN packets") Signed-off-by: Michal Kalderon Signed-off-by: Ariel Elior Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/qed_iwarp.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/drivers/net/ethernet/qlogic/qed/qed_iwarp.c b/drivers/net/ethernet/qlogic/qed/qed_iwarp.c index fefe5277f20d..d5d02be72947 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_iwarp.c +++ b/drivers/net/ethernet/qlogic/qed/qed_iwarp.c @@ -1703,6 +1703,13 @@ qed_iwarp_parse_rx_pkt(struct qed_hwfn *p_hwfn, iph = (struct iphdr *)((u8 *)(ethh) + eth_hlen); if (eth_type == ETH_P_IP) { + if (iph->protocol != IPPROTO_TCP) { + DP_NOTICE(p_hwfn, + "Unexpected ip protocol on ll2 %x\n", + iph->protocol); + return -EINVAL; + } + cm_info->local_ip[0] = ntohl(iph->daddr); cm_info->remote_ip[0] = ntohl(iph->saddr); cm_info->ip_version = TCP_IPV4; @@ -1711,6 +1718,14 @@ qed_iwarp_parse_rx_pkt(struct qed_hwfn *p_hwfn, *payload_len = ntohs(iph->tot_len) - ip_hlen; } else if (eth_type == ETH_P_IPV6) { ip6h = (struct ipv6hdr *)iph; + + if (ip6h->nexthdr != IPPROTO_TCP) { + DP_NOTICE(p_hwfn, + "Unexpected ip protocol on ll2 %x\n", + iph->protocol); + return -EINVAL; + } + for (i = 0; i < 4; i++) { cm_info->local_ip[i] = ntohl(ip6h->daddr.in6_u.u6_addr32[i]); -- cgit v1.2.3 From 4609adc27175839408359822523de7247d56c87f Mon Sep 17 00:00:00 2001 From: Michal Kalderon Date: Wed, 14 Mar 2018 14:56:53 +0200 Subject: qede: Fix qedr link update Link updates were not reported to qedr correctly. Leading to cases where a link could be down, but qedr would see it as up. In addition, once qede was loaded, link state would be up, regardless of the actual link state. Signed-off-by: Michal Kalderon Signed-off-by: Ariel Elior Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qede/qede_main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qede/qede_main.c b/drivers/net/ethernet/qlogic/qede/qede_main.c index 2db70eabddfe..5c28209e97d0 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_main.c +++ b/drivers/net/ethernet/qlogic/qede/qede_main.c @@ -2067,8 +2067,6 @@ static int qede_load(struct qede_dev *edev, enum qede_load_mode mode, link_params.link_up = true; edev->ops->common->set_link(edev->cdev, &link_params); - qede_rdma_dev_event_open(edev); - edev->state = QEDE_STATE_OPEN; DP_INFO(edev, "Ending successfully qede load\n"); @@ -2169,12 +2167,14 @@ static void qede_link_update(void *dev, struct qed_link_output *link) DP_NOTICE(edev, "Link is up\n"); netif_tx_start_all_queues(edev->ndev); netif_carrier_on(edev->ndev); + qede_rdma_dev_event_open(edev); } } else { if (netif_carrier_ok(edev->ndev)) { DP_NOTICE(edev, "Link is down\n"); netif_tx_disable(edev->ndev); netif_carrier_off(edev->ndev); + qede_rdma_dev_event_close(edev); } } } -- cgit v1.2.3 From 02a2385f37a7c6594c9d89b64c4a1451276f08eb Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Wed, 14 Mar 2018 21:10:23 +0100 Subject: netlink: avoid a double skb free in genlmsg_mcast() nlmsg_multicast() consumes always the skb, thus the original skb must be freed only when this function is called with a clone. Fixes: cb9f7a9a5c96 ("netlink: ensure to loop over all netns in genlmsg_multicast_allns()") Reported-by: Ben Hutchings Signed-off-by: Nicolas Dichtel Signed-off-by: David S. Miller --- net/netlink/genetlink.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index 6f02499ef007..b9ce82c9440f 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@ -1106,7 +1106,7 @@ static int genlmsg_mcast(struct sk_buff *skb, u32 portid, unsigned long group, if (!err) delivered = true; else if (err != -ESRCH) - goto error; + return err; return delivered ? 0 : -ESRCH; error: kfree_skb(skb); -- cgit v1.2.3 From 6e5d58fdc9bedd0255a8781b258f10bbdc63e975 Mon Sep 17 00:00:00 2001 From: Vinicius Costa Gomes Date: Wed, 14 Mar 2018 13:32:09 -0700 Subject: skbuff: Fix not waking applications when errors are enqueued When errors are enqueued to the error queue via sock_queue_err_skb() function, it is possible that the waiting application is not notified. Calling 'sk->sk_data_ready()' would not notify applications that selected only POLLERR events in poll() (for example). Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Reported-by: Randy E. Witt Reviewed-by: Eric Dumazet Signed-off-by: Vinicius Costa Gomes Signed-off-by: David S. Miller --- net/core/skbuff.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index b103f46ec512..1e7acdc30732 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -4179,7 +4179,7 @@ int sock_queue_err_skb(struct sock *sk, struct sk_buff *skb) skb_queue_tail(&sk->sk_error_queue, skb); if (!sock_flag(sk, SOCK_DEAD)) - sk->sk_data_ready(sk); + sk->sk_error_report(sk); return 0; } EXPORT_SYMBOL(sock_queue_err_skb); -- cgit v1.2.3 From 7b7ef57c0dd2bb367ad41a77e5136c5579ae4026 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Fri, 16 Mar 2018 18:03:46 +0100 Subject: net: dsa: mv88e6xxx: Fix binding documentation for MDIO busses MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The MDIO busses are switch properties and so should be inside the switch node. Fix the examples in the binding document. Reported-by: 尤晓杰 Signed-off-by: Andrew Lunn Fixes: a3c53be55c95 ("net: dsa: mv88e6xxx: Support multiple MDIO busses") Signed-off-by: David S. Miller --- .../devicetree/bindings/net/dsa/marvell.txt | 48 +++++++++++----------- 1 file changed, 25 insertions(+), 23 deletions(-) diff --git a/Documentation/devicetree/bindings/net/dsa/marvell.txt b/Documentation/devicetree/bindings/net/dsa/marvell.txt index 1d4d0f49c9d0..8c033d48e2ba 100644 --- a/Documentation/devicetree/bindings/net/dsa/marvell.txt +++ b/Documentation/devicetree/bindings/net/dsa/marvell.txt @@ -50,14 +50,15 @@ Example: compatible = "marvell,mv88e6085"; reg = <0>; reset-gpios = <&gpio5 1 GPIO_ACTIVE_LOW>; - }; - mdio { - #address-cells = <1>; - #size-cells = <0>; - switch1phy0: switch1phy0@0 { - reg = <0>; - interrupt-parent = <&switch0>; - interrupts = <0 IRQ_TYPE_LEVEL_HIGH>; + + mdio { + #address-cells = <1>; + #size-cells = <0>; + switch1phy0: switch1phy0@0 { + reg = <0>; + interrupt-parent = <&switch0>; + interrupts = <0 IRQ_TYPE_LEVEL_HIGH>; + }; }; }; }; @@ -74,23 +75,24 @@ Example: compatible = "marvell,mv88e6390"; reg = <0>; reset-gpios = <&gpio5 1 GPIO_ACTIVE_LOW>; - }; - mdio { - #address-cells = <1>; - #size-cells = <0>; - switch1phy0: switch1phy0@0 { - reg = <0>; - interrupt-parent = <&switch0>; - interrupts = <0 IRQ_TYPE_LEVEL_HIGH>; + + mdio { + #address-cells = <1>; + #size-cells = <0>; + switch1phy0: switch1phy0@0 { + reg = <0>; + interrupt-parent = <&switch0>; + interrupts = <0 IRQ_TYPE_LEVEL_HIGH>; + }; }; - }; - mdio1 { - compatible = "marvell,mv88e6xxx-mdio-external"; - #address-cells = <1>; - #size-cells = <0>; - switch1phy9: switch1phy0@9 { - reg = <9>; + mdio1 { + compatible = "marvell,mv88e6xxx-mdio-external"; + #address-cells = <1>; + #size-cells = <0>; + switch1phy9: switch1phy0@9 { + reg = <9>; + }; }; }; }; -- cgit v1.2.3 From bc44b78157f621ff2a2618fe287a827bcb094ac4 Mon Sep 17 00:00:00 2001 From: Matthias Schiffer Date: Fri, 16 Mar 2018 11:29:09 +0100 Subject: batman-adv: update data pointers after skb_cow() batadv_check_unicast_ttvn() calls skb_cow(), so pointers into the SKB data must be (re)set after calling it. The ethhdr variable is dropped altogether. Fixes: 7cdcf6dddc42 ("batman-adv: add UNICAST_4ADDR packet type") Signed-off-by: Matthias Schiffer Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich --- net/batman-adv/routing.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c index b6891e8b741c..6a9242658c8d 100644 --- a/net/batman-adv/routing.c +++ b/net/batman-adv/routing.c @@ -968,14 +968,10 @@ int batadv_recv_unicast_packet(struct sk_buff *skb, struct batadv_orig_node *orig_node = NULL, *orig_node_gw = NULL; int check, hdr_size = sizeof(*unicast_packet); enum batadv_subtype subtype; - struct ethhdr *ethhdr; int ret = NET_RX_DROP; bool is4addr, is_gw; unicast_packet = (struct batadv_unicast_packet *)skb->data; - unicast_4addr_packet = (struct batadv_unicast_4addr_packet *)skb->data; - ethhdr = eth_hdr(skb); - is4addr = unicast_packet->packet_type == BATADV_UNICAST_4ADDR; /* the caller function should have already pulled 2 bytes */ if (is4addr) @@ -995,12 +991,14 @@ int batadv_recv_unicast_packet(struct sk_buff *skb, if (!batadv_check_unicast_ttvn(bat_priv, skb, hdr_size)) goto free_skb; + unicast_packet = (struct batadv_unicast_packet *)skb->data; + /* packet for me */ if (batadv_is_my_mac(bat_priv, unicast_packet->dest)) { /* If this is a unicast packet from another backgone gw, * drop it. */ - orig_addr_gw = ethhdr->h_source; + orig_addr_gw = eth_hdr(skb)->h_source; orig_node_gw = batadv_orig_hash_find(bat_priv, orig_addr_gw); if (orig_node_gw) { is_gw = batadv_bla_is_backbone_gw(skb, orig_node_gw, @@ -1015,6 +1013,8 @@ int batadv_recv_unicast_packet(struct sk_buff *skb, } if (is4addr) { + unicast_4addr_packet = + (struct batadv_unicast_4addr_packet *)skb->data; subtype = unicast_4addr_packet->subtype; batadv_dat_inc_counter(bat_priv, subtype); -- cgit v1.2.3 From 6f27d2c2a8c236d296201c19abb8533ec20d212b Mon Sep 17 00:00:00 2001 From: Matthias Schiffer Date: Fri, 16 Mar 2018 11:29:10 +0100 Subject: batman-adv: fix header size check in batadv_dbg_arp() Checking for 0 is insufficient: when an SKB without a batadv header, but with a VLAN header is received, hdr_size will be 4, making the following code interpret the Ethernet header as a batadv header. Fixes: be1db4f6615b ("batman-adv: make the Distributed ARP Table vlan aware") Signed-off-by: Matthias Schiffer Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich --- net/batman-adv/distributed-arp-table.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/batman-adv/distributed-arp-table.c b/net/batman-adv/distributed-arp-table.c index 9703c791ffc5..87cd962d28d5 100644 --- a/net/batman-adv/distributed-arp-table.c +++ b/net/batman-adv/distributed-arp-table.c @@ -393,7 +393,7 @@ static void batadv_dbg_arp(struct batadv_priv *bat_priv, struct sk_buff *skb, batadv_arp_hw_src(skb, hdr_size), &ip_src, batadv_arp_hw_dst(skb, hdr_size), &ip_dst); - if (hdr_size == 0) + if (hdr_size < sizeof(struct batadv_unicast_packet)) return; unicast_4addr_packet = (struct batadv_unicast_4addr_packet *)skb->data; -- cgit v1.2.3 From 2613f36ed965d0e5a595a1d931fd3b480e82d6fd Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Wed, 14 Mar 2018 19:36:14 +0100 Subject: x86/microcode: Attempt late loading only when new microcode is present Return UCODE_NEW from the scanning functions to denote that new microcode was found and only then attempt the expensive synchronization dance. Reported-by: Emanuel Czirai Signed-off-by: Borislav Petkov Signed-off-by: Thomas Gleixner Tested-by: Emanuel Czirai Tested-by: Ashok Raj Tested-by: Tom Lendacky Link: https://lkml.kernel.org/r/20180314183615.17629-1-bp@alien8.de --- arch/x86/include/asm/microcode.h | 1 + arch/x86/kernel/cpu/microcode/amd.c | 34 +++++++++++++++++++++------------- arch/x86/kernel/cpu/microcode/core.c | 8 +++----- arch/x86/kernel/cpu/microcode/intel.c | 4 +++- 4 files changed, 28 insertions(+), 19 deletions(-) diff --git a/arch/x86/include/asm/microcode.h b/arch/x86/include/asm/microcode.h index 7fb1047d61c7..6cf0e4cb7b97 100644 --- a/arch/x86/include/asm/microcode.h +++ b/arch/x86/include/asm/microcode.h @@ -39,6 +39,7 @@ struct device; enum ucode_state { UCODE_OK = 0, + UCODE_NEW, UCODE_UPDATED, UCODE_NFOUND, UCODE_ERROR, diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c index a998e1a7d46f..48179928ff38 100644 --- a/arch/x86/kernel/cpu/microcode/amd.c +++ b/arch/x86/kernel/cpu/microcode/amd.c @@ -339,7 +339,7 @@ int __init save_microcode_in_initrd_amd(unsigned int cpuid_1_eax) return -EINVAL; ret = load_microcode_amd(true, x86_family(cpuid_1_eax), desc.data, desc.size); - if (ret != UCODE_OK) + if (ret > UCODE_UPDATED) return -EINVAL; return 0; @@ -683,27 +683,35 @@ static enum ucode_state __load_microcode_amd(u8 family, const u8 *data, static enum ucode_state load_microcode_amd(bool save, u8 family, const u8 *data, size_t size) { + struct ucode_patch *p; enum ucode_state ret; /* free old equiv table */ free_equiv_cpu_table(); ret = __load_microcode_amd(family, data, size); - - if (ret != UCODE_OK) + if (ret != UCODE_OK) { cleanup(); + return ret; + } -#ifdef CONFIG_X86_32 - /* save BSP's matching patch for early load */ - if (save) { - struct ucode_patch *p = find_patch(0); - if (p) { - memset(amd_ucode_patch, 0, PATCH_MAX_SIZE); - memcpy(amd_ucode_patch, p->data, min_t(u32, ksize(p->data), - PATCH_MAX_SIZE)); - } + p = find_patch(0); + if (!p) { + return ret; + } else { + if (boot_cpu_data.microcode == p->patch_id) + return ret; + + ret = UCODE_NEW; } -#endif + + /* save BSP's matching patch for early load */ + if (!save) + return ret; + + memset(amd_ucode_patch, 0, PATCH_MAX_SIZE); + memcpy(amd_ucode_patch, p->data, min_t(u32, ksize(p->data), PATCH_MAX_SIZE)); + return ret; } diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c index 70ecbc8099c9..9f0fe5bb450d 100644 --- a/arch/x86/kernel/cpu/microcode/core.c +++ b/arch/x86/kernel/cpu/microcode/core.c @@ -607,7 +607,7 @@ static ssize_t reload_store(struct device *dev, return size; tmp_ret = microcode_ops->request_microcode_fw(bsp, µcode_pdev->dev, true); - if (tmp_ret != UCODE_OK) + if (tmp_ret != UCODE_NEW) return size; get_online_cpus(); @@ -691,10 +691,8 @@ static enum ucode_state microcode_init_cpu(int cpu, bool refresh_fw) if (system_state != SYSTEM_RUNNING) return UCODE_NFOUND; - ustate = microcode_ops->request_microcode_fw(cpu, µcode_pdev->dev, - refresh_fw); - - if (ustate == UCODE_OK) { + ustate = microcode_ops->request_microcode_fw(cpu, µcode_pdev->dev, refresh_fw); + if (ustate == UCODE_NEW) { pr_debug("CPU%d updated upon init\n", cpu); apply_microcode_on_target(cpu); } diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c index 2aded9db1d42..32b8e5724f96 100644 --- a/arch/x86/kernel/cpu/microcode/intel.c +++ b/arch/x86/kernel/cpu/microcode/intel.c @@ -862,6 +862,7 @@ static enum ucode_state generic_load_microcode(int cpu, void *data, size_t size, unsigned int leftover = size; unsigned int curr_mc_size = 0, new_mc_size = 0; unsigned int csig, cpf; + enum ucode_state ret = UCODE_OK; while (leftover) { struct microcode_header_intel mc_header; @@ -903,6 +904,7 @@ static enum ucode_state generic_load_microcode(int cpu, void *data, size_t size, new_mc = mc; new_mc_size = mc_size; mc = NULL; /* trigger new vmalloc */ + ret = UCODE_NEW; } ucode_ptr += mc_size; @@ -932,7 +934,7 @@ static enum ucode_state generic_load_microcode(int cpu, void *data, size_t size, pr_debug("CPU%d found a matching microcode update with version 0x%x (current=0x%x)\n", cpu, new_rev, uci->cpu_sig.rev); - return UCODE_OK; + return ret; } static int get_ucode_fw(void *to, const void *from, size_t n) -- cgit v1.2.3 From bb8c13d61a629276a162c1d2b1a20a815cbcfbb7 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Wed, 14 Mar 2018 19:36:15 +0100 Subject: x86/microcode: Fix CPU synchronization routine Emanuel reported an issue with a hang during microcode update because my dumb idea to use one atomic synchronization variable for both rendezvous - before and after update - was simply bollocks: microcode: microcode_reload_late: late_cpus: 4 microcode: __reload_late: cpu 2 entered microcode: __reload_late: cpu 1 entered microcode: __reload_late: cpu 3 entered microcode: __reload_late: cpu 0 entered microcode: __reload_late: cpu 1 left microcode: Timeout while waiting for CPUs rendezvous, remaining: 1 CPU1 above would finish, leave and the others will still spin waiting for it to join. So do two synchronization atomics instead, which makes the code a lot more straightforward. Also, since the update is serialized and it also takes quite some time per microcode engine, increase the exit timeout by the number of CPUs on the system. That's ok because the moment all CPUs are done, that timeout will be cut short. Furthermore, panic when some of the CPUs timeout when returning from a microcode update: we can't allow a system with not all cores updated. Also, as an optimization, do not do the exit sync if microcode wasn't updated. Reported-by: Emanuel Czirai Signed-off-by: Borislav Petkov Signed-off-by: Thomas Gleixner Tested-by: Emanuel Czirai Tested-by: Ashok Raj Tested-by: Tom Lendacky Link: https://lkml.kernel.org/r/20180314183615.17629-2-bp@alien8.de --- arch/x86/kernel/cpu/microcode/core.c | 68 ++++++++++++++++++++++-------------- 1 file changed, 41 insertions(+), 27 deletions(-) diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c index 9f0fe5bb450d..10c4fc2c91f8 100644 --- a/arch/x86/kernel/cpu/microcode/core.c +++ b/arch/x86/kernel/cpu/microcode/core.c @@ -517,7 +517,29 @@ static int check_online_cpus(void) return -EINVAL; } -static atomic_t late_cpus; +static atomic_t late_cpus_in; +static atomic_t late_cpus_out; + +static int __wait_for_cpus(atomic_t *t, long long timeout) +{ + int all_cpus = num_online_cpus(); + + atomic_inc(t); + + while (atomic_read(t) < all_cpus) { + if (timeout < SPINUNIT) { + pr_err("Timeout while waiting for CPUs rendezvous, remaining: %d\n", + all_cpus - atomic_read(t)); + return 1; + } + + ndelay(SPINUNIT); + timeout -= SPINUNIT; + + touch_nmi_watchdog(); + } + return 0; +} /* * Returns: @@ -527,30 +549,16 @@ static atomic_t late_cpus; */ static int __reload_late(void *info) { - unsigned int timeout = NSEC_PER_SEC; - int all_cpus = num_online_cpus(); int cpu = smp_processor_id(); enum ucode_state err; int ret = 0; - atomic_dec(&late_cpus); - /* * Wait for all CPUs to arrive. A load will not be attempted unless all * CPUs show up. * */ - while (atomic_read(&late_cpus)) { - if (timeout < SPINUNIT) { - pr_err("Timeout while waiting for CPUs rendezvous, remaining: %d\n", - atomic_read(&late_cpus)); - return -1; - } - - ndelay(SPINUNIT); - timeout -= SPINUNIT; - - touch_nmi_watchdog(); - } + if (__wait_for_cpus(&late_cpus_in, NSEC_PER_SEC)) + return -1; spin_lock(&update_lock); apply_microcode_local(&err); @@ -558,15 +566,22 @@ static int __reload_late(void *info) if (err > UCODE_NFOUND) { pr_warn("Error reloading microcode on CPU %d\n", cpu); - ret = -1; - } else if (err == UCODE_UPDATED) { + return -1; + /* siblings return UCODE_OK because their engine got updated already */ + } else if (err == UCODE_UPDATED || err == UCODE_OK) { ret = 1; + } else { + return ret; } - atomic_inc(&late_cpus); - - while (atomic_read(&late_cpus) != all_cpus) - cpu_relax(); + /* + * Increase the wait timeout to a safe value here since we're + * serializing the microcode update and that could take a while on a + * large number of CPUs. And that is fine as the *actual* timeout will + * be determined by the last CPU finished updating and thus cut short. + */ + if (__wait_for_cpus(&late_cpus_out, NSEC_PER_SEC * num_online_cpus())) + panic("Timeout during microcode update!\n"); return ret; } @@ -579,12 +594,11 @@ static int microcode_reload_late(void) { int ret; - atomic_set(&late_cpus, num_online_cpus()); + atomic_set(&late_cpus_in, 0); + atomic_set(&late_cpus_out, 0); ret = stop_machine_cpuslocked(__reload_late, NULL, cpu_online_mask); - if (ret < 0) - return ret; - else if (ret > 0) + if (ret > 0) microcode_check(); return ret; -- cgit v1.2.3 From 9ef0f88fe5466c2ca1d2975549ba6be502c464c1 Mon Sep 17 00:00:00 2001 From: John David Anglin Date: Wed, 7 Mar 2018 08:18:05 -0500 Subject: parisc: Handle case where flush_cache_range is called with no context MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Just when I had decided that flush_cache_range() was always called with a valid context, Helge reported two cases where the "BUG_ON(!vma->vm_mm->context);" was hit on the phantom buildd: kernel BUG at /mnt/sdb6/linux/linux-4.15.4/arch/parisc/kernel/cache.c:587! CPU: 1 PID: 3254 Comm: kworker/1:2 Tainted: G D 4.15.0-1-parisc64-smp #1 Debian 4.15.4-1+b1 Workqueue: events free_ioctx   IAOQ[0]: flush_cache_range+0x164/0x168   IAOQ[1]: flush_cache_page+0x0/0x1c8   RP(r2): unmap_page_range+0xae8/0xb88 Backtrace:   [<00000000404a6980>] unmap_page_range+0xae8/0xb88   [<00000000404a6ae0>] unmap_single_vma+0xc0/0x188   [<00000000404a6cdc>] zap_page_range_single+0x134/0x1f8   [<00000000404a702c>] unmap_mapping_range+0x1cc/0x208   [<0000000040461518>] truncate_pagecache+0x98/0x108   [<0000000040461624>] truncate_setsize+0x9c/0xb8   [<00000000405d7f30>] put_aio_ring_file+0x80/0x100   [<00000000405d803c>] aio_free_ring+0x8c/0x290   [<00000000405d82c0>] free_ioctx+0x80/0x180   [<0000000040284e6c>] process_one_work+0x21c/0x668   [<00000000402854c4>] worker_thread+0x20c/0x778   [<0000000040291d44>] kthread+0x2d4/0x2e0   [<0000000040204020>] end_fault_vector+0x20/0xc0 This indicates that we need to handle the no context case in flush_cache_range() as we do in flush_cache_mm(). In thinking about this, I realized that we don't need to flush the TLB when there is no context. So, I added context checks to the large flush cases in flush_cache_mm() and flush_cache_range(). The large flush case occurs frequently in flush_cache_mm() and the change should improve fork performance. The v2 version of this change removes the BUG_ON from flush_cache_page() by skipping the TLB flush when there is no context.  I also added code to flush the TLB in flush_cache_mm() and flush_cache_range() when we have a context that's not current.  Now all three routines handle TLB flushes in a similar manner. Signed-off-by: John David Anglin Cc: stable@vger.kernel.org # 4.9+ Signed-off-by: Helge Deller --- arch/parisc/kernel/cache.c | 41 ++++++++++++++++++++++++++++++++--------- 1 file changed, 32 insertions(+), 9 deletions(-) diff --git a/arch/parisc/kernel/cache.c b/arch/parisc/kernel/cache.c index 79089778725b..e3b45546d589 100644 --- a/arch/parisc/kernel/cache.c +++ b/arch/parisc/kernel/cache.c @@ -543,7 +543,8 @@ void flush_cache_mm(struct mm_struct *mm) rp3440, etc. So, avoid it if the mm isn't too big. */ if ((!IS_ENABLED(CONFIG_SMP) || !arch_irqs_disabled()) && mm_total_size(mm) >= parisc_cache_flush_threshold) { - flush_tlb_all(); + if (mm->context) + flush_tlb_all(); flush_cache_all(); return; } @@ -571,6 +572,8 @@ void flush_cache_mm(struct mm_struct *mm) pfn = pte_pfn(*ptep); if (!pfn_valid(pfn)) continue; + if (unlikely(mm->context)) + flush_tlb_page(vma, addr); __flush_cache_page(vma, addr, PFN_PHYS(pfn)); } } @@ -579,26 +582,46 @@ void flush_cache_mm(struct mm_struct *mm) void flush_cache_range(struct vm_area_struct *vma, unsigned long start, unsigned long end) { + pgd_t *pgd; + unsigned long addr; + if ((!IS_ENABLED(CONFIG_SMP) || !arch_irqs_disabled()) && end - start >= parisc_cache_flush_threshold) { - flush_tlb_range(vma, start, end); + if (vma->vm_mm->context) + flush_tlb_range(vma, start, end); flush_cache_all(); return; } - flush_user_dcache_range_asm(start, end); - if (vma->vm_flags & VM_EXEC) - flush_user_icache_range_asm(start, end); - flush_tlb_range(vma, start, end); + if (vma->vm_mm->context == mfsp(3)) { + flush_user_dcache_range_asm(start, end); + if (vma->vm_flags & VM_EXEC) + flush_user_icache_range_asm(start, end); + flush_tlb_range(vma, start, end); + return; + } + + pgd = vma->vm_mm->pgd; + for (addr = vma->vm_start; addr < vma->vm_end; addr += PAGE_SIZE) { + unsigned long pfn; + pte_t *ptep = get_ptep(pgd, addr); + if (!ptep) + continue; + pfn = pte_pfn(*ptep); + if (pfn_valid(pfn)) { + if (unlikely(vma->vm_mm->context)) + flush_tlb_page(vma, addr); + __flush_cache_page(vma, addr, PFN_PHYS(pfn)); + } + } } void flush_cache_page(struct vm_area_struct *vma, unsigned long vmaddr, unsigned long pfn) { - BUG_ON(!vma->vm_mm->context); - if (pfn_valid(pfn)) { - flush_tlb_page(vma, vmaddr); + if (likely(vma->vm_mm->context)) + flush_tlb_page(vma, vmaddr); __flush_cache_page(vma, vmaddr, PFN_PHYS(pfn)); } } -- cgit v1.2.3 From cce6294cc2eaa083482e1d85d8db5845c82a7e14 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 14 Mar 2018 18:53:00 -0700 Subject: net: sched: fix uses after free syzbot reported one use-after-free in pfifo_fast_enqueue() [1] Issue here is that we can not reuse skb after a successful skb_array_produce() since another cpu might have consumed it already. I believe a similar problem exists in try_bulk_dequeue_skb_slow() in case we put an skb into qdisc_enqueue_skb_bad_txq() for lockless qdisc. [1] BUG: KASAN: use-after-free in qdisc_pkt_len include/net/sch_generic.h:610 [inline] BUG: KASAN: use-after-free in qdisc_qstats_cpu_backlog_inc include/net/sch_generic.h:712 [inline] BUG: KASAN: use-after-free in pfifo_fast_enqueue+0x4bc/0x5e0 net/sched/sch_generic.c:639 Read of size 4 at addr ffff8801cede37e8 by task syzkaller717588/5543 CPU: 1 PID: 5543 Comm: syzkaller717588 Not tainted 4.16.0-rc4+ #265 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:17 [inline] dump_stack+0x194/0x24d lib/dump_stack.c:53 print_address_description+0x73/0x250 mm/kasan/report.c:256 kasan_report_error mm/kasan/report.c:354 [inline] kasan_report+0x23c/0x360 mm/kasan/report.c:412 __asan_report_load4_noabort+0x14/0x20 mm/kasan/report.c:432 qdisc_pkt_len include/net/sch_generic.h:610 [inline] qdisc_qstats_cpu_backlog_inc include/net/sch_generic.h:712 [inline] pfifo_fast_enqueue+0x4bc/0x5e0 net/sched/sch_generic.c:639 __dev_xmit_skb net/core/dev.c:3216 [inline] Fixes: c5ad119fb6c0 ("net: sched: pfifo_fast use skb_array") Signed-off-by: Eric Dumazet Reported-by: syzbot+ed43b6903ab968b16f54@syzkaller.appspotmail.com Cc: John Fastabend Cc: Jamal Hadi Salim Cc: Cong Wang Cc: Jiri Pirko Acked-by: John Fastabend Signed-off-by: David S. Miller --- net/sched/sch_generic.c | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 190570f21b20..7e3fbe9cc936 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -106,6 +106,14 @@ static inline void qdisc_enqueue_skb_bad_txq(struct Qdisc *q, __skb_queue_tail(&q->skb_bad_txq, skb); + if (qdisc_is_percpu_stats(q)) { + qdisc_qstats_cpu_backlog_inc(q, skb); + qdisc_qstats_cpu_qlen_inc(q); + } else { + qdisc_qstats_backlog_inc(q, skb); + q->q.qlen++; + } + if (lock) spin_unlock(lock); } @@ -196,14 +204,6 @@ static void try_bulk_dequeue_skb_slow(struct Qdisc *q, break; if (unlikely(skb_get_queue_mapping(nskb) != mapping)) { qdisc_enqueue_skb_bad_txq(q, nskb); - - if (qdisc_is_percpu_stats(q)) { - qdisc_qstats_cpu_backlog_inc(q, nskb); - qdisc_qstats_cpu_qlen_inc(q); - } else { - qdisc_qstats_backlog_inc(q, nskb); - q->q.qlen++; - } break; } skb->next = nskb; @@ -628,6 +628,7 @@ static int pfifo_fast_enqueue(struct sk_buff *skb, struct Qdisc *qdisc, int band = prio2band[skb->priority & TC_PRIO_MAX]; struct pfifo_fast_priv *priv = qdisc_priv(qdisc); struct skb_array *q = band2list(priv, band); + unsigned int pkt_len = qdisc_pkt_len(skb); int err; err = skb_array_produce(q, skb); @@ -636,7 +637,10 @@ static int pfifo_fast_enqueue(struct sk_buff *skb, struct Qdisc *qdisc, return qdisc_drop_cpu(skb, qdisc, to_free); qdisc_qstats_cpu_qlen_inc(qdisc); - qdisc_qstats_cpu_backlog_inc(qdisc, skb); + /* Note: skb can not be used after skb_array_produce(), + * so we better not use qdisc_qstats_cpu_backlog_inc() + */ + this_cpu_add(qdisc->cpu_qstats->backlog, pkt_len); return NET_XMIT_SUCCESS; } -- cgit v1.2.3 From bcdd5de80a2275f7879dc278bfc747f1caf94442 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Thu, 15 Mar 2018 14:49:56 +0200 Subject: mlxsw: spectrum_buffers: Set a minimum quota for CPU port traffic In commit 9ffcc3725f09 ("mlxsw: spectrum: Allow packets to be trapped from any PG") I fixed a problem where packets could not be trapped to the CPU due to exceeded shared buffer quotas. The mentioned commit explains the problem in detail. The problem was fixed by assigning a minimum quota for the CPU port and the traffic class used for scheduling traffic to the CPU. However, commit 117b0dad2d54 ("mlxsw: Create a different trap group list for each device") assigned different traffic classes to different packet types and rendered the fix useless. Fix the problem by assigning a minimum quota for the CPU port and all the traffic classes that are currently in use. Fixes: 117b0dad2d54 ("mlxsw: Create a different trap group list for each device") Signed-off-by: Ido Schimmel Reported-by: Eddie Shklaer Tested-by: Eddie Shklaer Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c index 93728c694e6d..0a9adc5962fb 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c @@ -385,13 +385,13 @@ static const struct mlxsw_sp_sb_cm mlxsw_sp_sb_cms_egress[] = { static const struct mlxsw_sp_sb_cm mlxsw_sp_cpu_port_sb_cms[] = { MLXSW_SP_CPU_PORT_SB_CM, + MLXSW_SP_SB_CM(MLXSW_PORT_MAX_MTU, 0, 0), + MLXSW_SP_SB_CM(MLXSW_PORT_MAX_MTU, 0, 0), + MLXSW_SP_SB_CM(MLXSW_PORT_MAX_MTU, 0, 0), + MLXSW_SP_SB_CM(MLXSW_PORT_MAX_MTU, 0, 0), + MLXSW_SP_SB_CM(MLXSW_PORT_MAX_MTU, 0, 0), MLXSW_SP_CPU_PORT_SB_CM, - MLXSW_SP_CPU_PORT_SB_CM, - MLXSW_SP_CPU_PORT_SB_CM, - MLXSW_SP_CPU_PORT_SB_CM, - MLXSW_SP_CPU_PORT_SB_CM, - MLXSW_SP_CPU_PORT_SB_CM, - MLXSW_SP_SB_CM(10000, 0, 0), + MLXSW_SP_SB_CM(MLXSW_PORT_MAX_MTU, 0, 0), MLXSW_SP_CPU_PORT_SB_CM, MLXSW_SP_CPU_PORT_SB_CM, MLXSW_SP_CPU_PORT_SB_CM, -- cgit v1.2.3 From e40bdb03d3cd7da66bd0bc1e40cbcfb49351265c Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Sat, 17 Mar 2018 22:40:18 +0100 Subject: ALSA: hda/realtek - Always immediately update mute LED with pin VREF Some HP laptops have a mute mute LED controlled by a pin VREF. The Realtek codec driver updates the VREF via vmaster hook by calling snd_hda_set_pin_ctl_cache(). This works fine as long as the driver is running in a normal mode. However, when the VREF change happens during the codec being in runtime PM suspend, the regmap access will skip and postpone the actual register change. This ends up with the unchanged LED status until the next runtime PM resume even if you change the Master mute switch. (Interestingly, the machine keeps the LED status even after the codec goes into D3 -- but it's another story.) For improving this usability, let the driver temporarily powering up / down only during the pin VREF change. This can be achieved easily by wrapping the call with snd_hda_power_up_pm() / *_down_pm(). Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=199073 Cc: Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 9af301c6bba2..5ef056f2aecc 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -3596,8 +3596,12 @@ static void alc269_fixup_mic_mute_hook(void *private_data, int enabled) pinval = snd_hda_codec_get_pin_target(codec, spec->mute_led_nid); pinval &= ~AC_PINCTL_VREFEN; pinval |= enabled ? AC_PINCTL_VREF_HIZ : AC_PINCTL_VREF_80; - if (spec->mute_led_nid) + if (spec->mute_led_nid) { + /* temporarily power up/down for setting VREF */ + snd_hda_power_up_pm(codec); snd_hda_set_pin_ctl_cache(codec, spec->mute_led_nid, pinval); + snd_hda_power_down_pm(codec); + } } /* Make sure the led works even in runtime suspend */ -- cgit v1.2.3 From d61d263c8d82db7c4404a29ebc29674b1c0c05c9 Mon Sep 17 00:00:00 2001 From: Matthias Brugger Date: Thu, 15 Mar 2018 17:54:20 +0100 Subject: net: hns: Fix ethtool private flags The driver implementation returns support for private flags, while no private flags are present. When asked for the number of private flags it returns the number of statistic flag names. Fix this by returning EOPNOTSUPP for not implemented ethtool flags. Signed-off-by: Matthias Brugger Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hns/hns_dsaf_gmac.c | 2 +- drivers/net/ethernet/hisilicon/hns/hns_dsaf_ppe.c | 2 +- drivers/net/ethernet/hisilicon/hns/hns_dsaf_rcb.c | 2 +- drivers/net/ethernet/hisilicon/hns/hns_ethtool.c | 4 +++- 4 files changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_gmac.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_gmac.c index 86944bc3b273..74bd260ca02a 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_gmac.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_gmac.c @@ -666,7 +666,7 @@ static void hns_gmac_get_strings(u32 stringset, u8 *data) static int hns_gmac_get_sset_count(int stringset) { - if (stringset == ETH_SS_STATS || stringset == ETH_SS_PRIV_FLAGS) + if (stringset == ETH_SS_STATS) return ARRAY_SIZE(g_gmac_stats_string); return 0; diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_ppe.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_ppe.c index b62816c1574e..93e71e27401b 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_ppe.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_ppe.c @@ -422,7 +422,7 @@ void hns_ppe_update_stats(struct hns_ppe_cb *ppe_cb) int hns_ppe_get_sset_count(int stringset) { - if (stringset == ETH_SS_STATS || stringset == ETH_SS_PRIV_FLAGS) + if (stringset == ETH_SS_STATS) return ETH_PPE_STATIC_NUM; return 0; } diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_rcb.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_rcb.c index 6f3570cfb501..e2e28532e4dc 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_rcb.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_rcb.c @@ -876,7 +876,7 @@ void hns_rcb_get_stats(struct hnae_queue *queue, u64 *data) */ int hns_rcb_get_ring_sset_count(int stringset) { - if (stringset == ETH_SS_STATS || stringset == ETH_SS_PRIV_FLAGS) + if (stringset == ETH_SS_STATS) return HNS_RING_STATIC_REG_NUM; return 0; diff --git a/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c b/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c index 7ea7f8a4aa2a..2e14a3ae1d8b 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c @@ -993,8 +993,10 @@ int hns_get_sset_count(struct net_device *netdev, int stringset) cnt--; return cnt; - } else { + } else if (stringset == ETH_SS_STATS) { return (HNS_NET_STATS_CNT + ops->get_sset_count(h, stringset)); + } else { + return -EOPNOTSUPP; } } -- cgit v1.2.3 From f9db50691db4a7d860fce985f080bb3fc23a7ede Mon Sep 17 00:00:00 2001 From: "SZ Lin (林上智)" Date: Fri, 16 Mar 2018 00:56:01 +0800 Subject: net: ethernet: ti: cpsw: add check for in-band mode setting with RGMII PHY interface MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit According to AM335x TRM[1] 14.3.6.2, AM437x TRM[2] 15.3.6.2 and DRA7 TRM[3] 24.11.4.8.7.3.3, in-band mode in EXT_EN(bit18) register is only available when PHY is configured in RGMII mode with 10Mbps speed. It will cause some networking issues without RGMII mode, such as carrier sense errors and low throughput. TI also mentioned this issue in their forum[4]. This patch adds the check mechanism for PHY interface with RGMII interface type, the in-band mode can only be set in RGMII mode with 10Mbps speed. References: [1]: https://www.ti.com/lit/ug/spruh73p/spruh73p.pdf [2]: http://www.ti.com/lit/ug/spruhl7h/spruhl7h.pdf [3]: http://www.ti.com/lit/ug/spruic2b/spruic2b.pdf [4]: https://e2e.ti.com/support/arm/sitara_arm/f/791/p/640765/2392155 Suggested-by: Holsety Chen (陳憲輝) Signed-off-by: SZ Lin (林上智) Signed-off-by: Schuyler Patton Reviewed-by: Grygorii Strashko Signed-off-by: David S. Miller --- drivers/net/ethernet/ti/cpsw.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c index 1b1b78fdc138..b2b30c9df037 100644 --- a/drivers/net/ethernet/ti/cpsw.c +++ b/drivers/net/ethernet/ti/cpsw.c @@ -1014,7 +1014,8 @@ static void _cpsw_adjust_link(struct cpsw_slave *slave, /* set speed_in input in case RMII mode is used in 100Mbps */ if (phy->speed == 100) mac_control |= BIT(15); - else if (phy->speed == 10) + /* in band mode only works in 10Mbps RGMII mode */ + else if ((phy->speed == 10) && phy_interface_is_rgmii(phy)) mac_control |= BIT(18); /* In Band mode */ if (priv->rx_pause) -- cgit v1.2.3 From 1edf8abe04090c4f41a85e42c66638be1ee69156 Mon Sep 17 00:00:00 2001 From: Davide Caratti Date: Fri, 16 Mar 2018 00:00:53 +0100 Subject: net/sched: fix NULL dereference in the error path of tcf_vlan_init() when the following command # tc actions replace action vlan pop index 100 is run for the first time, and tcf_vlan_init() fails allocating struct tcf_vlan_params, tcf_vlan_cleanup() calls kfree_rcu(NULL, ...). This causes the following error: BUG: unable to handle kernel NULL pointer dereference at 0000000000000018 IP: __call_rcu+0x23/0x2b0 PGD 80000000760a2067 P4D 80000000760a2067 PUD 742c1067 PMD 0 Oops: 0002 [#1] SMP PTI Modules linked in: act_vlan(E) ip6table_filter ip6_tables iptable_filter binfmt_misc ext4 snd_hda_codec_generic snd_hda_intel mbcache snd_hda_codec jbd2 snd_hda_core crct10dif_pclmul crc32_pclmul ghash_clmulni_intel pcbc snd_hwdep snd_seq snd_seq_device snd_pcm aesni_intel crypto_simd snd_timer glue_helper snd cryptd joydev soundcore virtio_balloon pcspkr i2c_piix4 nfsd auth_rpcgss nfs_acl lockd grace sunrpc ip_tables xfs libcrc32c ata_generic pata_acpi qxl drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm drm virtio_console virtio_blk virtio_net ata_piix crc32c_intel libata virtio_pci i2c_core virtio_ring serio_raw virtio floppy dm_mirror dm_region_hash dm_log dm_mod [last unloaded: act_vlan] CPU: 3 PID: 3119 Comm: tc Tainted: G E 4.16.0-rc4.act_vlan.orig+ #403 Hardware name: Red Hat KVM, BIOS 0.5.1 01/01/2011 RIP: 0010:__call_rcu+0x23/0x2b0 RSP: 0018:ffffaac3005fb798 EFLAGS: 00010246 RAX: ffffffffc0704080 RBX: ffff97f2b4bbe900 RCX: 00000000ffffffff RDX: ffffffffabca5f00 RSI: 0000000000000010 RDI: 0000000000000010 RBP: 0000000000000010 R08: 0000000000000001 R09: 0000000000000044 R10: 00000000fd003000 R11: ffff97f2faab5b91 R12: 0000000000000000 R13: ffffffffabca5f00 R14: ffff97f2fb80202c R15: 00000000fffffff4 FS: 00007f68f75b4740(0000) GS:ffff97f2ffd80000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000018 CR3: 0000000072b52001 CR4: 00000000001606e0 Call Trace: __tcf_idr_release+0x79/0xf0 tcf_vlan_init+0x168/0x270 [act_vlan] tcf_action_init_1+0x2cc/0x430 tcf_action_init+0xd3/0x1b0 tc_ctl_action+0x18b/0x240 rtnetlink_rcv_msg+0x29c/0x310 ? _cond_resched+0x15/0x30 ? __kmalloc_node_track_caller+0x1b9/0x270 ? rtnl_calcit.isra.28+0x100/0x100 netlink_rcv_skb+0xd2/0x110 netlink_unicast+0x17c/0x230 netlink_sendmsg+0x2cd/0x3c0 sock_sendmsg+0x30/0x40 ___sys_sendmsg+0x27a/0x290 ? filemap_map_pages+0x34a/0x3a0 ? __handle_mm_fault+0xbfd/0xe20 __sys_sendmsg+0x51/0x90 do_syscall_64+0x6e/0x1a0 entry_SYSCALL_64_after_hwframe+0x3d/0xa2 RIP: 0033:0x7f68f69c5ba0 RSP: 002b:00007fffd79c1118 EFLAGS: 00000246 ORIG_RAX: 000000000000002e RAX: ffffffffffffffda RBX: 00007fffd79c1240 RCX: 00007f68f69c5ba0 RDX: 0000000000000000 RSI: 00007fffd79c1190 RDI: 0000000000000003 RBP: 000000005aaa708e R08: 0000000000000002 R09: 0000000000000000 R10: 00007fffd79c0ba0 R11: 0000000000000246 R12: 0000000000000000 R13: 00007fffd79c1254 R14: 0000000000000001 R15: 0000000000669f60 Code: 5d e9 42 da ff ff 66 90 0f 1f 44 00 00 41 57 41 56 41 55 49 89 d5 41 54 55 48 89 fd 53 48 83 ec 08 40 f6 c7 07 0f 85 19 02 00 00 <48> 89 75 08 48 c7 45 00 00 00 00 00 9c 58 0f 1f 44 00 00 49 89 RIP: __call_rcu+0x23/0x2b0 RSP: ffffaac3005fb798 CR2: 0000000000000018 fix this in tcf_vlan_cleanup(), ensuring that kfree_rcu(p, ...) is called only when p is not NULL. Fixes: 4c5b9d9642c8 ("act_vlan: VLAN action rewrite to use RCU lock/unlock and update") Acked-by: Jiri Pirko Acked-by: Manish Kurup Signed-off-by: Davide Caratti Signed-off-by: David S. Miller --- net/sched/act_vlan.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/sched/act_vlan.c b/net/sched/act_vlan.c index e1a1b3f3983a..c2914e9a4a6f 100644 --- a/net/sched/act_vlan.c +++ b/net/sched/act_vlan.c @@ -225,7 +225,8 @@ static void tcf_vlan_cleanup(struct tc_action *a) struct tcf_vlan_params *p; p = rcu_dereference_protected(v->vlan_p, 1); - kfree_rcu(p, rcu); + if (p) + kfree_rcu(p, rcu); } static int tcf_vlan_dump(struct sk_buff *skb, struct tc_action *a, -- cgit v1.2.3 From aab378a7eda21941c4b678b21aea8867b6817f59 Mon Sep 17 00:00:00 2001 From: Davide Caratti Date: Fri, 16 Mar 2018 00:00:54 +0100 Subject: net/sched: fix NULL dereference in the error path of tcf_csum_init() when the following command # tc action add action csum udp continue index 100 is run for the first time, and tcf_csum_init() fails allocating struct tcf_csum, tcf_csum_cleanup() calls kfree_rcu(NULL,...). This causes the following error: BUG: unable to handle kernel NULL pointer dereference at 0000000000000010 IP: __call_rcu+0x23/0x2b0 PGD 80000000740b4067 P4D 80000000740b4067 PUD 32e7f067 PMD 0 Oops: 0002 [#1] SMP PTI Modules linked in: act_csum(E) act_vlan ip6table_filter ip6_tables iptable_filter binfmt_misc ext4 mbcache jbd2 crct10dif_pclmul crc32_pclmul ghash_clmulni_intel snd_hda_codec_generic pcbc snd_hda_intel snd_hda_codec snd_hda_core snd_hwdep snd_seq snd_seq_device snd_pcm snd_timer aesni_intel crypto_simd glue_helper cryptd snd joydev pcspkr virtio_balloon i2c_piix4 soundcore nfsd auth_rpcgss nfs_acl lockd grace sunrpc ip_tables xfs libcrc32c ata_generic pata_acpi qxl drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm virtio_blk drm virtio_net virtio_console ata_piix crc32c_intel libata virtio_pci serio_raw i2c_core virtio_ring virtio floppy dm_mirror dm_region_hash dm_log dm_mod [last unloaded: act_vlan] CPU: 2 PID: 5763 Comm: tc Tainted: G E 4.16.0-rc4.act_vlan.orig+ #403 Hardware name: Red Hat KVM, BIOS 0.5.1 01/01/2011 RIP: 0010:__call_rcu+0x23/0x2b0 RSP: 0018:ffffb275803e77c0 EFLAGS: 00010246 RAX: ffffffffc057b080 RBX: ffff9674bc6f5240 RCX: 00000000ffffffff RDX: ffffffff928a5f00 RSI: 0000000000000008 RDI: 0000000000000008 RBP: 0000000000000008 R08: 0000000000000001 R09: 0000000000000044 R10: 0000000000000220 R11: ffff9674b9ab4821 R12: 0000000000000000 R13: ffffffff928a5f00 R14: 0000000000000000 R15: 0000000000000001 FS: 00007fa6368d8740(0000) GS:ffff9674bfd00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000010 CR3: 0000000073dec001 CR4: 00000000001606e0 Call Trace: __tcf_idr_release+0x79/0xf0 tcf_csum_init+0xfb/0x180 [act_csum] tcf_action_init_1+0x2cc/0x430 tcf_action_init+0xd3/0x1b0 tc_ctl_action+0x18b/0x240 rtnetlink_rcv_msg+0x29c/0x310 ? _cond_resched+0x15/0x30 ? __kmalloc_node_track_caller+0x1b9/0x270 ? rtnl_calcit.isra.28+0x100/0x100 netlink_rcv_skb+0xd2/0x110 netlink_unicast+0x17c/0x230 netlink_sendmsg+0x2cd/0x3c0 sock_sendmsg+0x30/0x40 ___sys_sendmsg+0x27a/0x290 ? filemap_map_pages+0x34a/0x3a0 ? __handle_mm_fault+0xbfd/0xe20 __sys_sendmsg+0x51/0x90 do_syscall_64+0x6e/0x1a0 entry_SYSCALL_64_after_hwframe+0x3d/0xa2 RIP: 0033:0x7fa635ce9ba0 RSP: 002b:00007ffc185b0fc8 EFLAGS: 00000246 ORIG_RAX: 000000000000002e RAX: ffffffffffffffda RBX: 00007ffc185b10f0 RCX: 00007fa635ce9ba0 RDX: 0000000000000000 RSI: 00007ffc185b1040 RDI: 0000000000000003 RBP: 000000005aaa85e0 R08: 0000000000000002 R09: 0000000000000000 R10: 00007ffc185b0a20 R11: 0000000000000246 R12: 0000000000000000 R13: 00007ffc185b1104 R14: 0000000000000001 R15: 0000000000669f60 Code: 5d e9 42 da ff ff 66 90 0f 1f 44 00 00 41 57 41 56 41 55 49 89 d5 41 54 55 48 89 fd 53 48 83 ec 08 40 f6 c7 07 0f 85 19 02 00 00 <48> 89 75 08 48 c7 45 00 00 00 00 00 9c 58 0f 1f 44 00 00 49 89 RIP: __call_rcu+0x23/0x2b0 RSP: ffffb275803e77c0 CR2: 0000000000000010 fix this in tcf_csum_cleanup(), ensuring that kfree_rcu(param, ...) is called only when param is not NULL. Fixes: 9c5f69bbd75a ("net/sched: act_csum: don't use spinlock in the fast path") Signed-off-by: Davide Caratti Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- net/sched/act_csum.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c index 24b2e8e681cf..2a5c8fd860cf 100644 --- a/net/sched/act_csum.c +++ b/net/sched/act_csum.c @@ -626,7 +626,8 @@ static void tcf_csum_cleanup(struct tc_action *a) struct tcf_csum_params *params; params = rcu_dereference_protected(p->params, 1); - kfree_rcu(params, rcu); + if (params) + kfree_rcu(params, rcu); } static int tcf_csum_walker(struct net *net, struct sk_buff *skb, -- cgit v1.2.3 From abdadd3cfd3e7ea3da61ac774f84777d1f702058 Mon Sep 17 00:00:00 2001 From: Davide Caratti Date: Fri, 16 Mar 2018 00:00:55 +0100 Subject: net/sched: fix NULL dereference in the error path of tunnel_key_init() when the following command # tc action add action tunnel_key unset index 100 is run for the first time, and tunnel_key_init() fails to allocate struct tcf_tunnel_key_params, tunnel_key_release() dereferences NULL pointers. This causes the following error: BUG: unable to handle kernel NULL pointer dereference at 0000000000000010 IP: tunnel_key_release+0xd/0x40 [act_tunnel_key] PGD 8000000033787067 P4D 8000000033787067 PUD 74646067 PMD 0 Oops: 0000 [#1] SMP PTI Modules linked in: act_tunnel_key(E) act_csum ip6table_filter ip6_tables iptable_filter binfmt_misc ext4 mbcache jbd2 crct10dif_pclmul crc32_pclmul snd_hda_codec_generic ghash_clmulni_intel snd_hda_intel pcbc snd_hda_codec snd_hda_core snd_hwdep snd_seq aesni_intel snd_seq_device crypto_simd glue_helper snd_pcm cryptd joydev snd_timer pcspkr virtio_balloon snd i2c_piix4 soundcore nfsd auth_rpcgss nfs_acl lockd grace sunrpc ip_tables xfs libcrc32c ata_generic pata_acpi qxl drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm virtio_net virtio_blk drm virtio_console crc32c_intel ata_piix serio_raw i2c_core virtio_pci libata virtio_ring virtio floppy dm_mirror dm_region_hash dm_log dm_mod CPU: 2 PID: 3101 Comm: tc Tainted: G E 4.16.0-rc4.act_vlan.orig+ #403 Hardware name: Red Hat KVM, BIOS 0.5.1 01/01/2011 RIP: 0010:tunnel_key_release+0xd/0x40 [act_tunnel_key] RSP: 0018:ffffba46803b7768 EFLAGS: 00010286 RAX: ffffffffc09010a0 RBX: 0000000000000000 RCX: 0000000000000024 RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffff99ee336d7480 RBP: 0000000000000000 R08: 0000000000000001 R09: 0000000000000044 R10: 0000000000000220 R11: ffff99ee79d73131 R12: 0000000000000000 R13: ffff99ee32d67610 R14: ffff99ee7671dc38 R15: 00000000fffffff4 FS: 00007febcb2cd740(0000) GS:ffff99ee7fd00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000010 CR3: 000000007c8e4005 CR4: 00000000001606e0 Call Trace: __tcf_idr_release+0x79/0xf0 tunnel_key_init+0xd9/0x460 [act_tunnel_key] tcf_action_init_1+0x2cc/0x430 tcf_action_init+0xd3/0x1b0 tc_ctl_action+0x18b/0x240 rtnetlink_rcv_msg+0x29c/0x310 ? _cond_resched+0x15/0x30 ? __kmalloc_node_track_caller+0x1b9/0x270 ? rtnl_calcit.isra.28+0x100/0x100 netlink_rcv_skb+0xd2/0x110 netlink_unicast+0x17c/0x230 netlink_sendmsg+0x2cd/0x3c0 sock_sendmsg+0x30/0x40 ___sys_sendmsg+0x27a/0x290 __sys_sendmsg+0x51/0x90 do_syscall_64+0x6e/0x1a0 entry_SYSCALL_64_after_hwframe+0x3d/0xa2 RIP: 0033:0x7febca6deba0 RSP: 002b:00007ffe7b0dd128 EFLAGS: 00000246 ORIG_RAX: 000000000000002e RAX: ffffffffffffffda RBX: 00007ffe7b0dd250 RCX: 00007febca6deba0 RDX: 0000000000000000 RSI: 00007ffe7b0dd1a0 RDI: 0000000000000003 RBP: 000000005aaa90cb R08: 0000000000000002 R09: 0000000000000000 R10: 00007ffe7b0dcba0 R11: 0000000000000246 R12: 0000000000000000 R13: 00007ffe7b0dd264 R14: 0000000000000001 R15: 0000000000669f60 Code: 44 00 00 8b 0d b5 23 00 00 48 8b 87 48 10 00 00 48 8b 3c c8 e9 a5 e5 d8 c3 0f 1f 44 00 00 0f 1f 44 00 00 53 48 8b 9f b0 00 00 00 <83> 7b 10 01 74 0b 48 89 df 31 f6 5b e9 f2 fa 7f c3 48 8b 7b 18 RIP: tunnel_key_release+0xd/0x40 [act_tunnel_key] RSP: ffffba46803b7768 CR2: 0000000000000010 Fix this in tunnel_key_release(), ensuring 'param' is not NULL before dereferencing it. Fixes: d0f6dd8a914f ("net/sched: Introduce act_tunnel_key") Signed-off-by: Davide Caratti Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- net/sched/act_tunnel_key.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/net/sched/act_tunnel_key.c b/net/sched/act_tunnel_key.c index fea772e66e62..1281ca463727 100644 --- a/net/sched/act_tunnel_key.c +++ b/net/sched/act_tunnel_key.c @@ -208,11 +208,12 @@ static void tunnel_key_release(struct tc_action *a) struct tcf_tunnel_key_params *params; params = rcu_dereference_protected(t->params, 1); + if (params) { + if (params->tcft_action == TCA_TUNNEL_KEY_ACT_SET) + dst_release(¶ms->tcft_enc_metadata->dst); - if (params->tcft_action == TCA_TUNNEL_KEY_ACT_SET) - dst_release(¶ms->tcft_enc_metadata->dst); - - kfree_rcu(params, rcu); + kfree_rcu(params, rcu); + } } static int tunnel_key_dump_addresses(struct sk_buff *skb, -- cgit v1.2.3 From 1f110e7cae09e6c6a144616480d1a9dd99c5208a Mon Sep 17 00:00:00 2001 From: Davide Caratti Date: Fri, 16 Mar 2018 00:00:56 +0100 Subject: net/sched: fix NULL dereference in the error path of tcf_sample_init() when the following command # tc action add action sample rate 100 group 100 index 100 is run for the first time, and psample_group_get(100) fails to create a new group, tcf_sample_cleanup() calls psample_group_put(NULL), thus causing the following error: BUG: unable to handle kernel NULL pointer dereference at 000000000000001c IP: psample_group_put+0x15/0x71 [psample] PGD 8000000075775067 P4D 8000000075775067 PUD 7453c067 PMD 0 Oops: 0002 [#1] SMP PTI Modules linked in: act_sample(E) psample ip6table_filter ip6_tables iptable_filter binfmt_misc ext4 snd_hda_codec_generic snd_hda_intel snd_hda_codec snd_hda_core mbcache jbd2 crct10dif_pclmul snd_hwdep crc32_pclmul snd_seq ghash_clmulni_intel pcbc snd_seq_device snd_pcm aesni_intel crypto_simd snd_timer glue_helper snd cryptd joydev pcspkr i2c_piix4 soundcore virtio_balloon nfsd auth_rpcgss nfs_acl lockd grace sunrpc ip_tables xfs libcrc32c ata_generic pata_acpi qxl drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm drm virtio_net ata_piix virtio_console virtio_blk libata serio_raw crc32c_intel virtio_pci i2c_core virtio_ring virtio floppy dm_mirror dm_region_hash dm_log dm_mod [last unloaded: act_tunnel_key] CPU: 2 PID: 5740 Comm: tc Tainted: G E 4.16.0-rc4.act_vlan.orig+ #403 Hardware name: Red Hat KVM, BIOS 0.5.1 01/01/2011 RIP: 0010:psample_group_put+0x15/0x71 [psample] RSP: 0018:ffffb8a80032f7d0 EFLAGS: 00010246 RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000000024 RDX: 0000000000000001 RSI: 0000000000000000 RDI: ffffffffc06d93c0 RBP: 0000000000000000 R08: 0000000000000001 R09: 0000000000000044 R10: 00000000bd003000 R11: ffff979fba04aa59 R12: 0000000000000000 R13: 0000000000000000 R14: 0000000000000000 R15: ffff979fbba3f22c FS: 00007f7638112740(0000) GS:ffff979fbfd00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 000000000000001c CR3: 00000000734ea001 CR4: 00000000001606e0 Call Trace: __tcf_idr_release+0x79/0xf0 tcf_sample_init+0x125/0x1d0 [act_sample] tcf_action_init_1+0x2cc/0x430 tcf_action_init+0xd3/0x1b0 tc_ctl_action+0x18b/0x240 rtnetlink_rcv_msg+0x29c/0x310 ? _cond_resched+0x15/0x30 ? __kmalloc_node_track_caller+0x1b9/0x270 ? rtnl_calcit.isra.28+0x100/0x100 netlink_rcv_skb+0xd2/0x110 netlink_unicast+0x17c/0x230 netlink_sendmsg+0x2cd/0x3c0 sock_sendmsg+0x30/0x40 ___sys_sendmsg+0x27a/0x290 ? filemap_map_pages+0x34a/0x3a0 ? __handle_mm_fault+0xbfd/0xe20 __sys_sendmsg+0x51/0x90 do_syscall_64+0x6e/0x1a0 entry_SYSCALL_64_after_hwframe+0x3d/0xa2 RIP: 0033:0x7f7637523ba0 RSP: 002b:00007fff0473ef58 EFLAGS: 00000246 ORIG_RAX: 000000000000002e RAX: ffffffffffffffda RBX: 00007fff0473f080 RCX: 00007f7637523ba0 RDX: 0000000000000000 RSI: 00007fff0473efd0 RDI: 0000000000000003 RBP: 000000005aaaac80 R08: 0000000000000002 R09: 0000000000000000 R10: 00007fff0473e9e0 R11: 0000000000000246 R12: 0000000000000000 R13: 00007fff0473f094 R14: 0000000000000001 R15: 0000000000669f60 Code: be 02 00 00 00 48 89 df e8 a9 fe ff ff e9 7c ff ff ff 0f 1f 40 00 0f 1f 44 00 00 53 48 89 fb 48 c7 c7 c0 93 6d c0 e8 db 20 8c ef <83> 6b 1c 01 74 10 48 c7 c7 c0 93 6d c0 ff 14 25 e8 83 83 b0 5b RIP: psample_group_put+0x15/0x71 [psample] RSP: ffffb8a80032f7d0 CR2: 000000000000001c Fix it in tcf_sample_cleanup(), ensuring that calls to psample_group_put(p) are done only when p is not NULL. Fixes: cadb9c9fdbc6 ("net/sched: act_sample: Fix error path in init") Signed-off-by: Davide Caratti Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- net/sched/act_sample.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/sched/act_sample.c b/net/sched/act_sample.c index 1ba0df238756..74c5d7e6a0fa 100644 --- a/net/sched/act_sample.c +++ b/net/sched/act_sample.c @@ -103,7 +103,8 @@ static void tcf_sample_cleanup(struct tc_action *a) psample_group = rtnl_dereference(s->psample_group); RCU_INIT_POINTER(s->psample_group, NULL); - psample_group_put(psample_group); + if (psample_group) + psample_group_put(psample_group); } static bool tcf_sample_dev_ok_push(struct net_device *dev) -- cgit v1.2.3 From 2d433610176d6569e8b3a28f67bc72235bf69efc Mon Sep 17 00:00:00 2001 From: Davide Caratti Date: Fri, 16 Mar 2018 00:00:57 +0100 Subject: net/sched: fix NULL dereference on the error path of tcf_skbmod_init() when the following command # tc action replace action skbmod swap mac index 100 is run for the first time, and tcf_skbmod_init() fails to allocate struct tcf_skbmod_params, tcf_skbmod_cleanup() calls kfree_rcu(NULL), thus causing the following error: BUG: unable to handle kernel NULL pointer dereference at 0000000000000008 IP: __call_rcu+0x23/0x2b0 PGD 8000000034057067 P4D 8000000034057067 PUD 74937067 PMD 0 Oops: 0002 [#1] SMP PTI Modules linked in: act_skbmod(E) psample ip6table_filter ip6_tables iptable_filter binfmt_misc ext4 snd_hda_codec_generic snd_hda_intel snd_hda_codec crct10dif_pclmul mbcache jbd2 crc32_pclmul snd_hda_core ghash_clmulni_intel snd_hwdep pcbc snd_seq snd_seq_device snd_pcm aesni_intel snd_timer crypto_simd glue_helper snd cryptd virtio_balloon joydev soundcore pcspkr i2c_piix4 nfsd auth_rpcgss nfs_acl lockd grace sunrpc ip_tables xfs libcrc32c ata_generic pata_acpi qxl drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm drm virtio_console virtio_net virtio_blk ata_piix libata crc32c_intel virtio_pci serio_raw virtio_ring virtio i2c_core floppy dm_mirror dm_region_hash dm_log dm_mod [last unloaded: act_skbmod] CPU: 3 PID: 3144 Comm: tc Tainted: G E 4.16.0-rc4.act_vlan.orig+ #403 Hardware name: Red Hat KVM, BIOS 0.5.1 01/01/2011 RIP: 0010:__call_rcu+0x23/0x2b0 RSP: 0018:ffffbd2e403e7798 EFLAGS: 00010246 RAX: ffffffffc0872080 RBX: ffff981d34bff780 RCX: 00000000ffffffff RDX: ffffffff922a5f00 RSI: 0000000000000000 RDI: 0000000000000000 RBP: 0000000000000000 R08: 0000000000000001 R09: 000000000000021f R10: 000000003d003000 R11: 0000000000aaaaaa R12: 0000000000000000 R13: ffffffff922a5f00 R14: 0000000000000001 R15: ffff981d3b698c2c FS: 00007f3678292740(0000) GS:ffff981d3fd80000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000008 CR3: 000000007c57a006 CR4: 00000000001606e0 Call Trace: __tcf_idr_release+0x79/0xf0 tcf_skbmod_init+0x1d1/0x210 [act_skbmod] tcf_action_init_1+0x2cc/0x430 tcf_action_init+0xd3/0x1b0 tc_ctl_action+0x18b/0x240 rtnetlink_rcv_msg+0x29c/0x310 ? _cond_resched+0x15/0x30 ? __kmalloc_node_track_caller+0x1b9/0x270 ? rtnl_calcit.isra.28+0x100/0x100 netlink_rcv_skb+0xd2/0x110 netlink_unicast+0x17c/0x230 netlink_sendmsg+0x2cd/0x3c0 sock_sendmsg+0x30/0x40 ___sys_sendmsg+0x27a/0x290 ? filemap_map_pages+0x34a/0x3a0 ? __handle_mm_fault+0xbfd/0xe20 __sys_sendmsg+0x51/0x90 do_syscall_64+0x6e/0x1a0 entry_SYSCALL_64_after_hwframe+0x3d/0xa2 RIP: 0033:0x7f36776a3ba0 RSP: 002b:00007fff4703b618 EFLAGS: 00000246 ORIG_RAX: 000000000000002e RAX: ffffffffffffffda RBX: 00007fff4703b740 RCX: 00007f36776a3ba0 RDX: 0000000000000000 RSI: 00007fff4703b690 RDI: 0000000000000003 RBP: 000000005aaaba36 R08: 0000000000000002 R09: 0000000000000000 R10: 00007fff4703b0a0 R11: 0000000000000246 R12: 0000000000000000 R13: 00007fff4703b754 R14: 0000000000000001 R15: 0000000000669f60 Code: 5d e9 42 da ff ff 66 90 0f 1f 44 00 00 41 57 41 56 41 55 49 89 d5 41 54 55 48 89 fd 53 48 83 ec 08 40 f6 c7 07 0f 85 19 02 00 00 <48> 89 75 08 48 c7 45 00 00 00 00 00 9c 58 0f 1f 44 00 00 49 89 RIP: __call_rcu+0x23/0x2b0 RSP: ffffbd2e403e7798 CR2: 0000000000000008 Fix it in tcf_skbmod_cleanup(), ensuring that kfree_rcu(p, ...) is called only when p is not NULL. Fixes: 86da71b57383 ("net_sched: Introduce skbmod action") Signed-off-by: Davide Caratti Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- net/sched/act_skbmod.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/sched/act_skbmod.c b/net/sched/act_skbmod.c index fa975262dbac..d09565d6433e 100644 --- a/net/sched/act_skbmod.c +++ b/net/sched/act_skbmod.c @@ -190,7 +190,8 @@ static void tcf_skbmod_cleanup(struct tc_action *a) struct tcf_skbmod_params *p; p = rcu_dereference_protected(d->skbmod_p, 1); - kfree_rcu(p, rcu); + if (p) + kfree_rcu(p, rcu); } static int tcf_skbmod_dump(struct sk_buff *skb, struct tc_action *a, -- cgit v1.2.3 From 7a4c003d6921e2af215f4790aa43a292bdc78be0 Mon Sep 17 00:00:00 2001 From: Ronak Doshi Date: Fri, 16 Mar 2018 14:47:54 -0700 Subject: vmxnet3: avoid xmit reset due to a race in vmxnet3 The field txNumDeferred is used by the driver to keep track of the number of packets it has pushed to the emulation. The driver increments it on pushing the packet to the emulation and the emulation resets it to 0 at the end of the transmit. There is a possibility of a race either when (a) ESX is under heavy load or (b) workload inside VM is of low packet rate. This race results in xmit hangs when network coalescing is disabled. This change creates a local copy of txNumDeferred and uses it to perform ring arithmetic. Reported-by: Noriho Tanaka Signed-off-by: Ronak Doshi Acked-by: Shrikrishna Khare Signed-off-by: David S. Miller --- drivers/net/vmxnet3/vmxnet3_drv.c | 13 ++++++++----- drivers/net/vmxnet3/vmxnet3_int.h | 4 ++-- 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/drivers/net/vmxnet3/vmxnet3_drv.c b/drivers/net/vmxnet3/vmxnet3_drv.c index 8b39c160743d..b466a422b72d 100644 --- a/drivers/net/vmxnet3/vmxnet3_drv.c +++ b/drivers/net/vmxnet3/vmxnet3_drv.c @@ -977,6 +977,8 @@ vmxnet3_tq_xmit(struct sk_buff *skb, struct vmxnet3_tx_queue *tq, { int ret; u32 count; + int num_pkts; + int tx_num_deferred; unsigned long flags; struct vmxnet3_tx_ctx ctx; union Vmxnet3_GenericDesc *gdesc; @@ -1075,12 +1077,12 @@ vmxnet3_tq_xmit(struct sk_buff *skb, struct vmxnet3_tx_queue *tq, #else gdesc = ctx.sop_txd; #endif + tx_num_deferred = le32_to_cpu(tq->shared->txNumDeferred); if (ctx.mss) { gdesc->txd.hlen = ctx.eth_ip_hdr_size + ctx.l4_hdr_size; gdesc->txd.om = VMXNET3_OM_TSO; gdesc->txd.msscof = ctx.mss; - le32_add_cpu(&tq->shared->txNumDeferred, (skb->len - - gdesc->txd.hlen + ctx.mss - 1) / ctx.mss); + num_pkts = (skb->len - gdesc->txd.hlen + ctx.mss - 1) / ctx.mss; } else { if (skb->ip_summed == CHECKSUM_PARTIAL) { gdesc->txd.hlen = ctx.eth_ip_hdr_size; @@ -1091,8 +1093,10 @@ vmxnet3_tq_xmit(struct sk_buff *skb, struct vmxnet3_tx_queue *tq, gdesc->txd.om = 0; gdesc->txd.msscof = 0; } - le32_add_cpu(&tq->shared->txNumDeferred, 1); + num_pkts = 1; } + le32_add_cpu(&tq->shared->txNumDeferred, num_pkts); + tx_num_deferred += num_pkts; if (skb_vlan_tag_present(skb)) { gdesc->txd.ti = 1; @@ -1118,8 +1122,7 @@ vmxnet3_tq_xmit(struct sk_buff *skb, struct vmxnet3_tx_queue *tq, spin_unlock_irqrestore(&tq->tx_lock, flags); - if (le32_to_cpu(tq->shared->txNumDeferred) >= - le32_to_cpu(tq->shared->txThreshold)) { + if (tx_num_deferred >= le32_to_cpu(tq->shared->txThreshold)) { tq->shared->txNumDeferred = 0; VMXNET3_WRITE_BAR0_REG(adapter, VMXNET3_REG_TXPROD + tq->qid * 8, diff --git a/drivers/net/vmxnet3/vmxnet3_int.h b/drivers/net/vmxnet3/vmxnet3_int.h index 5ba222920e80..b94fdfd0b6f1 100644 --- a/drivers/net/vmxnet3/vmxnet3_int.h +++ b/drivers/net/vmxnet3/vmxnet3_int.h @@ -69,10 +69,10 @@ /* * Version numbers */ -#define VMXNET3_DRIVER_VERSION_STRING "1.4.11.0-k" +#define VMXNET3_DRIVER_VERSION_STRING "1.4.12.0-k" /* a 32-bit int, each byte encode a verion number in VMXNET3_DRIVER_VERSION */ -#define VMXNET3_DRIVER_VERSION_NUM 0x01040b00 +#define VMXNET3_DRIVER_VERSION_NUM 0x01040c00 #if defined(CONFIG_PCI_MSI) /* RSS only makes sense if MSI-X is supported. */ -- cgit v1.2.3 From 034f405793897a3c8f642935f5494b86c340cde7 Mon Sep 17 00:00:00 2001 From: Ronak Doshi Date: Fri, 16 Mar 2018 14:49:19 -0700 Subject: vmxnet3: use correct flag to indicate LRO feature 'Commit 45dac1d6ea04 ("vmxnet3: Changes for vmxnet3 adapter version 2 (fwd)")' introduced a flag "lro" in structure vmxnet3_adapter which is used to indicate whether LRO is enabled or not. However, the patch did not set the flag and hence it was never exercised. So, when LRO is enabled, it resulted in poor TCP performance due to delayed acks. This issue is seen with packets which are larger than the mss getting a delayed ack rather than an immediate ack, thus resulting in high latency. This patch removes the lro flag and directly uses device features against NETIF_F_LRO to check if lro is enabled. Fixes: 45dac1d6ea04 ("vmxnet3: Changes for vmxnet3 adapter version 2 (fwd)") Reported-by: Rachel Lunnon Signed-off-by: Ronak Doshi Acked-by: Shrikrishna Khare Signed-off-by: David S. Miller --- drivers/net/vmxnet3/vmxnet3_drv.c | 3 ++- drivers/net/vmxnet3/vmxnet3_int.h | 5 ++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/vmxnet3/vmxnet3_drv.c b/drivers/net/vmxnet3/vmxnet3_drv.c index b466a422b72d..e04937f44f33 100644 --- a/drivers/net/vmxnet3/vmxnet3_drv.c +++ b/drivers/net/vmxnet3/vmxnet3_drv.c @@ -1473,7 +1473,8 @@ vmxnet3_rq_rx_complete(struct vmxnet3_rx_queue *rq, vmxnet3_rx_csum(adapter, skb, (union Vmxnet3_GenericDesc *)rcd); skb->protocol = eth_type_trans(skb, adapter->netdev); - if (!rcd->tcp || !adapter->lro) + if (!rcd->tcp || + !(adapter->netdev->features & NETIF_F_LRO)) goto not_lro; if (segCnt != 0 && mss != 0) { diff --git a/drivers/net/vmxnet3/vmxnet3_int.h b/drivers/net/vmxnet3/vmxnet3_int.h index b94fdfd0b6f1..99387a4a20a8 100644 --- a/drivers/net/vmxnet3/vmxnet3_int.h +++ b/drivers/net/vmxnet3/vmxnet3_int.h @@ -69,10 +69,10 @@ /* * Version numbers */ -#define VMXNET3_DRIVER_VERSION_STRING "1.4.12.0-k" +#define VMXNET3_DRIVER_VERSION_STRING "1.4.13.0-k" /* a 32-bit int, each byte encode a verion number in VMXNET3_DRIVER_VERSION */ -#define VMXNET3_DRIVER_VERSION_NUM 0x01040c00 +#define VMXNET3_DRIVER_VERSION_NUM 0x01040d00 #if defined(CONFIG_PCI_MSI) /* RSS only makes sense if MSI-X is supported. */ @@ -343,7 +343,6 @@ struct vmxnet3_adapter { u8 version; bool rxcsum; - bool lro; #ifdef VMXNET3_RSS struct UPT1_RSSConf *rss_conf; -- cgit v1.2.3 From 48881ed56b395bdf2cff6c102039016223ca4da6 Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Sun, 18 Mar 2018 09:48:03 +0100 Subject: batman-adv: Add missing include for EPOLL* constants Fixes: a9a08845e9ac ("vfs: do bulk POLL* -> EPOLL* replacement") Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich --- net/batman-adv/icmp_socket.c | 1 + net/batman-adv/log.c | 1 + 2 files changed, 2 insertions(+) diff --git a/net/batman-adv/icmp_socket.c b/net/batman-adv/icmp_socket.c index e91f29c7c638..5daa3d50da17 100644 --- a/net/batman-adv/icmp_socket.c +++ b/net/batman-adv/icmp_socket.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include #include diff --git a/net/batman-adv/log.c b/net/batman-adv/log.c index dc9fa37ddd14..cdbe0e5e208b 100644 --- a/net/batman-adv/log.c +++ b/net/batman-adv/log.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include #include -- cgit v1.2.3 From fc04fdb2c8a894283259f5621d31d75610701091 Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Fri, 16 Mar 2018 21:14:32 +0100 Subject: batman-adv: Fix skbuff rcsum on packet reroute batadv_check_unicast_ttvn may redirect a packet to itself or another originator. This involves rewriting the ttvn and the destination address in the batadv unicast header. These field were not yet pulled (with skb rcsum update) and thus any change to them also requires a change in the receive checksum. Reported-by: Matthias Schiffer Fixes: a73105b8d4c7 ("batman-adv: improved client announcement mechanism") Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich --- net/batman-adv/routing.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c index 6a9242658c8d..e61dc1293bb5 100644 --- a/net/batman-adv/routing.c +++ b/net/batman-adv/routing.c @@ -759,6 +759,7 @@ free_skb: /** * batadv_reroute_unicast_packet() - update the unicast header for re-routing * @bat_priv: the bat priv with all the soft interface information + * @skb: unicast packet to process * @unicast_packet: the unicast header to be updated * @dst_addr: the payload destination * @vid: VLAN identifier @@ -770,7 +771,7 @@ free_skb: * Return: true if the packet header has been updated, false otherwise */ static bool -batadv_reroute_unicast_packet(struct batadv_priv *bat_priv, +batadv_reroute_unicast_packet(struct batadv_priv *bat_priv, struct sk_buff *skb, struct batadv_unicast_packet *unicast_packet, u8 *dst_addr, unsigned short vid) { @@ -799,8 +800,10 @@ batadv_reroute_unicast_packet(struct batadv_priv *bat_priv, } /* update the packet header */ + skb_postpull_rcsum(skb, unicast_packet, sizeof(*unicast_packet)); ether_addr_copy(unicast_packet->dest, orig_addr); unicast_packet->ttvn = orig_ttvn; + skb_postpush_rcsum(skb, unicast_packet, sizeof(*unicast_packet)); ret = true; out: @@ -841,7 +844,7 @@ static bool batadv_check_unicast_ttvn(struct batadv_priv *bat_priv, * the packet to */ if (batadv_tt_local_client_is_roaming(bat_priv, ethhdr->h_dest, vid)) { - if (batadv_reroute_unicast_packet(bat_priv, unicast_packet, + if (batadv_reroute_unicast_packet(bat_priv, skb, unicast_packet, ethhdr->h_dest, vid)) batadv_dbg_ratelimited(BATADV_DBG_TT, bat_priv, @@ -887,7 +890,7 @@ static bool batadv_check_unicast_ttvn(struct batadv_priv *bat_priv, * destination can possibly be updated and forwarded towards the new * target host */ - if (batadv_reroute_unicast_packet(bat_priv, unicast_packet, + if (batadv_reroute_unicast_packet(bat_priv, skb, unicast_packet, ethhdr->h_dest, vid)) { batadv_dbg_ratelimited(BATADV_DBG_TT, bat_priv, "Rerouting unicast packet to %pM (dst=%pM): TTVN mismatch old_ttvn=%u new_ttvn=%u\n", @@ -910,12 +913,14 @@ static bool batadv_check_unicast_ttvn(struct batadv_priv *bat_priv, if (!primary_if) return false; + /* update the packet header */ + skb_postpull_rcsum(skb, unicast_packet, sizeof(*unicast_packet)); ether_addr_copy(unicast_packet->dest, primary_if->net_dev->dev_addr); + unicast_packet->ttvn = curr_ttvn; + skb_postpush_rcsum(skb, unicast_packet, sizeof(*unicast_packet)); batadv_hardif_put(primary_if); - unicast_packet->ttvn = curr_ttvn; - return true; } -- cgit v1.2.3 From a069215cf5985f3aa1bba550264907d6bd05c5f7 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Sun, 18 Mar 2018 12:49:51 -0700 Subject: net: fec: Fix unbalanced PM runtime calls When unbinding/removing the driver, we will run into the following warnings: [ 259.655198] fec 400d1000.ethernet: 400d1000.ethernet supply phy not found, using dummy regulator [ 259.665065] fec 400d1000.ethernet: Unbalanced pm_runtime_enable! [ 259.672770] fec 400d1000.ethernet (unnamed net_device) (uninitialized): Invalid MAC address: 00:00:00:00:00:00 [ 259.683062] fec 400d1000.ethernet (unnamed net_device) (uninitialized): Using random MAC address: f2:3e:93:b7:29:c1 [ 259.696239] libphy: fec_enet_mii_bus: probed Avoid these warnings by balancing the runtime PM calls during fec_drv_remove(). Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/fec_main.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index 7a7f3a42b2aa..d4604bc8eb5b 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -3600,6 +3600,8 @@ fec_drv_remove(struct platform_device *pdev) fec_enet_mii_remove(fep); if (fep->reg_phy) regulator_disable(fep->reg_phy); + pm_runtime_put(&pdev->dev); + pm_runtime_disable(&pdev->dev); if (of_phy_is_fixed_link(np)) of_phy_deregister_fixed_link(np); of_node_put(fep->phy_node); -- cgit v1.2.3 From c15619a88ad43f215bbfb6ee163aa26ba9cc2573 Mon Sep 17 00:00:00 2001 From: Sylwester Nawrocki Date: Fri, 9 Mar 2018 16:40:16 +0100 Subject: dt-bindings: exynos: Document #sound-dai-cells property of the HDMI node The #sound-dai-cells DT property is required to describe link between the HDMI IP block and the SoC's audio subsystem. Signed-off-by: Sylwester Nawrocki Reviewed-by: Rob Herring Signed-off-by: Inki Dae --- Documentation/devicetree/bindings/display/exynos/exynos_hdmi.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/display/exynos/exynos_hdmi.txt b/Documentation/devicetree/bindings/display/exynos/exynos_hdmi.txt index 6394ea9e3b9e..58b12e25bbb1 100644 --- a/Documentation/devicetree/bindings/display/exynos/exynos_hdmi.txt +++ b/Documentation/devicetree/bindings/display/exynos/exynos_hdmi.txt @@ -16,6 +16,7 @@ Required properties: - ddc: phandle to the hdmi ddc node - phy: phandle to the hdmi phy node - samsung,syscon-phandle: phandle for system controller node for PMU. +- #sound-dai-cells: should be 0. Required properties for Exynos 4210, 4212, 5420 and 5433: - clocks: list of clock IDs from SoC clock driver. -- cgit v1.2.3 From c698ca5278934c0ae32297a8725ced2e27585d7f Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 18 Mar 2018 17:48:42 -0700 Subject: Linux 4.16-rc6 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index e02d092bc2d6..d65e2e229017 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 4 PATCHLEVEL = 16 SUBLEVEL = 0 -EXTRAVERSION = -rc5 +EXTRAVERSION = -rc6 NAME = Fearless Coyote # *DOCUMENTATION* -- cgit v1.2.3 From 4b0b37d4cc54b21a6ecad7271cbc850555869c62 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Sat, 17 Mar 2018 08:25:07 -0700 Subject: selftests/x86/ptrace_syscall: Fix for yet more glibc interference glibc keeps getting cleverer, and my version now turns raise() into more than one syscall. Since the test relies on ptrace seeing an exact set of syscalls, this breaks the test. Replace raise(SIGSTOP) with syscall(SYS_tgkill, ...) to force glibc to get out of our way. Signed-off-by: Andy Lutomirski Cc: Borislav Petkov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kselftest@vger.kernel.org Cc: stable@vger.kernel.org Link: http://lkml.kernel.org/r/bc80338b453afa187bc5f895bd8e2c8d6e264da2.1521300271.git.luto@kernel.org Signed-off-by: Ingo Molnar --- tools/testing/selftests/x86/ptrace_syscall.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/x86/ptrace_syscall.c b/tools/testing/selftests/x86/ptrace_syscall.c index 1ae1c5a7392e..6f22238f3217 100644 --- a/tools/testing/selftests/x86/ptrace_syscall.c +++ b/tools/testing/selftests/x86/ptrace_syscall.c @@ -183,8 +183,10 @@ static void test_ptrace_syscall_restart(void) if (ptrace(PTRACE_TRACEME, 0, 0, 0) != 0) err(1, "PTRACE_TRACEME"); + pid_t pid = getpid(), tid = syscall(SYS_gettid); + printf("\tChild will make one syscall\n"); - raise(SIGSTOP); + syscall(SYS_tgkill, pid, tid, SIGSTOP); syscall(SYS_gettid, 10, 11, 12, 13, 14, 15); _exit(0); @@ -301,9 +303,11 @@ static void test_restart_under_ptrace(void) if (ptrace(PTRACE_TRACEME, 0, 0, 0) != 0) err(1, "PTRACE_TRACEME"); + pid_t pid = getpid(), tid = syscall(SYS_gettid); + printf("\tChild will take a nap until signaled\n"); setsigign(SIGUSR1, SA_RESTART); - raise(SIGSTOP); + syscall(SYS_tgkill, pid, tid, SIGSTOP); syscall(SYS_pause, 0, 0, 0, 0, 0, 0); _exit(0); -- cgit v1.2.3 From b1d0b34b748c9e5e67a91c545cb48711cfde119c Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Sat, 17 Mar 2018 01:52:38 +0100 Subject: drm/tegra: dc: Detach IOMMU group from domain only once Detaching from an IOMMU group multiple times can lead to a crash. This could potentially be fixed in the IOMMU driver, but it's easy to avoid the subsequent detach operations in this driver, so do that as well. Signed-off-by: Thierry Reding --- drivers/gpu/drm/tegra/dc.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/tegra/dc.c b/drivers/gpu/drm/tegra/dc.c index b8403ed48285..fbffe1948b3b 100644 --- a/drivers/gpu/drm/tegra/dc.c +++ b/drivers/gpu/drm/tegra/dc.c @@ -1903,8 +1903,12 @@ cleanup: if (!IS_ERR(primary)) drm_plane_cleanup(primary); - if (group && tegra->domain) { - iommu_detach_group(tegra->domain, group); + if (group && dc->domain) { + if (group == tegra->group) { + iommu_detach_group(dc->domain, group); + tegra->group = NULL; + } + dc->domain = NULL; } @@ -1913,8 +1917,10 @@ cleanup: static int tegra_dc_exit(struct host1x_client *client) { + struct drm_device *drm = dev_get_drvdata(client->parent); struct iommu_group *group = iommu_group_get(client->dev); struct tegra_dc *dc = host1x_client_to_dc(client); + struct tegra_drm *tegra = drm->dev_private; int err; devm_free_irq(dc->dev, dc->irq, dc); @@ -1926,7 +1932,11 @@ static int tegra_dc_exit(struct host1x_client *client) } if (group && dc->domain) { - iommu_detach_group(dc->domain, group); + if (group == tegra->group) { + iommu_detach_group(dc->domain, group); + tegra->group = NULL; + } + dc->domain = NULL; } -- cgit v1.2.3 From 8dafb8301cae1a3fc8407f5da62b491bfcfdf04b Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Sat, 17 Mar 2018 02:48:34 +0100 Subject: drm/tegra: dsi: Don't disable regulator on ->exit() The regulator is controlled as part of runtime PM, so it should not be additionally disabled from the ->exit() callback. Signed-off-by: Thierry Reding --- drivers/gpu/drm/tegra/dsi.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/gpu/drm/tegra/dsi.c b/drivers/gpu/drm/tegra/dsi.c index 4d2ed966f9e3..87c5d89bc9ba 100644 --- a/drivers/gpu/drm/tegra/dsi.c +++ b/drivers/gpu/drm/tegra/dsi.c @@ -1072,7 +1072,6 @@ static int tegra_dsi_exit(struct host1x_client *client) struct tegra_dsi *dsi = host1x_client_to_dsi(client); tegra_output_exit(&dsi->output); - regulator_disable(dsi->vdd); return 0; } -- cgit v1.2.3 From 192b4af6cd28cdad9b42fd79c21a90a2aeb0bec7 Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Sun, 18 Mar 2018 01:13:39 +0100 Subject: drm/tegra: Shutdown on driver unbind Since commit 846c7dfc1193 ("drm/atomic: Try to preserve the crtc enabled state in drm_atomic_remove_fb, v2."), removing the last framebuffer will no longer disable the corresponding pipeline, which causes the KMS core to complain about leaked connectors on driver unbind. Fix this by calling drm_atomic_helper_shutdown() on driver unbind, which will cause all display pipelines to be shut down and therefore drop the extra references on the connectors. Signed-off-by: Thierry Reding --- drivers/gpu/drm/tegra/drm.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/tegra/drm.c b/drivers/gpu/drm/tegra/drm.c index d50bddb2e447..7fcf4a242840 100644 --- a/drivers/gpu/drm/tegra/drm.c +++ b/drivers/gpu/drm/tegra/drm.c @@ -250,6 +250,7 @@ static void tegra_drm_unload(struct drm_device *drm) drm_kms_helper_poll_fini(drm); tegra_drm_fb_exit(drm); + drm_atomic_helper_shutdown(drm); drm_mode_config_cleanup(drm); err = host1x_device_exit(device); -- cgit v1.2.3 From 82bf43b291888599b4079244d12195d214086fa4 Mon Sep 17 00:00:00 2001 From: Daniel Drake Date: Wed, 14 Mar 2018 16:42:17 +0800 Subject: Revert "ACPI / battery: Add quirk for Asus GL502VSK and UX305LA" Revert commit c68f0676ef7d ("ACPI / battery: Add quirk for Asus GL502VSK and UX305LA") and commit 4446823e2573 ("ACPI / battery: Add quirk for Asus UX360UA and UX410UAK"). On many many Asus products, the battery is sometimes reported as charging or discharging even when it is full and you are on AC power. This change quirked the kernel to avoid advertising the discharging state when this happens on 4 laptop models, under the belief that this was incorrect information. I presume it originates from user reports who are confused that their battery status icon says that it is discharging. However, the reported information is indeed correct, and the quirk approach taken is inadequate and more thought is needed first. Specifically: 1. It only quirks discharging state, not charging 2. There are so many different Asus products and DMI naming variants within those product families that behave this way; Linux could grow to quirk hundreds of products and still not even be close at "winning" this battle. 3. Asus previously clarified that this behaviour is intentional. The platform will periodically do a partial discharge/charge cycle when the battery is full, because this is one way to extend the lifetime of the battery (leaving a battery at 100% charge and unused will decrease its usable capacity over time). My understanding is that any decent consumer product will have this behaviour, but it appears that Asus is different in that they expose this info through ACPI. However, the behaviour seems correct. The ACPI spec does not suggest in that the platform should hide the truth. It lets you report that the battery is full of charge, and discharging, and with external power connected; and Asus does this. 4. In terms of not confusing the user, this seems like something that could/should be handled by userspace, which can also detect these same (accurate) conditions in the general case. Revert this quirk before it gets included in a release, while we look for better approaches. Signed-off-by: Daniel Drake Acked-by: Kai-Heng Feng Signed-off-by: Rafael J. Wysocki --- drivers/acpi/battery.c | 48 +++--------------------------------------------- 1 file changed, 3 insertions(+), 45 deletions(-) diff --git a/drivers/acpi/battery.c b/drivers/acpi/battery.c index 7128488a3a72..f2eb6c37ea0a 100644 --- a/drivers/acpi/battery.c +++ b/drivers/acpi/battery.c @@ -70,7 +70,6 @@ static async_cookie_t async_cookie; static bool battery_driver_registered; static int battery_bix_broken_package; static int battery_notification_delay_ms; -static int battery_full_discharging; static unsigned int cache_time = 1000; module_param(cache_time, uint, 0644); MODULE_PARM_DESC(cache_time, "cache time in milliseconds"); @@ -215,12 +214,9 @@ static int acpi_battery_get_property(struct power_supply *psy, return -ENODEV; switch (psp) { case POWER_SUPPLY_PROP_STATUS: - if (battery->state & ACPI_BATTERY_STATE_DISCHARGING) { - if (battery_full_discharging && battery->rate_now == 0) - val->intval = POWER_SUPPLY_STATUS_FULL; - else - val->intval = POWER_SUPPLY_STATUS_DISCHARGING; - } else if (battery->state & ACPI_BATTERY_STATE_CHARGING) + if (battery->state & ACPI_BATTERY_STATE_DISCHARGING) + val->intval = POWER_SUPPLY_STATUS_DISCHARGING; + else if (battery->state & ACPI_BATTERY_STATE_CHARGING) val->intval = POWER_SUPPLY_STATUS_CHARGING; else if (acpi_battery_is_charged(battery)) val->intval = POWER_SUPPLY_STATUS_FULL; @@ -1170,12 +1166,6 @@ battery_notification_delay_quirk(const struct dmi_system_id *d) return 0; } -static int __init battery_full_discharging_quirk(const struct dmi_system_id *d) -{ - battery_full_discharging = 1; - return 0; -} - static const struct dmi_system_id bat_dmi_table[] __initconst = { { .callback = battery_bix_broken_package_quirk, @@ -1193,38 +1183,6 @@ static const struct dmi_system_id bat_dmi_table[] __initconst = { DMI_MATCH(DMI_PRODUCT_NAME, "Aspire V5-573G"), }, }, - { - .callback = battery_full_discharging_quirk, - .ident = "ASUS GL502VSK", - .matches = { - DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."), - DMI_MATCH(DMI_PRODUCT_NAME, "GL502VSK"), - }, - }, - { - .callback = battery_full_discharging_quirk, - .ident = "ASUS UX305LA", - .matches = { - DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."), - DMI_MATCH(DMI_PRODUCT_NAME, "UX305LA"), - }, - }, - { - .callback = battery_full_discharging_quirk, - .ident = "ASUS UX360UA", - .matches = { - DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."), - DMI_MATCH(DMI_PRODUCT_NAME, "UX360UA"), - }, - }, - { - .callback = battery_full_discharging_quirk, - .ident = "ASUS UX410UAK", - .matches = { - DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."), - DMI_MATCH(DMI_PRODUCT_NAME, "UX410UAK"), - }, - }, {}, }; -- cgit v1.2.3 From 6de564939e14327148e31ddcf769e34105176447 Mon Sep 17 00:00:00 2001 From: OuYang ZhiZhong Date: Sun, 11 Mar 2018 15:59:07 +0800 Subject: mtdchar: fix usage of mtd_ooblayout_ecc() Section was not properly computed. The value of OOB region definition is always ECC section 0 information in the OOB area, but we want to get all the ECC bytes information, so we should call mtd_ooblayout_ecc(mtd, section++, &oobregion) until it returns -ERANGE. Fixes: c2b78452a9db ("mtd: use mtd_ooblayout_xxx() helpers where appropriate") Cc: Signed-off-by: OuYang ZhiZhong Signed-off-by: Boris Brezillon --- drivers/mtd/mtdchar.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/mtd/mtdchar.c b/drivers/mtd/mtdchar.c index de8c902059b8..7d80a8bb96fe 100644 --- a/drivers/mtd/mtdchar.c +++ b/drivers/mtd/mtdchar.c @@ -479,7 +479,7 @@ static int shrink_ecclayout(struct mtd_info *mtd, for (i = 0; i < MTD_MAX_ECCPOS_ENTRIES;) { u32 eccpos; - ret = mtd_ooblayout_ecc(mtd, section, &oobregion); + ret = mtd_ooblayout_ecc(mtd, section++, &oobregion); if (ret < 0) { if (ret != -ERANGE) return ret; @@ -526,7 +526,7 @@ static int get_oobinfo(struct mtd_info *mtd, struct nand_oobinfo *to) for (i = 0; i < ARRAY_SIZE(to->eccpos);) { u32 eccpos; - ret = mtd_ooblayout_ecc(mtd, section, &oobregion); + ret = mtd_ooblayout_ecc(mtd, section++, &oobregion); if (ret < 0) { if (ret != -ERANGE) return ret; -- cgit v1.2.3 From 9ffd7503944ec7c0ef41c3245d1306c221aef2be Mon Sep 17 00:00:00 2001 From: Andri Yngvason Date: Thu, 15 Mar 2018 18:23:17 +0000 Subject: can: cc770: Fix use after free in cc770_tx_interrupt() This fixes use after free introduced by the last cc770 patch. Signed-off-by: Andri Yngvason Fixes: 746201235b3f ("can: cc770: Fix queue stall & dropped RTR reply") Cc: linux-stable Signed-off-by: Marc Kleine-Budde --- drivers/net/can/cc770/cc770.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/net/can/cc770/cc770.c b/drivers/net/can/cc770/cc770.c index 2743d82d4424..6da69af103e6 100644 --- a/drivers/net/can/cc770/cc770.c +++ b/drivers/net/can/cc770/cc770.c @@ -706,13 +706,12 @@ static void cc770_tx_interrupt(struct net_device *dev, unsigned int o) return; } - can_put_echo_skb(priv->tx_skb, dev, 0); - can_get_echo_skb(dev, 0); - cf = (struct can_frame *)priv->tx_skb->data; stats->tx_bytes += cf->can_dlc; stats->tx_packets++; + can_put_echo_skb(priv->tx_skb, dev, 0); + can_get_echo_skb(dev, 0); priv->tx_skb = NULL; netif_wake_queue(dev); -- cgit v1.2.3 From 4c41aa24baa4ed338241d05494f2c595c885af8f Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 19 Mar 2018 14:07:45 +0300 Subject: staging: ncpfs: memory corruption in ncp_read_kernel() If the server is malicious then *bytes_read could be larger than the size of the "target" buffer. It would lead to memory corruption when we do the memcpy(). Reported-by: Dr Silvio Cesare of InfoSect Signed-off-by: Dan Carpenter Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/staging/ncpfs/ncplib_kernel.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/staging/ncpfs/ncplib_kernel.c b/drivers/staging/ncpfs/ncplib_kernel.c index 804adfebba2f..3e047eb4cc7c 100644 --- a/drivers/staging/ncpfs/ncplib_kernel.c +++ b/drivers/staging/ncpfs/ncplib_kernel.c @@ -981,6 +981,10 @@ ncp_read_kernel(struct ncp_server *server, const char *file_id, goto out; } *bytes_read = ncp_reply_be16(server, 0); + if (*bytes_read > to_read) { + result = -EINVAL; + goto out; + } source = ncp_reply_data(server, 2 + (offset & 1)); memcpy(target, source, *bytes_read); -- cgit v1.2.3 From 62ac3f7305470e3f52f159de448bc1a771717e88 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Mon, 19 Mar 2018 16:33:58 +0100 Subject: libata: Apply NOLPM quirk to Crucial M500 480 and 960GB SSDs There have been reports of the Crucial M500 480GB model not working with LPM set to min_power / med_power_with_dipm level. It has not been tested with medium_power, but that typically has no measurable power-savings. Note the reporters Crucial_CT480M500SSD3 has a firmware version of MU03 and there is a MU05 update available, but that update does not mention any LPM fixes in its changelog, so the quirk matches all firmware versions. In my experience the LPM problems with (older) Crucial SSDs seem to be limited to higher capacity versions of the SSDs (different firmware?), so this commit adds a NOLPM quirk for the 480 and 960GB versions of the M500, to avoid LPM causing issues with these SSDs. Cc: stable@vger.kernel.org Reported-and-tested-by: Martin Steigerwald Signed-off-by: Hans de Goede Signed-off-by: Tejun Heo --- drivers/ata/libata-core.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index aec609f80c4e..53400ce09818 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -4538,6 +4538,14 @@ static const struct ata_blacklist_entry ata_device_blacklist [] = { ATA_HORKAGE_ZERO_AFTER_TRIM | ATA_HORKAGE_NOLPM, }, + /* 480GB+ M500 SSDs have both queued TRIM and LPM issues */ + { "Crucial_CT480M500*", NULL, ATA_HORKAGE_NO_NCQ_TRIM | + ATA_HORKAGE_ZERO_AFTER_TRIM | + ATA_HORKAGE_NOLPM, }, + { "Crucial_CT960M500*", NULL, ATA_HORKAGE_NO_NCQ_TRIM | + ATA_HORKAGE_ZERO_AFTER_TRIM | + ATA_HORKAGE_NOLPM, }, + /* devices that don't properly handle queued TRIM commands */ { "Micron_M500_*", NULL, ATA_HORKAGE_NO_NCQ_TRIM | ATA_HORKAGE_ZERO_AFTER_TRIM, }, -- cgit v1.2.3 From 3bf7b5d6d017c27e0d3b160aafb35a8e7cfeda1f Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Mon, 19 Mar 2018 16:33:59 +0100 Subject: libata: Make Crucial BX100 500GB LPM quirk apply to all firmware versions Commit b17e5729a630 ("libata: disable LPM for Crucial BX100 SSD 500GB drive"), introduced a ATA_HORKAGE_NOLPM quirk for Crucial BX100 500GB SSDs but limited this to the MU02 firmware version, according to: http://www.crucial.com/usa/en/support-ssd-firmware MU02 is the last version, so there are no newer possibly fixed versions and if the MU02 version has broken LPM then the MU01 almost certainly also has broken LPM, so this commit changes the quirk to apply to all firmware versions. Fixes: b17e5729a630 ("libata: disable LPM for Crucial BX100 SSD 500GB...") Cc: stable@vger.kernel.org Cc: Kai-Heng Feng Signed-off-by: Hans de Goede Signed-off-by: Tejun Heo --- drivers/ata/libata-core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index 53400ce09818..bce9840526da 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -4531,7 +4531,7 @@ static const struct ata_blacklist_entry ata_device_blacklist [] = { { "PIONEER DVD-RW DVR-216D", NULL, ATA_HORKAGE_NOSETXFER }, /* Crucial BX100 SSD 500GB has broken LPM support */ - { "CT500BX100SSD1", "MU02", ATA_HORKAGE_NOLPM }, + { "CT500BX100SSD1", NULL, ATA_HORKAGE_NOLPM }, /* The 512GB version of the MX100 has both queued TRIM and LPM issues */ { "Crucial_CT512MX100*", NULL, ATA_HORKAGE_NO_NCQ_TRIM | -- cgit v1.2.3 From d418ff56b8f2d2b296daafa8da151fe27689b757 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Mon, 19 Mar 2018 16:34:00 +0100 Subject: libata: Modify quirks for MX100 to limit NCQ_TRIM quirk to MU01 version When commit 9c7be59fc519af ("libata: Apply NOLPM quirk to Crucial MX100 512GB SSDs") was added it inherited the ATA_HORKAGE_NO_NCQ_TRIM quirk from the existing "Crucial_CT*MX100*" entry, but that entry sets model_rev to "MU01", where as the entry adding the NOLPM quirk sets it to NULL. This means that after this commit we no apply the NO_NCQ_TRIM quirk to all "Crucial_CT512MX100*" SSDs even if they have the fixed "MU02" firmware. This commit splits the "Crucial_CT512MX100*" quirk into 2 quirks, one for the "MU01" firmware and one for all other firmware versions, so that we once again only apply the NO_NCQ_TRIM quirk to the "MU01" firmware version. Fixes: 9c7be59fc519af ("libata: Apply NOLPM quirk to ... MX100 512GB SSDs") Cc: stable@vger.kernel.org Signed-off-by: Hans de Goede Signed-off-by: Tejun Heo --- drivers/ata/libata-core.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index bce9840526da..7431ccd03316 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -4533,10 +4533,13 @@ static const struct ata_blacklist_entry ata_device_blacklist [] = { /* Crucial BX100 SSD 500GB has broken LPM support */ { "CT500BX100SSD1", NULL, ATA_HORKAGE_NOLPM }, - /* The 512GB version of the MX100 has both queued TRIM and LPM issues */ - { "Crucial_CT512MX100*", NULL, ATA_HORKAGE_NO_NCQ_TRIM | + /* 512GB MX100 with MU01 firmware has both queued TRIM and LPM issues */ + { "Crucial_CT512MX100*", "MU01", ATA_HORKAGE_NO_NCQ_TRIM | ATA_HORKAGE_ZERO_AFTER_TRIM | ATA_HORKAGE_NOLPM, }, + /* 512GB MX100 with newer firmware has only LPM issues */ + { "Crucial_CT512MX100*", NULL, ATA_HORKAGE_ZERO_AFTER_TRIM | + ATA_HORKAGE_NOLPM, }, /* 480GB+ M500 SSDs have both queued TRIM and LPM issues */ { "Crucial_CT480M500*", NULL, ATA_HORKAGE_NO_NCQ_TRIM | -- cgit v1.2.3 From a6618f4aedb2b60932d766bd82ae7ce866e842aa Mon Sep 17 00:00:00 2001 From: Kirill Marinushkin Date: Mon, 19 Mar 2018 07:11:08 +0100 Subject: ALSA: usb-audio: Fix parsing descriptor of UAC2 processing unit Currently, the offsets in the UAC2 processing unit descriptor are calculated incorrectly. It causes an issue when connecting the device which provides such a feature: ~~~~ [84126.724420] usb 1-1.3.1: invalid Processing Unit descriptor (id 18) ~~~~ After this patch is applied, the UAC2 processing unit inits w/o this error. Fixes: 23caaf19b11e ("ALSA: usb-mixer: Add support for Audio Class v2.0") Signed-off-by: Kirill Marinushkin Cc: Signed-off-by: Takashi Iwai --- include/uapi/linux/usb/audio.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/uapi/linux/usb/audio.h b/include/uapi/linux/usb/audio.h index 17a022c5b414..da3315ed1bcd 100644 --- a/include/uapi/linux/usb/audio.h +++ b/include/uapi/linux/usb/audio.h @@ -370,7 +370,7 @@ static inline __u8 uac_processing_unit_bControlSize(struct uac_processing_unit_d { return (protocol == UAC_VERSION_1) ? desc->baSourceID[desc->bNrInPins + 4] : - desc->baSourceID[desc->bNrInPins + 6]; + 2; /* in UAC2, this value is constant */ } static inline __u8 *uac_processing_unit_bmControls(struct uac_processing_unit_descriptor *desc, @@ -378,7 +378,7 @@ static inline __u8 *uac_processing_unit_bmControls(struct uac_processing_unit_de { return (protocol == UAC_VERSION_1) ? &desc->baSourceID[desc->bNrInPins + 5] : - &desc->baSourceID[desc->bNrInPins + 7]; + &desc->baSourceID[desc->bNrInPins + 6]; } static inline __u8 uac_processing_unit_iProcessing(struct uac_processing_unit_descriptor *desc, -- cgit v1.2.3 From 219be9dda6137bc9759c449bbff5d4394fe73382 Mon Sep 17 00:00:00 2001 From: Clark Zheng Date: Thu, 15 Mar 2018 14:02:06 +0800 Subject: drm/amd/display: Refine disable VGA bad case won't follow normal sense, it will not enable vga1 as usual, but vga2,3,4 is on. Signed-off-by: Clark Zheng Reviewed-by: Tony Cheng Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dce/dce_hwseq.h | 8 +++++++- .../drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c | 20 +++++++++++++++----- 2 files changed, 22 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_hwseq.h b/drivers/gpu/drm/amd/display/dc/dce/dce_hwseq.h index a993279a8f2d..f11f17fe08f9 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_hwseq.h +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_hwseq.h @@ -496,6 +496,9 @@ struct dce_hwseq_registers { HWS_SF(, DOMAIN7_PG_STATUS, DOMAIN7_PGFSM_PWR_STATUS, mask_sh), \ HWS_SF(, DC_IP_REQUEST_CNTL, IP_REQUEST_EN, mask_sh), \ HWS_SF(, D1VGA_CONTROL, D1VGA_MODE_ENABLE, mask_sh),\ + HWS_SF(, D2VGA_CONTROL, D2VGA_MODE_ENABLE, mask_sh),\ + HWS_SF(, D3VGA_CONTROL, D3VGA_MODE_ENABLE, mask_sh),\ + HWS_SF(, D4VGA_CONTROL, D4VGA_MODE_ENABLE, mask_sh),\ HWS_SF(, VGA_TEST_CONTROL, VGA_TEST_ENABLE, mask_sh),\ HWS_SF(, VGA_TEST_CONTROL, VGA_TEST_RENDER_START, mask_sh),\ HWS_SF(, LVTMA_PWRSEQ_CNTL, LVTMA_BLON, mask_sh), \ @@ -591,7 +594,10 @@ struct dce_hwseq_registers { type DENTIST_DISPCLK_WDIVIDER; \ type VGA_TEST_ENABLE; \ type VGA_TEST_RENDER_START; \ - type D1VGA_MODE_ENABLE; + type D1VGA_MODE_ENABLE; \ + type D2VGA_MODE_ENABLE; \ + type D3VGA_MODE_ENABLE; \ + type D4VGA_MODE_ENABLE; struct dce_hwseq_shift { HWSEQ_REG_FIELD_LIST(uint8_t) diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c index 072e4485e85e..dc1e010725c1 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c @@ -238,14 +238,24 @@ static void enable_power_gating_plane( static void disable_vga( struct dce_hwseq *hws) { - unsigned int in_vga_mode = 0; - - REG_GET(D1VGA_CONTROL, D1VGA_MODE_ENABLE, &in_vga_mode); - - if (in_vga_mode == 0) + unsigned int in_vga1_mode = 0; + unsigned int in_vga2_mode = 0; + unsigned int in_vga3_mode = 0; + unsigned int in_vga4_mode = 0; + + REG_GET(D1VGA_CONTROL, D1VGA_MODE_ENABLE, &in_vga1_mode); + REG_GET(D2VGA_CONTROL, D2VGA_MODE_ENABLE, &in_vga2_mode); + REG_GET(D3VGA_CONTROL, D3VGA_MODE_ENABLE, &in_vga3_mode); + REG_GET(D4VGA_CONTROL, D4VGA_MODE_ENABLE, &in_vga4_mode); + + if (in_vga1_mode == 0 && in_vga2_mode == 0 && + in_vga3_mode == 0 && in_vga4_mode == 0) return; REG_WRITE(D1VGA_CONTROL, 0); + REG_WRITE(D2VGA_CONTROL, 0); + REG_WRITE(D3VGA_CONTROL, 0); + REG_WRITE(D4VGA_CONTROL, 0); /* HW Engineer's Notes: * During switch from vga->extended, if we set the VGA_TEST_ENABLE and -- cgit v1.2.3 From cd2d6c92a8e39d7e50a5af9fcc67d07e6a89e91d Mon Sep 17 00:00:00 2001 From: Shirish S Date: Thu, 15 Mar 2018 16:01:00 +0530 Subject: drm/amd/display: fix dereferencing possible ERR_PTR() This patch fixes static checker warning caused by "36cc549d5986: "drm/amd/display: disable CRTCs with NULL FB on their primary plane (V2)" Reported-by: Dan Carpenter Signed-off-by: Shirish S Reviewed-by: Harry Wentland Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index c345e645f1d7..95b639eddcb6 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -4794,6 +4794,9 @@ static int dm_atomic_check_plane_state_fb(struct drm_atomic_state *state, return -EDEADLK; crtc_state = drm_atomic_get_crtc_state(plane_state->state, crtc); + if (IS_ERR(crtc_state)) + return PTR_ERR(crtc_state); + if (crtc->primary == plane && crtc_state->active) { if (!plane_state->fb) return -EINVAL; -- cgit v1.2.3 From 49012d1bf5f78782d398adb984a080a88ba42965 Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Thu, 8 Feb 2018 14:43:35 +0100 Subject: clk: bcm2835: Fix ana->maskX definitions ana->maskX values are already '~'-ed in bcm2835_pll_set_rate(). Remove the '~' in the definition to fix ANA setup. Note that this commit fixes a long standing bug preventing one from using an HDMI display if it's plugged after the FW has booted Linux. This is because PLLH is used by the HDMI encoder to generate the pixel clock. Fixes: 41691b8862e2 ("clk: bcm2835: Add support for programming the audio domain clocks") Cc: Signed-off-by: Boris Brezillon Reviewed-by: Eric Anholt Signed-off-by: Stephen Boyd --- drivers/clk/bcm/clk-bcm2835.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/clk/bcm/clk-bcm2835.c b/drivers/clk/bcm/clk-bcm2835.c index 44301a3d9963..2108a274185a 100644 --- a/drivers/clk/bcm/clk-bcm2835.c +++ b/drivers/clk/bcm/clk-bcm2835.c @@ -449,17 +449,17 @@ struct bcm2835_pll_ana_bits { static const struct bcm2835_pll_ana_bits bcm2835_ana_default = { .mask0 = 0, .set0 = 0, - .mask1 = (u32)~(A2W_PLL_KI_MASK | A2W_PLL_KP_MASK), + .mask1 = A2W_PLL_KI_MASK | A2W_PLL_KP_MASK, .set1 = (2 << A2W_PLL_KI_SHIFT) | (8 << A2W_PLL_KP_SHIFT), - .mask3 = (u32)~A2W_PLL_KA_MASK, + .mask3 = A2W_PLL_KA_MASK, .set3 = (2 << A2W_PLL_KA_SHIFT), .fb_prediv_mask = BIT(14), }; static const struct bcm2835_pll_ana_bits bcm2835_ana_pllh = { - .mask0 = (u32)~(A2W_PLLH_KA_MASK | A2W_PLLH_KI_LOW_MASK), + .mask0 = A2W_PLLH_KA_MASK | A2W_PLLH_KI_LOW_MASK, .set0 = (2 << A2W_PLLH_KA_SHIFT) | (2 << A2W_PLLH_KI_LOW_SHIFT), - .mask1 = (u32)~(A2W_PLLH_KI_HIGH_MASK | A2W_PLLH_KP_MASK), + .mask1 = A2W_PLLH_KI_HIGH_MASK | A2W_PLLH_KP_MASK, .set1 = (6 << A2W_PLLH_KP_SHIFT), .mask3 = 0, .set3 = 0, -- cgit v1.2.3 From 7997f3b2df751aab0b8e60149b226a32966c41ac Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Thu, 8 Feb 2018 14:43:36 +0100 Subject: clk: bcm2835: Protect sections updating shared registers CM_PLLx and A2W_XOSC_CTRL registers are accessed by different clock handlers and must be accessed with ->regs_lock held. Update the sections where this protection is missing. Fixes: 41691b8862e2 ("clk: bcm2835: Add support for programming the audio domain clocks") Cc: Signed-off-by: Boris Brezillon Reviewed-by: Eric Anholt Signed-off-by: Stephen Boyd --- drivers/clk/bcm/clk-bcm2835.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/clk/bcm/clk-bcm2835.c b/drivers/clk/bcm/clk-bcm2835.c index 2108a274185a..a07f6451694a 100644 --- a/drivers/clk/bcm/clk-bcm2835.c +++ b/drivers/clk/bcm/clk-bcm2835.c @@ -623,8 +623,10 @@ static int bcm2835_pll_on(struct clk_hw *hw) ~A2W_PLL_CTRL_PWRDN); /* Take the PLL out of reset. */ + spin_lock(&cprman->regs_lock); cprman_write(cprman, data->cm_ctrl_reg, cprman_read(cprman, data->cm_ctrl_reg) & ~CM_PLL_ANARST); + spin_unlock(&cprman->regs_lock); /* Wait for the PLL to lock. */ timeout = ktime_add_ns(ktime_get(), LOCK_TIMEOUT_NS); @@ -701,9 +703,11 @@ static int bcm2835_pll_set_rate(struct clk_hw *hw, } /* Unmask the reference clock from the oscillator. */ + spin_lock(&cprman->regs_lock); cprman_write(cprman, A2W_XOSC_CTRL, cprman_read(cprman, A2W_XOSC_CTRL) | data->reference_enable_mask); + spin_unlock(&cprman->regs_lock); if (do_ana_setup_first) bcm2835_pll_write_ana(cprman, data->ana_reg_base, ana); -- cgit v1.2.3 From 71546d100422bcc2c543dadeb9328728997cd23a Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 14 Mar 2018 08:27:26 -0700 Subject: percpu: include linux/sched.h for cond_resched() microblaze build broke due to missing declaration of the cond_resched() invocation added recently. Let's include linux/sched.h explicitly. Signed-off-by: Tejun Heo Reported-by: kbuild test robot --- mm/percpu.c | 1 + 1 file changed, 1 insertion(+) diff --git a/mm/percpu.c b/mm/percpu.c index 36e7b65ba6cf..15a398c00791 100644 --- a/mm/percpu.c +++ b/mm/percpu.c @@ -80,6 +80,7 @@ #include #include #include +#include #include #include -- cgit v1.2.3 From f52ba1fef7b92e74d58efef8eae7b6f48c6d218d Mon Sep 17 00:00:00 2001 From: Kirill Tkhai Date: Mon, 19 Mar 2018 18:32:10 +0300 Subject: mm: Allow to kill tasks doing pcpu_alloc() and waiting for pcpu_balance_workfn() In case of memory deficit and low percpu memory pages, pcpu_balance_workfn() takes pcpu_alloc_mutex for a long time (as it makes memory allocations itself and waits for memory reclaim). If tasks doing pcpu_alloc() are choosen by OOM killer, they can't exit, because they are waiting for the mutex. The patch makes pcpu_alloc() to care about killing signal and use mutex_lock_killable(), when it's allowed by GFP flags. This guarantees, a task does not miss SIGKILL from OOM killer. Signed-off-by: Kirill Tkhai Signed-off-by: Tejun Heo --- mm/percpu.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/mm/percpu.c b/mm/percpu.c index 15a398c00791..9297098519a6 100644 --- a/mm/percpu.c +++ b/mm/percpu.c @@ -1373,8 +1373,17 @@ static void __percpu *pcpu_alloc(size_t size, size_t align, bool reserved, return NULL; } - if (!is_atomic) - mutex_lock(&pcpu_alloc_mutex); + if (!is_atomic) { + /* + * pcpu_balance_workfn() allocates memory under this mutex, + * and it may wait for memory reclaim. Allow current task + * to become OOM victim, in case of memory pressure. + */ + if (gfp & __GFP_NOFAIL) + mutex_lock(&pcpu_alloc_mutex); + else if (mutex_lock_killable(&pcpu_alloc_mutex)) + return NULL; + } spin_lock_irqsave(&pcpu_lock, flags); -- cgit v1.2.3 From b3a5d111994450909158929560906f2c1c6c1d85 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 14 Mar 2018 12:45:12 -0700 Subject: percpu_ref: Update doc to dissuade users from depending on internal RCU grace periods percpu_ref internally uses sched-RCU to implement the percpu -> atomic mode switching and the documentation suggested that this could be depended upon. This doesn't seem like a good idea. * percpu_ref uses sched-RCU which has different grace periods regular RCU. Users may combine percpu_ref with regular RCU usage and incorrectly believe that regular RCU grace periods are performed by percpu_ref. This can lead to, for example, use-after-free due to premature freeing. * percpu_ref has a grace period when switching from percpu to atomic mode. It doesn't have one between the last put and release. This distinction is subtle and can lead to surprising bugs. * percpu_ref allows starting in and switching to atomic mode manually for debugging and other purposes. This means that there may not be any grace periods from kill to release. This patch makes it clear that the grace periods are percpu_ref's internal implementation detail and can't be depended upon by the users. Signed-off-by: Tejun Heo Cc: Kent Overstreet Cc: Linus Torvalds Signed-off-by: Tejun Heo --- include/linux/percpu-refcount.h | 18 ++++++++++++------ lib/percpu-refcount.c | 2 ++ 2 files changed, 14 insertions(+), 6 deletions(-) diff --git a/include/linux/percpu-refcount.h b/include/linux/percpu-refcount.h index 864d167a1073..009cdf3d65b6 100644 --- a/include/linux/percpu-refcount.h +++ b/include/linux/percpu-refcount.h @@ -30,10 +30,14 @@ * calls io_destroy() or the process exits. * * In the aio code, kill_ioctx() is called when we wish to destroy a kioctx; it - * calls percpu_ref_kill(), then hlist_del_rcu() and synchronize_rcu() to remove - * the kioctx from the proccess's list of kioctxs - after that, there can't be - * any new users of the kioctx (from lookup_ioctx()) and it's then safe to drop - * the initial ref with percpu_ref_put(). + * removes the kioctx from the proccess's table of kioctxs and kills percpu_ref. + * After that, there can't be any new users of the kioctx (from lookup_ioctx()) + * and it's then safe to drop the initial ref with percpu_ref_put(). + * + * Note that the free path, free_ioctx(), needs to go through explicit call_rcu() + * to synchronize with RCU protected lookup_ioctx(). percpu_ref operations don't + * imply RCU grace periods of any kind and if a user wants to combine percpu_ref + * with RCU protection, it must be done explicitly. * * Code that does a two stage shutdown like this often needs some kind of * explicit synchronization to ensure the initial refcount can only be dropped @@ -113,8 +117,10 @@ void percpu_ref_reinit(struct percpu_ref *ref); * Must be used to drop the initial ref on a percpu refcount; must be called * precisely once before shutdown. * - * Puts @ref in non percpu mode, then does a call_rcu() before gathering up the - * percpu counters and dropping the initial ref. + * Switches @ref into atomic mode before gathering up the percpu counters + * and dropping the initial ref. + * + * There are no implied RCU grace periods between kill and release. */ static inline void percpu_ref_kill(struct percpu_ref *ref) { diff --git a/lib/percpu-refcount.c b/lib/percpu-refcount.c index 30e7dd88148b..9f96fa7bc000 100644 --- a/lib/percpu-refcount.c +++ b/lib/percpu-refcount.c @@ -322,6 +322,8 @@ EXPORT_SYMBOL_GPL(percpu_ref_switch_to_percpu); * This function normally doesn't block and can be called from any context * but it may block if @confirm_kill is specified and @ref is in the * process of switching to atomic mode by percpu_ref_switch_to_atomic(). + * + * There are no implied RCU grace periods between kill and release. */ void percpu_ref_kill_and_confirm(struct percpu_ref *ref, percpu_ref_func_t *confirm_kill) -- cgit v1.2.3 From ed65a4dc22083e73bac599ded6a262318cad7baf Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Mon, 19 Mar 2018 14:20:15 +0200 Subject: RDMA/ucma: Fix use-after-free access in ucma_close The error in ucma_create_id() left ctx in the list of contexts belong to ucma file descriptor. The attempt to close this file descriptor causes to use-after-free accesses while iterating over such list. Fixes: 75216638572f ("RDMA/cma: Export rdma cm interface to userspace") Reported-by: Signed-off-by: Leon Romanovsky Reviewed-by: Sean Hefty Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/ucma.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c index 3f3f7bbc4528..14c260ed63cb 100644 --- a/drivers/infiniband/core/ucma.c +++ b/drivers/infiniband/core/ucma.c @@ -497,6 +497,9 @@ err1: mutex_lock(&mut); idr_remove(&ctx_idr, ctx->id); mutex_unlock(&mut); + mutex_lock(&file->mut); + list_del(&ctx->list); + mutex_unlock(&file->mut); kfree(ctx); return ret; } -- cgit v1.2.3 From 80cf79ae4f688ead1dca8878990709c0446c3992 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Mon, 19 Mar 2018 12:21:43 +0200 Subject: RDMA/verbs: Remove restrack entry from XRCD structure XRCD object is not implemented in the restrack, so lets remove it. Fixes: 02d8883f520e ("RDMA/restrack: Add general infrastructure to track RDMA resources") Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- include/rdma/ib_verbs.h | 4 ---- 1 file changed, 4 deletions(-) diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 73b2387e3f74..ff3ed435701f 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -1537,10 +1537,6 @@ struct ib_xrcd { struct mutex tgt_qp_mutex; struct list_head tgt_qp_list; - /* - * Implementation details of the RDMA core, don't use in drivers: - */ - struct rdma_restrack_entry res; }; struct ib_ah { -- cgit v1.2.3 From 68ef3bc3166468678d5e1fdd216628c35bd1186f Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Fri, 16 Mar 2018 11:32:02 -0400 Subject: nfsd: remove blocked locks on client teardown We had some reports of panics in nfsd4_lm_notify, and that showed a nfs4_lockowner that had outlived its so_client. Ensure that we walk any leftover lockowners after tearing down all of the stateids, and remove any blocked locks that they hold. With this change, we also don't need to walk the nbl_lru on nfsd_net shutdown, as that will happen naturally when we tear down the clients. Fixes: 76d348fadff5 (nfsd: have nfsd4_lock use blocking locks for v4.1+ locks) Reported-by: Frank Sorenson Signed-off-by: Jeff Layton Cc: stable@vger.kernel.org # 4.9 Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 62 +++++++++++++++++++++++++++++++++++++---------------- 1 file changed, 43 insertions(+), 19 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 150521c9671b..61b770e39809 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -268,6 +268,35 @@ free_blocked_lock(struct nfsd4_blocked_lock *nbl) kfree(nbl); } +static void +remove_blocked_locks(struct nfs4_lockowner *lo) +{ + struct nfs4_client *clp = lo->lo_owner.so_client; + struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); + struct nfsd4_blocked_lock *nbl; + LIST_HEAD(reaplist); + + /* Dequeue all blocked locks */ + spin_lock(&nn->blocked_locks_lock); + while (!list_empty(&lo->lo_blocked)) { + nbl = list_first_entry(&lo->lo_blocked, + struct nfsd4_blocked_lock, + nbl_list); + list_del_init(&nbl->nbl_list); + list_move(&nbl->nbl_lru, &reaplist); + } + spin_unlock(&nn->blocked_locks_lock); + + /* Now free them */ + while (!list_empty(&reaplist)) { + nbl = list_first_entry(&reaplist, struct nfsd4_blocked_lock, + nbl_lru); + list_del_init(&nbl->nbl_lru); + posix_unblock_lock(&nbl->nbl_lock); + free_blocked_lock(nbl); + } +} + static int nfsd4_cb_notify_lock_done(struct nfsd4_callback *cb, struct rpc_task *task) { @@ -1866,6 +1895,7 @@ static __be32 mark_client_expired_locked(struct nfs4_client *clp) static void __destroy_client(struct nfs4_client *clp) { + int i; struct nfs4_openowner *oo; struct nfs4_delegation *dp; struct list_head reaplist; @@ -1895,6 +1925,16 @@ __destroy_client(struct nfs4_client *clp) nfs4_get_stateowner(&oo->oo_owner); release_openowner(oo); } + for (i = 0; i < OWNER_HASH_SIZE; i++) { + struct nfs4_stateowner *so, *tmp; + + list_for_each_entry_safe(so, tmp, &clp->cl_ownerstr_hashtbl[i], + so_strhash) { + /* Should be no openowners at this point */ + WARN_ON_ONCE(so->so_is_open_owner); + remove_blocked_locks(lockowner(so)); + } + } nfsd4_return_all_client_layouts(clp); nfsd4_shutdown_callback(clp); if (clp->cl_cb_conn.cb_xprt) @@ -6355,6 +6395,7 @@ nfsd4_release_lockowner(struct svc_rqst *rqstp, } spin_unlock(&clp->cl_lock); free_ol_stateid_reaplist(&reaplist); + remove_blocked_locks(lo); nfs4_put_stateowner(&lo->lo_owner); return status; @@ -7140,6 +7181,8 @@ nfs4_state_destroy_net(struct net *net) } } + WARN_ON(!list_empty(&nn->blocked_locks_lru)); + for (i = 0; i < CLIENT_HASH_SIZE; i++) { while (!list_empty(&nn->unconf_id_hashtbl[i])) { clp = list_entry(nn->unconf_id_hashtbl[i].next, struct nfs4_client, cl_idhash); @@ -7206,7 +7249,6 @@ nfs4_state_shutdown_net(struct net *net) struct nfs4_delegation *dp = NULL; struct list_head *pos, *next, reaplist; struct nfsd_net *nn = net_generic(net, nfsd_net_id); - struct nfsd4_blocked_lock *nbl; cancel_delayed_work_sync(&nn->laundromat_work); locks_end_grace(&nn->nfsd4_manager); @@ -7227,24 +7269,6 @@ nfs4_state_shutdown_net(struct net *net) nfs4_put_stid(&dp->dl_stid); } - BUG_ON(!list_empty(&reaplist)); - spin_lock(&nn->blocked_locks_lock); - while (!list_empty(&nn->blocked_locks_lru)) { - nbl = list_first_entry(&nn->blocked_locks_lru, - struct nfsd4_blocked_lock, nbl_lru); - list_move(&nbl->nbl_lru, &reaplist); - list_del_init(&nbl->nbl_list); - } - spin_unlock(&nn->blocked_locks_lock); - - while (!list_empty(&reaplist)) { - nbl = list_first_entry(&reaplist, - struct nfsd4_blocked_lock, nbl_lru); - list_del_init(&nbl->nbl_lru); - posix_unblock_lock(&nbl->nbl_lock); - free_blocked_lock(nbl); - } - nfsd4_client_tracking_exit(net); nfs4_state_destroy_net(net); } -- cgit v1.2.3 From b1abf6fc49829d89660c961fafe3f90f3d843c55 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 19 Mar 2018 14:51:49 +0100 Subject: ACPI / watchdog: Fix off-by-one error at resource assignment The resource allocation in WDAT watchdog has off-one-by error, it sets one byte more than the actual end address. This may eventually lead to unexpected resource conflicts. Fixes: 058dfc767008 (ACPI / watchdog: Add support for WDAT hardware watchdog) Cc: 4.9+ # 4.9+ Signed-off-by: Takashi Iwai Acked-by: Mika Westerberg Acked-by: Guenter Roeck Signed-off-by: Rafael J. Wysocki --- drivers/acpi/acpi_watchdog.c | 4 ++-- drivers/watchdog/wdat_wdt.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/acpi/acpi_watchdog.c b/drivers/acpi/acpi_watchdog.c index 11b113f8e367..ebb626ffb5fa 100644 --- a/drivers/acpi/acpi_watchdog.c +++ b/drivers/acpi/acpi_watchdog.c @@ -74,10 +74,10 @@ void __init acpi_watchdog_init(void) res.start = gas->address; if (gas->space_id == ACPI_ADR_SPACE_SYSTEM_MEMORY) { res.flags = IORESOURCE_MEM; - res.end = res.start + ALIGN(gas->access_width, 4); + res.end = res.start + ALIGN(gas->access_width, 4) - 1; } else if (gas->space_id == ACPI_ADR_SPACE_SYSTEM_IO) { res.flags = IORESOURCE_IO; - res.end = res.start + gas->access_width; + res.end = res.start + gas->access_width - 1; } else { pr_warn("Unsupported address space: %u\n", gas->space_id); diff --git a/drivers/watchdog/wdat_wdt.c b/drivers/watchdog/wdat_wdt.c index 6d1fbda0f461..0da9943d405f 100644 --- a/drivers/watchdog/wdat_wdt.c +++ b/drivers/watchdog/wdat_wdt.c @@ -392,7 +392,7 @@ static int wdat_wdt_probe(struct platform_device *pdev) memset(&r, 0, sizeof(r)); r.start = gas->address; - r.end = r.start + gas->access_width; + r.end = r.start + gas->access_width - 1; if (gas->space_id == ACPI_ADR_SPACE_SYSTEM_MEMORY) { r.flags = IORESOURCE_MEM; } else if (gas->space_id == ACPI_ADR_SPACE_SYSTEM_IO) { -- cgit v1.2.3 From b1e314462bba76660eec62760bb2e87f28f58866 Mon Sep 17 00:00:00 2001 From: Dhinakaran Pandiyan Date: Tue, 13 Mar 2018 22:48:25 -0700 Subject: drm/i915/dp: Write to SET_POWER dpcd to enable MST hub. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If bios sets up an MST output and hardware state readout code sees this is an SST configuration, when disabling the encoder we end up calling ->post_disable_dp() hook instead of the MST version. Consequently, we write to the DP_SET_POWER dpcd to set it D3 state. Further along when we try enable the encoder in MST mode, POWER_UP_PHY transaction fails to power up the MST hub. This results in continuous link training failures which keep the system busy delaying boot. We could identify bios MST boot discrepancy and handle it accordingly but a simple way to solve this is to write to the DP_SET_POWER dpcd for MST too. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=105470 Cc: Ville Syrjälä Cc: Jani Nikula Reviewed-by: Ville Syrjälä Reported-by: Laura Abbott Cc: stable@vger.kernel.org Fixes: 5ea2355a100a ("drm/i915/mst: Use MST sideband message transactions for dpms control") Tested-by: Laura Abbott Signed-off-by: Dhinakaran Pandiyan Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20180314054825.1718-1-dhinakaran.pandiyan@intel.com (cherry picked from commit ad260ab32a4d94fa974f58262f8000472d34fd5b) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/intel_ddi.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c index f51645a08dca..6aff9d096e13 100644 --- a/drivers/gpu/drm/i915/intel_ddi.c +++ b/drivers/gpu/drm/i915/intel_ddi.c @@ -2175,8 +2175,7 @@ static void intel_ddi_pre_enable_dp(struct intel_encoder *encoder, intel_prepare_dp_ddi_buffers(encoder, crtc_state); intel_ddi_init_dp_buf_reg(encoder); - if (!is_mst) - intel_dp_sink_dpms(intel_dp, DRM_MODE_DPMS_ON); + intel_dp_sink_dpms(intel_dp, DRM_MODE_DPMS_ON); intel_dp_start_link_train(intel_dp); if (port != PORT_A || INTEL_GEN(dev_priv) >= 9) intel_dp_stop_link_train(intel_dp); @@ -2274,14 +2273,12 @@ static void intel_ddi_post_disable_dp(struct intel_encoder *encoder, struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); struct intel_digital_port *dig_port = enc_to_dig_port(&encoder->base); struct intel_dp *intel_dp = &dig_port->dp; - bool is_mst = intel_crtc_has_type(old_crtc_state, INTEL_OUTPUT_DP_MST); /* * Power down sink before disabling the port, otherwise we end * up getting interrupts from the sink on detecting link loss. */ - if (!is_mst) - intel_dp_sink_dpms(intel_dp, DRM_MODE_DPMS_OFF); + intel_dp_sink_dpms(intel_dp, DRM_MODE_DPMS_OFF); intel_disable_ddi_buf(encoder); -- cgit v1.2.3 From 2399ac42e762ab25c58420e25359b2921afdc55f Mon Sep 17 00:00:00 2001 From: Grygorii Strashko Date: Fri, 16 Mar 2018 17:08:34 -0500 Subject: sysfs: symlink: export sysfs_create_link_nowarn() The sysfs_create_link_nowarn() is going to be used in phylib framework in subsequent patch which can be built as module. Hence, export sysfs_create_link_nowarn() to avoid build errors. Cc: Florian Fainelli Cc: Andrew Lunn Fixes: a3995460491d ("net: phy: Relax error checking on sysfs_create_link()") Signed-off-by: Grygorii Strashko Acked-by: Greg Kroah-Hartman Signed-off-by: David S. Miller --- fs/sysfs/symlink.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/sysfs/symlink.c b/fs/sysfs/symlink.c index 8664db25a9a6..215c225b2ca1 100644 --- a/fs/sysfs/symlink.c +++ b/fs/sysfs/symlink.c @@ -106,6 +106,7 @@ int sysfs_create_link_nowarn(struct kobject *kobj, struct kobject *target, { return sysfs_do_create_link(kobj, target, name, 0); } +EXPORT_SYMBOL_GPL(sysfs_create_link_nowarn); /** * sysfs_delete_link - remove symlink in object's directory. -- cgit v1.2.3 From 4414b3ed74be0e205e04e12cd83542a727d88255 Mon Sep 17 00:00:00 2001 From: Grygorii Strashko Date: Fri, 16 Mar 2018 17:08:35 -0500 Subject: net: phy: relax error checking when creating sysfs link netdev->phydev Some ethernet drivers (like TI CPSW) may connect and manage >1 Net PHYs per one netdevice, as result such drivers will produce warning during system boot and fail to connect second phy to netdevice when PHYLIB framework will try to create sysfs link netdev->phydev for second PHY in phy_attach_direct(), because sysfs link with the same name has been created already for the first PHY. As result, second CPSW external port will became unusable. Fix it by relaxing error checking when PHYLIB framework is creating sysfs link netdev->phydev in phy_attach_direct(), suppressing warning by using sysfs_create_link_nowarn() and adding error message instead. After this change links (phy->netdev and netdev->phy) creation failure is not fatal any more and system can continue working, which fixes TI CPSW issue. Cc: Florian Fainelli Cc: Andrew Lunn Fixes: a3995460491d ("net: phy: Relax error checking on sysfs_create_link()") Signed-off-by: Grygorii Strashko Signed-off-by: David S. Miller --- drivers/net/phy/phy_device.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index 478405e544cc..fe16f5894092 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -1012,10 +1012,17 @@ int phy_attach_direct(struct net_device *dev, struct phy_device *phydev, err = sysfs_create_link(&phydev->mdio.dev.kobj, &dev->dev.kobj, "attached_dev"); if (!err) { - err = sysfs_create_link(&dev->dev.kobj, &phydev->mdio.dev.kobj, - "phydev"); - if (err) - goto error; + err = sysfs_create_link_nowarn(&dev->dev.kobj, + &phydev->mdio.dev.kobj, + "phydev"); + if (err) { + dev_err(&dev->dev, "could not add device link to %s err %d\n", + kobject_name(&phydev->mdio.dev.kobj), + err); + /* non-fatal - some net drivers can use one netdevice + * with more then one phy + */ + } phydev->sysfs_links = true; } -- cgit v1.2.3 From 597d74005ba85e87c256cd732128ebf7faf54247 Mon Sep 17 00:00:00 2001 From: "Martin K. Petersen" Date: Wed, 14 Mar 2018 12:15:56 -0400 Subject: scsi: sd: Remember that READ CAPACITY(16) succeeded The USB storage glue sets the try_rc_10_first flag in an attempt to avoid wedging poorly implemented legacy USB devices. If the device capacity is too large to be expressed in the provided response buffer field of READ CAPACITY(10), a well-behaved device will set the reported capacity to 0xFFFFFFFF. We will then attempt to issue a READ CAPACITY(16) to obtain the real capacity. Since this part of the discovery logic is not covered by the first_scan flag, a warning will be printed a couple of times times per revalidate attempt if we upgrade from READ CAPACITY(10) to READ CAPACITY(16). Remember that we have successfully issued READ CAPACITY(16) so we can take the fast path on subsequent revalidate attempts. Reported-by: Menion Reviewed-by: Laurence Oberman Signed-off-by: Martin K. Petersen --- drivers/scsi/sd.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index 3b45f7fc5620..13ec1b5ef75c 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -2513,6 +2513,8 @@ sd_read_capacity(struct scsi_disk *sdkp, unsigned char *buffer) sector_size = old_sector_size; goto got_data; } + /* Remember that READ CAPACITY(16) succeeded */ + sdp->try_rc_10_first = 0; } } -- cgit v1.2.3 From 89d0c804392bb962553f23dc4c119d11b6bd1675 Mon Sep 17 00:00:00 2001 From: Jianchao Wang Date: Wed, 7 Mar 2018 20:29:03 +0800 Subject: scsi: iscsi_tcp: set BDI_CAP_STABLE_WRITES when data digest enabled iscsi tcp will first send out data, then calculate and send data digest. If we don't have BDI_CAP_STABLE_WRITES, the page cache will be written in spite of the on going writeback. Consequently, wrong digest will be got and sent to target. To fix this, set BDI_CAP_STABLE_WRITES when data digest is enabled in iscsi_tcp .slave_configure callback. Signed-off-by: Jianchao Wang Acked-by: Chris Leech Acked-by: Lee Duncan Signed-off-by: Martin K. Petersen --- drivers/scsi/iscsi_tcp.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/scsi/iscsi_tcp.c b/drivers/scsi/iscsi_tcp.c index 6198559abbd8..dd66c11399a2 100644 --- a/drivers/scsi/iscsi_tcp.c +++ b/drivers/scsi/iscsi_tcp.c @@ -37,6 +37,7 @@ #include #include #include +#include #include #include #include @@ -954,6 +955,13 @@ static int iscsi_sw_tcp_slave_alloc(struct scsi_device *sdev) static int iscsi_sw_tcp_slave_configure(struct scsi_device *sdev) { + struct iscsi_sw_tcp_host *tcp_sw_host = iscsi_host_priv(sdev->host); + struct iscsi_session *session = tcp_sw_host->session; + struct iscsi_conn *conn = session->leadconn; + + if (conn->datadgst_en) + sdev->request_queue->backing_dev_info->capabilities + |= BDI_CAP_STABLE_WRITES; blk_queue_bounce_limit(sdev->request_queue, BLK_BOUNCE_ANY); blk_queue_dma_alignment(sdev->request_queue, 0); return 0; -- cgit v1.2.3 From e3d03598e8ae7d195af5d3d049596dec336f569f Mon Sep 17 00:00:00 2001 From: "H.J. Lu" Date: Mon, 19 Mar 2018 13:57:46 -0700 Subject: x86/build/64: Force the linker to use 2MB page size Binutils 2.31 will enable -z separate-code by default for x86 to avoid mixing code pages with data to improve cache performance as well as security. To reduce x86-64 executable and shared object sizes, the maximum page size is reduced from 2MB to 4KB. But x86-64 kernel must be aligned to 2MB. Pass -z max-page-size=0x200000 to linker to force 2MB page size regardless of the default page size used by linker. Tested with Linux kernel 4.15.6 on x86-64. Signed-off-by: H.J. Lu Cc: Andy Shevchenko Cc: Eric Biederman Cc: H. Peter Anvin Cc: Juergen Gross Cc: Kees Cook Cc: Kirill A. Shutemov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/CAMe9rOp4_%3D_8twdpTyAP2DhONOCeaTOsniJLoppzhoNptL8xzA@mail.gmail.com Signed-off-by: Ingo Molnar --- arch/x86/Makefile | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/arch/x86/Makefile b/arch/x86/Makefile index 498c1b812300..1c4d012550ec 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -223,6 +223,15 @@ KBUILD_CFLAGS += $(cfi) $(cfi-sigframe) $(cfi-sections) $(asinstr) $(avx_instr) LDFLAGS := -m elf_$(UTS_MACHINE) +# +# The 64-bit kernel must be aligned to 2MB. Pass -z max-page-size=0x200000 to +# the linker to force 2MB page size regardless of the default page size used +# by the linker. +# +ifdef CONFIG_X86_64 +LDFLAGS += $(call ld-option, -z max-page-size=0x200000) +endif + # Speed up the build KBUILD_CFLAGS += -pipe # Workaround for a gcc prelease that unfortunately was shipped in a suse release -- cgit v1.2.3 From c55b8550fa57ba4f5e507be406ff9fc2845713e8 Mon Sep 17 00:00:00 2001 From: "H.J. Lu" Date: Mon, 19 Mar 2018 14:08:11 -0700 Subject: x86/boot/64: Verify alignment of the LOAD segment Since the x86-64 kernel must be aligned to 2MB, refuse to boot the kernel if the alignment of the LOAD segment isn't a multiple of 2MB. Signed-off-by: H.J. Lu Cc: Andy Shevchenko Cc: Eric Biederman Cc: H. Peter Anvin Cc: Juergen Gross Cc: Kees Cook Cc: Kirill A. Shutemov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/CAMe9rOrR7xSJgUfiCoZLuqWUwymRxXPoGBW38%2BpN%3D9g%2ByKNhZw@mail.gmail.com Signed-off-by: Ingo Molnar --- arch/x86/boot/compressed/misc.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c index 98761a1576ce..252fee320816 100644 --- a/arch/x86/boot/compressed/misc.c +++ b/arch/x86/boot/compressed/misc.c @@ -309,6 +309,10 @@ static void parse_elf(void *output) switch (phdr->p_type) { case PT_LOAD: +#ifdef CONFIG_X86_64 + if ((phdr->p_align % 0x200000) != 0) + error("Alignment of LOAD segment isn't multiple of 2MB"); +#endif #ifdef CONFIG_RELOCATABLE dest = output; dest += (phdr->p_paddr - LOAD_PHYSICAL_ADDR); -- cgit v1.2.3 From 45dbac0e288350f9a4226a5b4b651ed434dd9f85 Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Thu, 15 Mar 2018 04:58:12 -0700 Subject: locking/mutex: Improve documentation On Wed, Mar 14, 2018 at 01:56:31PM -0700, Andrew Morton wrote: > My memory is weak and our documentation is awful. What does > mutex_lock_killable() actually do and how does it differ from > mutex_lock_interruptible()? Add kernel-doc for mutex_lock_killable() and mutex_lock_io(). Reword the kernel-doc for mutex_lock_interruptible(). Signed-off-by: Matthew Wilcox Signed-off-by: Peter Zijlstra (Intel) Cc: Andrew Morton Cc: Jonathan Corbet Cc: Kirill Tkhai Cc: Linus Torvalds Cc: Mauro Carvalho Chehab Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: cl@linux.com Cc: tj@kernel.org Link: http://lkml.kernel.org/r/20180315115812.GA9949@bombadil.infradead.org Signed-off-by: Ingo Molnar --- kernel/locking/mutex.c | 37 ++++++++++++++++++++++++++++++------- 1 file changed, 30 insertions(+), 7 deletions(-) diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c index 858a07590e39..2048359f33d2 100644 --- a/kernel/locking/mutex.c +++ b/kernel/locking/mutex.c @@ -1082,15 +1082,16 @@ static noinline int __sched __mutex_lock_interruptible_slowpath(struct mutex *lock); /** - * mutex_lock_interruptible - acquire the mutex, interruptible - * @lock: the mutex to be acquired + * mutex_lock_interruptible() - Acquire the mutex, interruptible by signals. + * @lock: The mutex to be acquired. * - * Lock the mutex like mutex_lock(), and return 0 if the mutex has - * been acquired or sleep until the mutex becomes available. If a - * signal arrives while waiting for the lock then this function - * returns -EINTR. + * Lock the mutex like mutex_lock(). If a signal is delivered while the + * process is sleeping, this function will return without acquiring the + * mutex. * - * This function is similar to (but not equivalent to) down_interruptible(). + * Context: Process context. + * Return: 0 if the lock was successfully acquired or %-EINTR if a + * signal arrived. */ int __sched mutex_lock_interruptible(struct mutex *lock) { @@ -1104,6 +1105,18 @@ int __sched mutex_lock_interruptible(struct mutex *lock) EXPORT_SYMBOL(mutex_lock_interruptible); +/** + * mutex_lock_killable() - Acquire the mutex, interruptible by fatal signals. + * @lock: The mutex to be acquired. + * + * Lock the mutex like mutex_lock(). If a signal which will be fatal to + * the current process is delivered while the process is sleeping, this + * function will return without acquiring the mutex. + * + * Context: Process context. + * Return: 0 if the lock was successfully acquired or %-EINTR if a + * fatal signal arrived. + */ int __sched mutex_lock_killable(struct mutex *lock) { might_sleep(); @@ -1115,6 +1128,16 @@ int __sched mutex_lock_killable(struct mutex *lock) } EXPORT_SYMBOL(mutex_lock_killable); +/** + * mutex_lock_io() - Acquire the mutex and mark the process as waiting for I/O + * @lock: The mutex to be acquired. + * + * Lock the mutex like mutex_lock(). While the task is waiting for this + * mutex, it will be accounted as being in the IO wait state by the + * scheduler. + * + * Context: Process context. + */ void __sched mutex_lock_io(struct mutex *lock) { int token; -- cgit v1.2.3 From 2c2a9bbe7fecb2ad4981b6f4a56cacbfb849f848 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Mon, 12 Feb 2018 14:20:35 -0800 Subject: perf/x86/intel: Disable userspace RDPMC usage for large PEBS Userspace RDPMC cannot possibly work for large PEBS, which was introduced in: b8241d20699e ("perf/x86/intel: Implement batched PEBS interrupt handling (large PEBS interrupt threshold)") When the PEBS interrupt threshold is larger than one, there is no way to get exact auto-reload times and value for userspace RDPMC. Disable the userspace RDPMC usage when large PEBS is enabled. The only exception is when the PEBS interrupt threshold is 1, in which case user-space RDPMC works well even with auto-reload events. Signed-off-by: Kan Liang Signed-off-by: Peter Zijlstra (Intel) Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Cc: acme@kernel.org Fixes: b8241d20699e ("perf/x86/intel: Implement batched PEBS interrupt handling (large PEBS interrupt threshold)") Link: http://lkml.kernel.org/r/1518474035-21006-6-git-send-email-kan.liang@linux.intel.com Signed-off-by: Ingo Molnar (cherry picked from commit 1af22eba248efe2de25658041a80a3d40fb3e92e) --- arch/x86/events/core.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c index 140d33288e78..3d24edfef3e4 100644 --- a/arch/x86/events/core.c +++ b/arch/x86/events/core.c @@ -2118,7 +2118,8 @@ static int x86_pmu_event_init(struct perf_event *event) event->destroy(event); } - if (READ_ONCE(x86_pmu.attr_rdpmc)) + if (READ_ONCE(x86_pmu.attr_rdpmc) && + !(event->hw.flags & PERF_X86_EVENT_FREERUNNING)) event->hw.flags |= PERF_X86_EVENT_RDPMC_ALLOWED; return err; -- cgit v1.2.3 From e5ea9b54a055619160bbfe527ebb7d7191823d66 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Sat, 17 Mar 2018 14:52:16 +0300 Subject: perf/x86/intel: Don't accidentally clear high bits in bdw_limit_period() We intended to clear the lowest 6 bits but because of a type bug we clear the high 32 bits as well. Andi says that periods are rarely more than U32_MAX so this bug probably doesn't have a huge runtime impact. Signed-off-by: Dan Carpenter Signed-off-by: Peter Zijlstra (Intel) Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: H. Peter Anvin Cc: Jiri Olsa Cc: Kan Liang Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Sebastian Andrzej Siewior Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Fixes: 294fe0f52a44 ("perf/x86/intel: Add INST_RETIRED.ALL workarounds") Link: http://lkml.kernel.org/r/20180317115216.GB4035@mwanda Signed-off-by: Ingo Molnar --- arch/x86/events/intel/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index 56457cb73448..9b18a227fff7 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -3194,7 +3194,7 @@ static unsigned bdw_limit_period(struct perf_event *event, unsigned left) X86_CONFIG(.event=0xc0, .umask=0x01)) { if (left < 128) left = 128; - left &= ~0x3fu; + left &= ~0x3fULL; } return left; } -- cgit v1.2.3 From e340895c9ed0b44548f08bbaaee4afc7bfacd354 Mon Sep 17 00:00:00 2001 From: Stephane Eranian Date: Mon, 12 Mar 2018 08:41:34 -0700 Subject: perf/x86/intel/uncore: Add missing filter constraint for SKX CHA event Adding a filter constraint for Intel Skylake CHA event UNC_CHA_UPI_CREDITS_ACQUIRED (0x38). The event supports core-id/thread-id and link filtering. Signed-off-by: Stephane Eranian Signed-off-by: Kan Liang Signed-off-by: Peter Zijlstra (Intel) Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Vince Weaver Link: http://lkml.kernel.org/r/1520869294-14176-1-git-send-email-kan.liang@linux.intel.com Signed-off-by: Ingo Molnar --- arch/x86/events/intel/uncore_snbep.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c index 22ec65bc033a..0876798f2ac9 100644 --- a/arch/x86/events/intel/uncore_snbep.c +++ b/arch/x86/events/intel/uncore_snbep.c @@ -3343,6 +3343,7 @@ static struct extra_reg skx_uncore_cha_extra_regs[] = { SNBEP_CBO_EVENT_EXTRA_REG(0x9134, 0xffff, 0x4), SNBEP_CBO_EVENT_EXTRA_REG(0x35, 0xff, 0x8), SNBEP_CBO_EVENT_EXTRA_REG(0x36, 0xff, 0x8), + SNBEP_CBO_EVENT_EXTRA_REG(0x38, 0xff, 0x3), EVENT_EXTRA_END }; -- cgit v1.2.3 From 578ae447e7e5d78c90ac40a06406c1741f79ba96 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Mon, 19 Mar 2018 13:18:57 -0500 Subject: jump_label: Disable jump labels in __exit code With the following commit: 333522447063 ("jump_label: Explicitly disable jump labels in __init code") ... we explicitly disabled jump labels in __init code, so they could be detected and not warned about in the following commit: dc1dd184c2f0 ("jump_label: Warn on failed jump_label patching attempt") In-kernel __exit code has the same issue. It's never used, so it's freed along with the rest of initmem. But jump label entries in __exit code aren't explicitly disabled, so we get the following warning when enabling pr_debug() in __exit code: can't patch jump_label at dmi_sysfs_exit+0x0/0x2d WARNING: CPU: 0 PID: 22572 at kernel/jump_label.c:376 __jump_label_update+0x9d/0xb0 Fix the warning by disabling all jump labels in initmem (which includes both __init and __exit code). Reported-and-tested-by: Li Wang Signed-off-by: Josh Poimboeuf Cc: Borislav Petkov Cc: Jason Baron Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Fixes: dc1dd184c2f0 ("jump_label: Warn on failed jump_label patching attempt") Link: http://lkml.kernel.org/r/7121e6e595374f06616c505b6e690e275c0054d1.1521483452.git.jpoimboe@redhat.com Signed-off-by: Ingo Molnar --- include/linux/jump_label.h | 4 ++-- init/main.c | 2 +- kernel/jump_label.c | 7 ++++--- 3 files changed, 7 insertions(+), 6 deletions(-) diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h index 2168cc6b8b30..b46b541c67c4 100644 --- a/include/linux/jump_label.h +++ b/include/linux/jump_label.h @@ -151,7 +151,7 @@ extern struct jump_entry __start___jump_table[]; extern struct jump_entry __stop___jump_table[]; extern void jump_label_init(void); -extern void jump_label_invalidate_init(void); +extern void jump_label_invalidate_initmem(void); extern void jump_label_lock(void); extern void jump_label_unlock(void); extern void arch_jump_label_transform(struct jump_entry *entry, @@ -199,7 +199,7 @@ static __always_inline void jump_label_init(void) static_key_initialized = true; } -static inline void jump_label_invalidate_init(void) {} +static inline void jump_label_invalidate_initmem(void) {} static __always_inline bool static_key_false(struct static_key *key) { diff --git a/init/main.c b/init/main.c index 969eaf140ef0..21efbf6ace93 100644 --- a/init/main.c +++ b/init/main.c @@ -1001,7 +1001,7 @@ static int __ref kernel_init(void *unused) /* need to finish all async __init code before freeing the memory */ async_synchronize_full(); ftrace_free_init_mem(); - jump_label_invalidate_init(); + jump_label_invalidate_initmem(); free_initmem(); mark_readonly(); system_state = SYSTEM_RUNNING; diff --git a/kernel/jump_label.c b/kernel/jump_label.c index e7214093dcd1..01ebdf1f9f40 100644 --- a/kernel/jump_label.c +++ b/kernel/jump_label.c @@ -16,6 +16,7 @@ #include #include #include +#include #ifdef HAVE_JUMP_LABEL @@ -421,15 +422,15 @@ void __init jump_label_init(void) cpus_read_unlock(); } -/* Disable any jump label entries in __init code */ -void __init jump_label_invalidate_init(void) +/* Disable any jump label entries in __init/__exit code */ +void __init jump_label_invalidate_initmem(void) { struct jump_entry *iter_start = __start___jump_table; struct jump_entry *iter_stop = __stop___jump_table; struct jump_entry *iter; for (iter = iter_start; iter < iter_stop; iter++) { - if (init_kernel_text(iter->code)) + if (init_section_contains((void *)(unsigned long)iter->code, 1)) iter->code = 0; } } -- cgit v1.2.3 From 174afc3e7dd7823df8218e16e7768b834097184e Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Mon, 12 Mar 2018 10:45:37 -0400 Subject: perf/x86/intel: Rename confusing 'freerunning PEBS' API and implementation to 'large PEBS' The 'freerunning PEBS' and 'large PEBS' are the same thing. Both of these names appear in the code and in the API, which causes confusion. Rename 'freerunning PEBS' to 'large PEBS' to unify the code, which eliminates the confusion. No functional change. Reported-by: Vince Weaver Signed-off-by: Kan Liang Signed-off-by: Peter Zijlstra (Intel) Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1520865937-22910-1-git-send-email-kan.liang@linux.intel.com Signed-off-by: Ingo Molnar --- arch/x86/events/core.c | 2 +- arch/x86/events/intel/core.c | 12 ++++++------ arch/x86/events/intel/ds.c | 6 +++--- arch/x86/events/perf_event.h | 6 +++--- 4 files changed, 13 insertions(+), 13 deletions(-) diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c index 3d24edfef3e4..88797c80b3e0 100644 --- a/arch/x86/events/core.c +++ b/arch/x86/events/core.c @@ -2119,7 +2119,7 @@ static int x86_pmu_event_init(struct perf_event *event) } if (READ_ONCE(x86_pmu.attr_rdpmc) && - !(event->hw.flags & PERF_X86_EVENT_FREERUNNING)) + !(event->hw.flags & PERF_X86_EVENT_LARGE_PEBS)) event->hw.flags |= PERF_X86_EVENT_RDPMC_ALLOWED; return err; diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index 9b18a227fff7..1e41d7508d99 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -2952,9 +2952,9 @@ static void intel_pebs_aliases_skl(struct perf_event *event) return intel_pebs_aliases_precdist(event); } -static unsigned long intel_pmu_free_running_flags(struct perf_event *event) +static unsigned long intel_pmu_large_pebs_flags(struct perf_event *event) { - unsigned long flags = x86_pmu.free_running_flags; + unsigned long flags = x86_pmu.large_pebs_flags; if (event->attr.use_clockid) flags &= ~PERF_SAMPLE_TIME; @@ -2976,8 +2976,8 @@ static int intel_pmu_hw_config(struct perf_event *event) if (!event->attr.freq) { event->hw.flags |= PERF_X86_EVENT_AUTO_RELOAD; if (!(event->attr.sample_type & - ~intel_pmu_free_running_flags(event))) - event->hw.flags |= PERF_X86_EVENT_FREERUNNING; + ~intel_pmu_large_pebs_flags(event))) + event->hw.flags |= PERF_X86_EVENT_LARGE_PEBS; } if (x86_pmu.pebs_aliases) x86_pmu.pebs_aliases(event); @@ -3460,7 +3460,7 @@ static __initconst const struct x86_pmu core_pmu = { .event_map = intel_pmu_event_map, .max_events = ARRAY_SIZE(intel_perfmon_event_map), .apic = 1, - .free_running_flags = PEBS_FREERUNNING_FLAGS, + .large_pebs_flags = LARGE_PEBS_FLAGS, /* * Intel PMCs cannot be accessed sanely above 32-bit width, @@ -3502,7 +3502,7 @@ static __initconst const struct x86_pmu intel_pmu = { .event_map = intel_pmu_event_map, .max_events = ARRAY_SIZE(intel_perfmon_event_map), .apic = 1, - .free_running_flags = PEBS_FREERUNNING_FLAGS, + .large_pebs_flags = LARGE_PEBS_FLAGS, /* * Intel PMCs cannot be accessed sanely above 32 bit width, * so we install an artificial 1<<31 period regardless of diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c index 18c25ab28557..d8015235ba76 100644 --- a/arch/x86/events/intel/ds.c +++ b/arch/x86/events/intel/ds.c @@ -935,7 +935,7 @@ void intel_pmu_pebs_add(struct perf_event *event) bool needed_cb = pebs_needs_sched_cb(cpuc); cpuc->n_pebs++; - if (hwc->flags & PERF_X86_EVENT_FREERUNNING) + if (hwc->flags & PERF_X86_EVENT_LARGE_PEBS) cpuc->n_large_pebs++; pebs_update_state(needed_cb, cpuc, event->ctx->pmu); @@ -975,7 +975,7 @@ void intel_pmu_pebs_del(struct perf_event *event) bool needed_cb = pebs_needs_sched_cb(cpuc); cpuc->n_pebs--; - if (hwc->flags & PERF_X86_EVENT_FREERUNNING) + if (hwc->flags & PERF_X86_EVENT_LARGE_PEBS) cpuc->n_large_pebs--; pebs_update_state(needed_cb, cpuc, event->ctx->pmu); @@ -1530,7 +1530,7 @@ void __init intel_ds_init(void) x86_pmu.pebs_record_size = sizeof(struct pebs_record_skl); x86_pmu.drain_pebs = intel_pmu_drain_pebs_nhm; - x86_pmu.free_running_flags |= PERF_SAMPLE_TIME; + x86_pmu.large_pebs_flags |= PERF_SAMPLE_TIME; break; default: diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h index 78f91ec1056e..39cd0615f04f 100644 --- a/arch/x86/events/perf_event.h +++ b/arch/x86/events/perf_event.h @@ -69,7 +69,7 @@ struct event_constraint { #define PERF_X86_EVENT_RDPMC_ALLOWED 0x0100 /* grant rdpmc permission */ #define PERF_X86_EVENT_EXCL_ACCT 0x0200 /* accounted EXCL event */ #define PERF_X86_EVENT_AUTO_RELOAD 0x0400 /* use PEBS auto-reload */ -#define PERF_X86_EVENT_FREERUNNING 0x0800 /* use freerunning PEBS */ +#define PERF_X86_EVENT_LARGE_PEBS 0x0800 /* use large PEBS */ struct amd_nb { @@ -88,7 +88,7 @@ struct amd_nb { * REGS_USER can be handled for events limited to ring 3. * */ -#define PEBS_FREERUNNING_FLAGS \ +#define LARGE_PEBS_FLAGS \ (PERF_SAMPLE_IP | PERF_SAMPLE_TID | PERF_SAMPLE_ADDR | \ PERF_SAMPLE_ID | PERF_SAMPLE_CPU | PERF_SAMPLE_STREAM_ID | \ PERF_SAMPLE_DATA_SRC | PERF_SAMPLE_IDENTIFIER | \ @@ -608,7 +608,7 @@ struct x86_pmu { struct event_constraint *pebs_constraints; void (*pebs_aliases)(struct perf_event *event); int max_pebs_events; - unsigned long free_running_flags; + unsigned long large_pebs_flags; /* * Intel LBR -- cgit v1.2.3 From 320b0651f32b830add6497fcdcfdcb6ae8c7b8a0 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Tue, 13 Mar 2018 11:51:34 -0700 Subject: perf/x86/intel/uncore: Fix multi-domain PCI CHA enumeration bug on Skylake servers The number of CHAs is miscalculated on multi-domain PCI Skylake server systems, resulting in an uncore driver initialization error. Gary Kroening explains: "For systems with a single PCI segment, it is sufficient to look for the bus number to change in order to determine that all of the CHa's have been counted for a single socket. However, for multi PCI segment systems, each socket is given a new segment and the bus number does NOT change. So looking only for the bus number to change ends up counting all of the CHa's on all sockets in the system. This leads to writing CPU MSRs beyond a valid range and causes an error in ivbep_uncore_msr_init_box()." To fix this bug, query the number of CHAs from the CAPID6 register: it should read bits 27:0 in the CAPID6 register located at Device 30, Function 3, Offset 0x9C. These 28 bits form a bit vector of available LLC slices and the CHAs that manage those slices. Reported-by: Kroening, Gary Tested-by: Kroening, Gary Signed-off-by: Kan Liang Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Andy Shevchenko Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Cc: abanman@hpe.com Cc: dimitri.sivanich@hpe.com Cc: hpa@zytor.com Cc: mike.travis@hpe.com Cc: russ.anderson@hpe.com Fixes: cd34cd97b7b4 ("perf/x86/intel/uncore: Add Skylake server uncore support") Link: http://lkml.kernel.org/r/1520967094-13219-1-git-send-email-kan.liang@linux.intel.com Signed-off-by: Ingo Molnar --- arch/x86/events/intel/uncore_snbep.c | 31 +++++++++++++++++-------------- 1 file changed, 17 insertions(+), 14 deletions(-) diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c index 0876798f2ac9..c98b943e58b4 100644 --- a/arch/x86/events/intel/uncore_snbep.c +++ b/arch/x86/events/intel/uncore_snbep.c @@ -3563,24 +3563,27 @@ static struct intel_uncore_type *skx_msr_uncores[] = { NULL, }; +/* + * To determine the number of CHAs, it should read bits 27:0 in the CAPID6 + * register which located at Device 30, Function 3, Offset 0x9C. PCI ID 0x2083. + */ +#define SKX_CAPID6 0x9c +#define SKX_CHA_BIT_MASK GENMASK(27, 0) + static int skx_count_chabox(void) { - struct pci_dev *chabox_dev = NULL; - int bus, count = 0; + struct pci_dev *dev = NULL; + u32 val = 0; - while (1) { - chabox_dev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x208d, chabox_dev); - if (!chabox_dev) - break; - if (count == 0) - bus = chabox_dev->bus->number; - if (bus != chabox_dev->bus->number) - break; - count++; - } + dev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x2083, dev); + if (!dev) + goto out; - pci_dev_put(chabox_dev); - return count; + pci_read_config_dword(dev, SKX_CAPID6, &val); + val &= SKX_CHA_BIT_MASK; +out: + pci_dev_put(dev); + return hweight32(val); } void skx_uncore_cpu_init(void) -- cgit v1.2.3 From c917e0f259908e75bd2a65877e25f9d90c22c848 Mon Sep 17 00:00:00 2001 From: Song Liu Date: Mon, 12 Mar 2018 09:59:43 -0700 Subject: perf/cgroup: Fix child event counting bug When a perf_event is attached to parent cgroup, it should count events for all children cgroups: parent_group <---- perf_event \ - child_group <---- process(es) However, in our tests, we found this perf_event cannot report reliable results. Here is an example case: # create cgroups mkdir -p /sys/fs/cgroup/p/c # start perf for parent group perf stat -e instructions -G "p" # on another console, run test process in child cgroup: stressapptest -s 2 -M 1000 & echo $! > /sys/fs/cgroup/p/c/cgroup.procs # after the test process is done, stop perf in the first console shows instructions p The instruction should not be "not counted" as the process runs in the child cgroup. We found this is because perf_event->cgrp and cpuctx->cgrp are not identical, thus perf_event->cgrp are not updated properly. This patch fixes this by updating perf_cgroup properly for ancestor cgroup(s). Reported-by: Ephraim Park Signed-off-by: Song Liu Signed-off-by: Peter Zijlstra (Intel) Cc: Cc: Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Link: http://lkml.kernel.org/r/20180312165943.1057894-1-songliubraving@fb.com Signed-off-by: Ingo Molnar --- kernel/events/core.c | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index 4b838470fac4..709a55b9ad97 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -724,9 +724,15 @@ static inline void __update_cgrp_time(struct perf_cgroup *cgrp) static inline void update_cgrp_time_from_cpuctx(struct perf_cpu_context *cpuctx) { - struct perf_cgroup *cgrp_out = cpuctx->cgrp; - if (cgrp_out) - __update_cgrp_time(cgrp_out); + struct perf_cgroup *cgrp = cpuctx->cgrp; + struct cgroup_subsys_state *css; + + if (cgrp) { + for (css = &cgrp->css; css; css = css->parent) { + cgrp = container_of(css, struct perf_cgroup, css); + __update_cgrp_time(cgrp); + } + } } static inline void update_cgrp_time_from_event(struct perf_event *event) @@ -754,6 +760,7 @@ perf_cgroup_set_timestamp(struct task_struct *task, { struct perf_cgroup *cgrp; struct perf_cgroup_info *info; + struct cgroup_subsys_state *css; /* * ctx->lock held by caller @@ -764,8 +771,12 @@ perf_cgroup_set_timestamp(struct task_struct *task, return; cgrp = perf_cgroup_from_task(task, ctx); - info = this_cpu_ptr(cgrp->info); - info->timestamp = ctx->timestamp; + + for (css = &cgrp->css; css; css = css->parent) { + cgrp = container_of(css, struct perf_cgroup, css); + info = this_cpu_ptr(cgrp->info); + info->timestamp = ctx->timestamp; + } } static DEFINE_PER_CPU(struct list_head, cgrp_cpuctx_list); -- cgit v1.2.3 From a8c024cd9b9683d25ae1f459525dd2c6bec75e79 Mon Sep 17 00:00:00 2001 From: Joe Lawrence Date: Mon, 19 Mar 2018 14:35:54 -0400 Subject: sched/debug: Fix per-task line continuation for console output When the SEQ_printf() macro prints to the console, it runs a simple printk() without KERN_CONT "continued" line printing. The result of this is oddly wrapped task info, for example: % echo t > /proc/sysrq-trigger % dmesg ... runnable tasks: ... [ 29.608611] I [ 29.608613] rcu_sched 8 3252.013846 4087 120 [ 29.608614] 0.000000 29.090111 0.000000 [ 29.608615] 0 0 [ 29.608616] / Modify SEQ_printf to use pr_cont() for expected one-line results: % echo t > /proc/sysrq-trigger % dmesg ... runnable tasks: ... [ 106.716329] S cpuhp/5 37 2006.315026 14 120 0.000000 0.496893 0.000000 0 0 / Signed-off-by: Joe Lawrence Acked-by: Peter Zijlstra Cc: Linus Torvalds Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1521484555-8620-2-git-send-email-joe.lawrence@redhat.com Signed-off-by: Ingo Molnar --- kernel/sched/debug.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c index 1ca0130ed4f9..50026aa2d81e 100644 --- a/kernel/sched/debug.c +++ b/kernel/sched/debug.c @@ -32,7 +32,7 @@ static DEFINE_SPINLOCK(sched_debug_lock); if (m) \ seq_printf(m, x); \ else \ - printk(x); \ + pr_cont(x); \ } while (0) /* -- cgit v1.2.3 From e9ca267096674eadd1fd479279bcb58df1486049 Mon Sep 17 00:00:00 2001 From: Joe Lawrence Date: Mon, 19 Mar 2018 14:35:55 -0400 Subject: sched/debug: Adjust newlines for better alignment Scheduler debug stats include newlines that display out of alignment when prefixed by timestamps. For example, the dmesg utility: % echo t > /proc/sysrq-trigger % dmesg ... [ 83.124251] runnable tasks: S task PID tree-key switches prio wait-time sum-exec sum-sleep ----------------------------------------------------------------------------------------------------------- At the same time, some syslog utilities (like rsyslog by default) don't like the additional newlines control characters, saving lines like this to /var/log/messages: Mar 16 16:02:29 localhost kernel: #012runnable tasks:#012 S task PID tree-key ... ^^^^ ^^^^ Clean these up by moving newline characters to their own SEQ_printf invocation. This leaves the /proc/sched_debug unchanged, but brings the entire output into alignment when prefixed: % echo t > /proc/sysrq-trigger % dmesg ... [ 62.410368] runnable tasks: [ 62.410368] S task PID tree-key switches prio wait-time sum-exec sum-sleep [ 62.410369] ----------------------------------------------------------------------------------------------------------- [ 62.410369] I kworker/u12:0 5 1932.215593 332 120 0.000000 3.621252 0.000000 0 0 / and no escaped control characters from rsyslog in /var/log/messages: Mar 16 16:15:06 localhost kernel: runnable tasks: Mar 16 16:15:06 localhost kernel: S task PID tree-key ... Signed-off-by: Joe Lawrence Acked-by: Peter Zijlstra Cc: Linus Torvalds Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1521484555-8620-3-git-send-email-joe.lawrence@redhat.com Signed-off-by: Ingo Molnar --- kernel/sched/debug.c | 27 ++++++++++++++++----------- 1 file changed, 16 insertions(+), 11 deletions(-) diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c index 50026aa2d81e..72c401b3b15c 100644 --- a/kernel/sched/debug.c +++ b/kernel/sched/debug.c @@ -501,12 +501,12 @@ static void print_rq(struct seq_file *m, struct rq *rq, int rq_cpu) { struct task_struct *g, *p; - SEQ_printf(m, - "\nrunnable tasks:\n" - " S task PID tree-key switches prio" - " wait-time sum-exec sum-sleep\n" - "-------------------------------------------------------" - "----------------------------------------------------\n"); + SEQ_printf(m, "\n"); + SEQ_printf(m, "runnable tasks:\n"); + SEQ_printf(m, " S task PID tree-key switches prio" + " wait-time sum-exec sum-sleep\n"); + SEQ_printf(m, "-------------------------------------------------------" + "----------------------------------------------------\n"); rcu_read_lock(); for_each_process_thread(g, p) { @@ -527,9 +527,11 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq) unsigned long flags; #ifdef CONFIG_FAIR_GROUP_SCHED - SEQ_printf(m, "\ncfs_rq[%d]:%s\n", cpu, task_group_path(cfs_rq->tg)); + SEQ_printf(m, "\n"); + SEQ_printf(m, "cfs_rq[%d]:%s\n", cpu, task_group_path(cfs_rq->tg)); #else - SEQ_printf(m, "\ncfs_rq[%d]:\n", cpu); + SEQ_printf(m, "\n"); + SEQ_printf(m, "cfs_rq[%d]:\n", cpu); #endif SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "exec_clock", SPLIT_NS(cfs_rq->exec_clock)); @@ -595,9 +597,11 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq) void print_rt_rq(struct seq_file *m, int cpu, struct rt_rq *rt_rq) { #ifdef CONFIG_RT_GROUP_SCHED - SEQ_printf(m, "\nrt_rq[%d]:%s\n", cpu, task_group_path(rt_rq->tg)); + SEQ_printf(m, "\n"); + SEQ_printf(m, "rt_rq[%d]:%s\n", cpu, task_group_path(rt_rq->tg)); #else - SEQ_printf(m, "\nrt_rq[%d]:\n", cpu); + SEQ_printf(m, "\n"); + SEQ_printf(m, "rt_rq[%d]:\n", cpu); #endif #define P(x) \ @@ -624,7 +628,8 @@ void print_dl_rq(struct seq_file *m, int cpu, struct dl_rq *dl_rq) { struct dl_bw *dl_bw; - SEQ_printf(m, "\ndl_rq[%d]:\n", cpu); + SEQ_printf(m, "\n"); + SEQ_printf(m, "dl_rq[%d]:\n", cpu); #define PU(x) \ SEQ_printf(m, " .%-30s: %lu\n", #x, (unsigned long)(dl_rq->x)) -- cgit v1.2.3 From 5927145efd5de71976e62e2822511b13014d7e56 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 19 Mar 2018 11:38:13 +0100 Subject: x86/cpu: Remove the CONFIG_X86_PPRO_FENCE=y quirk There were only a few Pentium Pro multiprocessors systems where this errata applied. They are more than 20 years old now, and we've slowly dropped places which put the workarounds in and discouraged anyone from enabling the workaround. Get rid of it for good. Tested-by: Tom Lendacky Signed-off-by: Christoph Hellwig Reviewed-by: Thomas Gleixner Reviewed-by: Konrad Rzeszutek Wilk Cc: David Woodhouse Cc: Joerg Roedel Cc: Jon Mason Cc: Linus Torvalds Cc: Muli Ben-Yehuda Cc: Peter Zijlstra Cc: iommu@lists.linux-foundation.org Link: http://lkml.kernel.org/r/20180319103826.12853-2-hch@lst.de Signed-off-by: Ingo Molnar --- arch/x86/Kconfig.cpu | 13 ------------- arch/x86/entry/vdso/vdso32/vclock_gettime.c | 2 -- arch/x86/include/asm/barrier.h | 30 ----------------------------- arch/x86/include/asm/io.h | 15 --------------- arch/x86/kernel/pci-nommu.c | 19 ------------------ arch/x86/um/asm/barrier.h | 4 ---- 6 files changed, 83 deletions(-) diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu index 65a9a4716e34..f0c5ef578153 100644 --- a/arch/x86/Kconfig.cpu +++ b/arch/x86/Kconfig.cpu @@ -315,19 +315,6 @@ config X86_L1_CACHE_SHIFT default "4" if MELAN || M486 || MGEODEGX1 default "5" if MWINCHIP3D || MWINCHIPC6 || MCRUSOE || MEFFICEON || MCYRIXIII || MK6 || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || M586 || MVIAC3_2 || MGEODE_LX -config X86_PPRO_FENCE - bool "PentiumPro memory ordering errata workaround" - depends on M686 || M586MMX || M586TSC || M586 || M486 || MGEODEGX1 - ---help--- - Old PentiumPro multiprocessor systems had errata that could cause - memory operations to violate the x86 ordering standard in rare cases. - Enabling this option will attempt to work around some (but not all) - occurrences of this problem, at the cost of much heavier spinlock and - memory barrier operations. - - If unsure, say n here. Even distro kernels should think twice before - enabling this: there are few systems, and an unlikely bug. - config X86_F00F_BUG def_bool y depends on M586MMX || M586TSC || M586 || M486 diff --git a/arch/x86/entry/vdso/vdso32/vclock_gettime.c b/arch/x86/entry/vdso/vdso32/vclock_gettime.c index 7780bbfb06ef..9242b28418d5 100644 --- a/arch/x86/entry/vdso/vdso32/vclock_gettime.c +++ b/arch/x86/entry/vdso/vdso32/vclock_gettime.c @@ -5,8 +5,6 @@ #undef CONFIG_OPTIMIZE_INLINING #endif -#undef CONFIG_X86_PPRO_FENCE - #ifdef CONFIG_X86_64 /* diff --git a/arch/x86/include/asm/barrier.h b/arch/x86/include/asm/barrier.h index e1259f043ae9..042b5e892ed1 100644 --- a/arch/x86/include/asm/barrier.h +++ b/arch/x86/include/asm/barrier.h @@ -52,11 +52,7 @@ static inline unsigned long array_index_mask_nospec(unsigned long index, #define barrier_nospec() alternative_2("", "mfence", X86_FEATURE_MFENCE_RDTSC, \ "lfence", X86_FEATURE_LFENCE_RDTSC) -#ifdef CONFIG_X86_PPRO_FENCE -#define dma_rmb() rmb() -#else #define dma_rmb() barrier() -#endif #define dma_wmb() barrier() #ifdef CONFIG_X86_32 @@ -68,30 +64,6 @@ static inline unsigned long array_index_mask_nospec(unsigned long index, #define __smp_wmb() barrier() #define __smp_store_mb(var, value) do { (void)xchg(&var, value); } while (0) -#if defined(CONFIG_X86_PPRO_FENCE) - -/* - * For this option x86 doesn't have a strong TSO memory - * model and we should fall back to full barriers. - */ - -#define __smp_store_release(p, v) \ -do { \ - compiletime_assert_atomic_type(*p); \ - __smp_mb(); \ - WRITE_ONCE(*p, v); \ -} while (0) - -#define __smp_load_acquire(p) \ -({ \ - typeof(*p) ___p1 = READ_ONCE(*p); \ - compiletime_assert_atomic_type(*p); \ - __smp_mb(); \ - ___p1; \ -}) - -#else /* regular x86 TSO memory ordering */ - #define __smp_store_release(p, v) \ do { \ compiletime_assert_atomic_type(*p); \ @@ -107,8 +79,6 @@ do { \ ___p1; \ }) -#endif - /* Atomic operations are already serializing on x86 */ #define __smp_mb__before_atomic() barrier() #define __smp_mb__after_atomic() barrier() diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h index 95e948627fd0..f6e5b9375d8c 100644 --- a/arch/x86/include/asm/io.h +++ b/arch/x86/include/asm/io.h @@ -232,21 +232,6 @@ extern void set_iounmap_nonlazy(void); */ #define __ISA_IO_base ((char __iomem *)(PAGE_OFFSET)) -/* - * Cache management - * - * This needed for two cases - * 1. Out of order aware processors - * 2. Accidentally out of order processors (PPro errata #51) - */ - -static inline void flush_write_buffers(void) -{ -#if defined(CONFIG_X86_PPRO_FENCE) - asm volatile("lock; addl $0,0(%%esp)": : :"memory"); -#endif -} - #endif /* __KERNEL__ */ extern void native_io_delay(void); diff --git a/arch/x86/kernel/pci-nommu.c b/arch/x86/kernel/pci-nommu.c index 618285e475c6..ac7ea3a8242f 100644 --- a/arch/x86/kernel/pci-nommu.c +++ b/arch/x86/kernel/pci-nommu.c @@ -37,7 +37,6 @@ static dma_addr_t nommu_map_page(struct device *dev, struct page *page, WARN_ON(size == 0); if (!check_addr("map_single", dev, bus, size)) return NOMMU_MAPPING_ERROR; - flush_write_buffers(); return bus; } @@ -72,25 +71,9 @@ static int nommu_map_sg(struct device *hwdev, struct scatterlist *sg, return 0; s->dma_length = s->length; } - flush_write_buffers(); return nents; } -static void nommu_sync_single_for_device(struct device *dev, - dma_addr_t addr, size_t size, - enum dma_data_direction dir) -{ - flush_write_buffers(); -} - - -static void nommu_sync_sg_for_device(struct device *dev, - struct scatterlist *sg, int nelems, - enum dma_data_direction dir) -{ - flush_write_buffers(); -} - static int nommu_mapping_error(struct device *dev, dma_addr_t dma_addr) { return dma_addr == NOMMU_MAPPING_ERROR; @@ -101,8 +84,6 @@ const struct dma_map_ops nommu_dma_ops = { .free = dma_generic_free_coherent, .map_sg = nommu_map_sg, .map_page = nommu_map_page, - .sync_single_for_device = nommu_sync_single_for_device, - .sync_sg_for_device = nommu_sync_sg_for_device, .is_phys = 1, .mapping_error = nommu_mapping_error, .dma_supported = x86_dma_supported, diff --git a/arch/x86/um/asm/barrier.h b/arch/x86/um/asm/barrier.h index b7d73400ea29..f31e5d903161 100644 --- a/arch/x86/um/asm/barrier.h +++ b/arch/x86/um/asm/barrier.h @@ -30,11 +30,7 @@ #endif /* CONFIG_X86_32 */ -#ifdef CONFIG_X86_PPRO_FENCE -#define dma_rmb() rmb() -#else /* CONFIG_X86_PPRO_FENCE */ #define dma_rmb() barrier() -#endif /* CONFIG_X86_PPRO_FENCE */ #define dma_wmb() barrier() #include -- cgit v1.2.3 From 8250e6cadceddefb8ab32eb7a011b3201adebbb3 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sun, 18 Mar 2018 23:48:09 +0100 Subject: drm/sun4i: hdmi: Fix an error handling path in 'sun4i_hdmi_bind()' If we can not allocate the HDMI encoder regmap, we still need to free some resources before returning. Fixes: 4b1c924b1fc1 ("drm/sun4i: hdmi: create a regmap for later use") Reviewed-by: Chen-Yu Tsai Signed-off-by: Christophe JAILLET Signed-off-by: Maxime Ripard Link: https://patchwork.freedesktop.org/patch/msgid/14c42391e1b562c7495bda6ad6fa1d24ec8dc052.1521413031.git.christophe.jaillet@wanadoo.fr --- drivers/gpu/drm/sun4i/sun4i_hdmi_enc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/sun4i/sun4i_hdmi_enc.c b/drivers/gpu/drm/sun4i/sun4i_hdmi_enc.c index 500b6fb3e028..d2839727bb0b 100644 --- a/drivers/gpu/drm/sun4i/sun4i_hdmi_enc.c +++ b/drivers/gpu/drm/sun4i/sun4i_hdmi_enc.c @@ -538,7 +538,8 @@ static int sun4i_hdmi_bind(struct device *dev, struct device *master, &sun4i_hdmi_regmap_config); if (IS_ERR(hdmi->regmap)) { dev_err(dev, "Couldn't create HDMI encoder regmap\n"); - return PTR_ERR(hdmi->regmap); + ret = PTR_ERR(hdmi->regmap); + goto err_disable_mod_clk; } ret = sun4i_tmds_create(hdmi); -- cgit v1.2.3 From 1bc659eb2369ab795a401ff82f7e3a87a46bc864 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sun, 18 Mar 2018 23:48:10 +0100 Subject: drm/sun4i: hdmi: Fix another error handling path in 'sun4i_hdmi_bind()' If we can not get the HDMI DDC clock, we still need to free some resources before returning. Fixes: 939d749ad664 ("drm/sun4i: hdmi: Add support for controller hardware variants") Reviewed-by: Chen-Yu Tsai Signed-off-by: Christophe JAILLET Signed-off-by: Maxime Ripard Link: https://patchwork.freedesktop.org/patch/msgid/5e0084af4ad57e9eea3bca5bd8e2e95970cd6714.1521413031.git.christophe.jaillet@wanadoo.fr --- drivers/gpu/drm/sun4i/sun4i_hdmi_enc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/sun4i/sun4i_hdmi_enc.c b/drivers/gpu/drm/sun4i/sun4i_hdmi_enc.c index d2839727bb0b..fa4bcd092eaf 100644 --- a/drivers/gpu/drm/sun4i/sun4i_hdmi_enc.c +++ b/drivers/gpu/drm/sun4i/sun4i_hdmi_enc.c @@ -552,7 +552,8 @@ static int sun4i_hdmi_bind(struct device *dev, struct device *master, hdmi->ddc_parent_clk = devm_clk_get(dev, "ddc"); if (IS_ERR(hdmi->ddc_parent_clk)) { dev_err(dev, "Couldn't get the HDMI DDC clock\n"); - return PTR_ERR(hdmi->ddc_parent_clk); + ret = PTR_ERR(hdmi->ddc_parent_clk); + goto err_disable_mod_clk; } } else { hdmi->ddc_parent_clk = hdmi->tmds_clk; -- cgit v1.2.3 From 31ad7f8e7dc94d3b85ccf9b6141ce6dfd35a1781 Mon Sep 17 00:00:00 2001 From: Boris Ostrovsky Date: Mon, 19 Mar 2018 10:31:54 -0400 Subject: x86/vsyscall/64: Use proper accessor to update P4D entry Writing to it directly does not work for Xen PV guests. Fixes: 49275fef986a ("x86/vsyscall/64: Explicitly set _PAGE_USER in the pagetable hierarchy") Signed-off-by: Boris Ostrovsky Signed-off-by: Thomas Gleixner Reviewed-by: Juergen Gross Acked-by: Andy Lutomirski Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20180319143154.3742-1-boris.ostrovsky@oracle.com Signed-off-by: Ingo Molnar --- arch/x86/entry/vsyscall/vsyscall_64.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/entry/vsyscall/vsyscall_64.c b/arch/x86/entry/vsyscall/vsyscall_64.c index 8560ef68a9d6..317be365bce3 100644 --- a/arch/x86/entry/vsyscall/vsyscall_64.c +++ b/arch/x86/entry/vsyscall/vsyscall_64.c @@ -347,7 +347,7 @@ void __init set_vsyscall_pgtable_user_bits(pgd_t *root) set_pgd(pgd, __pgd(pgd_val(*pgd) | _PAGE_USER)); p4d = p4d_offset(pgd, VSYSCALL_ADDR); #if CONFIG_PGTABLE_LEVELS >= 5 - p4d->p4d |= _PAGE_USER; + set_p4d(p4d, __p4d(p4d_val(*p4d) | _PAGE_USER)); #endif pud = pud_offset(p4d, VSYSCALL_ADDR); set_pud(pud, __pud(pud_val(*pud) | _PAGE_USER)); -- cgit v1.2.3 From 8137a8e2190975ed9060111ce13000792360aba3 Mon Sep 17 00:00:00 2001 From: Igor Pylypiv Date: Sat, 17 Mar 2018 18:17:58 -0700 Subject: vmxnet3: remove unused flag "rxcsum" from struct vmxnet3_adapter Signed-off-by: Igor Pylypiv Signed-off-by: David S. Miller --- drivers/net/vmxnet3/vmxnet3_int.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/net/vmxnet3/vmxnet3_int.h b/drivers/net/vmxnet3/vmxnet3_int.h index 99387a4a20a8..59ec34052a65 100644 --- a/drivers/net/vmxnet3/vmxnet3_int.h +++ b/drivers/net/vmxnet3/vmxnet3_int.h @@ -342,8 +342,6 @@ struct vmxnet3_adapter { u8 __iomem *hw_addr1; /* for BAR 1 */ u8 version; - bool rxcsum; - #ifdef VMXNET3_RSS struct UPT1_RSSConf *rss_conf; bool rss; -- cgit v1.2.3 From 7fe4d6dcbcb43fe0282d4213fc52be178bb30e91 Mon Sep 17 00:00:00 2001 From: Arkadi Sharshevsky Date: Sun, 18 Mar 2018 17:37:22 +0200 Subject: devlink: Remove redundant free on error path The current code performs unneeded free. Remove the redundant skb freeing during the error path. Fixes: 1555d204e743 ("devlink: Support for pipeline debug (dpipe)") Signed-off-by: Arkadi Sharshevsky Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- net/core/devlink.c | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) diff --git a/net/core/devlink.c b/net/core/devlink.c index 2f2307d94787..effd4848c2b4 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -1798,7 +1798,7 @@ send_done: if (!nlh) { err = devlink_dpipe_send_and_alloc_skb(&skb, info); if (err) - goto err_skb_send_alloc; + return err; goto send_done; } @@ -1807,7 +1807,6 @@ send_done: nla_put_failure: err = -EMSGSIZE; err_table_put: -err_skb_send_alloc: genlmsg_cancel(skb, hdr); nlmsg_free(skb); return err; @@ -2073,7 +2072,7 @@ static int devlink_dpipe_entries_fill(struct genl_info *info, table->counters_enabled, &dump_ctx); if (err) - goto err_entries_dump; + return err; send_done: nlh = nlmsg_put(dump_ctx.skb, info->snd_portid, info->snd_seq, @@ -2081,16 +2080,10 @@ send_done: if (!nlh) { err = devlink_dpipe_send_and_alloc_skb(&dump_ctx.skb, info); if (err) - goto err_skb_send_alloc; + return err; goto send_done; } return genlmsg_reply(dump_ctx.skb, info); - -err_entries_dump: -err_skb_send_alloc: - genlmsg_cancel(dump_ctx.skb, dump_ctx.hdr); - nlmsg_free(dump_ctx.skb); - return err; } static int devlink_nl_cmd_dpipe_entries_get(struct sk_buff *skb, @@ -2229,7 +2222,7 @@ send_done: if (!nlh) { err = devlink_dpipe_send_and_alloc_skb(&skb, info); if (err) - goto err_skb_send_alloc; + return err; goto send_done; } return genlmsg_reply(skb, info); @@ -2237,7 +2230,6 @@ send_done: nla_put_failure: err = -EMSGSIZE; err_table_put: -err_skb_send_alloc: genlmsg_cancel(skb, hdr); nlmsg_free(skb); return err; -- cgit v1.2.3 From 00777fac28ba3e126b9e63e789a613e8bd2cab25 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sun, 18 Mar 2018 23:59:36 +0100 Subject: net: ethernet: arc: Fix a potential memory leak if an optional regulator is deferred If the optional regulator is deferred, we must release some resources. They will be re-allocated when the probe function will be called again. Fixes: 6eacf31139bf ("ethernet: arc: Add support for Rockchip SoC layer device tree bindings") Signed-off-by: Christophe JAILLET Signed-off-by: David S. Miller --- drivers/net/ethernet/arc/emac_rockchip.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/arc/emac_rockchip.c b/drivers/net/ethernet/arc/emac_rockchip.c index 16f9bee992fe..0f6576802607 100644 --- a/drivers/net/ethernet/arc/emac_rockchip.c +++ b/drivers/net/ethernet/arc/emac_rockchip.c @@ -169,8 +169,10 @@ static int emac_rockchip_probe(struct platform_device *pdev) /* Optional regulator for PHY */ priv->regulator = devm_regulator_get_optional(dev, "phy"); if (IS_ERR(priv->regulator)) { - if (PTR_ERR(priv->regulator) == -EPROBE_DEFER) - return -EPROBE_DEFER; + if (PTR_ERR(priv->regulator) == -EPROBE_DEFER) { + err = -EPROBE_DEFER; + goto out_clk_disable; + } dev_err(dev, "no regulator found\n"); priv->regulator = NULL; } -- cgit v1.2.3 From 44caebd368a5174f8aa7b80076350385eb2c2f42 Mon Sep 17 00:00:00 2001 From: Igor Pylypiv Date: Sun, 18 Mar 2018 23:40:51 -0700 Subject: net: gemini: fix memory leak cppcheck report: [drivers/net/ethernet/cortina/gemini.c:543]: (error) Memory leak: skb_tab Signed-off-by: Igor Pylypiv Acked-by: Linus Walleij Signed-off-by: David S. Miller --- drivers/net/ethernet/cortina/gemini.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/cortina/gemini.c b/drivers/net/ethernet/cortina/gemini.c index 5eb999af2c40..bd3f6e4d1341 100644 --- a/drivers/net/ethernet/cortina/gemini.c +++ b/drivers/net/ethernet/cortina/gemini.c @@ -540,6 +540,7 @@ static int gmac_setup_txqs(struct net_device *netdev) if (port->txq_dma_base & ~DMA_Q_BASE_MASK) { dev_warn(geth->dev, "TX queue base it not aligned\n"); + kfree(skb_tab); return -ENOMEM; } -- cgit v1.2.3 From 5f2fb802eee1df0810b47ea251942fe3fd36589a Mon Sep 17 00:00:00 2001 From: Stefano Brivio Date: Mon, 19 Mar 2018 11:24:58 +0100 Subject: ipv6: old_dport should be a __be16 in __ip6_datagram_connect() Fixes: 2f987a76a977 ("net: ipv6: keep sk status consistent after datagram connect failure") Signed-off-by: Stefano Brivio Acked-by: Paolo Abeni Acked-by: Guillaume Nault Signed-off-by: David S. Miller --- net/ipv6/datagram.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index 8a9ac2d0f5d3..a9f7eca0b6a3 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -149,7 +149,7 @@ int __ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr, struct in6_addr *daddr, old_daddr; __be32 fl6_flowlabel = 0; __be32 old_fl6_flowlabel; - __be32 old_dport; + __be16 old_dport; int addr_type; int err; -- cgit v1.2.3 From e8980d67d6017c8eee8f9c35f782c4bd68e004c9 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Tue, 20 Mar 2018 17:05:13 +0200 Subject: RDMA/ucma: Ensure that CM_ID exists prior to access it Prior to access UCMA commands, the context should be initialized and connected to CM_ID with ucma_create_id(). In case user skips this step, he can provide non-valid ctx without CM_ID and cause to multiple NULL dereferences. Also there are situations where the create_id can be raced with other user access, ensure that the context is only shared to other threads once it is fully initialized to avoid the races. [ 109.088108] BUG: unable to handle kernel NULL pointer dereference at 0000000000000020 [ 109.090315] IP: ucma_connect+0x138/0x1d0 [ 109.092595] PGD 80000001dc02d067 P4D 80000001dc02d067 PUD 1da9ef067 PMD 0 [ 109.095384] Oops: 0000 [#1] SMP KASAN PTI [ 109.097834] CPU: 0 PID: 663 Comm: uclose Tainted: G B 4.16.0-rc1-00062-g2975d5de6428 #45 [ 109.100816] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.11.0-0-g63451fca13-prebuilt.qemu-project.org 04/01/2014 [ 109.105943] RIP: 0010:ucma_connect+0x138/0x1d0 [ 109.108850] RSP: 0018:ffff8801c8567a80 EFLAGS: 00010246 [ 109.111484] RAX: 0000000000000000 RBX: 1ffff100390acf50 RCX: ffffffff9d7812e2 [ 109.114496] RDX: 1ffffffff3f507a5 RSI: 0000000000000297 RDI: 0000000000000297 [ 109.117490] RBP: ffff8801daa15600 R08: 0000000000000000 R09: ffffed00390aceeb [ 109.120429] R10: 0000000000000001 R11: ffffed00390aceea R12: 0000000000000000 [ 109.123318] R13: 0000000000000120 R14: ffff8801de6459c0 R15: 0000000000000118 [ 109.126221] FS: 00007fabb68d6700(0000) GS:ffff8801e5c00000(0000) knlGS:0000000000000000 [ 109.129468] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 109.132523] CR2: 0000000000000020 CR3: 00000001d45d8003 CR4: 00000000003606b0 [ 109.135573] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 109.138716] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 109.142057] Call Trace: [ 109.144160] ? ucma_listen+0x110/0x110 [ 109.146386] ? wake_up_q+0x59/0x90 [ 109.148853] ? futex_wake+0x10b/0x2a0 [ 109.151297] ? save_stack+0x89/0xb0 [ 109.153489] ? _copy_from_user+0x5e/0x90 [ 109.155500] ucma_write+0x174/0x1f0 [ 109.157933] ? ucma_resolve_route+0xf0/0xf0 [ 109.160389] ? __mod_node_page_state+0x1d/0x80 [ 109.162706] __vfs_write+0xc4/0x350 [ 109.164911] ? kernel_read+0xa0/0xa0 [ 109.167121] ? path_openat+0x1b10/0x1b10 [ 109.169355] ? fsnotify+0x899/0x8f0 [ 109.171567] ? fsnotify_unmount_inodes+0x170/0x170 [ 109.174145] ? __fget+0xa8/0xf0 [ 109.177110] vfs_write+0xf7/0x280 [ 109.179532] SyS_write+0xa1/0x120 [ 109.181885] ? SyS_read+0x120/0x120 [ 109.184482] ? compat_start_thread+0x60/0x60 [ 109.187124] ? SyS_read+0x120/0x120 [ 109.189548] do_syscall_64+0xeb/0x250 [ 109.192178] entry_SYSCALL_64_after_hwframe+0x21/0x86 [ 109.194725] RIP: 0033:0x7fabb61ebe99 [ 109.197040] RSP: 002b:00007fabb68d5e98 EFLAGS: 00000202 ORIG_RAX: 0000000000000001 [ 109.200294] RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007fabb61ebe99 [ 109.203399] RDX: 0000000000000120 RSI: 00000000200001c0 RDI: 0000000000000004 [ 109.206548] RBP: 00007fabb68d5ec0 R08: 0000000000000000 R09: 0000000000000000 [ 109.209902] R10: 0000000000000000 R11: 0000000000000202 R12: 00007fabb68d5fc0 [ 109.213327] R13: 0000000000000000 R14: 00007fff40ab2430 R15: 00007fabb68d69c0 [ 109.216613] Code: 88 44 24 2c 0f b6 84 24 6e 01 00 00 88 44 24 2d 0f b6 84 24 69 01 00 00 88 44 24 2e 8b 44 24 60 89 44 24 30 e8 da f6 06 ff 31 c0 <66> 41 83 7c 24 20 1b 75 04 8b 44 24 64 48 8d 74 24 20 4c 89 e7 [ 109.223602] RIP: ucma_connect+0x138/0x1d0 RSP: ffff8801c8567a80 [ 109.226256] CR2: 0000000000000020 Fixes: 75216638572f ("RDMA/cma: Export rdma cm interface to userspace") Reported-by: Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/ucma.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c index 14c260ed63cb..e5a1e7d81326 100644 --- a/drivers/infiniband/core/ucma.c +++ b/drivers/infiniband/core/ucma.c @@ -132,7 +132,7 @@ static inline struct ucma_context *_ucma_find_context(int id, ctx = idr_find(&ctx_idr, id); if (!ctx) ctx = ERR_PTR(-ENOENT); - else if (ctx->file != file) + else if (ctx->file != file || !ctx->cm_id) ctx = ERR_PTR(-EINVAL); return ctx; } @@ -456,6 +456,7 @@ static ssize_t ucma_create_id(struct ucma_file *file, const char __user *inbuf, struct rdma_ucm_create_id cmd; struct rdma_ucm_create_id_resp resp; struct ucma_context *ctx; + struct rdma_cm_id *cm_id; enum ib_qp_type qp_type; int ret; @@ -476,10 +477,10 @@ static ssize_t ucma_create_id(struct ucma_file *file, const char __user *inbuf, return -ENOMEM; ctx->uid = cmd.uid; - ctx->cm_id = rdma_create_id(current->nsproxy->net_ns, - ucma_event_handler, ctx, cmd.ps, qp_type); - if (IS_ERR(ctx->cm_id)) { - ret = PTR_ERR(ctx->cm_id); + cm_id = rdma_create_id(current->nsproxy->net_ns, + ucma_event_handler, ctx, cmd.ps, qp_type); + if (IS_ERR(cm_id)) { + ret = PTR_ERR(cm_id); goto err1; } @@ -489,10 +490,12 @@ static ssize_t ucma_create_id(struct ucma_file *file, const char __user *inbuf, ret = -EFAULT; goto err2; } + + ctx->cm_id = cm_id; return 0; err2: - rdma_destroy_id(ctx->cm_id); + rdma_destroy_id(cm_id); err1: mutex_lock(&mut); idr_remove(&ctx_idr, ctx->id); -- cgit v1.2.3 From 32d43cd391bacb5f0814c2624399a5dad3501d09 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Tue, 20 Mar 2018 12:16:59 -0700 Subject: kvm/x86: fix icebp instruction handling The undocumented 'icebp' instruction (aka 'int1') works pretty much like 'int3' in the absense of in-circuit probing equipment (except, obviously, that it raises #DB instead of raising #BP), and is used by some validation test-suites as such. But Andy Lutomirski noticed that his test suite acted differently in kvm than on bare hardware. The reason is that kvm used an inexact test for the icebp instruction: it just assumed that an all-zero VM exit qualification value meant that the VM exit was due to icebp. That is not unlike the guess that do_debug() does for the actual exception handling case, but it's purely a heuristic, not an absolute rule. do_debug() does it because it wants to ascribe _some_ reasons to the #DB that happened, and an empty %dr6 value means that 'icebp' is the most likely casue and we have no better information. But kvm can just do it right, because unlike the do_debug() case, kvm actually sees the real reason for the #DB in the VM-exit interruption information field. So instead of relying on an inexact heuristic, just use the actual VM exit information that says "it was 'icebp'". Right now the 'icebp' instruction isn't technically documented by Intel, but that will hopefully change. The special "privileged software exception" information _is_ actually mentioned in the Intel SDM, even though the cause of it isn't enumerated. Reported-by: Andy Lutomirski Tested-by: Paolo Bonzini Signed-off-by: Linus Torvalds --- arch/x86/include/asm/vmx.h | 1 + arch/x86/kvm/vmx.c | 9 ++++++++- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h index 8b6780751132..5db8b0b10766 100644 --- a/arch/x86/include/asm/vmx.h +++ b/arch/x86/include/asm/vmx.h @@ -352,6 +352,7 @@ enum vmcs_field { #define INTR_TYPE_NMI_INTR (2 << 8) /* NMI */ #define INTR_TYPE_HARD_EXCEPTION (3 << 8) /* processor exception */ #define INTR_TYPE_SOFT_INTR (4 << 8) /* software interrupt */ +#define INTR_TYPE_PRIV_SW_EXCEPTION (5 << 8) /* ICE breakpoint - undocumented */ #define INTR_TYPE_SOFT_EXCEPTION (6 << 8) /* software exception */ /* GUEST_INTERRUPTIBILITY_INFO flags. */ diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 051dab74e4e9..2d87603f9179 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -1045,6 +1045,13 @@ static inline bool is_machine_check(u32 intr_info) (INTR_TYPE_HARD_EXCEPTION | MC_VECTOR | INTR_INFO_VALID_MASK); } +/* Undocumented: icebp/int1 */ +static inline bool is_icebp(u32 intr_info) +{ + return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VALID_MASK)) + == (INTR_TYPE_PRIV_SW_EXCEPTION | INTR_INFO_VALID_MASK); +} + static inline bool cpu_has_vmx_msr_bitmap(void) { return vmcs_config.cpu_based_exec_ctrl & CPU_BASED_USE_MSR_BITMAPS; @@ -6179,7 +6186,7 @@ static int handle_exception(struct kvm_vcpu *vcpu) (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))) { vcpu->arch.dr6 &= ~15; vcpu->arch.dr6 |= dr6 | DR6_RTM; - if (!(dr6 & ~DR6_RESERVED)) /* icebp */ + if (is_icebp(intr_info)) skip_emulated_instruction(vcpu); kvm_queue_exception(vcpu, DB_VECTOR); -- cgit v1.2.3 From 52fda36d63bfc8c8e8ae5eda8eb5ac6f52cd67ed Mon Sep 17 00:00:00 2001 From: Thadeu Lima de Souza Cascardo Date: Tue, 20 Mar 2018 09:58:51 -0300 Subject: test_bpf: Fix testing with CONFIG_BPF_JIT_ALWAYS_ON=y on other arches Function bpf_fill_maxinsns11 is designed to not be able to be JITed on x86_64. So, it fails when CONFIG_BPF_JIT_ALWAYS_ON=y, and commit 09584b406742 ("bpf: fix selftests/bpf test_kmod.sh failure when CONFIG_BPF_JIT_ALWAYS_ON=y") makes sure that failure is detected on that case. However, it does not fail on other architectures, which have a different JIT compiler design. So, test_bpf has started to fail to load on those. After this fix, test_bpf loads fine on both x86_64 and ppc64el. Fixes: 09584b406742 ("bpf: fix selftests/bpf test_kmod.sh failure when CONFIG_BPF_JIT_ALWAYS_ON=y") Signed-off-by: Thadeu Lima de Souza Cascardo Reviewed-by: Yonghong Song Signed-off-by: Daniel Borkmann --- lib/test_bpf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/test_bpf.c b/lib/test_bpf.c index 2efb213716fa..3e9335493fe4 100644 --- a/lib/test_bpf.c +++ b/lib/test_bpf.c @@ -5467,7 +5467,7 @@ static struct bpf_test tests[] = { { "BPF_MAXINSNS: Jump, gap, jump, ...", { }, -#ifdef CONFIG_BPF_JIT_ALWAYS_ON +#if defined(CONFIG_BPF_JIT_ALWAYS_ON) && defined(CONFIG_X86) CLASSIC | FLAG_NO_DATA | FLAG_EXPECTED_FAIL, #else CLASSIC | FLAG_NO_DATA, -- cgit v1.2.3 From f005afede992e265bb98534b86912bb669ccd0d2 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Tue, 20 Mar 2018 11:19:17 -0700 Subject: trace/bpf: remove helper bpf_perf_prog_read_value from tracepoint type programs Commit 4bebdc7a85aa ("bpf: add helper bpf_perf_prog_read_value") added helper bpf_perf_prog_read_value so that perf_event type program can read event counter and enabled/running time. This commit, however, introduced a bug which allows this helper for tracepoint type programs. This is incorrect as bpf_perf_prog_read_value needs to access perf_event through its bpf_perf_event_data_kern type context, which is not available for tracepoint type program. This patch fixed the issue by separating bpf_func_proto between tracepoint and perf_event type programs and removed bpf_perf_prog_read_value from tracepoint func prototype. Fixes: 4bebdc7a85aa ("bpf: add helper bpf_perf_prog_read_value") Reported-by: Alexei Starovoitov Signed-off-by: Yonghong Song Signed-off-by: Daniel Borkmann --- kernel/trace/bpf_trace.c | 68 ++++++++++++++++++++++++++++-------------------- 1 file changed, 40 insertions(+), 28 deletions(-) diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index c0a9e310d715..01e6b3a38871 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -661,7 +661,41 @@ static const struct bpf_func_proto bpf_get_stackid_proto_tp = { .arg3_type = ARG_ANYTHING, }; -BPF_CALL_3(bpf_perf_prog_read_value_tp, struct bpf_perf_event_data_kern *, ctx, +static const struct bpf_func_proto *tp_prog_func_proto(enum bpf_func_id func_id) +{ + switch (func_id) { + case BPF_FUNC_perf_event_output: + return &bpf_perf_event_output_proto_tp; + case BPF_FUNC_get_stackid: + return &bpf_get_stackid_proto_tp; + default: + return tracing_func_proto(func_id); + } +} + +static bool tp_prog_is_valid_access(int off, int size, enum bpf_access_type type, + struct bpf_insn_access_aux *info) +{ + if (off < sizeof(void *) || off >= PERF_MAX_TRACE_SIZE) + return false; + if (type != BPF_READ) + return false; + if (off % size != 0) + return false; + + BUILD_BUG_ON(PERF_MAX_TRACE_SIZE % sizeof(__u64)); + return true; +} + +const struct bpf_verifier_ops tracepoint_verifier_ops = { + .get_func_proto = tp_prog_func_proto, + .is_valid_access = tp_prog_is_valid_access, +}; + +const struct bpf_prog_ops tracepoint_prog_ops = { +}; + +BPF_CALL_3(bpf_perf_prog_read_value, struct bpf_perf_event_data_kern *, ctx, struct bpf_perf_event_value *, buf, u32, size) { int err = -EINVAL; @@ -678,8 +712,8 @@ clear: return err; } -static const struct bpf_func_proto bpf_perf_prog_read_value_proto_tp = { - .func = bpf_perf_prog_read_value_tp, +static const struct bpf_func_proto bpf_perf_prog_read_value_proto = { + .func = bpf_perf_prog_read_value, .gpl_only = true, .ret_type = RET_INTEGER, .arg1_type = ARG_PTR_TO_CTX, @@ -687,7 +721,7 @@ static const struct bpf_func_proto bpf_perf_prog_read_value_proto_tp = { .arg3_type = ARG_CONST_SIZE, }; -static const struct bpf_func_proto *tp_prog_func_proto(enum bpf_func_id func_id) +static const struct bpf_func_proto *pe_prog_func_proto(enum bpf_func_id func_id) { switch (func_id) { case BPF_FUNC_perf_event_output: @@ -695,34 +729,12 @@ static const struct bpf_func_proto *tp_prog_func_proto(enum bpf_func_id func_id) case BPF_FUNC_get_stackid: return &bpf_get_stackid_proto_tp; case BPF_FUNC_perf_prog_read_value: - return &bpf_perf_prog_read_value_proto_tp; + return &bpf_perf_prog_read_value_proto; default: return tracing_func_proto(func_id); } } -static bool tp_prog_is_valid_access(int off, int size, enum bpf_access_type type, - struct bpf_insn_access_aux *info) -{ - if (off < sizeof(void *) || off >= PERF_MAX_TRACE_SIZE) - return false; - if (type != BPF_READ) - return false; - if (off % size != 0) - return false; - - BUILD_BUG_ON(PERF_MAX_TRACE_SIZE % sizeof(__u64)); - return true; -} - -const struct bpf_verifier_ops tracepoint_verifier_ops = { - .get_func_proto = tp_prog_func_proto, - .is_valid_access = tp_prog_is_valid_access, -}; - -const struct bpf_prog_ops tracepoint_prog_ops = { -}; - static bool pe_prog_is_valid_access(int off, int size, enum bpf_access_type type, struct bpf_insn_access_aux *info) { @@ -779,7 +791,7 @@ static u32 pe_prog_convert_ctx_access(enum bpf_access_type type, } const struct bpf_verifier_ops perf_event_verifier_ops = { - .get_func_proto = tp_prog_func_proto, + .get_func_proto = pe_prog_func_proto, .is_valid_access = pe_prog_is_valid_access, .convert_ctx_access = pe_prog_convert_ctx_access, }; -- cgit v1.2.3 From 0fa4fe85f4724fff89b09741c437cbee9cf8b008 Mon Sep 17 00:00:00 2001 From: Chenbo Feng Date: Mon, 19 Mar 2018 17:57:27 -0700 Subject: bpf: skip unnecessary capability check The current check statement in BPF syscall will do a capability check for CAP_SYS_ADMIN before checking sysctl_unprivileged_bpf_disabled. This code path will trigger unnecessary security hooks on capability checking and cause false alarms on unprivileged process trying to get CAP_SYS_ADMIN access. This can be resolved by simply switch the order of the statement and CAP_SYS_ADMIN is not required anyway if unprivileged bpf syscall is allowed. Signed-off-by: Chenbo Feng Acked-by: Lorenzo Colitti Signed-off-by: Daniel Borkmann --- kernel/bpf/syscall.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index e24aa3241387..43f95d190eea 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -1845,7 +1845,7 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz union bpf_attr attr = {}; int err; - if (!capable(CAP_SYS_ADMIN) && sysctl_unprivileged_bpf_disabled) + if (sysctl_unprivileged_bpf_disabled && !capable(CAP_SYS_ADMIN)) return -EPERM; err = check_uarg_tail_zero(uattr, sizeof(attr), size); -- cgit v1.2.3 From 87e0d4f0f37fb0c8c4aeeac46fff5e957738df79 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 21 Mar 2018 01:18:24 +0100 Subject: kbuild: disable clang's default use of -fmerge-all-constants Prasad reported that he has seen crashes in BPF subsystem with netd on Android with arm64 in the form of (note, the taint is unrelated): [ 4134.721483] Unable to handle kernel paging request at virtual address 800000001 [ 4134.820925] Mem abort info: [ 4134.901283] Exception class = DABT (current EL), IL = 32 bits [ 4135.016736] SET = 0, FnV = 0 [ 4135.119820] EA = 0, S1PTW = 0 [ 4135.201431] Data abort info: [ 4135.301388] ISV = 0, ISS = 0x00000021 [ 4135.359599] CM = 0, WnR = 0 [ 4135.470873] user pgtable: 4k pages, 39-bit VAs, pgd = ffffffe39b946000 [ 4135.499757] [0000000800000001] *pgd=0000000000000000, *pud=0000000000000000 [ 4135.660725] Internal error: Oops: 96000021 [#1] PREEMPT SMP [ 4135.674610] Modules linked in: [ 4135.682883] CPU: 5 PID: 1260 Comm: netd Tainted: G S W 4.14.19+ #1 [ 4135.716188] task: ffffffe39f4aa380 task.stack: ffffff801d4e0000 [ 4135.731599] PC is at bpf_prog_add+0x20/0x68 [ 4135.741746] LR is at bpf_prog_inc+0x20/0x2c [ 4135.751788] pc : [] lr : [] pstate: 60400145 [ 4135.769062] sp : ffffff801d4e3ce0 [...] [ 4136.258315] Process netd (pid: 1260, stack limit = 0xffffff801d4e0000) [ 4136.273746] Call trace: [...] [ 4136.442494] 3ca0: ffffff94ab7ad584 0000000060400145 ffffffe3a01bf8f8 0000000000000006 [ 4136.460936] 3cc0: 0000008000000000 ffffff94ab844204 ffffff801d4e3cf0 ffffff94ab7ad584 [ 4136.479241] [] bpf_prog_add+0x20/0x68 [ 4136.491767] [] bpf_prog_inc+0x20/0x2c [ 4136.504536] [] bpf_obj_get_user+0x204/0x22c [ 4136.518746] [] SyS_bpf+0x5a8/0x1a88 Android's netd was basically pinning the uid cookie BPF map in BPF fs (/sys/fs/bpf/traffic_cookie_uid_map) and later on retrieving it again resulting in above panic. Issue is that the map was wrongly identified as a prog! Above kernel was compiled with clang 4.0, and it turns out that clang decided to merge the bpf_prog_iops and bpf_map_iops into a single memory location, such that the two i_ops could then not be distinguished anymore. Reason for this miscompilation is that clang has the more aggressive -fmerge-all-constants enabled by default. In fact, clang source code has a comment about it in lib/AST/ExprConstant.cpp on why it is okay to do so: Pointers with different bases cannot represent the same object. (Note that clang defaults to -fmerge-all-constants, which can lead to inconsistent results for comparisons involving the address of a constant; this generally doesn't matter in practice.) The issue never appeared with gcc however, since gcc does not enable -fmerge-all-constants by default and even *explicitly* states in it's option description that using this flag results in non-conforming behavior, quote from man gcc: Languages like C or C++ require each variable, including multiple instances of the same variable in recursive calls, to have distinct locations, so using this option results in non-conforming behavior. There are also various clang bug reports open on that matter [1], where clang developers acknowledge the non-conforming behavior, and refer to disabling it with -fno-merge-all-constants. But even if this gets fixed in clang today, there are already users out there that triggered this. Thus, fix this issue by explicitly adding -fno-merge-all-constants to the kernel's Makefile to generically disable this optimization, since potentially other places in the kernel could subtly break as well. Note, there is also a flag called -fmerge-constants (not supported by clang), which is more conservative and only applies to strings and it's enabled in gcc's -O/-O2/-O3/-Os optimization levels. In gcc's code, the two flags -fmerge-{all-,}constants share the same variable internally, so when disabling it via -fno-merge-all-constants, then we really don't merge any const data (e.g. strings), and text size increases with gcc (14,927,214 -> 14,942,646 for vmlinux.o). $ gcc -fverbose-asm -O2 foo.c -S -o foo.S -> foo.S lists -fmerge-constants under options enabled $ gcc -fverbose-asm -O2 -fno-merge-all-constants foo.c -S -o foo.S -> foo.S doesn't list -fmerge-constants under options enabled $ gcc -fverbose-asm -O2 -fno-merge-all-constants -fmerge-constants foo.c -S -o foo.S -> foo.S lists -fmerge-constants under options enabled Thus, as a workaround we need to set both -fno-merge-all-constants *and* -fmerge-constants in the Makefile in order for text size to stay as is. [1] https://bugs.llvm.org/show_bug.cgi?id=18538 Reported-by: Prasad Sodagudi Signed-off-by: Daniel Borkmann Cc: Linus Torvalds Cc: Chenbo Feng Cc: Richard Smith Cc: Chandler Carruth Cc: linux-kernel@vger.kernel.org Tested-by: Prasad Sodagudi Acked-by: Alexei Starovoitov Signed-off-by: Alexei Starovoitov --- Makefile | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/Makefile b/Makefile index c4322dea3ca2..af07bf5bb238 100644 --- a/Makefile +++ b/Makefile @@ -826,6 +826,15 @@ KBUILD_CFLAGS += $(call cc-disable-warning, pointer-sign) # disable invalid "can't wrap" optimizations for signed / pointers KBUILD_CFLAGS += $(call cc-option,-fno-strict-overflow) +# clang sets -fmerge-all-constants by default as optimization, but this +# is non-conforming behavior for C and in fact breaks the kernel, so we +# need to disable it here generally. +KBUILD_CFLAGS += $(call cc-option,-fno-merge-all-constants) + +# for gcc -fno-merge-all-constants disables everything, but it is fine +# to have actual conforming behavior enabled. +KBUILD_CFLAGS += $(call cc-option,-fmerge-constants) + # Make sure -fstack-check isn't enabled (like gentoo apparently did) KBUILD_CFLAGS += $(call cc-option,-fno-stack-check,) -- cgit v1.2.3 From 8bfac12f88dbafff40af72c7990b7d14e0158545 Mon Sep 17 00:00:00 2001 From: Mikita Lipski Date: Wed, 7 Mar 2018 10:49:23 -0500 Subject: drm/amd/display: Allow truncation to 10 bits The truncation isn't being programmed if the truncation depth is set to 2, it causes an issue with dce11.2 asic using 6bit eDP panel. It required to truncate 12:10 in order to perform spatial dither 10:6. This change will allow 12:10 truncation to be enabled. Signed-off-by: Mikita Lipski Reviewed-by: Jun Lei Acked-by: Harry Wentland Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dce/dce_opp.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_opp.c b/drivers/gpu/drm/amd/display/dc/dce/dce_opp.c index 3931412ab6d3..f19de13024f8 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_opp.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_opp.c @@ -128,8 +128,7 @@ static void set_truncation( return; } /* on other format-to do */ - if (params->flags.TRUNCATE_ENABLED == 0 || - params->flags.TRUNCATE_DEPTH == 2) + if (params->flags.TRUNCATE_ENABLED == 0) return; /*Set truncation depth and Enable truncation*/ REG_UPDATE_3(FMT_BIT_DEPTH_CONTROL, @@ -144,7 +143,7 @@ static void set_truncation( /** * set_spatial_dither * 1) set spatial dithering mode: pattern of seed - * 2) set spatical dithering depth: 0 for 18bpp or 1 for 24bpp + * 2) set spatial dithering depth: 0 for 18bpp or 1 for 24bpp * 3) set random seed * 4) set random mode * lfsr is reset every frame or not reset -- cgit v1.2.3 From 4407a29bad96a773e1ddd380ecab0828b5656b0a Mon Sep 17 00:00:00 2001 From: Mikita Lipski Date: Wed, 7 Mar 2018 11:12:20 -0500 Subject: drm/amd/display: Fix FMT truncation programming Switch the order of parameters being set for depth and mode of truncation, as it previously was not correct Signed-off-by: Mikita Lipski Reviewed-by: Harry Wentland Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dce/dce_opp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_opp.c b/drivers/gpu/drm/amd/display/dc/dce/dce_opp.c index f19de13024f8..87093894ea9e 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_opp.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_opp.c @@ -134,9 +134,9 @@ static void set_truncation( REG_UPDATE_3(FMT_BIT_DEPTH_CONTROL, FMT_TRUNCATE_EN, 1, FMT_TRUNCATE_DEPTH, - params->flags.TRUNCATE_MODE, + params->flags.TRUNCATE_DEPTH, FMT_TRUNCATE_MODE, - params->flags.TRUNCATE_DEPTH); + params->flags.TRUNCATE_MODE); } -- cgit v1.2.3 From 509648fcf0ce8650184649b43ad039f78dde155f Mon Sep 17 00:00:00 2001 From: Harry Wentland Date: Tue, 6 Mar 2018 11:14:12 -0500 Subject: drm/amd/display: We shouldn't set format_default on plane as atomic driver This is still a leftover from early atomic brinup days. Signed-off-by: Harry Wentland Reviewed-by: Tony Cheng Acked-by: Harry Wentland Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 95b639eddcb6..63c67346d316 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -3134,8 +3134,6 @@ static int amdgpu_dm_plane_init(struct amdgpu_display_manager *dm, switch (aplane->base.type) { case DRM_PLANE_TYPE_PRIMARY: - aplane->base.format_default = true; - res = drm_universal_plane_init( dm->adev->ddev, &aplane->base, -- cgit v1.2.3 From 731a373698c9675d5aed8a30d8c9861bea9c41a2 Mon Sep 17 00:00:00 2001 From: Harry Wentland Date: Wed, 7 Mar 2018 13:45:33 -0500 Subject: drm/amd/display: Add one to EDID's audio channel count when passing to DC DC takes channel count to mean the actual count. cea_sad's channels represent it as number of channels - 1. Signed-off-by: Harry Wentland Reviewed-by: Tony Cheng Acked-by: Harry Wentland Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c index 9bd142f65f9b..e1acc10e35a2 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c @@ -109,7 +109,7 @@ enum dc_edid_status dm_helpers_parse_edid_caps( struct cea_sad *sad = &sads[i]; edid_caps->audio_modes[i].format_code = sad->format; - edid_caps->audio_modes[i].channel_count = sad->channels; + edid_caps->audio_modes[i].channel_count = sad->channels + 1; edid_caps->audio_modes[i].sample_rate = sad->freq; edid_caps->audio_modes[i].sample_size = sad->byte2; } -- cgit v1.2.3 From fa8e6d58c5bc260f4369c6699683d69695daed0a Mon Sep 17 00:00:00 2001 From: Jagdish Gediya Date: Wed, 21 Mar 2018 04:31:36 +0530 Subject: mtd: nand: fsl_ifc: Fix nand waitfunc return value As per the IFC hardware manual, Most significant 2 bytes in nand_fsr register are the outcome of NAND READ STATUS command. So status value need to be shifted and aligned as per the nand framework requirement. Fixes: 82771882d960 ("NAND Machine support for Integrated Flash Controller") Cc: stable@vger.kernel.org # v3.18+ Signed-off-by: Jagdish Gediya Reviewed-by: Prabhakar Kushwaha Signed-off-by: Boris Brezillon --- drivers/mtd/nand/fsl_ifc_nand.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/mtd/nand/fsl_ifc_nand.c b/drivers/mtd/nand/fsl_ifc_nand.c index 4872a7ba6503..b6a3ba445cfb 100644 --- a/drivers/mtd/nand/fsl_ifc_nand.c +++ b/drivers/mtd/nand/fsl_ifc_nand.c @@ -626,6 +626,7 @@ static int fsl_ifc_wait(struct mtd_info *mtd, struct nand_chip *chip) struct fsl_ifc_ctrl *ctrl = priv->ctrl; struct fsl_ifc_runtime __iomem *ifc = ctrl->rregs; u32 nand_fsr; + int status; /* Use READ_STATUS command, but wait for the device to be ready */ ifc_out32((IFC_FIR_OP_CW0 << IFC_NAND_FIR0_OP0_SHIFT) | @@ -640,12 +641,12 @@ static int fsl_ifc_wait(struct mtd_info *mtd, struct nand_chip *chip) fsl_ifc_run_command(mtd); nand_fsr = ifc_in32(&ifc->ifc_nand.nand_fsr); - + status = nand_fsr >> 24; /* * The chip always seems to report that it is * write-protected, even when it is not. */ - return nand_fsr | NAND_STATUS_WP; + return status | NAND_STATUS_WP; } /* -- cgit v1.2.3 From 843c3a59997f18060848b8632607dd04781b52d1 Mon Sep 17 00:00:00 2001 From: Jagdish Gediya Date: Wed, 21 Mar 2018 05:51:46 +0530 Subject: mtd: nand: fsl_ifc: Fix eccstat array overflow for IFC ver >= 2.0.0 Number of ECC status registers i.e. (ECCSTATx) has been increased in IFC version 2.0.0 due to increase in SRAM size. This is causing eccstat array to over flow. So, replace eccstat array with u32 variable to make it fail-safe and independent of number of ECC status registers or SRAM size. Fixes: bccb06c353af ("mtd: nand: ifc: update bufnum mask for ver >= 2.0.0") Cc: stable@vger.kernel.org # 3.18+ Signed-off-by: Prabhakar Kushwaha Signed-off-by: Jagdish Gediya Signed-off-by: Boris Brezillon --- drivers/mtd/nand/fsl_ifc_nand.c | 23 ++++++++++------------- 1 file changed, 10 insertions(+), 13 deletions(-) diff --git a/drivers/mtd/nand/fsl_ifc_nand.c b/drivers/mtd/nand/fsl_ifc_nand.c index b6a3ba445cfb..fae01b04abc7 100644 --- a/drivers/mtd/nand/fsl_ifc_nand.c +++ b/drivers/mtd/nand/fsl_ifc_nand.c @@ -173,14 +173,9 @@ static void set_addr(struct mtd_info *mtd, int column, int page_addr, int oob) /* returns nonzero if entire page is blank */ static int check_read_ecc(struct mtd_info *mtd, struct fsl_ifc_ctrl *ctrl, - u32 *eccstat, unsigned int bufnum) + u32 eccstat, unsigned int bufnum) { - u32 reg = eccstat[bufnum / 4]; - int errors; - - errors = (reg >> ((3 - bufnum % 4) * 8)) & 15; - - return errors; + return (eccstat >> ((3 - bufnum % 4) * 8)) & 15; } /* @@ -193,7 +188,7 @@ static void fsl_ifc_run_command(struct mtd_info *mtd) struct fsl_ifc_ctrl *ctrl = priv->ctrl; struct fsl_ifc_nand_ctrl *nctrl = ifc_nand_ctrl; struct fsl_ifc_runtime __iomem *ifc = ctrl->rregs; - u32 eccstat[4]; + u32 eccstat; int i; /* set the chip select for NAND Transaction */ @@ -228,8 +223,8 @@ static void fsl_ifc_run_command(struct mtd_info *mtd) if (nctrl->eccread) { int errors; int bufnum = nctrl->page & priv->bufnum_mask; - int sector = bufnum * chip->ecc.steps; - int sector_end = sector + chip->ecc.steps - 1; + int sector_start = bufnum * chip->ecc.steps; + int sector_end = sector_start + chip->ecc.steps - 1; __be32 *eccstat_regs; if (ctrl->version >= FSL_IFC_VERSION_2_0_0) @@ -237,10 +232,12 @@ static void fsl_ifc_run_command(struct mtd_info *mtd) else eccstat_regs = ifc->ifc_nand.v1_nand_eccstat; - for (i = sector / 4; i <= sector_end / 4; i++) - eccstat[i] = ifc_in32(&eccstat_regs[i]); + eccstat = ifc_in32(&eccstat_regs[sector_start / 4]); + + for (i = sector_start; i <= sector_end; i++) { + if (i != sector_start && !(i % 4)) + eccstat = ifc_in32(&eccstat_regs[i / 4]); - for (i = sector; i <= sector_end; i++) { errors = check_read_ecc(mtd, ctrl, eccstat, i); if (errors == 15) { -- cgit v1.2.3 From 6b00c35138b404be98b85f4a703be594cbed501c Mon Sep 17 00:00:00 2001 From: Jagdish Gediya Date: Thu, 22 Mar 2018 01:08:10 +0530 Subject: mtd: nand: fsl_ifc: Read ECCSTAT0 and ECCSTAT1 registers for IFC 2.0 Due to missing information in Hardware manual, current implementation doesn't read ECCSTAT0 and ECCSTAT1 registers for IFC 2.0. Add support to read ECCSTAT0 and ECCSTAT1 registers during ecccheck for IFC 2.0. Fixes: 656441478ed5 ("mtd: nand: ifc: Fix location of eccstat registers for IFC V1.0") Cc: stable@vger.kernel.org # v3.18+ Signed-off-by: Jagdish Gediya Reviewed-by: Prabhakar Kushwaha Signed-off-by: Boris Brezillon --- drivers/mtd/nand/fsl_ifc_nand.c | 6 +----- include/linux/fsl_ifc.h | 6 +----- 2 files changed, 2 insertions(+), 10 deletions(-) diff --git a/drivers/mtd/nand/fsl_ifc_nand.c b/drivers/mtd/nand/fsl_ifc_nand.c index fae01b04abc7..5a9c2f0020c2 100644 --- a/drivers/mtd/nand/fsl_ifc_nand.c +++ b/drivers/mtd/nand/fsl_ifc_nand.c @@ -227,11 +227,7 @@ static void fsl_ifc_run_command(struct mtd_info *mtd) int sector_end = sector_start + chip->ecc.steps - 1; __be32 *eccstat_regs; - if (ctrl->version >= FSL_IFC_VERSION_2_0_0) - eccstat_regs = ifc->ifc_nand.v2_nand_eccstat; - else - eccstat_regs = ifc->ifc_nand.v1_nand_eccstat; - + eccstat_regs = ifc->ifc_nand.nand_eccstat; eccstat = ifc_in32(&eccstat_regs[sector_start / 4]); for (i = sector_start; i <= sector_end; i++) { diff --git a/include/linux/fsl_ifc.h b/include/linux/fsl_ifc.h index c332f0a45607..3fdfede2f0f3 100644 --- a/include/linux/fsl_ifc.h +++ b/include/linux/fsl_ifc.h @@ -734,11 +734,7 @@ struct fsl_ifc_nand { u32 res19[0x10]; __be32 nand_fsr; u32 res20; - /* The V1 nand_eccstat is actually 4 words that overlaps the - * V2 nand_eccstat. - */ - __be32 v1_nand_eccstat[2]; - __be32 v2_nand_eccstat[6]; + __be32 nand_eccstat[8]; u32 res21[0x1c]; __be32 nanndcr; u32 res22[0x2]; -- cgit v1.2.3 From a8d7bde23e7130686b76624b099f3e22dd38aef7 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 21 Mar 2018 10:06:13 +0100 Subject: ALSA: hda - Force polling mode on CFL for fixing codec communication We've observed too long probe time with Coffee Lake (CFL) machines, and the likely cause is some communication problem between the HD-audio controller and the codec chips. While the controller expects an IRQ wakeup for each codec response, it seems sometimes missing, and it takes one second for the controller driver to time out and read the response in the polling mode. Although we aren't sure about the real culprit yet, in this patch, we put a workaround by forcing the polling mode as default for CFL machines; the polling mode itself isn't too heavy, and much better than other workarounds initially suggested (e.g. disabling power-save), at least. Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=199007 Fixes: e79b0006c45c ("ALSA: hda - Add Coffelake PCI ID") Reported-and-tested-by: Hui Wang Cc: Signed-off-by: Takashi Iwai --- sound/pci/hda/hda_intel.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c index d5017adf9feb..c507c69029e3 100644 --- a/sound/pci/hda/hda_intel.c +++ b/sound/pci/hda/hda_intel.c @@ -375,6 +375,7 @@ enum { ((pci)->device == 0x160c)) #define IS_BXT(pci) ((pci)->vendor == 0x8086 && (pci)->device == 0x5a98) +#define IS_CFL(pci) ((pci)->vendor == 0x8086 && (pci)->device == 0xa348) static char *driver_short_names[] = { [AZX_DRIVER_ICH] = "HDA Intel", @@ -1744,6 +1745,10 @@ static int azx_create(struct snd_card *card, struct pci_dev *pci, else chip->bdl_pos_adj = bdl_pos_adj[dev]; + /* Workaround for a communication error on CFL (bko#199007) */ + if (IS_CFL(pci)) + chip->polling_mode = 1; + err = azx_bus_init(chip, model[dev], &pci_hda_io_ops); if (err < 0) { kfree(hda); -- cgit v1.2.3 From b24791fe00f8b089d5b10cb7bcc4e1ae88b4831b Mon Sep 17 00:00:00 2001 From: Daniel Stone Date: Tue, 20 Mar 2018 22:58:39 +0000 Subject: drm: Reject getfb for multi-plane framebuffers getfb can only return a single plane, so reject attempts to use it with multi-plane framebuffers. Signed-off-by: Daniel Stone Reported-by: Daniel van Vugt Reviewed-by: Rob Clark Reviewed-by: Daniel Vetter Fixes: 308e5bcbdb10 ("drm: add an fb creation ioctl that takes a pixel format v5") Cc: stable@vger.kernel.org # v3.3+ Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=105518 Link: https://patchwork.freedesktop.org/patch/msgid/20180320225839.30905-1-daniels@collabora.com --- drivers/gpu/drm/drm_framebuffer.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/gpu/drm/drm_framebuffer.c b/drivers/gpu/drm/drm_framebuffer.c index c0530a1af5e3..2dc5e8bed172 100644 --- a/drivers/gpu/drm/drm_framebuffer.c +++ b/drivers/gpu/drm/drm_framebuffer.c @@ -461,6 +461,12 @@ int drm_mode_getfb(struct drm_device *dev, if (!fb) return -ENOENT; + /* Multi-planar framebuffers need getfb2. */ + if (fb->format->num_planes > 1) { + ret = -EINVAL; + goto out; + } + r->height = fb->height; r->width = fb->width; r->depth = fb->format->depth; @@ -484,6 +490,7 @@ int drm_mode_getfb(struct drm_device *dev, ret = -ENODEV; } +out: drm_framebuffer_put(fb); return ret; -- cgit v1.2.3 From 140bcaa23a1c37b694910424075a15e009120dbe Mon Sep 17 00:00:00 2001 From: Thomas Hellstrom Date: Thu, 8 Mar 2018 10:07:37 +0100 Subject: drm/vmwgfx: Fix black screen and device errors when running without fbdev When we are running without fbdev, transitioning from the login screen to X or gnome-shell/wayland will cause a vt switch and the driver will disable svga mode, losing all modesetting resources. However, the kms atomic state does not reflect that and may think that a crtc is still turned on, which will cause device errors when we try to bind an fb to the crtc, and the screen will remain black. Fix this by turning off all kms resources before disabling svga mode. Cc: Signed-off-by: Thomas Hellstrom Reviewed-by: Sinclair Yeh --- drivers/gpu/drm/vmwgfx/vmwgfx_drv.c | 13 +++++++++++++ drivers/gpu/drm/vmwgfx/vmwgfx_drv.h | 1 + drivers/gpu/drm/vmwgfx/vmwgfx_kms.c | 11 +++++++++++ drivers/gpu/drm/vmwgfx/vmwgfx_kms.h | 1 - 4 files changed, 25 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c index 184340d486c3..86d25f18aa99 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c @@ -1337,6 +1337,19 @@ static void __vmw_svga_disable(struct vmw_private *dev_priv) */ void vmw_svga_disable(struct vmw_private *dev_priv) { + /* + * Disabling SVGA will turn off device modesetting capabilities, so + * notify KMS about that so that it doesn't cache atomic state that + * isn't valid anymore, for example crtcs turned on. + * Strictly we'd want to do this under the SVGA lock (or an SVGA mutex), + * but vmw_kms_lost_device() takes the reservation sem and thus we'll + * end up with lock order reversal. Thus, a master may actually perform + * a new modeset just after we call vmw_kms_lost_device() and race with + * vmw_svga_disable(), but that should at worst cause atomic KMS state + * to be inconsistent with the device, causing modesetting problems. + * + */ + vmw_kms_lost_device(dev_priv->dev); ttm_write_lock(&dev_priv->reservation_sem, false); spin_lock(&dev_priv->svga_lock); if (dev_priv->bdev.man[TTM_PL_VRAM].use_type) { diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h index d08753e8fd94..9116fe8baebc 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h @@ -938,6 +938,7 @@ int vmw_kms_present(struct vmw_private *dev_priv, int vmw_kms_update_layout_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv); void vmw_kms_legacy_hotspot_clear(struct vmw_private *dev_priv); +void vmw_kms_lost_device(struct drm_device *dev); int vmw_dumb_create(struct drm_file *file_priv, struct drm_device *dev, diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c index ead61015cd79..b3d62aa01a9b 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c @@ -2851,3 +2851,14 @@ int vmw_kms_set_config(struct drm_mode_set *set, return drm_atomic_helper_set_config(set, ctx); } + + +/** + * vmw_kms_lost_device - Notify kms that modesetting capabilities will be lost + * + * @dev: Pointer to the drm device + */ +void vmw_kms_lost_device(struct drm_device *dev) +{ + drm_atomic_helper_shutdown(dev); +} diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h index cd9da2dd79af..948362c43c73 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h @@ -439,5 +439,4 @@ int vmw_kms_stdu_dma(struct vmw_private *dev_priv, int vmw_kms_set_config(struct drm_mode_set *set, struct drm_modeset_acquire_ctx *ctx); - #endif -- cgit v1.2.3 From 73a88250b70954a8f27c2444e1c2411bba3c29d9 Mon Sep 17 00:00:00 2001 From: Thomas Hellstrom Date: Wed, 21 Mar 2018 10:18:38 +0100 Subject: drm/vmwgfx: Fix a destoy-while-held mutex problem. When validating legacy surfaces, the backup bo might be destroyed at surface validate time. However, the kms resource validation code may have the bo reserved, so we will destroy a locked mutex. While there shouldn't be any other users of that mutex when it is destroyed, it causes a lock leak and thus throws a lockdep error. Fix this by having the kms resource validation code hold a reference to the bo while we have it reserved. We do this by introducing a validation context which might come in handy when the kms code is extended to validate multiple resources or buffers. Cc: Signed-off-by: Thomas Hellstrom Reviewed-by: Brian Paul Reviewed-by: Sinclair Yeh --- drivers/gpu/drm/vmwgfx/vmwgfx_kms.c | 28 +++++++++++++++++++--------- drivers/gpu/drm/vmwgfx/vmwgfx_kms.h | 12 +++++++++--- drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c | 5 +++-- drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c | 5 +++-- 4 files changed, 34 insertions(+), 16 deletions(-) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c index b3d62aa01a9b..3c824fd7cbf3 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c @@ -31,7 +31,6 @@ #include #include - /* Might need a hrtimer here? */ #define VMWGFX_PRESENT_RATE ((HZ / 60 > 0) ? HZ / 60 : 1) @@ -2517,9 +2516,12 @@ void vmw_kms_helper_buffer_finish(struct vmw_private *dev_priv, * Helper to be used if an error forces the caller to undo the actions of * vmw_kms_helper_resource_prepare. */ -void vmw_kms_helper_resource_revert(struct vmw_resource *res) +void vmw_kms_helper_resource_revert(struct vmw_validation_ctx *ctx) { - vmw_kms_helper_buffer_revert(res->backup); + struct vmw_resource *res = ctx->res; + + vmw_kms_helper_buffer_revert(ctx->buf); + vmw_dmabuf_unreference(&ctx->buf); vmw_resource_unreserve(res, false, NULL, 0); mutex_unlock(&res->dev_priv->cmdbuf_mutex); } @@ -2536,10 +2538,14 @@ void vmw_kms_helper_resource_revert(struct vmw_resource *res) * interrupted by a signal. */ int vmw_kms_helper_resource_prepare(struct vmw_resource *res, - bool interruptible) + bool interruptible, + struct vmw_validation_ctx *ctx) { int ret = 0; + ctx->buf = NULL; + ctx->res = res; + if (interruptible) ret = mutex_lock_interruptible(&res->dev_priv->cmdbuf_mutex); else @@ -2558,6 +2564,8 @@ int vmw_kms_helper_resource_prepare(struct vmw_resource *res, res->dev_priv->has_mob); if (ret) goto out_unreserve; + + ctx->buf = vmw_dmabuf_reference(res->backup); } ret = vmw_resource_validate(res); if (ret) @@ -2565,7 +2573,7 @@ int vmw_kms_helper_resource_prepare(struct vmw_resource *res, return 0; out_revert: - vmw_kms_helper_buffer_revert(res->backup); + vmw_kms_helper_buffer_revert(ctx->buf); out_unreserve: vmw_resource_unreserve(res, false, NULL, 0); out_unlock: @@ -2581,11 +2589,13 @@ out_unlock: * @out_fence: Optional pointer to a fence pointer. If non-NULL, a * ref-counted fence pointer is returned here. */ -void vmw_kms_helper_resource_finish(struct vmw_resource *res, - struct vmw_fence_obj **out_fence) +void vmw_kms_helper_resource_finish(struct vmw_validation_ctx *ctx, + struct vmw_fence_obj **out_fence) { - if (res->backup || out_fence) - vmw_kms_helper_buffer_finish(res->dev_priv, NULL, res->backup, + struct vmw_resource *res = ctx->res; + + if (ctx->buf || out_fence) + vmw_kms_helper_buffer_finish(res->dev_priv, NULL, ctx->buf, out_fence, NULL); vmw_resource_unreserve(res, false, NULL, 0); diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h index 948362c43c73..3d2ca280eaa7 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h @@ -240,6 +240,11 @@ struct vmw_display_unit { int set_gui_y; }; +struct vmw_validation_ctx { + struct vmw_resource *res; + struct vmw_dma_buffer *buf; +}; + #define vmw_crtc_to_du(x) \ container_of(x, struct vmw_display_unit, crtc) #define vmw_connector_to_du(x) \ @@ -296,9 +301,10 @@ void vmw_kms_helper_buffer_finish(struct vmw_private *dev_priv, struct drm_vmw_fence_rep __user * user_fence_rep); int vmw_kms_helper_resource_prepare(struct vmw_resource *res, - bool interruptible); -void vmw_kms_helper_resource_revert(struct vmw_resource *res); -void vmw_kms_helper_resource_finish(struct vmw_resource *res, + bool interruptible, + struct vmw_validation_ctx *ctx); +void vmw_kms_helper_resource_revert(struct vmw_validation_ctx *ctx); +void vmw_kms_helper_resource_finish(struct vmw_validation_ctx *ctx, struct vmw_fence_obj **out_fence); int vmw_kms_readback(struct vmw_private *dev_priv, struct drm_file *file_priv, diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c b/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c index 63a4cd794b73..3ec9eae831b8 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c @@ -909,12 +909,13 @@ int vmw_kms_sou_do_surface_dirty(struct vmw_private *dev_priv, struct vmw_framebuffer_surface *vfbs = container_of(framebuffer, typeof(*vfbs), base); struct vmw_kms_sou_surface_dirty sdirty; + struct vmw_validation_ctx ctx; int ret; if (!srf) srf = &vfbs->surface->res; - ret = vmw_kms_helper_resource_prepare(srf, true); + ret = vmw_kms_helper_resource_prepare(srf, true, &ctx); if (ret) return ret; @@ -933,7 +934,7 @@ int vmw_kms_sou_do_surface_dirty(struct vmw_private *dev_priv, ret = vmw_kms_helper_dirty(dev_priv, framebuffer, clips, vclips, dest_x, dest_y, num_clips, inc, &sdirty.base); - vmw_kms_helper_resource_finish(srf, out_fence); + vmw_kms_helper_resource_finish(&ctx, out_fence); return ret; } diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c b/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c index b68d74888ab1..6b969e5dea2a 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c @@ -980,12 +980,13 @@ int vmw_kms_stdu_surface_dirty(struct vmw_private *dev_priv, struct vmw_framebuffer_surface *vfbs = container_of(framebuffer, typeof(*vfbs), base); struct vmw_stdu_dirty sdirty; + struct vmw_validation_ctx ctx; int ret; if (!srf) srf = &vfbs->surface->res; - ret = vmw_kms_helper_resource_prepare(srf, true); + ret = vmw_kms_helper_resource_prepare(srf, true, &ctx); if (ret) return ret; @@ -1008,7 +1009,7 @@ int vmw_kms_stdu_surface_dirty(struct vmw_private *dev_priv, dest_x, dest_y, num_clips, inc, &sdirty.base); out_finish: - vmw_kms_helper_resource_finish(srf, out_fence); + vmw_kms_helper_resource_finish(&ctx, out_fence); return ret; } -- cgit v1.2.3 From 7c181f4fcdc62e5dc7a87fd33387d322262c3b52 Mon Sep 17 00:00:00 2001 From: Ben Caradoc-Davies Date: Mon, 19 Mar 2018 12:57:44 +1300 Subject: mac80211: add ieee80211_hw flag for QoS NDP support Commit 7b6ddeaf27ec ("mac80211: use QoS NDP for AP probing") added an argument qos_ok to ieee80211_nullfunc_get to support QoS NDP. Despite the claim in the commit log "Change all the drivers to *not* allow QoS NDP for now, even though it looks like most of them should be OK with that", this commit enables QoS NDP in response to beacons (see change to mlme.c:ieee80211_send_nullfunc), causing ath9k_htc to lose IP connectivity. See: https://patchwork.kernel.org/patch/10241109/ https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=891060 Introduce a hardware flag to allow such buggy drivers to override the correct default behaviour of mac80211 of sending QoS NDP packets. Signed-off-by: Ben Caradoc-Davies Signed-off-by: Johannes Berg --- include/net/mac80211.h | 4 ++++ net/mac80211/debugfs.c | 1 + net/mac80211/mlme.c | 3 ++- 3 files changed, 7 insertions(+), 1 deletion(-) diff --git a/include/net/mac80211.h b/include/net/mac80211.h index c96511fa9198..2b581bd93812 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -2063,6 +2063,9 @@ struct ieee80211_txq { * @IEEE80211_HW_SUPPORTS_TDLS_BUFFER_STA: Hardware supports buffer STA on * TDLS links. * + * @IEEE80211_HW_DOESNT_SUPPORT_QOS_NDP: The driver (or firmware) doesn't + * support QoS NDP for AP probing - that's most likely a driver bug. + * * @NUM_IEEE80211_HW_FLAGS: number of hardware flags, used for sizing arrays */ enum ieee80211_hw_flags { @@ -2106,6 +2109,7 @@ enum ieee80211_hw_flags { IEEE80211_HW_REPORTS_LOW_ACK, IEEE80211_HW_SUPPORTS_TX_FRAG, IEEE80211_HW_SUPPORTS_TDLS_BUFFER_STA, + IEEE80211_HW_DOESNT_SUPPORT_QOS_NDP, /* keep last, obviously */ NUM_IEEE80211_HW_FLAGS diff --git a/net/mac80211/debugfs.c b/net/mac80211/debugfs.c index 1f466d12a6bc..94c7ee9df33b 100644 --- a/net/mac80211/debugfs.c +++ b/net/mac80211/debugfs.c @@ -212,6 +212,7 @@ static const char *hw_flag_names[] = { FLAG(REPORTS_LOW_ACK), FLAG(SUPPORTS_TX_FRAG), FLAG(SUPPORTS_TDLS_BUFFER_STA), + FLAG(DOESNT_SUPPORT_QOS_NDP), #undef FLAG }; diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 39b660b9a908..5f303abac5ad 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -896,7 +896,8 @@ void ieee80211_send_nullfunc(struct ieee80211_local *local, struct ieee80211_hdr_3addr *nullfunc; struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; - skb = ieee80211_nullfunc_get(&local->hw, &sdata->vif, true); + skb = ieee80211_nullfunc_get(&local->hw, &sdata->vif, + !ieee80211_hw_check(&local->hw, DOESNT_SUPPORT_QOS_NDP)); if (!skb) return; -- cgit v1.2.3 From d58ac803cfbb92ede501409dd6afe9abb111d4f1 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Wed, 21 Mar 2018 11:49:40 +0200 Subject: mmc: sdhci-acpi: Fix IRQ 0 Zero is a valid IRQ number and is being used on some CHT tablets. Stop treating it as an error. Reported-by: Luke Ross Fixes: 1b7ba57ecc86 ("mmc: sdhci-acpi: Handle return value of platform_get_irq") Cc: Arvind Yadav Signed-off-by: Adrian Hunter Cc: stable@vger.kernel.org Signed-off-by: Ulf Hansson --- drivers/mmc/host/sdhci-acpi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mmc/host/sdhci-acpi.c b/drivers/mmc/host/sdhci-acpi.c index 4065da58789d..32321bd596d8 100644 --- a/drivers/mmc/host/sdhci-acpi.c +++ b/drivers/mmc/host/sdhci-acpi.c @@ -680,7 +680,7 @@ static int sdhci_acpi_probe(struct platform_device *pdev) host->hw_name = "ACPI"; host->ops = &sdhci_acpi_ops_dflt; host->irq = platform_get_irq(pdev, 0); - if (host->irq <= 0) { + if (host->irq < 0) { err = -EINVAL; goto err_free; } -- cgit v1.2.3 From 4ea5aca27eab1c495985eb2a30b65b78cbf99404 Mon Sep 17 00:00:00 2001 From: Andrew Zaborowski Date: Wed, 21 Mar 2018 08:05:18 +0100 Subject: mac80211_hwsim: Set wmediumd for new radios Set the wmediumd to the net's wmediumd when the radio gets created. Radios created after HWSIM_CMD_REGISTER don't currently get their data->wmediumd set and the userspace would need to reconnect to netlink to be able to call HWSIM_CMD_REGISTER again. Alternatively I think data->netgroup and data->wmedium could be replaced with a pointer to hwsim_net. Signed-off-by: Andrew Zaborowski Signed-off-by: Johannes Berg --- drivers/net/wireless/mac80211_hwsim.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c index 6e0af815f25e..35b21f8152bb 100644 --- a/drivers/net/wireless/mac80211_hwsim.c +++ b/drivers/net/wireless/mac80211_hwsim.c @@ -2727,6 +2727,7 @@ static int mac80211_hwsim_new_radio(struct genl_info *info, mutex_init(&data->mutex); data->netgroup = hwsim_net_get_netgroup(net); + data->wmediumd = hwsim_net_get_wmediumd(net); /* Enable frame retransmissions for lossy channels */ hw->max_rates = 4; -- cgit v1.2.3 From 60b01bcce97191f473fa869df2713143936d6ef4 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 21 Mar 2018 11:00:14 +0100 Subject: ath9k_htc: use non-QoS NDP for AP probing When switching mac80211 to use QoS NDP, it turned out that ath9k_htc is somehow broken by this, e.g. see https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=891060. Fix this by using the new mac80211 flag to go back to the old, incorrect, behaviour for this driver. Fixes: 7b6ddeaf27ec ("mac80211: use QoS NDP for AP probing") Reported-by: Ben Caradoc-Davies Acked-by: Kalle Valo Signed-off-by: Johannes Berg --- drivers/net/wireless/ath/ath9k/htc_drv_init.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/wireless/ath/ath9k/htc_drv_init.c b/drivers/net/wireless/ath/ath9k/htc_drv_init.c index e89e5ef2c2a4..f246e9ed4a81 100644 --- a/drivers/net/wireless/ath/ath9k/htc_drv_init.c +++ b/drivers/net/wireless/ath/ath9k/htc_drv_init.c @@ -729,6 +729,7 @@ static void ath9k_set_hw_capab(struct ath9k_htc_priv *priv, ieee80211_hw_set(hw, SPECTRUM_MGMT); ieee80211_hw_set(hw, SIGNAL_DBM); ieee80211_hw_set(hw, AMPDU_AGGREGATION); + ieee80211_hw_set(hw, DOESNT_SUPPORT_QOS_NDP); if (ath9k_ps_enable) ieee80211_hw_set(hw, SUPPORTS_PS); -- cgit v1.2.3 From 3a088dd1b72dc0fc2e7ee1fca76e26290c5261a0 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 20 Mar 2018 10:04:48 +0000 Subject: drm/i915: Specify which engines to reset following semaphore/event lockups If the GPU is stuck waiting for an event or for a semaphore, we need to reset the GPU in order to recover. We have to tell the reset routine which engines we want reset, but we were still using the old interface and declaring it as "not-fatal". Fixes: 14b730fcb8d9 ("drm/i915/tdr: Prepare error handler to accept mask of hung engines") Signed-off-by: Chris Wilson Cc: Mika Kuoppala Cc: Michel Thierry Reviewed-by: Michel Thierry Link: https://patchwork.freedesktop.org/patch/msgid/20180320100449.1360-1-chris@chris-wilson.co.uk (cherry picked from commit ca98317b89428e6ac17be0938b467ed78654dd56) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/intel_hangcheck.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_hangcheck.c b/drivers/gpu/drm/i915/intel_hangcheck.c index 348a4f7ffb67..53747318f4a7 100644 --- a/drivers/gpu/drm/i915/intel_hangcheck.c +++ b/drivers/gpu/drm/i915/intel_hangcheck.c @@ -246,7 +246,7 @@ engine_stuck(struct intel_engine_cs *engine, u64 acthd) */ tmp = I915_READ_CTL(engine); if (tmp & RING_WAIT) { - i915_handle_error(dev_priv, 0, + i915_handle_error(dev_priv, BIT(engine->id), "Kicking stuck wait on %s", engine->name); I915_WRITE_CTL(engine, tmp); @@ -258,7 +258,7 @@ engine_stuck(struct intel_engine_cs *engine, u64 acthd) default: return ENGINE_DEAD; case 1: - i915_handle_error(dev_priv, 0, + i915_handle_error(dev_priv, ALL_ENGINES, "Kicking stuck semaphore on %s", engine->name); I915_WRITE_CTL(engine, tmp); -- cgit v1.2.3 From eb85a355c3afd9379f5953cfe2df73632d14c884 Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Wed, 21 Mar 2018 08:16:29 -0700 Subject: ARM: OMAP: Fix SRAM W+X mapping We are still using custom SRAM code for some SoCs and are not marking the PM code mapped to SRAM as read-only and executable after we're done. With CONFIG_DEBUG_WX=y, we will get "Found insecure W+X mapping at address" warning. Let's fix this issue the same way as commit 728bbe75c82f ("misc: sram: Introduce support code for protect-exec sram type") is doing for drivers/misc/sram-exec.c. On omap3, we need to restore SRAM when returning from off mode after idle, so init time configuration is not enough. And as we no longer have users for omap_sram_push_address() we can make it static while at it. Note that eventually we should be using sram-exec.c for all SoCs. Cc: stable@vger.kernel.org # v4.12+ Cc: Dave Gerlach Reported-by: Pavel Machek Signed-off-by: Tony Lindgren --- arch/arm/plat-omap/include/plat/sram.h | 11 +---------- arch/arm/plat-omap/sram.c | 36 +++++++++++++++++++++++++++++++++- 2 files changed, 36 insertions(+), 11 deletions(-) diff --git a/arch/arm/plat-omap/include/plat/sram.h b/arch/arm/plat-omap/include/plat/sram.h index fb061cf0d736..30a07730807a 100644 --- a/arch/arm/plat-omap/include/plat/sram.h +++ b/arch/arm/plat-omap/include/plat/sram.h @@ -5,13 +5,4 @@ void omap_map_sram(unsigned long start, unsigned long size, unsigned long skip, int cached); void omap_sram_reset(void); -extern void *omap_sram_push_address(unsigned long size); - -/* Macro to push a function to the internal SRAM, using the fncpy API */ -#define omap_sram_push(funcp, size) ({ \ - typeof(&(funcp)) _res = NULL; \ - void *_sram_address = omap_sram_push_address(size); \ - if (_sram_address) \ - _res = fncpy(_sram_address, &(funcp), size); \ - _res; \ -}) +extern void *omap_sram_push(void *funcp, unsigned long size); diff --git a/arch/arm/plat-omap/sram.c b/arch/arm/plat-omap/sram.c index a5bc92d7e476..921840acf65c 100644 --- a/arch/arm/plat-omap/sram.c +++ b/arch/arm/plat-omap/sram.c @@ -23,6 +23,7 @@ #include #include #include +#include #include @@ -42,7 +43,7 @@ static void __iomem *omap_sram_ceil; * Note that fncpy requires the returned address to be aligned * to an 8-byte boundary. */ -void *omap_sram_push_address(unsigned long size) +static void *omap_sram_push_address(unsigned long size) { unsigned long available, new_ceil = (unsigned long)omap_sram_ceil; @@ -60,6 +61,30 @@ void *omap_sram_push_address(unsigned long size) return (void *)omap_sram_ceil; } +void *omap_sram_push(void *funcp, unsigned long size) +{ + void *sram; + unsigned long base; + int pages; + void *dst = NULL; + + sram = omap_sram_push_address(size); + if (!sram) + return NULL; + + base = (unsigned long)sram & PAGE_MASK; + pages = PAGE_ALIGN(size) / PAGE_SIZE; + + set_memory_rw(base, pages); + + dst = fncpy(sram, funcp, size); + + set_memory_ro(base, pages); + set_memory_x(base, pages); + + return dst; +} + /* * The SRAM context is lost during off-idle and stack * needs to be reset. @@ -75,6 +100,9 @@ void omap_sram_reset(void) void __init omap_map_sram(unsigned long start, unsigned long size, unsigned long skip, int cached) { + unsigned long base; + int pages; + if (size == 0) return; @@ -95,4 +123,10 @@ void __init omap_map_sram(unsigned long start, unsigned long size, */ memset_io(omap_sram_base + omap_sram_skip, 0, omap_sram_size - omap_sram_skip); + + base = (unsigned long)omap_sram_base; + pages = PAGE_ALIGN(omap_sram_size) / PAGE_SIZE; + + set_memory_ro(base, pages); + set_memory_x(base, pages); } -- cgit v1.2.3 From 32927e281c004c352c0b88d2975f5511e5b7cf8e Mon Sep 17 00:00:00 2001 From: Mark Bloch Date: Tue, 20 Mar 2018 15:45:37 +0200 Subject: IB/mlx5: Don't clean uninitialized UMR resources In case we failed to create UMR resources, mark them as invalid so we won't try to destroy them on the unwind path. Add the relevant checks to destroy_umrc_res(), this is done for the unlikely event ib_register_device() or create_umr_res() err out and we try to destroy invalid objects. Fixes: 42cea83f9524 ("IB/mlx5: Fix cleanup order on unload") Signed-off-by: Mark Bloch Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/main.c | 12 +++++++++--- drivers/infiniband/hw/mlx5/mr.c | 3 +++ 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index da091de4e69d..7f8bda3a2005 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -3448,9 +3448,12 @@ static void destroy_umrc_res(struct mlx5_ib_dev *dev) if (err) mlx5_ib_warn(dev, "mr cache cleanup failed\n"); - mlx5_ib_destroy_qp(dev->umrc.qp); - ib_free_cq(dev->umrc.cq); - ib_dealloc_pd(dev->umrc.pd); + if (dev->umrc.qp) + mlx5_ib_destroy_qp(dev->umrc.qp); + if (dev->umrc.cq) + ib_free_cq(dev->umrc.cq); + if (dev->umrc.pd) + ib_dealloc_pd(dev->umrc.pd); } enum { @@ -3552,12 +3555,15 @@ static int create_umr_res(struct mlx5_ib_dev *dev) error_4: mlx5_ib_destroy_qp(qp); + dev->umrc.qp = NULL; error_3: ib_free_cq(cq); + dev->umrc.cq = NULL; error_2: ib_dealloc_pd(pd); + dev->umrc.pd = NULL; error_0: kfree(attr); diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index c51c602f06d6..3e0b3f0238d6 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -739,6 +739,9 @@ int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev) { int i; + if (!dev->cache.wq) + return 0; + dev->cache.stopped = 1; flush_workqueue(dev->cache.wq); -- cgit v1.2.3 From 103140eccbd2e3d56bcd366eba3e657f5d358bf5 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Wed, 21 Mar 2018 09:00:29 +0200 Subject: RDMA/restrack: Move restrack_clean to be symmetrical to restrack_init The fact that resource tracking commit 02d8883f520e ("RDMA/restrack: Add general infrastructure to track RDMA resources") was added immediately after commit 16c1975f1032 ("IB/mlx5: Create profile infrastructure to add and remove stages") caused us to miss the fact that PD and CQ are created after ib_register_device, but released after ib_unregister_device() and not before as it is expected from normal flow. Fix introduced in commit 42cea83f9524 ("IB/mlx5: Fix cleanup order on unload") revealed this fact, so this patch is needed to avoid from restrack warnings It fixes resource tracking warnings during shutdown. [ 43.473906] CPU: 5 PID: 3016 Comm: modprobe Not tainted 4.16.0-rc5-for-linust-perf-2018-03-19_07-01-58-14 #1 [ 43.473907] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Ubuntu-1.8.2-1ubuntu2 04/01/2014 [ 43.473919] RIP: 0010:rdma_restrack_clean+0x25/0x30 [ib_core] [ 43.473921] RSP: 0018:ffffc9000267be48 EFLAGS: 00010282 [ 43.473924] RAX: 0000000000000000 RBX: ffff88033c690070 RCX: 0000000180080006 [ 43.473925] RDX: ffff88035ce922e0 RSI: ffffea000cf1a200 RDI: ffff88033c6907c8 [ 43.473926] RBP: ffff88033c690070 R08: ffff88033c689000 R09: 0000000180080006 [ 43.473927] R10: 000000003c68a001 R11: ffff88033c689000 R12: ffff88033c690000 [ 43.473929] R13: ffff88033c69005c R14: 0000000000000000 R15: 0000000000000000 [ 43.473932] FS: 00007f5928359740(0000) GS:ffff88036c540000(0000) knlGS:0000000000000000 [ 43.473933] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 43.473935] CR2: 00007ffffc760cc8 CR3: 000000035620c000 CR4: 00000000000006e0 [ 43.473940] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 43.473941] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 43.473942] Call Trace: [ 43.473969] ib_unregister_device+0xf5/0x190 [ib_core] [ 43.474000] __mlx5_ib_remove+0x2e/0x40 [mlx5_ib] [ 43.474098] mlx5_remove_device+0xf5/0x120 [mlx5_core] [ 43.474132] mlx5_unregister_interface+0x37/0x90 [mlx5_core] [ 43.474142] mlx5_ib_cleanup+0xc/0x16a [mlx5_ib] [ 43.474152] SyS_delete_module+0x159/0x260 [ 43.474159] do_syscall_64+0x61/0x110 [ 43.474165] entry_SYSCALL_64_after_hwframe+0x3d/0xa2 [ 43.474168] RIP: 0033:0x7f59278466b7 [ 43.474170] RSP: 002b:00007ffffc763e38 EFLAGS: 00000202 ORIG_RAX: 00000000000000b0 [ 43.474172] RAX: ffffffffffffffda RBX: 000000000130d590 RCX: 00007f59278466b7 [ 43.474173] RDX: 0000000000000000 RSI: 0000000000000800 RDI: 000000000130d5f8 [ 43.474175] RBP: 0000000000000000 R08: 00007f5927b0b060 R09: 00007f59278b6a40 [ 43.474176] R10: 00007ffffc763bc0 R11: 0000000000000202 R12: 0000000000000000 [ 43.474177] R13: 0000000000000001 R14: 000000000130d5f8 R15: 0000000000000000 [ 43.474179] Code: 84 00 00 00 00 00 0f 1f 44 00 00 48 83 c7 28 31 c0 eb 0c 48 83 c0 08 48 3d 00 08 00 00 74 0f 48 8d 14 07 48 8b 12 48 85 d2 74 e8 <0f> 0b c3 f3 c3 66 0f 1f 44 00 00 0f 1f 44 00 00 53 48 8b 47 28 [ 43.474221] ---[ end trace e89771e2250ffc23 ]--- Fixes: 42cea83f9524 ("IB/mlx5: Fix cleanup order on unload") Reviewed-by: Mark Bloch Signed-off-by: Parav Pandit Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/device.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index bb065c9449be..b7459cf524e4 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -290,6 +290,7 @@ void ib_dealloc_device(struct ib_device *device) { WARN_ON(device->reg_state != IB_DEV_UNREGISTERED && device->reg_state != IB_DEV_UNINITIALIZED); + rdma_restrack_clean(&device->res); put_device(&device->dev); } EXPORT_SYMBOL(ib_dealloc_device); @@ -600,8 +601,6 @@ void ib_unregister_device(struct ib_device *device) } up_read(&lists_rwsem); - rdma_restrack_clean(&device->res); - ib_device_unregister_rdmacg(device); ib_device_unregister_sysfs(device); -- cgit v1.2.3 From 5f3e3b85cc0a5eae1c46d72e47d3de7bf208d9e2 Mon Sep 17 00:00:00 2001 From: Chien Tin Tung Date: Wed, 21 Mar 2018 13:09:25 -0500 Subject: RDMA/ucma: Correct option size check using optlen The option size check is using optval instead of optlen causing the set option call to fail. Use the correct field, optlen, for size check. Fixes: 6a21dfc0d0db ("RDMA/ucma: Limit possible option size") Signed-off-by: Chien Tin Tung Signed-off-by: Shiraz Saleem Reviewed-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/ucma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c index e5a1e7d81326..60449d611fb4 100644 --- a/drivers/infiniband/core/ucma.c +++ b/drivers/infiniband/core/ucma.c @@ -1307,7 +1307,7 @@ static ssize_t ucma_set_option(struct ucma_file *file, const char __user *inbuf, if (IS_ERR(ctx)) return PTR_ERR(ctx); - if (unlikely(cmd.optval > KMALLOC_MAX_SIZE)) + if (unlikely(cmd.optlen > KMALLOC_MAX_SIZE)) return -EINVAL; optval = memdup_user((void __user *) (unsigned long) cmd.optval, -- cgit v1.2.3 From c3594f22302cca5e924e47ec1cc8edd265708f41 Mon Sep 17 00:00:00 2001 From: "Kalderon, Michal" Date: Wed, 21 Mar 2018 14:51:50 +0200 Subject: RDMA/qedr: fix QP's ack timeout configuration QPs that were configured with ack timeout value lower than 1 msec will not implement re-transmission timeout. This means that if a packet / ACK were dropped, the QP will not retransmit this packet. This can lead to an application hang. Fixes: cecbcddf6 ("qedr: Add support for QP verbs") Signed-off-by: Michal Kalderon Signed-off-by: Ariel Elior Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/qedr/verbs.c | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c index 875b17272d65..716fae063122 100644 --- a/drivers/infiniband/hw/qedr/verbs.c +++ b/drivers/infiniband/hw/qedr/verbs.c @@ -2086,18 +2086,23 @@ int qedr_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, SET_FIELD(qp_params.modify_flags, QED_ROCE_MODIFY_QP_VALID_ACK_TIMEOUT, 1); - qp_params.ack_timeout = attr->timeout; - if (attr->timeout) { - u32 temp; - - temp = 4096 * (1UL << attr->timeout) / 1000 / 1000; - /* FW requires [msec] */ - qp_params.ack_timeout = temp; - } else { - /* Infinite */ + /* The received timeout value is an exponent used like this: + * "12.7.34 LOCAL ACK TIMEOUT + * Value representing the transport (ACK) timeout for use by + * the remote, expressed as: 4.096 * 2^timeout [usec]" + * The FW expects timeout in msec so we need to divide the usec + * result by 1000. We'll approximate 1000~2^10, and 4.096 ~ 2^2, + * so we get: 2^2 * 2^timeout / 2^10 = 2^(timeout - 8). + * The value of zero means infinite so we use a 'max_t' to make + * sure that sub 1 msec values will be configured as 1 msec. + */ + if (attr->timeout) + qp_params.ack_timeout = + 1 << max_t(int, attr->timeout - 8, 0); + else qp_params.ack_timeout = 0; - } } + if (attr_mask & IB_QP_RETRY_CNT) { SET_FIELD(qp_params.modify_flags, QED_ROCE_MODIFY_QP_VALID_RETRY_CNT, 1); -- cgit v1.2.3 From b15606f47b89b0b09936d7f45b59ba6275527041 Mon Sep 17 00:00:00 2001 From: "Kalderon, Michal" Date: Wed, 21 Mar 2018 14:51:51 +0200 Subject: RDMA/qedr: Fix rc initialization on CNQ allocation failure Return code wasn't set properly when CNQ allocation failed. This only affect error message logging, currently user will receive an error message that says the qedr driver load failed with rc '0', instead of ENOMEM Fixes: ec72fce4 ("qedr: Add support for RoCE HW init") Signed-off-by: Michal Kalderon Signed-off-by: Ariel Elior Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/qedr/main.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/qedr/main.c b/drivers/infiniband/hw/qedr/main.c index db4bf97c0e15..0ffb9b93e22d 100644 --- a/drivers/infiniband/hw/qedr/main.c +++ b/drivers/infiniband/hw/qedr/main.c @@ -833,7 +833,8 @@ static struct qedr_dev *qedr_add(struct qed_dev *cdev, struct pci_dev *pdev, dev->num_cnq = dev->ops->rdma_get_min_cnq_msix(cdev); if (!dev->num_cnq) { - DP_ERR(dev, "not enough CNQ resources.\n"); + DP_ERR(dev, "Failed. At least one CNQ is required.\n"); + rc = -ENOMEM; goto init_err; } -- cgit v1.2.3 From caf61b1b8b88ccf1451f7321a176393797e8d292 Mon Sep 17 00:00:00 2001 From: "Kalderon, Michal" Date: Wed, 21 Mar 2018 14:51:52 +0200 Subject: RDMA/qedr: Fix QP state initialization race Once the FW is transitioned to error, FLUSH cqes can be received. We want the driver to be aware of the fact that QP is already in error. Without this fix, a user may see false error messages in the dmesg log, mentioning that a FLUSH cqe was received while QP is not in error state. Fixes: cecbcddf ("qedr: Add support for QP verbs") Signed-off-by: Michal Kalderon Signed-off-by: Ariel Elior Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/qedr/verbs.c | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c index 716fae063122..419a158e8fca 100644 --- a/drivers/infiniband/hw/qedr/verbs.c +++ b/drivers/infiniband/hw/qedr/verbs.c @@ -1841,14 +1841,15 @@ static void qedr_reset_qp_hwq_info(struct qedr_qp_hwq_info *qph) static int qedr_update_qp_state(struct qedr_dev *dev, struct qedr_qp *qp, + enum qed_roce_qp_state cur_state, enum qed_roce_qp_state new_state) { int status = 0; - if (new_state == qp->state) + if (new_state == cur_state) return 0; - switch (qp->state) { + switch (cur_state) { case QED_ROCE_QP_STATE_RESET: switch (new_state) { case QED_ROCE_QP_STATE_INIT: @@ -1955,6 +1956,7 @@ int qedr_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, struct qedr_dev *dev = get_qedr_dev(&qp->dev->ibdev); const struct ib_global_route *grh = rdma_ah_read_grh(&attr->ah_attr); enum ib_qp_state old_qp_state, new_qp_state; + enum qed_roce_qp_state cur_state; int rc = 0; DP_DEBUG(dev, QEDR_MSG_QP, @@ -2175,13 +2177,25 @@ int qedr_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, qp->dest_qp_num = attr->dest_qp_num; } + cur_state = qp->state; + + /* Update the QP state before the actual ramrod to prevent a race with + * fast path. Modifying the QP state to error will cause the device to + * flush the CQEs and while polling the flushed CQEs will considered as + * a potential issue if the QP isn't in error state. + */ + if ((attr_mask & IB_QP_STATE) && qp->qp_type != IB_QPT_GSI && + !udata && qp_params.new_state == QED_ROCE_QP_STATE_ERR) + qp->state = QED_ROCE_QP_STATE_ERR; + if (qp->qp_type != IB_QPT_GSI) rc = dev->ops->rdma_modify_qp(dev->rdma_ctx, qp->qed_qp, &qp_params); if (attr_mask & IB_QP_STATE) { if ((qp->qp_type != IB_QPT_GSI) && (!udata)) - rc = qedr_update_qp_state(dev, qp, qp_params.new_state); + rc = qedr_update_qp_state(dev, qp, cur_state, + qp_params.new_state); qp->state = qp_params.new_state; } -- cgit v1.2.3 From 896196dc4e419a9d0782404e0befac17d638fc01 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Wed, 21 Mar 2018 14:06:23 -0700 Subject: libnvdimm, region: hide persistence_domain when unknown Similar to other region attributes, do not emit the persistence_domain attribute if its contents are empty. Fixes: 96c3a239054a ("libnvdimm: expose platform persistence attr...") Cc: Dave Jiang Reviewed-by: Ross Zwisler Signed-off-by: Dan Williams --- drivers/nvdimm/region_devs.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/nvdimm/region_devs.c b/drivers/nvdimm/region_devs.c index e6d01911e092..a8e9d428c0a5 100644 --- a/drivers/nvdimm/region_devs.c +++ b/drivers/nvdimm/region_devs.c @@ -593,6 +593,13 @@ static umode_t region_visible(struct kobject *kobj, struct attribute *a, int n) return 0; } + if (a == &dev_attr_persistence_domain.attr) { + if ((nd_region->flags & (BIT(ND_REGION_PERSIST_CACHE) + | BIT(ND_REGION_PERSIST_MEMCTRL))) == 0) + return 0; + return a->mode; + } + if (a != &dev_attr_set_cookie.attr && a != &dev_attr_available_size.attr) return a->mode; -- cgit v1.2.3 From 214cbc14734958fe533916fdb4194f5983ad4bc4 Mon Sep 17 00:00:00 2001 From: Mathias Kresin Date: Fri, 16 Mar 2018 21:27:28 +0100 Subject: MIPS: lantiq: Fix Danube USB clock On Danube the USB0 controller registers are at 1e101000 and the USB0 PHY register is at 1f203018 similar to all other lantiq SoCs. Activate the USB controller gating clock thorough the USB controller driver and not the PHY. This fixes a problem introduced in a previous commit. Fixes: dea54fbad332 ("phy: Add an USB PHY driver for the Lantiq SoCs using the RCU module") Signed-off-by: Mathias Kresin Signed-off-by: Hauke Mehrtens Acked-by: Martin Blumenstingl Cc: Ralf Baechle Cc: John Crispin Cc: linux-mips@linux-mips.org Cc: # 4.14+ Patchwork: https://patchwork.linux-mips.org/patch/18816/ Signed-off-by: James Hogan --- arch/mips/lantiq/xway/sysctrl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/mips/lantiq/xway/sysctrl.c b/arch/mips/lantiq/xway/sysctrl.c index 52500d3b7004..f11f1dd10493 100644 --- a/arch/mips/lantiq/xway/sysctrl.c +++ b/arch/mips/lantiq/xway/sysctrl.c @@ -560,7 +560,7 @@ void __init ltq_soc_init(void) } else { clkdev_add_static(ltq_danube_cpu_hz(), ltq_danube_fpi_hz(), ltq_danube_fpi_hz(), ltq_danube_pp32_hz()); - clkdev_add_pmu("1f203018.usb2-phy", "ctrl", 1, 0, PMU_USB0); + clkdev_add_pmu("1e101000.usb", "otg", 1, 0, PMU_USB0); clkdev_add_pmu("1f203018.usb2-phy", "phy", 1, 0, PMU_USB0_P); clkdev_add_pmu("1e103000.sdio", NULL, 1, 0, PMU_SDIO); clkdev_add_pmu("1e103100.deu", NULL, 1, 0, PMU_DEU); -- cgit v1.2.3 From 3223a5a7d3a606dcb7d9190a788b9544a45441ee Mon Sep 17 00:00:00 2001 From: Mathias Kresin Date: Fri, 16 Mar 2018 21:27:29 +0100 Subject: MIPS: lantiq: Enable AHB Bus for USB On Danube and AR9 the USB core is connected though a AHB bus to the main system cross bar, hence we need to enable the gating clock of the AHB Bus as well to make the USB controller work. Fixes: dea54fbad332 ("phy: Add an USB PHY driver for the Lantiq SoCs using the RCU module") Signed-off-by: Mathias Kresin Signed-off-by: Hauke Mehrtens Acked-by: Martin Blumenstingl Cc: Ralf Baechle Cc: John Crispin Cc: linux-mips@linux-mips.org Cc: # 4.14+ Patchwork: https://patchwork.linux-mips.org/patch/18814/ Signed-off-by: James Hogan --- arch/mips/lantiq/xway/sysctrl.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/mips/lantiq/xway/sysctrl.c b/arch/mips/lantiq/xway/sysctrl.c index f11f1dd10493..e0af39b33e28 100644 --- a/arch/mips/lantiq/xway/sysctrl.c +++ b/arch/mips/lantiq/xway/sysctrl.c @@ -549,9 +549,9 @@ void __init ltq_soc_init(void) clkdev_add_static(ltq_ar9_cpu_hz(), ltq_ar9_fpi_hz(), ltq_ar9_fpi_hz(), CLOCK_250M); clkdev_add_pmu("1f203018.usb2-phy", "phy", 1, 0, PMU_USB0_P); - clkdev_add_pmu("1e101000.usb", "otg", 1, 0, PMU_USB0); + clkdev_add_pmu("1e101000.usb", "otg", 1, 0, PMU_USB0 | PMU_AHBM); clkdev_add_pmu("1f203034.usb2-phy", "phy", 1, 0, PMU_USB1_P); - clkdev_add_pmu("1e106000.usb", "otg", 1, 0, PMU_USB1); + clkdev_add_pmu("1e106000.usb", "otg", 1, 0, PMU_USB1 | PMU_AHBM); clkdev_add_pmu("1e180000.etop", "switch", 1, 0, PMU_SWITCH); clkdev_add_pmu("1e103000.sdio", NULL, 1, 0, PMU_SDIO); clkdev_add_pmu("1e103100.deu", NULL, 1, 0, PMU_DEU); @@ -560,7 +560,7 @@ void __init ltq_soc_init(void) } else { clkdev_add_static(ltq_danube_cpu_hz(), ltq_danube_fpi_hz(), ltq_danube_fpi_hz(), ltq_danube_pp32_hz()); - clkdev_add_pmu("1e101000.usb", "otg", 1, 0, PMU_USB0); + clkdev_add_pmu("1e101000.usb", "otg", 1, 0, PMU_USB0 | PMU_AHBM); clkdev_add_pmu("1f203018.usb2-phy", "phy", 1, 0, PMU_USB0_P); clkdev_add_pmu("1e103000.sdio", NULL, 1, 0, PMU_SDIO); clkdev_add_pmu("1e103100.deu", NULL, 1, 0, PMU_DEU); -- cgit v1.2.3 From a821328c2f3003b908880792d71b2781b44fa53c Mon Sep 17 00:00:00 2001 From: Mathias Kresin Date: Fri, 16 Mar 2018 21:27:30 +0100 Subject: MIPS: lantiq: ase: Enable MFD_SYSCON Enable syscon to use it for the RCU MFD on Amazon SE as well. The Amazon SE also has similar reset controller system as Danube and XWAY and use their drivers mostly. As these drivers now need syscon also activate the syscon subsystem for for Amazon SE. Fixes: 2b6639d4c794 ("MIPS: lantiq: Enable MFD_SYSCON to be able to use it for the RCU MFD") Signed-off-by: Mathias Kresin Signed-off-by: Hauke Mehrtens Acked-by: Martin Blumenstingl Cc: Ralf Baechle Cc: John Crispin Cc: linux-mips@linux-mips.org Cc: # 4.14+ Patchwork: https://patchwork.linux-mips.org/patch/18817/ Signed-off-by: James Hogan --- arch/mips/lantiq/Kconfig | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/mips/lantiq/Kconfig b/arch/mips/lantiq/Kconfig index 692ae85a3e3d..8e3a1fc2bc39 100644 --- a/arch/mips/lantiq/Kconfig +++ b/arch/mips/lantiq/Kconfig @@ -13,6 +13,8 @@ choice config SOC_AMAZON_SE bool "Amazon SE" select SOC_TYPE_XWAY + select MFD_SYSCON + select MFD_CORE config SOC_XWAY bool "XWAY" -- cgit v1.2.3 From 924613d3a87feaa886dd1b18496463c5359e7965 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Mon, 19 Mar 2018 14:32:59 +0000 Subject: bnx2x: fix spelling mistake: "registeration" -> "registration" Trivial fix to spelling mistake in BNX2X_ERR error message text Signed-off-by: Colin Ian King Acked-by: Sudarsana Kalluru Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c index 74fc9af4aadb..b8388e93520a 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c @@ -13913,7 +13913,7 @@ static void bnx2x_register_phc(struct bnx2x *bp) bp->ptp_clock = ptp_clock_register(&bp->ptp_clock_info, &bp->pdev->dev); if (IS_ERR(bp->ptp_clock)) { bp->ptp_clock = NULL; - BNX2X_ERR("PTP clock registeration failed\n"); + BNX2X_ERR("PTP clock registration failed\n"); } } -- cgit v1.2.3 From 3f2176dd7fe9e4eb1444766741fe4ea6535eb6d8 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Mon, 19 Mar 2018 14:57:11 +0000 Subject: qede: fix spelling mistake: "registeration" -> "registration" Trivial fix to spelling mistakes in DP_ERR error message text and comments Signed-off-by: Colin Ian King Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qede/qede_main.c | 4 ++-- drivers/net/ethernet/qlogic/qede/qede_ptp.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qede/qede_main.c b/drivers/net/ethernet/qlogic/qede/qede_main.c index 5c28209e97d0..a01e7d6e5442 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_main.c +++ b/drivers/net/ethernet/qlogic/qede/qede_main.c @@ -288,7 +288,7 @@ int __init qede_init(void) } /* Must register notifier before pci ops, since we might miss - * interface rename after pci probe and netdev registeration. + * interface rename after pci probe and netdev registration. */ ret = register_netdevice_notifier(&qede_netdev_notifier); if (ret) { @@ -988,7 +988,7 @@ static int __qede_probe(struct pci_dev *pdev, u32 dp_module, u8 dp_level, if (rc) goto err3; - /* Prepare the lock prior to the registeration of the netdev, + /* Prepare the lock prior to the registration of the netdev, * as once it's registered we might reach flows requiring it * [it's even possible to reach a flow needing it directly * from there, although it's unlikely]. diff --git a/drivers/net/ethernet/qlogic/qede/qede_ptp.c b/drivers/net/ethernet/qlogic/qede/qede_ptp.c index 9b2280badaf7..02adb513f475 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_ptp.c +++ b/drivers/net/ethernet/qlogic/qede/qede_ptp.c @@ -485,7 +485,7 @@ int qede_ptp_enable(struct qede_dev *edev, bool init_tc) ptp->clock = ptp_clock_register(&ptp->clock_info, &edev->pdev->dev); if (IS_ERR(ptp->clock)) { rc = -EINVAL; - DP_ERR(edev, "PTP clock registeration failed\n"); + DP_ERR(edev, "PTP clock registration failed\n"); goto err2; } -- cgit v1.2.3 From bbc09e7842a5023ba5bc0f8d559b9dd464e44006 Mon Sep 17 00:00:00 2001 From: Davide Caratti Date: Mon, 19 Mar 2018 15:31:22 +0100 Subject: net/sched: fix idr leak on the error path of tcf_bpf_init() when the following command sequence is entered # tc action add action bpf bytecode '4,40 0 0 12,31 0 1 2048,6 0 0 262144,6 0 0 0' index 100 RTNETLINK answers: Invalid argument We have an error talking to the kernel # tc action add action bpf bytecode '4,40 0 0 12,21 0 1 2048,6 0 0 262144,6 0 0 0' index 100 RTNETLINK answers: No space left on device We have an error talking to the kernel act_bpf correctly refuses to install the first TC rule, because 31 is not a valid instruction. However, it refuses to install the second TC rule, even if the BPF code is correct. Furthermore, it's no more possible to install any other rule having the same value of 'index' until act_bpf module is unloaded/inserted again. After the idr has been reserved, call tcf_idr_release() instead of tcf_idr_cleanup(), to fix this issue. Fixes: 65a206c01e8e ("net/sched: Change act_api and act_xxx modules to use IDR") Acked-by: Jamal Hadi Salim Signed-off-by: Davide Caratti Signed-off-by: David S. Miller --- net/sched/act_bpf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c index b3f2c15affa7..9d2cabf1dc7e 100644 --- a/net/sched/act_bpf.c +++ b/net/sched/act_bpf.c @@ -352,7 +352,7 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla, return res; out: if (res == ACT_P_CREATED) - tcf_idr_cleanup(*act, est); + tcf_idr_release(*act, bind); return ret; } -- cgit v1.2.3 From 60e10b3adc3bac0f6a894c28e0eb1f2d13607362 Mon Sep 17 00:00:00 2001 From: Davide Caratti Date: Mon, 19 Mar 2018 15:31:23 +0100 Subject: net/sched: fix idr leak in the error path of tcf_simp_init() if the kernel fails to duplicate 'sdata', creation of a new action fails with -ENOMEM. However, subsequent attempts to install the same action using the same value of 'index' systematically fail with -ENOSPC, and that value of 'index' will no more be usable by act_simple, until rmmod / insmod of act_simple.ko is done: # tc actions add action simple sdata hello index 100 # tc actions list action simple action order 0: Simple index 100 ref 1 bind 0 # tc actions flush action simple # tc actions add action simple sdata hello index 100 RTNETLINK answers: Cannot allocate memory We have an error talking to the kernel # tc actions flush action simple # tc actions add action simple sdata hello index 100 RTNETLINK answers: No space left on device We have an error talking to the kernel # tc actions add action simple sdata hello index 100 RTNETLINK answers: No space left on device We have an error talking to the kernel ... Fix this in the error path of tcf_simp_init(), calling tcf_idr_release() in place of tcf_idr_cleanup(). Fixes: 65a206c01e8e ("net/sched: Change act_api and act_xxx modules to use IDR") Suggested-by: Cong Wang Acked-by: Jamal Hadi Salim Signed-off-by: Davide Caratti Signed-off-by: David S. Miller --- net/sched/act_simple.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c index 425eac11f6da..b1f38063ada0 100644 --- a/net/sched/act_simple.c +++ b/net/sched/act_simple.c @@ -121,7 +121,7 @@ static int tcf_simp_init(struct net *net, struct nlattr *nla, d = to_defact(*a); ret = alloc_defdata(d, defdata); if (ret < 0) { - tcf_idr_cleanup(*a, est); + tcf_idr_release(*a, bind); return ret; } d->tcf_action = parm->action; -- cgit v1.2.3 From 5bf7f8185f7c7112decdfe3d3e5c5d5e67f099a1 Mon Sep 17 00:00:00 2001 From: Davide Caratti Date: Mon, 19 Mar 2018 15:31:24 +0100 Subject: net/sched: fix idr leak in the error path of tcf_act_police_init() tcf_act_police_init() can fail after the idr has been successfully reserved (e.g., qdisc_get_rtab() may return NULL). When this happens, subsequent attempts to configure a police rule using the same idr value systematiclly fail with -ENOSPC: # tc action add action police rate 1000 burst 1000 drop index 100 RTNETLINK answers: Cannot allocate memory We have an error talking to the kernel # tc action add action police rate 1000 burst 1000 drop index 100 RTNETLINK answers: No space left on device We have an error talking to the kernel # tc action add action police rate 1000 burst 1000 drop index 100 RTNETLINK answers: No space left on device ... Fix this in the error path of tcf_act_police_init(), calling tcf_idr_release() in place of tcf_idr_cleanup(). Fixes: 65a206c01e8e ("net/sched: Change act_api and act_xxx modules to use IDR") Acked-by: Jamal Hadi Salim Signed-off-by: Davide Caratti Signed-off-by: David S. Miller --- net/sched/act_police.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sched/act_police.c b/net/sched/act_police.c index 95d3c9097b25..faebf82b99f1 100644 --- a/net/sched/act_police.c +++ b/net/sched/act_police.c @@ -194,7 +194,7 @@ failure: qdisc_put_rtab(P_tab); qdisc_put_rtab(R_tab); if (ret == ACT_P_CREATED) - tcf_idr_cleanup(*a, est); + tcf_idr_release(*a, bind); return err; } -- cgit v1.2.3 From fe9a552e715dfe5167d52deb74ea16335896bdaf Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Wed, 21 Mar 2018 15:12:07 -0700 Subject: libnvdimm, nfit: fix persistence domain reporting The persistence domain is a point in the platform where once writes reach that destination the platform claims it will make them persistent relative to power loss. In the ACPI NFIT this is currently communicated as 2 bits in the "NFIT - Platform Capabilities Structure". The bits comprise a hierarchy, i.e. bit0 "CPU Cache Flush to NVDIMM Durability on Power Loss Capable" implies bit1 "Memory Controller Flush to NVDIMM Durability on Power Loss Capable". Commit 96c3a239054a "libnvdimm: expose platform persistence attr..." shows the persistence domain as flags, but it's really an enumerated hierarchy. Fix this newly introduced user ABI to show the closest available persistence domain before userspace develops dependencies on seeing, or needing to develop code to tolerate, the raw NFIT flags communicated through the libnvdimm-generic region attribute. Fixes: 96c3a239054a ("libnvdimm: expose platform persistence attr...") Reviewed-by: Dave Jiang Cc: "Rafael J. Wysocki" Cc: Ross Zwisler Signed-off-by: Dan Williams --- drivers/acpi/nfit/core.c | 10 +++++++--- drivers/nvdimm/region_devs.c | 10 ++++++---- 2 files changed, 13 insertions(+), 7 deletions(-) diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c index bbe48ad20886..eb09ef55c38a 100644 --- a/drivers/acpi/nfit/core.c +++ b/drivers/acpi/nfit/core.c @@ -2675,10 +2675,14 @@ static int acpi_nfit_register_region(struct acpi_nfit_desc *acpi_desc, else ndr_desc->numa_node = NUMA_NO_NODE; - if(acpi_desc->platform_cap & ACPI_NFIT_CAPABILITY_CACHE_FLUSH) + /* + * Persistence domain bits are hierarchical, if + * ACPI_NFIT_CAPABILITY_CACHE_FLUSH is set then + * ACPI_NFIT_CAPABILITY_MEM_FLUSH is implied. + */ + if (acpi_desc->platform_cap & ACPI_NFIT_CAPABILITY_CACHE_FLUSH) set_bit(ND_REGION_PERSIST_CACHE, &ndr_desc->flags); - - if (acpi_desc->platform_cap & ACPI_NFIT_CAPABILITY_MEM_FLUSH) + else if (acpi_desc->platform_cap & ACPI_NFIT_CAPABILITY_MEM_FLUSH) set_bit(ND_REGION_PERSIST_MEMCTRL, &ndr_desc->flags); list_for_each_entry(nfit_memdev, &acpi_desc->memdevs, list) { diff --git a/drivers/nvdimm/region_devs.c b/drivers/nvdimm/region_devs.c index a8e9d428c0a5..1593e1806b16 100644 --- a/drivers/nvdimm/region_devs.c +++ b/drivers/nvdimm/region_devs.c @@ -532,11 +532,13 @@ static ssize_t persistence_domain_show(struct device *dev, struct device_attribute *attr, char *buf) { struct nd_region *nd_region = to_nd_region(dev); - unsigned long flags = nd_region->flags; - return sprintf(buf, "%s%s\n", - flags & BIT(ND_REGION_PERSIST_CACHE) ? "cpu_cache " : "", - flags & BIT(ND_REGION_PERSIST_MEMCTRL) ? "memory_controller " : ""); + if (test_bit(ND_REGION_PERSIST_CACHE, &nd_region->flags)) + return sprintf(buf, "cpu_cache\n"); + else if (test_bit(ND_REGION_PERSIST_MEMCTRL, &nd_region->flags)) + return sprintf(buf, "memory_controller\n"); + else + return sprintf(buf, "\n"); } static DEVICE_ATTR_RO(persistence_domain); -- cgit v1.2.3 From 94fa3f929ec0c048b1f3658cc335b940df4f6d22 Mon Sep 17 00:00:00 2001 From: Davide Caratti Date: Mon, 19 Mar 2018 15:31:25 +0100 Subject: net/sched: fix idr leak in the error path of tcp_pedit_init() tcf_pedit_init() can fail to allocate 'keys' after the idr has been successfully reserved. When this happens, subsequent attempts to configure a pedit rule using the same idr value systematically fail with -ENOSPC: # tc action add action pedit munge ip ttl set 63 index 100 RTNETLINK answers: Cannot allocate memory We have an error talking to the kernel # tc action add action pedit munge ip ttl set 63 index 100 RTNETLINK answers: No space left on device We have an error talking to the kernel # tc action add action pedit munge ip ttl set 63 index 100 RTNETLINK answers: No space left on device We have an error talking to the kernel ... Fix this in the error path of tcf_act_pedit_init(), calling tcf_idr_release() in place of tcf_idr_cleanup(). Fixes: 65a206c01e8e ("net/sched: Change act_api and act_xxx modules to use IDR") Acked-by: Jamal Hadi Salim Signed-off-by: Davide Caratti Signed-off-by: David S. Miller --- net/sched/act_pedit.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c index 349beaffb29e..fef08835f26d 100644 --- a/net/sched/act_pedit.c +++ b/net/sched/act_pedit.c @@ -176,7 +176,7 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla, p = to_pedit(*a); keys = kmalloc(ksize, GFP_KERNEL); if (keys == NULL) { - tcf_idr_cleanup(*a, est); + tcf_idr_release(*a, bind); kfree(keys_ex); return -ENOMEM; } -- cgit v1.2.3 From 1e46ef1762bb2e52f0f996131a4d16ed4e9fd065 Mon Sep 17 00:00:00 2001 From: Davide Caratti Date: Mon, 19 Mar 2018 15:31:26 +0100 Subject: net/sched: fix idr leak in the error path of __tcf_ipt_init() __tcf_ipt_init() can fail after the idr has been successfully reserved. When this happens, subsequent attempts to configure xt/ipt rules using the same idr value systematically fail with -ENOSPC: # tc action add action xt -j LOG --log-prefix test1 index 100 tablename: mangle hook: NF_IP_POST_ROUTING target: LOG level warning prefix "test1" index 100 RTNETLINK answers: Cannot allocate memory We have an error talking to the kernel Command "(null)" is unknown, try "tc actions help". # tc action add action xt -j LOG --log-prefix test1 index 100 tablename: mangle hook: NF_IP_POST_ROUTING target: LOG level warning prefix "test1" index 100 RTNETLINK answers: No space left on device We have an error talking to the kernel Command "(null)" is unknown, try "tc actions help". # tc action add action xt -j LOG --log-prefix test1 index 100 tablename: mangle hook: NF_IP_POST_ROUTING target: LOG level warning prefix "test1" index 100 RTNETLINK answers: No space left on device We have an error talking to the kernel ... Fix this in the error path of __tcf_ipt_init(), calling tcf_idr_release() in place of tcf_idr_cleanup(). Since tcf_ipt_release() can now be called when tcfi_t is NULL, we also need to protect calls to ipt_destroy_target() to avoid NULL pointer dereference. Fixes: 65a206c01e8e ("net/sched: Change act_api and act_xxx modules to use IDR") Acked-by: Jamal Hadi Salim Signed-off-by: Davide Caratti Signed-off-by: David S. Miller --- net/sched/act_ipt.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c index 06e380ae0928..7e06b9b62613 100644 --- a/net/sched/act_ipt.c +++ b/net/sched/act_ipt.c @@ -80,9 +80,12 @@ static void ipt_destroy_target(struct xt_entry_target *t) static void tcf_ipt_release(struct tc_action *a) { struct tcf_ipt *ipt = to_ipt(a); - ipt_destroy_target(ipt->tcfi_t); + + if (ipt->tcfi_t) { + ipt_destroy_target(ipt->tcfi_t); + kfree(ipt->tcfi_t); + } kfree(ipt->tcfi_tname); - kfree(ipt->tcfi_t); } static const struct nla_policy ipt_policy[TCA_IPT_MAX + 1] = { @@ -187,7 +190,7 @@ err2: kfree(tname); err1: if (ret == ACT_P_CREATED) - tcf_idr_cleanup(*a, est); + tcf_idr_release(*a, bind); return err; } -- cgit v1.2.3 From d7f20015736048f494988093b59fe513dc232ce9 Mon Sep 17 00:00:00 2001 From: Davide Caratti Date: Mon, 19 Mar 2018 15:31:27 +0100 Subject: net/sched: fix idr leak in the error path of tcf_vlan_init() tcf_vlan_init() can fail after the idr has been successfully reserved. When this happens, every subsequent attempt to configure vlan rules using the same idr value will systematically fail with -ENOSPC, unless the first attempt was done using the 'replace' keyword. # tc action add action vlan pop index 100 RTNETLINK answers: Cannot allocate memory We have an error talking to the kernel # tc action add action vlan pop index 100 RTNETLINK answers: No space left on device We have an error talking to the kernel # tc action add action vlan pop index 100 RTNETLINK answers: No space left on device We have an error talking to the kernel ... Fix this in tcf_vlan_init(), ensuring that tcf_idr_release() is called on the error path when the idr has been reserved, but not yet inserted. Also, don't test 'ovr' in the error path, to avoid a 'replace' failure implicitly become a 'delete' that leaks refcount in act_vlan module: # rmmod act_vlan; modprobe act_vlan # tc action add action vlan push id 5 index 100 # tc action replace action vlan push id 7 index 100 RTNETLINK answers: Cannot allocate memory We have an error talking to the kernel # tc action list action vlan # # rmmod act_vlan rmmod: ERROR: Module act_vlan is in use Fixes: 4c5b9d9642c8 ("act_vlan: VLAN action rewrite to use RCU lock/unlock and update") Fixes: 65a206c01e8e ("net/sched: Change act_api and act_xxx modules to use IDR") Acked-by: Jamal Hadi Salim Signed-off-by: Davide Caratti Signed-off-by: David S. Miller --- net/sched/act_vlan.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sched/act_vlan.c b/net/sched/act_vlan.c index c2914e9a4a6f..c49cb61adedf 100644 --- a/net/sched/act_vlan.c +++ b/net/sched/act_vlan.c @@ -195,7 +195,7 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla, ASSERT_RTNL(); p = kzalloc(sizeof(*p), GFP_KERNEL); if (!p) { - if (ovr) + if (ret == ACT_P_CREATED) tcf_idr_release(*a, bind); return -ENOMEM; } -- cgit v1.2.3 From f29cdfbe33d6915ba8056179b0041279a67e3647 Mon Sep 17 00:00:00 2001 From: Davide Caratti Date: Mon, 19 Mar 2018 15:31:28 +0100 Subject: net/sched: fix idr leak in the error path of tcf_skbmod_init() tcf_skbmod_init() can fail after the idr has been successfully reserved. When this happens, every subsequent attempt to configure skbmod rules using the same idr value will systematically fail with -ENOSPC, unless the first attempt was done using the 'replace' keyword: # tc action add action skbmod swap mac index 100 RTNETLINK answers: Cannot allocate memory We have an error talking to the kernel # tc action add action skbmod swap mac index 100 RTNETLINK answers: No space left on device We have an error talking to the kernel # tc action add action skbmod swap mac index 100 RTNETLINK answers: No space left on device We have an error talking to the kernel ... Fix this in tcf_skbmod_init(), ensuring that tcf_idr_release() is called on the error path when the idr has been reserved, but not yet inserted. Also, don't test 'ovr' in the error path, to avoid a 'replace' failure implicitly become a 'delete' that leaks refcount in act_skbmod module: # rmmod act_skbmod; modprobe act_skbmod # tc action add action skbmod swap mac index 100 # tc action add action skbmod swap mac continue index 100 RTNETLINK answers: File exists We have an error talking to the kernel # tc action replace action skbmod swap mac continue index 100 RTNETLINK answers: Cannot allocate memory We have an error talking to the kernel # tc action list action skbmod # # rmmod act_skbmod rmmod: ERROR: Module act_skbmod is in use Fixes: 65a206c01e8e ("net/sched: Change act_api and act_xxx modules to use IDR") Acked-by: Jamal Hadi Salim Signed-off-by: Davide Caratti Signed-off-by: David S. Miller --- net/sched/act_skbmod.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sched/act_skbmod.c b/net/sched/act_skbmod.c index d09565d6433e..7b0700f52b50 100644 --- a/net/sched/act_skbmod.c +++ b/net/sched/act_skbmod.c @@ -152,7 +152,7 @@ static int tcf_skbmod_init(struct net *net, struct nlattr *nla, ASSERT_RTNL(); p = kzalloc(sizeof(struct tcf_skbmod_params), GFP_KERNEL); if (unlikely(!p)) { - if (ovr) + if (ret == ACT_P_CREATED) tcf_idr_release(*a, bind); return -ENOMEM; } -- cgit v1.2.3 From 891731f6a5dbe508d12443175a7e166a2fba616a Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Tue, 20 Mar 2018 19:29:51 +1100 Subject: MIPS: ralink: Remove ralink_halt() ralink_halt() does nothing that machine_halt() doesn't already do, so it adds no value. It actually causes incorrect behaviour due to the "unreachable()" at the end. This tells the compiler that the end of the function will never be reached, which isn't true. The compiler responds by not adding a 'return' instruction, so control simply moves on to whatever bytes come afterwards in memory. In my tested, that was the ralink_restart() function. This means that an attempt to 'halt' the machine would actually cause a reboot. So remove ralink_halt() so that a 'halt' really does halt. Fixes: c06e836ada59 ("MIPS: ralink: adds reset code") Signed-off-by: NeilBrown Cc: John Crispin Cc: Ralf Baechle Cc: linux-mips@linux-mips.org Cc: # 3.9+ Patchwork: https://patchwork.linux-mips.org/patch/18851/ Signed-off-by: James Hogan --- arch/mips/ralink/reset.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/arch/mips/ralink/reset.c b/arch/mips/ralink/reset.c index 64543d66e76b..e9531fea23a2 100644 --- a/arch/mips/ralink/reset.c +++ b/arch/mips/ralink/reset.c @@ -96,16 +96,9 @@ static void ralink_restart(char *command) unreachable(); } -static void ralink_halt(void) -{ - local_irq_disable(); - unreachable(); -} - static int __init mips_reboot_setup(void) { _machine_restart = ralink_restart; - _machine_halt = ralink_halt; return 0; } -- cgit v1.2.3 From a63d706ea719190a79a6c769e898f70680044d3e Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Wed, 21 Mar 2018 14:02:10 +1100 Subject: MIPS: ralink: Fix booting on MT7621 Since commit 3af5a67c86a3 ("MIPS: Fix early CM probing") the MT7621 has not been able to boot. This commit caused mips_cm_probe() to be called before mt7621.c::proc_soc_init(). prom_soc_init() has a comment explaining that mips_cm_probe() "wipes out the bootloader config" and means that configuration registers are no longer available. It has some code to re-enable this config. Before this re-enable code is run, the sysc register cannot be read, so when SYSC_REG_CHIP_NAME0 is read, a garbage value is returned and panic() is called. If we move the config-repair code to the top of prom_soc_init(), the registers can be read and boot can proceed. Very occasionally, the first register read after the reconfiguration returns garbage, so add a call to __sync(). Fixes: 3af5a67c86a3 ("MIPS: Fix early CM probing") Signed-off-by: NeilBrown Reviewed-by: Matt Redfearn Cc: John Crispin Cc: Ralf Baechle Cc: linux-mips@linux-mips.org Cc: # 4.5+ Patchwork: https://patchwork.linux-mips.org/patch/18859/ Signed-off-by: James Hogan --- arch/mips/ralink/mt7621.c | 42 ++++++++++++++++++++++-------------------- 1 file changed, 22 insertions(+), 20 deletions(-) diff --git a/arch/mips/ralink/mt7621.c b/arch/mips/ralink/mt7621.c index 1b274742077d..d2718de60b9b 100644 --- a/arch/mips/ralink/mt7621.c +++ b/arch/mips/ralink/mt7621.c @@ -170,6 +170,28 @@ void prom_soc_init(struct ralink_soc_info *soc_info) u32 n1; u32 rev; + /* Early detection of CMP support */ + mips_cm_probe(); + mips_cpc_probe(); + + if (mips_cps_numiocu(0)) { + /* + * mips_cm_probe() wipes out bootloader + * config for CM regions and we have to configure them + * again. This SoC cannot talk to pamlbus devices + * witout proper iocu region set up. + * + * FIXME: it would be better to do this with values + * from DT, but we need this very early because + * without this we cannot talk to pretty much anything + * including serial. + */ + write_gcr_reg0_base(MT7621_PALMBUS_BASE); + write_gcr_reg0_mask(~MT7621_PALMBUS_SIZE | + CM_GCR_REGn_MASK_CMTGT_IOCU0); + __sync(); + } + n0 = __raw_readl(sysc + SYSC_REG_CHIP_NAME0); n1 = __raw_readl(sysc + SYSC_REG_CHIP_NAME1); @@ -194,26 +216,6 @@ void prom_soc_init(struct ralink_soc_info *soc_info) rt2880_pinmux_data = mt7621_pinmux_data; - /* Early detection of CMP support */ - mips_cm_probe(); - mips_cpc_probe(); - - if (mips_cps_numiocu(0)) { - /* - * mips_cm_probe() wipes out bootloader - * config for CM regions and we have to configure them - * again. This SoC cannot talk to pamlbus devices - * witout proper iocu region set up. - * - * FIXME: it would be better to do this with values - * from DT, but we need this very early because - * without this we cannot talk to pretty much anything - * including serial. - */ - write_gcr_reg0_base(MT7621_PALMBUS_BASE); - write_gcr_reg0_mask(~MT7621_PALMBUS_SIZE | - CM_GCR_REGn_MASK_CMTGT_IOCU0); - } if (!register_cps_smp_ops()) return; -- cgit v1.2.3 From 834814e80268c818f354c8f402e0c6604ed75589 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Wed, 21 Mar 2018 22:50:19 -0600 Subject: Revert: "vfio-pci: Mask INTx if a device is not capabable of enabling it" This reverts commit 2170dd04316e0754cbbfa4892a25aead39d225f7 The intent of commit 2170dd04316e ("vfio-pci: Mask INTx if a device is not capabable of enabling it") was to disallow the user from seeing that the device supports INTx if the platform is incapable of enabling it. The detection of this case however incorrectly includes devices which natively do not support INTx, such as SR-IOV VFs, and further discussions reveal gaps even for the target use case. Reported-by: Arjun Vynipadath Fixes: 2170dd04316e ("vfio-pci: Mask INTx if a device is not capabable of enabling it") Signed-off-by: Alex Williamson --- drivers/vfio/pci/vfio_pci.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c index b0f759476900..8a1508a8e481 100644 --- a/drivers/vfio/pci/vfio_pci.c +++ b/drivers/vfio/pci/vfio_pci.c @@ -207,9 +207,6 @@ static bool vfio_pci_nointx(struct pci_dev *pdev) } } - if (!pdev->irq) - return true; - return false; } -- cgit v1.2.3 From 3e4543bf20531d1cdb8672d25b3f2ff6d3d07627 Mon Sep 17 00:00:00 2001 From: Pierre-Yves MORDRET Date: Tue, 13 Mar 2018 17:55:35 +0100 Subject: dmaengine: stm32-dmamux: fix a potential buffer overflow The bitfield dma_inuse is allocated of size dma_requests bits, thus a valid bit address is from 0 to (dma_requests - 1). When find_first_zero_bit() fails, it returns dma_requests as invalid address. Using such address for the following set_bit() is incorrect and, if dma_requests is a multiple of BITS_PER_LONG, it will cause a buffer overflow. Currently this driver is only used in DT stm32h743.dtsi where a safe value dma_requests=16 is not triggering the buffer overflow. Fixed by checking the return value of find_first_zero_bit() _before_ using it. Signed-off-by: Antonio Borneo Signed-off-by: Pierre-Yves MORDRET Signed-off-by: Vinod Koul --- drivers/dma/stm32-dmamux.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/dma/stm32-dmamux.c b/drivers/dma/stm32-dmamux.c index 4dbb30cf94ac..b922db90939a 100644 --- a/drivers/dma/stm32-dmamux.c +++ b/drivers/dma/stm32-dmamux.c @@ -118,14 +118,15 @@ static void *stm32_dmamux_route_allocate(struct of_phandle_args *dma_spec, spin_lock_irqsave(&dmamux->lock, flags); mux->chan_id = find_first_zero_bit(dmamux->dma_inuse, dmamux->dma_requests); - set_bit(mux->chan_id, dmamux->dma_inuse); - spin_unlock_irqrestore(&dmamux->lock, flags); if (mux->chan_id == dmamux->dma_requests) { + spin_unlock_irqrestore(&dmamux->lock, flags); dev_err(&pdev->dev, "Run out of free DMA requests\n"); ret = -ENOMEM; - goto error; + goto error_chan_id; } + set_bit(mux->chan_id, dmamux->dma_inuse); + spin_unlock_irqrestore(&dmamux->lock, flags); /* Look for DMA Master */ for (i = 1, min = 0, max = dmamux->dma_reqs[i]; @@ -173,6 +174,8 @@ static void *stm32_dmamux_route_allocate(struct of_phandle_args *dma_spec, error: clear_bit(mux->chan_id, dmamux->dma_inuse); + +error_chan_id: kfree(mux); return ERR_PTR(ret); } -- cgit v1.2.3 From 3b82a4db8eaccce735dffd50b4d4e1578099b8e8 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 21 Mar 2018 16:45:53 +0100 Subject: drm: udl: Properly check framebuffer mmap offsets The memmap options sent to the udl framebuffer driver were not being checked for all sets of possible crazy values. Fix this up by properly bounding the allowed values. Reported-by: Eyal Itkin Cc: stable Signed-off-by: Greg Kroah-Hartman Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20180321154553.GA18454@kroah.com --- drivers/gpu/drm/udl/udl_fb.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/udl/udl_fb.c b/drivers/gpu/drm/udl/udl_fb.c index b5b335c9b2bb..2ebdc6d5a76e 100644 --- a/drivers/gpu/drm/udl/udl_fb.c +++ b/drivers/gpu/drm/udl/udl_fb.c @@ -159,10 +159,15 @@ static int udl_fb_mmap(struct fb_info *info, struct vm_area_struct *vma) { unsigned long start = vma->vm_start; unsigned long size = vma->vm_end - vma->vm_start; - unsigned long offset = vma->vm_pgoff << PAGE_SHIFT; + unsigned long offset; unsigned long page, pos; - if (offset + size > info->fix.smem_len) + if (vma->vm_pgoff > (~0UL >> PAGE_SHIFT)) + return -EINVAL; + + offset = vma->vm_pgoff << PAGE_SHIFT; + + if (offset > info->fix.smem_len || size > info->fix.smem_len - offset) return -EINVAL; pos = (unsigned long)info->fix.smem_start + offset; -- cgit v1.2.3 From f0ba9d699e5ca2bcd07f70185d18720c4f1b597c Mon Sep 17 00:00:00 2001 From: Kailang Yang Date: Fri, 16 Mar 2018 11:46:08 +0800 Subject: ALSA: hda/realtek - Fix Dell headset Mic can't record This platform was hardware fixed type for CTIA type for headset port. Assigned 0x19 verb will fix can't record issue. Signed-off-by: Kailang Yang Cc: Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 5ef056f2aecc..619d1f92b6eb 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -5501,6 +5501,7 @@ enum { ALC274_FIXUP_DELL_AIO_LINEOUT_VERB, ALC298_FIXUP_TPT470_DOCK, ALC255_FIXUP_DUMMY_LINEOUT_VERB, + ALC255_FIXUP_DELL_HEADSET_MIC, }; static const struct hda_fixup alc269_fixups[] = { @@ -6361,6 +6362,13 @@ static const struct hda_fixup alc269_fixups[] = { .chained = true, .chain_id = ALC255_FIXUP_DELL1_MIC_NO_PRESENCE }, + [ALC255_FIXUP_DELL_HEADSET_MIC] = { + .type = HDA_FIXUP_PINS, + .v.pins = (const struct hda_pintbl[]) { + { 0x19, 0x01a1913c }, /* use as headset mic, without its own jack detect */ + { } + }, + }, }; static const struct snd_pci_quirk alc269_fixup_tbl[] = { @@ -6415,6 +6423,8 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1028, 0x082a, "Dell XPS 13 9360", ALC256_FIXUP_DELL_XPS_13_HEADPHONE_NOISE), SND_PCI_QUIRK(0x1028, 0x084b, "Dell", ALC274_FIXUP_DELL_AIO_LINEOUT_VERB), SND_PCI_QUIRK(0x1028, 0x084e, "Dell", ALC274_FIXUP_DELL_AIO_LINEOUT_VERB), + SND_PCI_QUIRK(0x1028, 0x0871, "Dell Precision 3630", ALC255_FIXUP_DELL_HEADSET_MIC), + SND_PCI_QUIRK(0x1028, 0x0872, "Dell Precision 3630", ALC255_FIXUP_DELL_HEADSET_MIC), SND_PCI_QUIRK(0x1028, 0x0873, "Dell Precision 3930", ALC255_FIXUP_DUMMY_LINEOUT_VERB), SND_PCI_QUIRK(0x1028, 0x164a, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1028, 0x164b, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE), -- cgit v1.2.3 From 88d42b2b45d7208cc872c2c9dec0b1ae6c6008d7 Mon Sep 17 00:00:00 2001 From: Kailang Yang Date: Wed, 14 Mar 2018 16:08:57 +0800 Subject: ALSA: hda/realtek - Fix speaker no sound after system resume It will have a chance speaker no sound after system resume. To toggle NID 0x53 index 0x2 bit 15 will solve this issue. This usage will also suitable with ALC256. Fixes: 4a219ef8f370 ("ALSA: hda/realtek - Add ALC256 HP depop function") Signed-off-by: Kailang Yang Cc: Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 619d1f92b6eb..aef1f52db7d9 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -3130,6 +3130,8 @@ static void alc256_init(struct hda_codec *codec) alc_update_coef_idx(codec, 0x46, 3 << 12, 0); alc_update_coefex_idx(codec, 0x57, 0x04, 0x0007, 0x4); /* Hight power */ + alc_update_coefex_idx(codec, 0x53, 0x02, 0x8000, 1 << 15); /* Clear bit */ + alc_update_coefex_idx(codec, 0x53, 0x02, 0x8000, 0 << 15); } static void alc256_shutup(struct hda_codec *codec) @@ -7168,6 +7170,8 @@ static int patch_alc269(struct hda_codec *codec) break; case 0x10ec0257: spec->codec_variant = ALC269_TYPE_ALC257; + spec->shutup = alc256_shutup; + spec->init_hook = alc256_init; spec->gen.mixer_nid = 0; break; case 0x10ec0215: -- cgit v1.2.3 From 67a01afaf3d34893cf7d2ea19b34555d6abb7cb0 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Thu, 22 Mar 2018 08:56:06 +0100 Subject: ALSA: aloop: Sync stale timer before release The aloop driver tries to stop the pending timer via timer_del() in the trigger callback and in the close callback. The former is correct, as it's an atomic operation, while the latter expects that the timer gets really removed and proceeds the resource releases after that. But timer_del() doesn't synchronize, hence the running timer may still access the released resources. A similar situation can be also seen in the prepare callback after trigger(STOP) where the prepare tries to re-initialize the things while a timer is still running. The problems like the above are seen indirectly in some syzkaller reports (although it's not 100% clear whether this is the only cause, as the race condition is quite narrow and not always easy to trigger). For addressing these issues, this patch adds the explicit alls of timer_del_sync() in some places, so that the pending timer is properly killed / synced. Cc: Signed-off-by: Takashi Iwai --- sound/drivers/aloop.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/sound/drivers/aloop.c b/sound/drivers/aloop.c index 0333143a1fa7..0a08e63e9c66 100644 --- a/sound/drivers/aloop.c +++ b/sound/drivers/aloop.c @@ -192,6 +192,11 @@ static inline void loopback_timer_stop(struct loopback_pcm *dpcm) dpcm->timer.expires = 0; } +static inline void loopback_timer_stop_sync(struct loopback_pcm *dpcm) +{ + del_timer_sync(&dpcm->timer); +} + #define CABLE_VALID_PLAYBACK (1 << SNDRV_PCM_STREAM_PLAYBACK) #define CABLE_VALID_CAPTURE (1 << SNDRV_PCM_STREAM_CAPTURE) #define CABLE_VALID_BOTH (CABLE_VALID_PLAYBACK|CABLE_VALID_CAPTURE) @@ -326,6 +331,8 @@ static int loopback_prepare(struct snd_pcm_substream *substream) struct loopback_cable *cable = dpcm->cable; int bps, salign; + loopback_timer_stop_sync(dpcm); + salign = (snd_pcm_format_width(runtime->format) * runtime->channels) / 8; bps = salign * runtime->rate; @@ -744,7 +751,7 @@ static int loopback_close(struct snd_pcm_substream *substream) struct loopback *loopback = substream->private_data; struct loopback_pcm *dpcm = substream->runtime->private_data; - loopback_timer_stop(dpcm); + loopback_timer_stop_sync(dpcm); mutex_lock(&loopback->cable_lock); free_cable(substream); mutex_unlock(&loopback->cable_lock); -- cgit v1.2.3 From 8e6b1a72a75bb5067ccb6b56d8ca4aa3a300a64e Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Thu, 22 Mar 2018 10:40:27 +0100 Subject: ALSA: aloop: Fix access to not-yet-ready substream via cable In loopback_open() and loopback_close(), we assign and release the substream object to the corresponding cable in a racy way. It's neither locked nor done in the right position. The open callback assigns the substream before its preparation finishes, hence the other side of the cable may pick it up, which may lead to the invalid memory access. This patch addresses these: move the assignment to the end of the open callback, and wrap with cable->lock for avoiding concurrent accesses. Cc: Signed-off-by: Takashi Iwai --- sound/drivers/aloop.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/sound/drivers/aloop.c b/sound/drivers/aloop.c index 0a08e63e9c66..1063a4377502 100644 --- a/sound/drivers/aloop.c +++ b/sound/drivers/aloop.c @@ -666,7 +666,9 @@ static void free_cable(struct snd_pcm_substream *substream) return; if (cable->streams[!substream->stream]) { /* other stream is still alive */ + spin_lock_irq(&cable->lock); cable->streams[substream->stream] = NULL; + spin_unlock_irq(&cable->lock); } else { /* free the cable */ loopback->cables[substream->number][dev] = NULL; @@ -705,7 +707,6 @@ static int loopback_open(struct snd_pcm_substream *substream) loopback->cables[substream->number][dev] = cable; } dpcm->cable = cable; - cable->streams[substream->stream] = dpcm; snd_pcm_hw_constraint_integer(runtime, SNDRV_PCM_HW_PARAM_PERIODS); @@ -737,6 +738,11 @@ static int loopback_open(struct snd_pcm_substream *substream) runtime->hw = loopback_pcm_hardware; else runtime->hw = cable->hw; + + spin_lock_irq(&cable->lock); + cable->streams[substream->stream] = dpcm; + spin_unlock_irq(&cable->lock); + unlock: if (err < 0) { free_cable(substream); -- cgit v1.2.3 From 19b558db12f9f4e45a22012bae7b4783e62224da Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 15 Feb 2018 17:21:55 +0100 Subject: posix-timers: Protect posix clock array access against speculation The clockid argument of clockid_to_kclock() comes straight from user space via various syscalls and is used as index into the posix_clocks array. Protect it against spectre v1 array out of bounds speculation. Remove the redundant check for !posix_clock[id] as this is another source for speculation and does not provide any advantage over the return posix_clock[id] path which returns NULL in that case anyway. Signed-off-by: Thomas Gleixner Acked-by: Peter Zijlstra (Intel) Acked-by: Dan Williams Cc: Rasmus Villemoes Cc: Greg KH Cc: stable@vger.kernel.org Cc: Linus Torvalds Cc: David Woodhouse Link: https://lkml.kernel.org/r/alpine.DEB.2.21.1802151718320.1296@nanos.tec.linutronix.de --- kernel/time/posix-timers.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c index 75043046914e..10b7186d0638 100644 --- a/kernel/time/posix-timers.c +++ b/kernel/time/posix-timers.c @@ -50,6 +50,7 @@ #include #include #include +#include #include "timekeeping.h" #include "posix-timers.h" @@ -1346,11 +1347,15 @@ static const struct k_clock * const posix_clocks[] = { static const struct k_clock *clockid_to_kclock(const clockid_t id) { - if (id < 0) + clockid_t idx = id; + + if (id < 0) { return (id & CLOCKFD_MASK) == CLOCKFD ? &clock_posix_dynamic : &clock_posix_cpu; + } - if (id >= ARRAY_SIZE(posix_clocks) || !posix_clocks[id]) + if (id >= ARRAY_SIZE(posix_clocks)) return NULL; - return posix_clocks[id]; + + return posix_clocks[array_index_nospec(idx, ARRAY_SIZE(posix_clocks))]; } -- cgit v1.2.3 From 9a02d3af9ce9fcca751da1d4b5663b7a825d5957 Mon Sep 17 00:00:00 2001 From: Stefan Agner Date: Tue, 20 Mar 2018 10:11:44 +0100 Subject: drm/tegra: dc: Use correct format array for Tegra124 Use tegra124_(primary|overlay)_formats for Tegra124, otherwise the count specified in the Tegra124 SoC info structure will be different from the array size and cause a crash. Fixes: 511c7023cf23 ("drm/tegra: dc: Support more formats") Signed-off-by: Stefan Agner Acked-by: Marcel Ziswiler Signed-off-by: Thierry Reding --- drivers/gpu/drm/tegra/dc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/tegra/dc.c b/drivers/gpu/drm/tegra/dc.c index fbffe1948b3b..c1a5ae8faaef 100644 --- a/drivers/gpu/drm/tegra/dc.c +++ b/drivers/gpu/drm/tegra/dc.c @@ -2009,9 +2009,9 @@ static const struct tegra_dc_soc_info tegra124_dc_soc_info = { .coupled_pm = false, .has_nvdisplay = false, .num_primary_formats = ARRAY_SIZE(tegra124_primary_formats), - .primary_formats = tegra114_primary_formats, + .primary_formats = tegra124_primary_formats, .num_overlay_formats = ARRAY_SIZE(tegra124_overlay_formats), - .overlay_formats = tegra114_overlay_formats, + .overlay_formats = tegra124_overlay_formats, }; static const struct tegra_dc_soc_info tegra210_dc_soc_info = { -- cgit v1.2.3 From 5df7af85ecd88e8b5f1f31d6456c3cf38a8bbdda Mon Sep 17 00:00:00 2001 From: Kevin Hao Date: Tue, 20 Mar 2018 09:44:52 +0800 Subject: net: phy: Add general dummy stubs for MMD register access For some phy devices, even though they don't support the MMD extended register access, it does have some side effect if we are trying to read/write the MMD registers via indirect method. So introduce general dummy stubs for MMD register access which these devices can use to avoid such side effect. Fixes: b6b5e8a69118 ("gianfar: Disable EEE autoneg by default") Signed-off-by: Kevin Hao Signed-off-by: David S. Miller --- drivers/net/phy/phy_device.c | 17 +++++++++++++++++ include/linux/phy.h | 4 ++++ 2 files changed, 21 insertions(+) diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index fe16f5894092..74664a6c0cdc 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -1673,6 +1673,23 @@ int genphy_config_init(struct phy_device *phydev) } EXPORT_SYMBOL(genphy_config_init); +/* This is used for the phy device which doesn't support the MMD extended + * register access, but it does have side effect when we are trying to access + * the MMD register via indirect method. + */ +int genphy_read_mmd_unsupported(struct phy_device *phdev, int devad, u16 regnum) +{ + return -EOPNOTSUPP; +} +EXPORT_SYMBOL(genphy_read_mmd_unsupported); + +int genphy_write_mmd_unsupported(struct phy_device *phdev, int devnum, + u16 regnum, u16 val) +{ + return -EOPNOTSUPP; +} +EXPORT_SYMBOL(genphy_write_mmd_unsupported); + int genphy_suspend(struct phy_device *phydev) { return phy_set_bits(phydev, MII_BMCR, BMCR_PDOWN); diff --git a/include/linux/phy.h b/include/linux/phy.h index b260fb336b25..7c4c2379e010 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -984,6 +984,10 @@ static inline int genphy_no_soft_reset(struct phy_device *phydev) { return 0; } +int genphy_read_mmd_unsupported(struct phy_device *phdev, int devad, + u16 regnum); +int genphy_write_mmd_unsupported(struct phy_device *phdev, int devnum, + u16 regnum, u16 val); /* Clause 45 PHY */ int genphy_c45_restart_aneg(struct phy_device *phydev); -- cgit v1.2.3 From 0231b1a074c672f8c00da00a57144072890d816b Mon Sep 17 00:00:00 2001 From: Kevin Hao Date: Tue, 20 Mar 2018 09:44:53 +0800 Subject: net: phy: realtek: Use the dummy stubs for MMD register access for rtl8211b The Ethernet on mpc8315erdb is broken since commit b6b5e8a69118 ("gianfar: Disable EEE autoneg by default"). The reason is that even though the rtl8211b doesn't support the MMD extended registers access, it does return some random values if we trying to access the MMD register via indirect method. This makes it seem that the EEE is supported by this phy device. And the subsequent writing to the MMD registers does cause the phy malfunction. So use the dummy stubs for the MMD register access to fix this issue. Fixes: b6b5e8a69118 ("gianfar: Disable EEE autoneg by default") Signed-off-by: Kevin Hao Signed-off-by: David S. Miller --- drivers/net/phy/realtek.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/phy/realtek.c b/drivers/net/phy/realtek.c index ee3ca4a2f12b..9f48ecf9c627 100644 --- a/drivers/net/phy/realtek.c +++ b/drivers/net/phy/realtek.c @@ -172,6 +172,8 @@ static struct phy_driver realtek_drvs[] = { .flags = PHY_HAS_INTERRUPT, .ack_interrupt = &rtl821x_ack_interrupt, .config_intr = &rtl8211b_config_intr, + .read_mmd = &genphy_read_mmd_unsupported, + .write_mmd = &genphy_write_mmd_unsupported, }, { .phy_id = 0x001cc914, .name = "RTL8211DN Gigabit Ethernet", -- cgit v1.2.3 From c846a2b7bd0e6900a726afb7c0a066f3a93617cf Mon Sep 17 00:00:00 2001 From: Kevin Hao Date: Tue, 20 Mar 2018 09:44:54 +0800 Subject: net: phy: micrel: Use the general dummy stubs for MMD register access The new general dummy stubs for MMD register access were introduced. Use that for the codes reuse. Signed-off-by: Kevin Hao Signed-off-by: David S. Miller --- drivers/net/phy/micrel.c | 23 ++--------------------- 1 file changed, 2 insertions(+), 21 deletions(-) diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c index 49be85afbea9..f41b224a9cdb 100644 --- a/drivers/net/phy/micrel.c +++ b/drivers/net/phy/micrel.c @@ -635,25 +635,6 @@ static int ksz8873mll_config_aneg(struct phy_device *phydev) return 0; } -/* This routine returns -1 as an indication to the caller that the - * Micrel ksz9021 10/100/1000 PHY does not support standard IEEE - * MMD extended PHY registers. - */ -static int -ksz9021_rd_mmd_phyreg(struct phy_device *phydev, int devad, u16 regnum) -{ - return -1; -} - -/* This routine does nothing since the Micrel ksz9021 does not support - * standard IEEE MMD extended PHY registers. - */ -static int -ksz9021_wr_mmd_phyreg(struct phy_device *phydev, int devad, u16 regnum, u16 val) -{ - return -1; -} - static int kszphy_get_sset_count(struct phy_device *phydev) { return ARRAY_SIZE(kszphy_hw_stats); @@ -946,8 +927,8 @@ static struct phy_driver ksphy_driver[] = { .get_stats = kszphy_get_stats, .suspend = genphy_suspend, .resume = genphy_resume, - .read_mmd = ksz9021_rd_mmd_phyreg, - .write_mmd = ksz9021_wr_mmd_phyreg, + .read_mmd = genphy_read_mmd_unsupported, + .write_mmd = genphy_write_mmd_unsupported, }, { .phy_id = PHY_ID_KSZ9031, .phy_id_mask = MICREL_PHY_ID_MASK, -- cgit v1.2.3 From 6be687395b3124f002a653c1a50b3260222b3cd7 Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Tue, 20 Mar 2018 07:59:12 +0100 Subject: s390/qeth: free netdevice when removing a card On removal, a qeth card's netdevice is currently not properly freed because the call chain looks as follows: qeth_core_remove_device(card) lx_remove_device(card) unregister_netdev(card->dev) card->dev = NULL !!! qeth_core_free_card(card) if (card->dev) !!! free_netdev(card->dev) Fix it by free'ing the netdev straight after unregistering. This also fixes the sysfs-driven layer switch case (qeth_dev_layer2_store()), where the need to free the current netdevice was not considered at all. Note that free_netdev() takes care of the netif_napi_del() for us too. Fixes: 4a71df50047f ("qeth: new qeth device driver") Signed-off-by: Julian Wiedmann Reviewed-by: Ursula Braun Signed-off-by: David S. Miller --- drivers/s390/net/qeth_core_main.c | 2 -- drivers/s390/net/qeth_l2_main.c | 2 +- drivers/s390/net/qeth_l3_main.c | 2 +- 3 files changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c index c8b308cfabf1..4a820afececd 100644 --- a/drivers/s390/net/qeth_core_main.c +++ b/drivers/s390/net/qeth_core_main.c @@ -5087,8 +5087,6 @@ static void qeth_core_free_card(struct qeth_card *card) QETH_DBF_HEX(SETUP, 2, &card, sizeof(void *)); qeth_clean_channel(&card->read); qeth_clean_channel(&card->write); - if (card->dev) - free_netdev(card->dev); qeth_free_qdio_buffers(card); unregister_service_level(&card->qeth_service_level); kfree(card); diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c index 7f236440483f..5ef4c978ad19 100644 --- a/drivers/s390/net/qeth_l2_main.c +++ b/drivers/s390/net/qeth_l2_main.c @@ -915,8 +915,8 @@ static void qeth_l2_remove_device(struct ccwgroup_device *cgdev) qeth_l2_set_offline(cgdev); if (card->dev) { - netif_napi_del(&card->napi); unregister_netdev(card->dev); + free_netdev(card->dev); card->dev = NULL; } return; diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c index 962a04b68dd2..b6b12220da71 100644 --- a/drivers/s390/net/qeth_l3_main.c +++ b/drivers/s390/net/qeth_l3_main.c @@ -2865,8 +2865,8 @@ static void qeth_l3_remove_device(struct ccwgroup_device *cgdev) qeth_l3_set_offline(cgdev); if (card->dev) { - netif_napi_del(&card->napi); unregister_netdev(card->dev); + free_netdev(card->dev); card->dev = NULL; } -- cgit v1.2.3 From 1063e432bb45be209427ed3f1ca3908e4aa3c7d7 Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Tue, 20 Mar 2018 07:59:13 +0100 Subject: s390/qeth: when thread completes, wake up all waiters qeth_wait_for_threads() is potentially called by multiple users, make sure to notify all of them after qeth_clear_thread_running_bit() adjusted the thread_running_mask. With no timeout, callers would otherwise stall. Signed-off-by: Julian Wiedmann Signed-off-by: David S. Miller --- drivers/s390/net/qeth_core_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c index 4a820afececd..42ee8806fa53 100644 --- a/drivers/s390/net/qeth_core_main.c +++ b/drivers/s390/net/qeth_core_main.c @@ -960,7 +960,7 @@ void qeth_clear_thread_running_bit(struct qeth_card *card, unsigned long thread) spin_lock_irqsave(&card->thread_mask_lock, flags); card->thread_running_mask &= ~thread; spin_unlock_irqrestore(&card->thread_mask_lock, flags); - wake_up(&card->wait_q); + wake_up_all(&card->wait_q); } EXPORT_SYMBOL_GPL(qeth_clear_thread_running_bit); -- cgit v1.2.3 From 17bf8c9b3d499d5168537c98b61eb7a1fcbca6c2 Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Tue, 20 Mar 2018 07:59:14 +0100 Subject: s390/qeth: lock read device while queueing next buffer For calling ccw_device_start(), issue_next_read() needs to hold the device's ccwlock. This is satisfied for the IRQ handler path (where qeth_irq() gets called under the ccwlock), but we need explicit locking for the initial call by the MPC initialization. Signed-off-by: Julian Wiedmann Signed-off-by: David S. Miller --- drivers/s390/net/qeth_core_main.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c index 42ee8806fa53..2a9afaf8f264 100644 --- a/drivers/s390/net/qeth_core_main.c +++ b/drivers/s390/net/qeth_core_main.c @@ -527,8 +527,7 @@ static inline int qeth_is_cq(struct qeth_card *card, unsigned int queue) queue == card->qdio.no_in_queues - 1; } - -static int qeth_issue_next_read(struct qeth_card *card) +static int __qeth_issue_next_read(struct qeth_card *card) { int rc; struct qeth_cmd_buffer *iob; @@ -559,6 +558,17 @@ static int qeth_issue_next_read(struct qeth_card *card) return rc; } +static int qeth_issue_next_read(struct qeth_card *card) +{ + int ret; + + spin_lock_irq(get_ccwdev_lock(CARD_RDEV(card))); + ret = __qeth_issue_next_read(card); + spin_unlock_irq(get_ccwdev_lock(CARD_RDEV(card))); + + return ret; +} + static struct qeth_reply *qeth_alloc_reply(struct qeth_card *card) { struct qeth_reply *reply; @@ -1182,7 +1192,7 @@ static void qeth_irq(struct ccw_device *cdev, unsigned long intparm, return; if (channel == &card->read && channel->state == CH_STATE_UP) - qeth_issue_next_read(card); + __qeth_issue_next_read(card); iob = channel->iob; index = channel->buf_no; -- cgit v1.2.3 From a6c3d93963e4b333c764fde69802c3ea9eaa9d5c Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Tue, 20 Mar 2018 07:59:15 +0100 Subject: s390/qeth: on channel error, reject further cmd requests When the IRQ handler determines that one of the cmd IO channels has failed and schedules recovery, block any further cmd requests from being submitted. The request would inevitably stall, and prevent the recovery from making progress until the request times out. This sort of error was observed after Live Guest Relocation, where the pending IO on the READ channel intentionally gets terminated to kick-start recovery. Simultaneously the guest executed SIOCETHTOOL, triggering qeth to issue a QUERY CARD INFO command. The command then stalled in the inoperabel WRITE channel. Signed-off-by: Julian Wiedmann Signed-off-by: David S. Miller --- drivers/s390/net/qeth_core_main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c index 2a9afaf8f264..3653bea38470 100644 --- a/drivers/s390/net/qeth_core_main.c +++ b/drivers/s390/net/qeth_core_main.c @@ -1174,6 +1174,7 @@ static void qeth_irq(struct ccw_device *cdev, unsigned long intparm, } rc = qeth_get_problem(cdev, irb); if (rc) { + card->read_or_write_problem = 1; qeth_clear_ipacmd_list(card); qeth_schedule_recovery(card); goto out; -- cgit v1.2.3 From 1bf9a7520fadaebfb8891284b046dd3fa6a2dc32 Mon Sep 17 00:00:00 2001 From: Igor Russkikh Date: Tue, 20 Mar 2018 14:40:31 +0300 Subject: net: aquantia: Fix hardware reset when SPI may rarely hangup Under some circumstances (notably using thunderbolt interface) SPI on chip reset may be in active transaction. Here we forcibly cleanup SPI to prevent possible hangups. Signed-off-by: Igor Russkikh Signed-off-by: David S. Miller --- .../ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c index 967f0fd07fcf..fcb3279ff9c7 100644 --- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c +++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c @@ -79,16 +79,15 @@ int hw_atl_utils_initfw(struct aq_hw_s *self, const struct aq_fw_ops **fw_ops) static int hw_atl_utils_soft_reset_flb(struct aq_hw_s *self) { + u32 gsr, val; int k = 0; - u32 gsr; aq_hw_write_reg(self, 0x404, 0x40e1); AQ_HW_SLEEP(50); /* Cleanup SPI */ - aq_hw_write_reg(self, 0x534, 0xA0); - aq_hw_write_reg(self, 0x100, 0x9F); - aq_hw_write_reg(self, 0x100, 0x809F); + val = aq_hw_read_reg(self, 0x53C); + aq_hw_write_reg(self, 0x53C, val | 0x10); gsr = aq_hw_read_reg(self, HW_ATL_GLB_SOFT_RES_ADR); aq_hw_write_reg(self, HW_ATL_GLB_SOFT_RES_ADR, (gsr & 0xBFFF) | 0x8000); @@ -97,7 +96,14 @@ static int hw_atl_utils_soft_reset_flb(struct aq_hw_s *self) aq_hw_write_reg(self, 0x404, 0x80e0); aq_hw_write_reg(self, 0x32a8, 0x0); aq_hw_write_reg(self, 0x520, 0x1); + + /* Reset SPI again because of possible interrupted SPI burst */ + val = aq_hw_read_reg(self, 0x53C); + aq_hw_write_reg(self, 0x53C, val | 0x10); AQ_HW_SLEEP(10); + /* Clear SPI reset state */ + aq_hw_write_reg(self, 0x53C, val & ~0x10); + aq_hw_write_reg(self, 0x404, 0x180e0); for (k = 0; k < 1000; k++) { @@ -147,7 +153,7 @@ static int hw_atl_utils_soft_reset_flb(struct aq_hw_s *self) static int hw_atl_utils_soft_reset_rbl(struct aq_hw_s *self) { - u32 gsr, rbl_status; + u32 gsr, val, rbl_status; int k; aq_hw_write_reg(self, 0x404, 0x40e1); @@ -157,6 +163,10 @@ static int hw_atl_utils_soft_reset_rbl(struct aq_hw_s *self) /* Alter RBL status */ aq_hw_write_reg(self, 0x388, 0xDEAD); + /* Cleanup SPI */ + val = aq_hw_read_reg(self, 0x53C); + aq_hw_write_reg(self, 0x53C, val | 0x10); + /* Global software reset*/ hw_atl_rx_rx_reg_res_dis_set(self, 0U); hw_atl_tx_tx_reg_res_dis_set(self, 0U); -- cgit v1.2.3 From d0f0fb25d6c7a7c299d9bdaa2a11e96e4102e944 Mon Sep 17 00:00:00 2001 From: Igor Russkikh Date: Tue, 20 Mar 2018 14:40:32 +0300 Subject: net: aquantia: Fix a regression with reset on old firmware FW 1.5.58 and below needs a fixed delay even after 0x18 register is filled. Otherwise, setting MPI_INIT state too fast causes traffic hang. Signed-off-by: Igor Russkikh Signed-off-by: David S. Miller --- drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c index fcb3279ff9c7..dcb27bc7e97c 100644 --- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c +++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c @@ -147,6 +147,8 @@ static int hw_atl_utils_soft_reset_flb(struct aq_hw_s *self) aq_pr_err("FW kickstart failed\n"); return -EIO; } + /* Old FW requires fixed delay after init */ + AQ_HW_SLEEP(15); return 0; } @@ -214,6 +216,8 @@ static int hw_atl_utils_soft_reset_rbl(struct aq_hw_s *self) aq_pr_err("FW kickstart failed\n"); return -EIO; } + /* Old FW requires fixed delay after init */ + AQ_HW_SLEEP(15); return 0; } -- cgit v1.2.3 From 47203b3426a6d0ac8b7a96259ed6784158b6d74b Mon Sep 17 00:00:00 2001 From: Igor Russkikh Date: Tue, 20 Mar 2018 14:40:33 +0300 Subject: net: aquantia: Change inefficient wait loop on fw data reads B1 hardware changes behavior of mailbox interface, it has busy bit always raised. Data ready condition should be detected by increment of address register. Old code has empty `for` loop, and that caused cpu overloads on B1 hardware. aq_nic_service_timer_cb consumed ~100ms because of that. Signed-off-by: Igor Russkikh Signed-off-by: David S. Miller --- .../aquantia/atlantic/hw_atl/hw_atl_utils.c | 42 ++++++++++++++-------- .../aquantia/atlantic/hw_atl/hw_atl_utils.h | 1 + 2 files changed, 28 insertions(+), 15 deletions(-) diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c index dcb27bc7e97c..d3b847ec7465 100644 --- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c +++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c @@ -21,6 +21,10 @@ #define HW_ATL_UCP_0X370_REG 0x0370U +#define HW_ATL_MIF_CMD 0x0200U +#define HW_ATL_MIF_ADDR 0x0208U +#define HW_ATL_MIF_VAL 0x020CU + #define HW_ATL_FW_SM_RAM 0x2U #define HW_ATL_MPI_FW_VERSION 0x18 #define HW_ATL_MPI_CONTROL_ADR 0x0368U @@ -269,18 +273,22 @@ int hw_atl_utils_fw_downld_dwords(struct aq_hw_s *self, u32 a, } } - aq_hw_write_reg(self, 0x00000208U, a); - - for (++cnt; --cnt;) { - u32 i = 0U; + aq_hw_write_reg(self, HW_ATL_MIF_ADDR, a); - aq_hw_write_reg(self, 0x00000200U, 0x00008000U); + for (++cnt; --cnt && !err;) { + aq_hw_write_reg(self, HW_ATL_MIF_CMD, 0x00008000U); - for (i = 1024U; - (0x100U & aq_hw_read_reg(self, 0x00000200U)) && --i;) { - } + if (IS_CHIP_FEATURE(REVISION_B1)) + AQ_HW_WAIT_FOR(a != aq_hw_read_reg(self, + HW_ATL_MIF_ADDR), + 1, 1000U); + else + AQ_HW_WAIT_FOR(!(0x100 & aq_hw_read_reg(self, + HW_ATL_MIF_CMD)), + 1, 1000U); - *(p++) = aq_hw_read_reg(self, 0x0000020CU); + *(p++) = aq_hw_read_reg(self, HW_ATL_MIF_VAL); + a += 4; } hw_atl_reg_glb_cpu_sem_set(self, 1U, HW_ATL_FW_SM_RAM); @@ -676,14 +684,18 @@ void hw_atl_utils_hw_chip_features_init(struct aq_hw_s *self, u32 *p) u32 val = hw_atl_reg_glb_mif_id_get(self); u32 mif_rev = val & 0xFFU; - if ((3U & mif_rev) == 1U) { - chip_features |= - HAL_ATLANTIC_UTILS_CHIP_REVISION_A0 | + if ((0xFU & mif_rev) == 1U) { + chip_features |= HAL_ATLANTIC_UTILS_CHIP_REVISION_A0 | HAL_ATLANTIC_UTILS_CHIP_MPI_AQ | HAL_ATLANTIC_UTILS_CHIP_MIPS; - } else if ((3U & mif_rev) == 2U) { - chip_features |= - HAL_ATLANTIC_UTILS_CHIP_REVISION_B0 | + } else if ((0xFU & mif_rev) == 2U) { + chip_features |= HAL_ATLANTIC_UTILS_CHIP_REVISION_B0 | + HAL_ATLANTIC_UTILS_CHIP_MPI_AQ | + HAL_ATLANTIC_UTILS_CHIP_MIPS | + HAL_ATLANTIC_UTILS_CHIP_TPO2 | + HAL_ATLANTIC_UTILS_CHIP_RPF2; + } else if ((0xFU & mif_rev) == 0xAU) { + chip_features |= HAL_ATLANTIC_UTILS_CHIP_REVISION_B1 | HAL_ATLANTIC_UTILS_CHIP_MPI_AQ | HAL_ATLANTIC_UTILS_CHIP_MIPS | HAL_ATLANTIC_UTILS_CHIP_TPO2 | diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.h b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.h index 2c690947910a..cd8f18f39c61 100644 --- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.h +++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.h @@ -161,6 +161,7 @@ struct __packed hw_aq_atl_utils_mbox { #define HAL_ATLANTIC_UTILS_CHIP_MPI_AQ 0x00000010U #define HAL_ATLANTIC_UTILS_CHIP_REVISION_A0 0x01000000U #define HAL_ATLANTIC_UTILS_CHIP_REVISION_B0 0x02000000U +#define HAL_ATLANTIC_UTILS_CHIP_REVISION_B1 0x04000000U #define IS_CHIP_FEATURE(_F_) (HAL_ATLANTIC_UTILS_CHIP_##_F_ & \ self->chip_features) -- cgit v1.2.3 From b647d3980948e881e6bb9bd898465e675d5e8486 Mon Sep 17 00:00:00 2001 From: Igor Russkikh Date: Tue, 20 Mar 2018 14:40:34 +0300 Subject: net: aquantia: Add tx clean budget and valid budget handling logic We should report to napi full budget only when we have more job to do. Before this fix, on any tx queue cleanup we forced napi to do poll again. Thats a waste of cpu resources and caused storming with napi polls when there was at least one tx on each interrupt. With this fix we report full budget only when there is more job on TX to do. Or, as before, when rx budget was fully consumed. Signed-off-by: Igor Russkikh Signed-off-by: David S. Miller --- drivers/net/ethernet/aquantia/atlantic/aq_cfg.h | 2 ++ drivers/net/ethernet/aquantia/atlantic/aq_ring.c | 7 +++++-- drivers/net/ethernet/aquantia/atlantic/aq_ring.h | 2 +- drivers/net/ethernet/aquantia/atlantic/aq_vec.c | 11 +++++------ 4 files changed, 13 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_cfg.h b/drivers/net/ethernet/aquantia/atlantic/aq_cfg.h index 0b49f1aeebd3..fc7383106946 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_cfg.h +++ b/drivers/net/ethernet/aquantia/atlantic/aq_cfg.h @@ -36,6 +36,8 @@ #define AQ_CFG_TX_FRAME_MAX (16U * 1024U) #define AQ_CFG_RX_FRAME_MAX (4U * 1024U) +#define AQ_CFG_TX_CLEAN_BUDGET 256U + /* LRO */ #define AQ_CFG_IS_LRO_DEF 1U diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ring.c b/drivers/net/ethernet/aquantia/atlantic/aq_ring.c index 0be6a11370bb..b5f1f62e8e25 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_ring.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_ring.c @@ -136,11 +136,12 @@ void aq_ring_queue_stop(struct aq_ring_s *ring) netif_stop_subqueue(ndev, ring->idx); } -void aq_ring_tx_clean(struct aq_ring_s *self) +bool aq_ring_tx_clean(struct aq_ring_s *self) { struct device *dev = aq_nic_get_dev(self->aq_nic); + unsigned int budget = AQ_CFG_TX_CLEAN_BUDGET; - for (; self->sw_head != self->hw_head; + for (; self->sw_head != self->hw_head && budget--; self->sw_head = aq_ring_next_dx(self, self->sw_head)) { struct aq_ring_buff_s *buff = &self->buff_ring[self->sw_head]; @@ -167,6 +168,8 @@ void aq_ring_tx_clean(struct aq_ring_s *self) buff->pa = 0U; buff->eop_index = 0xffffU; } + + return !!budget; } #define AQ_SKB_ALIGN SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ring.h b/drivers/net/ethernet/aquantia/atlantic/aq_ring.h index 965fae0fb6e0..ac1329f4051d 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_ring.h +++ b/drivers/net/ethernet/aquantia/atlantic/aq_ring.h @@ -153,7 +153,7 @@ void aq_ring_free(struct aq_ring_s *self); void aq_ring_update_queue_state(struct aq_ring_s *ring); void aq_ring_queue_wake(struct aq_ring_s *ring); void aq_ring_queue_stop(struct aq_ring_s *ring); -void aq_ring_tx_clean(struct aq_ring_s *self); +bool aq_ring_tx_clean(struct aq_ring_s *self); int aq_ring_rx_clean(struct aq_ring_s *self, struct napi_struct *napi, int *work_done, diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_vec.c b/drivers/net/ethernet/aquantia/atlantic/aq_vec.c index f890b8a5a862..d335c334fa56 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_vec.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_vec.c @@ -35,12 +35,12 @@ struct aq_vec_s { static int aq_vec_poll(struct napi_struct *napi, int budget) { struct aq_vec_s *self = container_of(napi, struct aq_vec_s, napi); + unsigned int sw_tail_old = 0U; struct aq_ring_s *ring = NULL; + bool was_tx_cleaned = true; + unsigned int i = 0U; int work_done = 0; int err = 0; - unsigned int i = 0U; - unsigned int sw_tail_old = 0U; - bool was_tx_cleaned = false; if (!self) { err = -EINVAL; @@ -57,9 +57,8 @@ static int aq_vec_poll(struct napi_struct *napi, int budget) if (ring[AQ_VEC_TX_ID].sw_head != ring[AQ_VEC_TX_ID].hw_head) { - aq_ring_tx_clean(&ring[AQ_VEC_TX_ID]); + was_tx_cleaned = aq_ring_tx_clean(&ring[AQ_VEC_TX_ID]); aq_ring_update_queue_state(&ring[AQ_VEC_TX_ID]); - was_tx_cleaned = true; } err = self->aq_hw_ops->hw_ring_rx_receive(self->aq_hw, @@ -90,7 +89,7 @@ static int aq_vec_poll(struct napi_struct *napi, int budget) } } - if (was_tx_cleaned) + if (!was_tx_cleaned) work_done = budget; if (work_done < budget) { -- cgit v1.2.3 From 3e9a545131723ff12fe4304245c222157f0e4622 Mon Sep 17 00:00:00 2001 From: Igor Russkikh Date: Tue, 20 Mar 2018 14:40:35 +0300 Subject: net: aquantia: Allow live mac address changes There is nothing prevents us from changing MAC on the running interface. Allow this with ndev priv flag. Signed-off-by: Igor Russkikh Signed-off-by: David S. Miller --- drivers/net/ethernet/aquantia/atlantic/aq_nic.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c index ebbaf63eaf47..34120d5b7c03 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c @@ -247,6 +247,8 @@ void aq_nic_ndev_init(struct aq_nic_s *self) self->ndev->hw_features |= aq_hw_caps->hw_features; self->ndev->features = aq_hw_caps->hw_features; self->ndev->priv_flags = aq_hw_caps->hw_priv_flags; + self->ndev->priv_flags |= IFF_LIVE_ADDR_CHANGE; + self->ndev->mtu = aq_nic_cfg->mtu - ETH_HLEN; self->ndev->max_mtu = aq_hw_caps->mtu - ETH_FCS_LEN - ETH_HLEN; -- cgit v1.2.3 From 90869ddfefebb1a79bd7bebfa4f28baa9f8c82cd Mon Sep 17 00:00:00 2001 From: Igor Russkikh Date: Tue, 20 Mar 2018 14:40:36 +0300 Subject: net: aquantia: Implement pci shutdown callback We should close link and all NIC operations during shutdown. On some systems graceful reboot never closes NIC interface on its own, but only indicates pci device shutdown. Without explicit handler, NIC rx rings continued to transfer DMA data into prepared buffers while CPU rebooted already. That caused memory corruptions on soft reboot. Signed-off-by: Igor Russkikh Signed-off-by: David S. Miller --- drivers/net/ethernet/aquantia/atlantic/aq_nic.c | 20 ++++++++++++++++++++ drivers/net/ethernet/aquantia/atlantic/aq_nic.h | 1 + drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c | 15 +++++++++++++++ 3 files changed, 36 insertions(+) diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c index 34120d5b7c03..c96a92118b8b 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c @@ -939,3 +939,23 @@ err_exit: out: return err; } + +void aq_nic_shutdown(struct aq_nic_s *self) +{ + int err = 0; + + if (!self->ndev) + return; + + rtnl_lock(); + + netif_device_detach(self->ndev); + + err = aq_nic_stop(self); + if (err < 0) + goto err_exit; + aq_nic_deinit(self); + +err_exit: + rtnl_unlock(); +} \ No newline at end of file diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_nic.h b/drivers/net/ethernet/aquantia/atlantic/aq_nic.h index d16b0f1a95aa..219b550d1665 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_nic.h +++ b/drivers/net/ethernet/aquantia/atlantic/aq_nic.h @@ -118,5 +118,6 @@ struct aq_nic_cfg_s *aq_nic_get_cfg(struct aq_nic_s *self); u32 aq_nic_get_fw_version(struct aq_nic_s *self); int aq_nic_change_pm_state(struct aq_nic_s *self, pm_message_t *pm_msg); int aq_nic_update_interrupt_moderation_settings(struct aq_nic_s *self); +void aq_nic_shutdown(struct aq_nic_s *self); #endif /* AQ_NIC_H */ diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c b/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c index 87c4308b52a7..ecc6306f940f 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c @@ -323,6 +323,20 @@ static void aq_pci_remove(struct pci_dev *pdev) pci_disable_device(pdev); } +static void aq_pci_shutdown(struct pci_dev *pdev) +{ + struct aq_nic_s *self = pci_get_drvdata(pdev); + + aq_nic_shutdown(self); + + pci_disable_device(pdev); + + if (system_state == SYSTEM_POWER_OFF) { + pci_wake_from_d3(pdev, false); + pci_set_power_state(pdev, PCI_D3hot); + } +} + static int aq_pci_suspend(struct pci_dev *pdev, pm_message_t pm_msg) { struct aq_nic_s *self = pci_get_drvdata(pdev); @@ -345,6 +359,7 @@ static struct pci_driver aq_pci_ops = { .remove = aq_pci_remove, .suspend = aq_pci_suspend, .resume = aq_pci_resume, + .shutdown = aq_pci_shutdown, }; module_pci_driver(aq_pci_ops); -- cgit v1.2.3 From c89bf1cd3498db851bdc184b26e26baaa5f141c5 Mon Sep 17 00:00:00 2001 From: Igor Russkikh Date: Tue, 20 Mar 2018 14:40:37 +0300 Subject: net: aquantia: driver version bump Signed-off-by: Igor Russkikh Signed-off-by: David S. Miller --- drivers/net/ethernet/aquantia/atlantic/ver.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/aquantia/atlantic/ver.h b/drivers/net/ethernet/aquantia/atlantic/ver.h index 5265b937677b..a445de6837a6 100644 --- a/drivers/net/ethernet/aquantia/atlantic/ver.h +++ b/drivers/net/ethernet/aquantia/atlantic/ver.h @@ -13,7 +13,7 @@ #define NIC_MAJOR_DRIVER_VERSION 2 #define NIC_MINOR_DRIVER_VERSION 0 #define NIC_BUILD_DRIVER_VERSION 2 -#define NIC_REVISION_DRIVER_VERSION 0 +#define NIC_REVISION_DRIVER_VERSION 1 #define AQ_CFG_DRV_VERSION_SUFFIX "-kern" -- cgit v1.2.3 From 191f86ca8ef27f7a492fd1c03620498c6e94f0ac Mon Sep 17 00:00:00 2001 From: David Lebrun Date: Tue, 20 Mar 2018 14:44:55 +0000 Subject: ipv6: sr: fix scheduling in RCU when creating seg6 lwtunnel state The seg6_build_state() function is called with RCU read lock held, so we cannot use GFP_KERNEL. This patch uses GFP_ATOMIC instead. [ 92.770271] ============================= [ 92.770628] WARNING: suspicious RCU usage [ 92.770921] 4.16.0-rc4+ #12 Not tainted [ 92.771277] ----------------------------- [ 92.771585] ./include/linux/rcupdate.h:302 Illegal context switch in RCU read-side critical section! [ 92.772279] [ 92.772279] other info that might help us debug this: [ 92.772279] [ 92.773067] [ 92.773067] rcu_scheduler_active = 2, debug_locks = 1 [ 92.773514] 2 locks held by ip/2413: [ 92.773765] #0: (rtnl_mutex){+.+.}, at: [<00000000e5461720>] rtnetlink_rcv_msg+0x441/0x4d0 [ 92.774377] #1: (rcu_read_lock){....}, at: [<00000000df4f161e>] lwtunnel_build_state+0x59/0x210 [ 92.775065] [ 92.775065] stack backtrace: [ 92.775371] CPU: 0 PID: 2413 Comm: ip Not tainted 4.16.0-rc4+ #12 [ 92.775791] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1.fc27 04/01/2014 [ 92.776608] Call Trace: [ 92.776852] dump_stack+0x7d/0xbc [ 92.777130] __schedule+0x133/0xf00 [ 92.777393] ? unwind_get_return_address_ptr+0x50/0x50 [ 92.777783] ? __sched_text_start+0x8/0x8 [ 92.778073] ? rcu_is_watching+0x19/0x30 [ 92.778383] ? kernel_text_address+0x49/0x60 [ 92.778800] ? __kernel_text_address+0x9/0x30 [ 92.779241] ? unwind_get_return_address+0x29/0x40 [ 92.779727] ? pcpu_alloc+0x102/0x8f0 [ 92.780101] _cond_resched+0x23/0x50 [ 92.780459] __mutex_lock+0xbd/0xad0 [ 92.780818] ? pcpu_alloc+0x102/0x8f0 [ 92.781194] ? seg6_build_state+0x11d/0x240 [ 92.781611] ? save_stack+0x9b/0xb0 [ 92.781965] ? __ww_mutex_wakeup_for_backoff+0xf0/0xf0 [ 92.782480] ? seg6_build_state+0x11d/0x240 [ 92.782925] ? lwtunnel_build_state+0x1bd/0x210 [ 92.783393] ? ip6_route_info_create+0x687/0x1640 [ 92.783846] ? ip6_route_add+0x74/0x110 [ 92.784236] ? inet6_rtm_newroute+0x8a/0xd0 Fixes: 6c8702c60b886 ("ipv6: sr: add support for SRH encapsulation and injection with lwtunnels") Signed-off-by: David Lebrun Signed-off-by: David S. Miller --- net/ipv6/seg6_iptunnel.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv6/seg6_iptunnel.c b/net/ipv6/seg6_iptunnel.c index bd6cc688bd19..8367b859a934 100644 --- a/net/ipv6/seg6_iptunnel.c +++ b/net/ipv6/seg6_iptunnel.c @@ -418,7 +418,7 @@ static int seg6_build_state(struct nlattr *nla, slwt = seg6_lwt_lwtunnel(newts); - err = dst_cache_init(&slwt->cache, GFP_KERNEL); + err = dst_cache_init(&slwt->cache, GFP_ATOMIC); if (err) { kfree(newts); return err; -- cgit v1.2.3 From 8936ef7604c11b5d701580d779e0f5684abc7b68 Mon Sep 17 00:00:00 2001 From: David Lebrun Date: Tue, 20 Mar 2018 14:44:56 +0000 Subject: ipv6: sr: fix NULL pointer dereference when setting encap source address When using seg6 in encap mode, we call ipv6_dev_get_saddr() to set the source address of the outer IPv6 header, in case none was specified. Using skb->dev can lead to BUG() when it is in an inconsistent state. This patch uses the net_device attached to the skb's dst instead. [940807.667429] BUG: unable to handle kernel NULL pointer dereference at 000000000000047c [940807.762427] IP: ipv6_dev_get_saddr+0x8b/0x1d0 [940807.815725] PGD 0 P4D 0 [940807.847173] Oops: 0000 [#1] SMP PTI [940807.890073] Modules linked in: [940807.927765] CPU: 6 PID: 0 Comm: swapper/6 Tainted: G W 4.16.0-rc1-seg6bpf+ #2 [940808.028988] Hardware name: HP ProLiant DL120 G6/ProLiant DL120 G6, BIOS O26 09/06/2010 [940808.128128] RIP: 0010:ipv6_dev_get_saddr+0x8b/0x1d0 [940808.187667] RSP: 0018:ffff88043fd836b0 EFLAGS: 00010206 [940808.251366] RAX: 0000000000000005 RBX: ffff88042cb1c860 RCX: 00000000000000fe [940808.338025] RDX: 00000000000002c0 RSI: ffff88042cb1c860 RDI: 0000000000004500 [940808.424683] RBP: ffff88043fd83740 R08: 0000000000000000 R09: ffffffffffffffff [940808.511342] R10: 0000000000000040 R11: 0000000000000000 R12: ffff88042cb1c850 [940808.598012] R13: ffffffff8208e380 R14: ffff88042ac8da00 R15: 0000000000000002 [940808.684675] FS: 0000000000000000(0000) GS:ffff88043fd80000(0000) knlGS:0000000000000000 [940808.783036] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [940808.852975] CR2: 000000000000047c CR3: 00000004255fe000 CR4: 00000000000006e0 [940808.939634] Call Trace: [940808.970041] [940808.995250] ? ip6t_do_table+0x265/0x640 [940809.043341] seg6_do_srh_encap+0x28f/0x300 [940809.093516] ? seg6_do_srh+0x1a0/0x210 [940809.139528] seg6_do_srh+0x1a0/0x210 [940809.183462] seg6_output+0x28/0x1e0 [940809.226358] lwtunnel_output+0x3f/0x70 [940809.272370] ip6_xmit+0x2b8/0x530 [940809.313185] ? ac6_proc_exit+0x20/0x20 [940809.359197] inet6_csk_xmit+0x7d/0xc0 [940809.404173] tcp_transmit_skb+0x548/0x9a0 [940809.453304] __tcp_retransmit_skb+0x1a8/0x7a0 [940809.506603] ? ip6_default_advmss+0x40/0x40 [940809.557824] ? tcp_current_mss+0x24/0x90 [940809.605925] tcp_retransmit_skb+0xd/0x80 [940809.654016] tcp_xmit_retransmit_queue.part.17+0xf9/0x210 [940809.719797] tcp_ack+0xa47/0x1110 [940809.760612] tcp_rcv_established+0x13c/0x570 [940809.812865] tcp_v6_do_rcv+0x151/0x3d0 [940809.858879] tcp_v6_rcv+0xa5c/0xb10 [940809.901770] ? seg6_output+0xdd/0x1e0 [940809.946745] ip6_input_finish+0xbb/0x460 [940809.994837] ip6_input+0x74/0x80 [940810.034612] ? ip6_rcv_finish+0xb0/0xb0 [940810.081663] ipv6_rcv+0x31c/0x4c0 ... Fixes: 6c8702c60b886 ("ipv6: sr: add support for SRH encapsulation and injection with lwtunnels") Reported-by: Tom Herbert Signed-off-by: David Lebrun Signed-off-by: David S. Miller --- net/ipv6/seg6_iptunnel.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/net/ipv6/seg6_iptunnel.c b/net/ipv6/seg6_iptunnel.c index 8367b859a934..7a78dcfda68a 100644 --- a/net/ipv6/seg6_iptunnel.c +++ b/net/ipv6/seg6_iptunnel.c @@ -93,7 +93,8 @@ static void set_tun_src(struct net *net, struct net_device *dev, /* encapsulate an IPv6 packet within an outer IPv6 header with a given SRH */ int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh, int proto) { - struct net *net = dev_net(skb_dst(skb)->dev); + struct dst_entry *dst = skb_dst(skb); + struct net *net = dev_net(dst->dev); struct ipv6hdr *hdr, *inner_hdr; struct ipv6_sr_hdr *isrh; int hdrlen, tot_len, err; @@ -134,7 +135,7 @@ int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh, int proto) isrh->nexthdr = proto; hdr->daddr = isrh->segments[isrh->first_segment]; - set_tun_src(net, skb->dev, &hdr->daddr, &hdr->saddr); + set_tun_src(net, ip6_dst_idev(dst)->dev, &hdr->daddr, &hdr->saddr); #ifdef CONFIG_IPV6_SEG6_HMAC if (sr_has_hmac(isrh)) { -- cgit v1.2.3 From 6d066734e9f09cdea4a3b9cb76136db3f29cfb02 Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Tue, 20 Mar 2018 16:49:26 +0100 Subject: ppp: avoid loop in xmit recursion detection code We already detect situations where a PPP channel sends packets back to its upper PPP device. While this is enough to avoid deadlocking on xmit locks, this doesn't prevent packets from looping between the channel and the unit. The problem is that ppp_start_xmit() enqueues packets in ppp->file.xq before checking for xmit recursion. Therefore, __ppp_xmit_process() might dequeue a packet from ppp->file.xq and send it on the channel which, in turn, loops it back on the unit. Then ppp_start_xmit() queues the packet back to ppp->file.xq and __ppp_xmit_process() picks it up and sends it again through the channel. Therefore, the packet will loop between __ppp_xmit_process() and ppp_start_xmit() until some other part of the xmit path drops it. For L2TP, we rapidly fill the skb's headroom and pppol2tp_xmit() drops the packet after a few iterations. But PPTP reallocates the headroom if necessary, letting the loop run and exhaust the machine resources (as reported in https://bugzilla.kernel.org/show_bug.cgi?id=199109). Fix this by letting __ppp_xmit_process() enqueue the skb to ppp->file.xq, so that we can check for recursion before adding it to the queue. Now ppp_xmit_process() can drop the packet when recursion is detected. __ppp_channel_push() is a bit special. It calls __ppp_xmit_process() without having any actual packet to send. This is used by ppp_output_wakeup() to re-enable transmission on the parent unit (for implementations like ppp_async.c, where the .start_xmit() function might not consume the skb, leaving it in ppp->xmit_pending and disabling transmission). Therefore, __ppp_xmit_process() needs to handle the case where skb is NULL, dequeuing as many packets as possible from ppp->file.xq. Reported-by: xu heng Fixes: 55454a565836 ("ppp: avoid dealock on recursive xmit") Signed-off-by: Guillaume Nault Signed-off-by: David S. Miller --- drivers/net/ppp/ppp_generic.c | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c index fa2a9bdd1866..da1937832c99 100644 --- a/drivers/net/ppp/ppp_generic.c +++ b/drivers/net/ppp/ppp_generic.c @@ -257,7 +257,7 @@ struct ppp_net { /* Prototypes. */ static int ppp_unattached_ioctl(struct net *net, struct ppp_file *pf, struct file *file, unsigned int cmd, unsigned long arg); -static void ppp_xmit_process(struct ppp *ppp); +static void ppp_xmit_process(struct ppp *ppp, struct sk_buff *skb); static void ppp_send_frame(struct ppp *ppp, struct sk_buff *skb); static void ppp_push(struct ppp *ppp); static void ppp_channel_push(struct channel *pch); @@ -513,13 +513,12 @@ static ssize_t ppp_write(struct file *file, const char __user *buf, goto out; } - skb_queue_tail(&pf->xq, skb); - switch (pf->kind) { case INTERFACE: - ppp_xmit_process(PF_TO_PPP(pf)); + ppp_xmit_process(PF_TO_PPP(pf), skb); break; case CHANNEL: + skb_queue_tail(&pf->xq, skb); ppp_channel_push(PF_TO_CHANNEL(pf)); break; } @@ -1267,8 +1266,8 @@ ppp_start_xmit(struct sk_buff *skb, struct net_device *dev) put_unaligned_be16(proto, pp); skb_scrub_packet(skb, !net_eq(ppp->ppp_net, dev_net(dev))); - skb_queue_tail(&ppp->file.xq, skb); - ppp_xmit_process(ppp); + ppp_xmit_process(ppp, skb); + return NETDEV_TX_OK; outf: @@ -1420,13 +1419,14 @@ static void ppp_setup(struct net_device *dev) */ /* Called to do any work queued up on the transmit side that can now be done */ -static void __ppp_xmit_process(struct ppp *ppp) +static void __ppp_xmit_process(struct ppp *ppp, struct sk_buff *skb) { - struct sk_buff *skb; - ppp_xmit_lock(ppp); if (!ppp->closing) { ppp_push(ppp); + + if (skb) + skb_queue_tail(&ppp->file.xq, skb); while (!ppp->xmit_pending && (skb = skb_dequeue(&ppp->file.xq))) ppp_send_frame(ppp, skb); @@ -1440,7 +1440,7 @@ static void __ppp_xmit_process(struct ppp *ppp) ppp_xmit_unlock(ppp); } -static void ppp_xmit_process(struct ppp *ppp) +static void ppp_xmit_process(struct ppp *ppp, struct sk_buff *skb) { local_bh_disable(); @@ -1448,7 +1448,7 @@ static void ppp_xmit_process(struct ppp *ppp) goto err; (*this_cpu_ptr(ppp->xmit_recursion))++; - __ppp_xmit_process(ppp); + __ppp_xmit_process(ppp, skb); (*this_cpu_ptr(ppp->xmit_recursion))--; local_bh_enable(); @@ -1458,6 +1458,8 @@ static void ppp_xmit_process(struct ppp *ppp) err: local_bh_enable(); + kfree_skb(skb); + if (net_ratelimit()) netdev_err(ppp->dev, "recursion detected\n"); } @@ -1942,7 +1944,7 @@ static void __ppp_channel_push(struct channel *pch) if (skb_queue_empty(&pch->file.xq)) { ppp = pch->ppp; if (ppp) - __ppp_xmit_process(ppp); + __ppp_xmit_process(ppp, NULL); } } -- cgit v1.2.3 From 68e2ffdeb5dbf54bc3a0684aa4e73c6db8675eed Mon Sep 17 00:00:00 2001 From: David Ahern Date: Tue, 20 Mar 2018 10:06:59 -0700 Subject: net/ipv6: Handle onlink flag with multipath routes For multipath routes the ONLINK flag can be specified per nexthop in rtnh_flags or globally in rtm_flags. Update ip6_route_multipath_add to consider the ONLINK setting coming from rtnh_flags. Each loop over nexthops the config for the sibling route is initialized to the global config and then per nexthop settings overlayed. The flag is 'or'ed into fib6_config to handle the ONLINK flag coming from either rtm_flags or rtnh_flags. Fixes: fc1e64e1092f ("net/ipv6: Add support for onlink flag") Signed-off-by: David Ahern Signed-off-by: David S. Miller --- net/ipv6/route.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index b3c1137ad0b8..b0d5c64e1978 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -4112,6 +4112,7 @@ static int ip6_route_multipath_add(struct fib6_config *cfg, r_cfg.fc_encap_type = nla_get_u16(nla); } + r_cfg.fc_flags |= (rtnh->rtnh_flags & RTNH_F_ONLINK); rt = ip6_route_info_create(&r_cfg, extack); if (IS_ERR(rt)) { err = PTR_ERR(rt); -- cgit v1.2.3 From 8348e0460ab1473f06c8b824699dd2eed3c1979d Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Tue, 20 Mar 2018 15:03:02 -0700 Subject: hv_netvsc: disable NAPI before channel close This makes sure that no CPU is still process packets when the channel is closed. Fixes: 76bb5db5c749 ("netvsc: fix use after free on module removal") Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- drivers/net/hyperv/netvsc.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c index 0265d703eb03..e70a44273f55 100644 --- a/drivers/net/hyperv/netvsc.c +++ b/drivers/net/hyperv/netvsc.c @@ -568,6 +568,10 @@ void netvsc_device_remove(struct hv_device *device) RCU_INIT_POINTER(net_device_ctx->nvdev, NULL); + /* And disassociate NAPI context from device */ + for (i = 0; i < net_device->num_chn; i++) + netif_napi_del(&net_device->chan_table[i].napi); + /* * At this point, no one should be accessing net_device * except in here @@ -579,10 +583,6 @@ void netvsc_device_remove(struct hv_device *device) netvsc_teardown_gpadl(device, net_device); - /* And dissassociate NAPI context from device */ - for (i = 0; i < net_device->num_chn; i++) - netif_napi_del(&net_device->chan_table[i].napi); - /* Release all resources */ free_netvsc_device_rcu(net_device); } -- cgit v1.2.3 From 02400fcee2542ee334a2394e0d9f6efd969fe782 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Tue, 20 Mar 2018 15:03:03 -0700 Subject: hv_netvsc: use RCU to fix concurrent rx and queue changes The receive processing may continue to happen while the internal network device state is in RCU grace period. The internal RNDIS structure is associated with the internal netvsc_device structure; both have the same RCU lifetime. Defer freeing all associated parts until after grace period. Fixes: 0cf737808ae7 ("hv_netvsc: netvsc_teardown_gpadl() split") Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- drivers/net/hyperv/netvsc.c | 17 +++++------------ drivers/net/hyperv/rndis_filter.c | 39 ++++++++++++++++----------------------- 2 files changed, 21 insertions(+), 35 deletions(-) diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c index e70a44273f55..12c044baf1af 100644 --- a/drivers/net/hyperv/netvsc.c +++ b/drivers/net/hyperv/netvsc.c @@ -90,6 +90,11 @@ static void free_netvsc_device(struct rcu_head *head) = container_of(head, struct netvsc_device, rcu); int i; + kfree(nvdev->extension); + vfree(nvdev->recv_buf); + vfree(nvdev->send_buf); + kfree(nvdev->send_section_map); + for (i = 0; i < VRSS_CHANNEL_MAX; i++) vfree(nvdev->chan_table[i].mrc.slots); @@ -211,12 +216,6 @@ static void netvsc_teardown_gpadl(struct hv_device *device, net_device->recv_buf_gpadl_handle = 0; } - if (net_device->recv_buf) { - /* Free up the receive buffer */ - vfree(net_device->recv_buf); - net_device->recv_buf = NULL; - } - if (net_device->send_buf_gpadl_handle) { ret = vmbus_teardown_gpadl(device->channel, net_device->send_buf_gpadl_handle); @@ -231,12 +230,6 @@ static void netvsc_teardown_gpadl(struct hv_device *device, } net_device->send_buf_gpadl_handle = 0; } - if (net_device->send_buf) { - /* Free up the send buffer */ - vfree(net_device->send_buf); - net_device->send_buf = NULL; - } - kfree(net_device->send_section_map); } int netvsc_alloc_recv_comp_ring(struct netvsc_device *net_device, u32 q_idx) diff --git a/drivers/net/hyperv/rndis_filter.c b/drivers/net/hyperv/rndis_filter.c index 00ec80c23fe5..963314eb3226 100644 --- a/drivers/net/hyperv/rndis_filter.c +++ b/drivers/net/hyperv/rndis_filter.c @@ -264,13 +264,23 @@ static void rndis_set_link_state(struct rndis_device *rdev, } } -static void rndis_filter_receive_response(struct rndis_device *dev, - struct rndis_message *resp) +static void rndis_filter_receive_response(struct net_device *ndev, + struct netvsc_device *nvdev, + const struct rndis_message *resp) { + struct rndis_device *dev = nvdev->extension; struct rndis_request *request = NULL; bool found = false; unsigned long flags; - struct net_device *ndev = dev->ndev; + + /* This should never happen, it means control message + * response received after device removed. + */ + if (dev->state == RNDIS_DEV_UNINITIALIZED) { + netdev_err(ndev, + "got rndis message uninitialized\n"); + return; + } spin_lock_irqsave(&dev->request_lock, flags); list_for_each_entry(request, &dev->req_list, list_ent) { @@ -352,7 +362,6 @@ static inline void *rndis_get_ppi(struct rndis_packet *rpkt, u32 type) static int rndis_filter_receive_data(struct net_device *ndev, struct netvsc_device *nvdev, - struct rndis_device *dev, struct rndis_message *msg, struct vmbus_channel *channel, void *data, u32 data_buflen) @@ -372,7 +381,7 @@ static int rndis_filter_receive_data(struct net_device *ndev, * should be the data packet size plus the trailer padding size */ if (unlikely(data_buflen < rndis_pkt->data_len)) { - netdev_err(dev->ndev, "rndis message buffer " + netdev_err(ndev, "rndis message buffer " "overflow detected (got %u, min %u)" "...dropping this message!\n", data_buflen, rndis_pkt->data_len); @@ -400,35 +409,20 @@ int rndis_filter_receive(struct net_device *ndev, void *data, u32 buflen) { struct net_device_context *net_device_ctx = netdev_priv(ndev); - struct rndis_device *rndis_dev = net_dev->extension; struct rndis_message *rndis_msg = data; - /* Make sure the rndis device state is initialized */ - if (unlikely(!rndis_dev)) { - netif_dbg(net_device_ctx, rx_err, ndev, - "got rndis message but no rndis device!\n"); - return NVSP_STAT_FAIL; - } - - if (unlikely(rndis_dev->state == RNDIS_DEV_UNINITIALIZED)) { - netif_dbg(net_device_ctx, rx_err, ndev, - "got rndis message uninitialized\n"); - return NVSP_STAT_FAIL; - } - if (netif_msg_rx_status(net_device_ctx)) dump_rndis_message(ndev, rndis_msg); switch (rndis_msg->ndis_msg_type) { case RNDIS_MSG_PACKET: - return rndis_filter_receive_data(ndev, net_dev, - rndis_dev, rndis_msg, + return rndis_filter_receive_data(ndev, net_dev, rndis_msg, channel, data, buflen); case RNDIS_MSG_INIT_C: case RNDIS_MSG_QUERY_C: case RNDIS_MSG_SET_C: /* completion msgs */ - rndis_filter_receive_response(rndis_dev, rndis_msg); + rndis_filter_receive_response(ndev, net_dev, rndis_msg); break; case RNDIS_MSG_INDICATE: @@ -1357,7 +1351,6 @@ void rndis_filter_device_remove(struct hv_device *dev, net_dev->extension = NULL; netvsc_device_remove(dev); - kfree(rndis_dev); } int rndis_filter_open(struct netvsc_device *nvdev) -- cgit v1.2.3 From 0ef58b0a05c127762f975c3dfe8b922e4aa87a29 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Tue, 20 Mar 2018 15:03:04 -0700 Subject: hv_netvsc: change GPAD teardown order on older versions On older versions of Windows, the host ignores messages after vmbus channel is closed. Workaround this by doing what Windows does and send the teardown before close on older versions of NVSP protocol. Reported-by: Mohammed Gamal Fixes: 0cf737808ae7 ("hv_netvsc: netvsc_teardown_gpadl() split") Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- drivers/net/hyperv/netvsc.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c index 12c044baf1af..37b0a30d6b03 100644 --- a/drivers/net/hyperv/netvsc.c +++ b/drivers/net/hyperv/netvsc.c @@ -571,10 +571,15 @@ void netvsc_device_remove(struct hv_device *device) */ netdev_dbg(ndev, "net device safe to remove\n"); + /* older versions require that buffer be revoked before close */ + if (net_device->nvsp_version < NVSP_PROTOCOL_VERSION_4) + netvsc_teardown_gpadl(device, net_device); + /* Now, we can close the channel safely */ vmbus_close(device->channel); - netvsc_teardown_gpadl(device, net_device); + if (net_device->nvsp_version >= NVSP_PROTOCOL_VERSION_4) + netvsc_teardown_gpadl(device, net_device); /* Release all resources */ free_netvsc_device_rcu(net_device); -- cgit v1.2.3 From 7b2ee50c0cd513a176a26a71f2989facdd75bfea Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Tue, 20 Mar 2018 15:03:05 -0700 Subject: hv_netvsc: common detach logic Make common function for detaching internals of device during changes to MTU and RSS. Make sure no more packets are transmitted and all packets have been received before doing device teardown. Change the wait logic to be common and use usleep_range(). Changes transmit enabling logic so that transmit queues are disabled during the period when lower device is being changed. And enabled only after sub channels are setup. This avoids issue where it could be that a packet was being sent while subchannel was not initialized. Fixes: 8195b1396ec8 ("hv_netvsc: fix deadlock on hotplug") Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- drivers/net/hyperv/hyperv_net.h | 1 - drivers/net/hyperv/netvsc.c | 20 +-- drivers/net/hyperv/netvsc_drv.c | 278 +++++++++++++++++++++----------------- drivers/net/hyperv/rndis_filter.c | 17 +-- 4 files changed, 173 insertions(+), 143 deletions(-) diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h index cd538d5a7986..32861036c3fc 100644 --- a/drivers/net/hyperv/hyperv_net.h +++ b/drivers/net/hyperv/hyperv_net.h @@ -212,7 +212,6 @@ void netvsc_channel_cb(void *context); int netvsc_poll(struct napi_struct *napi, int budget); void rndis_set_subchannel(struct work_struct *w); -bool rndis_filter_opened(const struct netvsc_device *nvdev); int rndis_filter_open(struct netvsc_device *nvdev); int rndis_filter_close(struct netvsc_device *nvdev); struct netvsc_device *rndis_filter_device_add(struct hv_device *dev, diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c index 37b0a30d6b03..7472172823f3 100644 --- a/drivers/net/hyperv/netvsc.c +++ b/drivers/net/hyperv/netvsc.c @@ -555,8 +555,6 @@ void netvsc_device_remove(struct hv_device *device) = rtnl_dereference(net_device_ctx->nvdev); int i; - cancel_work_sync(&net_device->subchan_work); - netvsc_revoke_buf(device, net_device); RCU_INIT_POINTER(net_device_ctx->nvdev, NULL); @@ -643,14 +641,18 @@ static void netvsc_send_tx_complete(struct netvsc_device *net_device, queue_sends = atomic_dec_return(&net_device->chan_table[q_idx].queue_sends); - if (net_device->destroy && queue_sends == 0) - wake_up(&net_device->wait_drain); + if (unlikely(net_device->destroy)) { + if (queue_sends == 0) + wake_up(&net_device->wait_drain); + } else { + struct netdev_queue *txq = netdev_get_tx_queue(ndev, q_idx); - if (netif_tx_queue_stopped(netdev_get_tx_queue(ndev, q_idx)) && - (hv_ringbuf_avail_percent(&channel->outbound) > RING_AVAIL_PERCENT_HIWATER || - queue_sends < 1)) { - netif_tx_wake_queue(netdev_get_tx_queue(ndev, q_idx)); - ndev_ctx->eth_stats.wake_queue++; + if (netif_tx_queue_stopped(txq) && + (hv_ringbuf_avail_percent(&channel->outbound) > RING_AVAIL_PERCENT_HIWATER || + queue_sends < 1)) { + netif_tx_wake_queue(txq); + ndev_ctx->eth_stats.wake_queue++; + } } } diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index faea0be18924..f28c85d212ce 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -46,7 +46,10 @@ #include "hyperv_net.h" -#define RING_SIZE_MIN 64 +#define RING_SIZE_MIN 64 +#define RETRY_US_LO 5000 +#define RETRY_US_HI 10000 +#define RETRY_MAX 2000 /* >10 sec */ #define LINKCHANGE_INT (2 * HZ) #define VF_TAKEOVER_INT (HZ / 10) @@ -123,10 +126,8 @@ static int netvsc_open(struct net_device *net) } rdev = nvdev->extension; - if (!rdev->link_state) { + if (!rdev->link_state) netif_carrier_on(net); - netif_tx_wake_all_queues(net); - } if (vf_netdev) { /* Setting synthetic device up transparently sets @@ -142,36 +143,25 @@ static int netvsc_open(struct net_device *net) return 0; } -static int netvsc_close(struct net_device *net) +static int netvsc_wait_until_empty(struct netvsc_device *nvdev) { - struct net_device_context *net_device_ctx = netdev_priv(net); - struct net_device *vf_netdev - = rtnl_dereference(net_device_ctx->vf_netdev); - struct netvsc_device *nvdev = rtnl_dereference(net_device_ctx->nvdev); - int ret = 0; - u32 aread, i, msec = 10, retry = 0, retry_max = 20; - struct vmbus_channel *chn; - - netif_tx_disable(net); - - /* No need to close rndis filter if it is removed already */ - if (!nvdev) - goto out; - - ret = rndis_filter_close(nvdev); - if (ret != 0) { - netdev_err(net, "unable to close device (ret %d).\n", ret); - return ret; - } + unsigned int retry = 0; + int i; /* Ensure pending bytes in ring are read */ - while (true) { - aread = 0; + for (;;) { + u32 aread = 0; + for (i = 0; i < nvdev->num_chn; i++) { - chn = nvdev->chan_table[i].channel; + struct vmbus_channel *chn + = nvdev->chan_table[i].channel; + if (!chn) continue; + /* make sure receive not running now */ + napi_synchronize(&nvdev->chan_table[i].napi); + aread = hv_get_bytes_to_read(&chn->inbound); if (aread) break; @@ -181,22 +171,40 @@ static int netvsc_close(struct net_device *net) break; } - retry++; - if (retry > retry_max || aread == 0) - break; + if (aread == 0) + return 0; - msleep(msec); + if (++retry > RETRY_MAX) + return -ETIMEDOUT; - if (msec < 1000) - msec *= 2; + usleep_range(RETRY_US_LO, RETRY_US_HI); } +} - if (aread) { - netdev_err(net, "Ring buffer not empty after closing rndis\n"); - ret = -ETIMEDOUT; +static int netvsc_close(struct net_device *net) +{ + struct net_device_context *net_device_ctx = netdev_priv(net); + struct net_device *vf_netdev + = rtnl_dereference(net_device_ctx->vf_netdev); + struct netvsc_device *nvdev = rtnl_dereference(net_device_ctx->nvdev); + int ret; + + netif_tx_disable(net); + + /* No need to close rndis filter if it is removed already */ + if (!nvdev) + return 0; + + ret = rndis_filter_close(nvdev); + if (ret != 0) { + netdev_err(net, "unable to close device (ret %d).\n", ret); + return ret; } -out: + ret = netvsc_wait_until_empty(nvdev); + if (ret) + netdev_err(net, "Ring buffer not empty after closing rndis\n"); + if (vf_netdev) dev_close(vf_netdev); @@ -845,16 +853,81 @@ static void netvsc_get_channels(struct net_device *net, } } +static int netvsc_detach(struct net_device *ndev, + struct netvsc_device *nvdev) +{ + struct net_device_context *ndev_ctx = netdev_priv(ndev); + struct hv_device *hdev = ndev_ctx->device_ctx; + int ret; + + /* Don't try continuing to try and setup sub channels */ + if (cancel_work_sync(&nvdev->subchan_work)) + nvdev->num_chn = 1; + + /* If device was up (receiving) then shutdown */ + if (netif_running(ndev)) { + netif_tx_disable(ndev); + + ret = rndis_filter_close(nvdev); + if (ret) { + netdev_err(ndev, + "unable to close device (ret %d).\n", ret); + return ret; + } + + ret = netvsc_wait_until_empty(nvdev); + if (ret) { + netdev_err(ndev, + "Ring buffer not empty after closing rndis\n"); + return ret; + } + } + + netif_device_detach(ndev); + + rndis_filter_device_remove(hdev, nvdev); + + return 0; +} + +static int netvsc_attach(struct net_device *ndev, + struct netvsc_device_info *dev_info) +{ + struct net_device_context *ndev_ctx = netdev_priv(ndev); + struct hv_device *hdev = ndev_ctx->device_ctx; + struct netvsc_device *nvdev; + struct rndis_device *rdev; + int ret; + + nvdev = rndis_filter_device_add(hdev, dev_info); + if (IS_ERR(nvdev)) + return PTR_ERR(nvdev); + + /* Note: enable and attach happen when sub-channels setup */ + + netif_carrier_off(ndev); + + if (netif_running(ndev)) { + ret = rndis_filter_open(nvdev); + if (ret) + return ret; + + rdev = nvdev->extension; + if (!rdev->link_state) + netif_carrier_on(ndev); + } + + return 0; +} + static int netvsc_set_channels(struct net_device *net, struct ethtool_channels *channels) { struct net_device_context *net_device_ctx = netdev_priv(net); - struct hv_device *dev = net_device_ctx->device_ctx; struct netvsc_device *nvdev = rtnl_dereference(net_device_ctx->nvdev); unsigned int orig, count = channels->combined_count; struct netvsc_device_info device_info; - bool was_opened; - int ret = 0; + int ret; /* We do not support separate count for rx, tx, or other */ if (count == 0 || @@ -871,9 +944,6 @@ static int netvsc_set_channels(struct net_device *net, return -EINVAL; orig = nvdev->num_chn; - was_opened = rndis_filter_opened(nvdev); - if (was_opened) - rndis_filter_close(nvdev); memset(&device_info, 0, sizeof(device_info)); device_info.num_chn = count; @@ -882,28 +952,17 @@ static int netvsc_set_channels(struct net_device *net, device_info.recv_sections = nvdev->recv_section_cnt; device_info.recv_section_size = nvdev->recv_section_size; - rndis_filter_device_remove(dev, nvdev); + ret = netvsc_detach(net, nvdev); + if (ret) + return ret; - nvdev = rndis_filter_device_add(dev, &device_info); - if (IS_ERR(nvdev)) { - ret = PTR_ERR(nvdev); + ret = netvsc_attach(net, &device_info); + if (ret) { device_info.num_chn = orig; - nvdev = rndis_filter_device_add(dev, &device_info); - - if (IS_ERR(nvdev)) { - netdev_err(net, "restoring channel setting failed: %ld\n", - PTR_ERR(nvdev)); - return ret; - } + if (netvsc_attach(net, &device_info)) + netdev_err(net, "restoring channel setting failed\n"); } - if (was_opened) - rndis_filter_open(nvdev); - - /* We may have missed link change notifications */ - net_device_ctx->last_reconfig = 0; - schedule_delayed_work(&net_device_ctx->dwork, 0); - return ret; } @@ -969,10 +1028,8 @@ static int netvsc_change_mtu(struct net_device *ndev, int mtu) struct net_device_context *ndevctx = netdev_priv(ndev); struct net_device *vf_netdev = rtnl_dereference(ndevctx->vf_netdev); struct netvsc_device *nvdev = rtnl_dereference(ndevctx->nvdev); - struct hv_device *hdev = ndevctx->device_ctx; int orig_mtu = ndev->mtu; struct netvsc_device_info device_info; - bool was_opened; int ret = 0; if (!nvdev || nvdev->destroy) @@ -985,11 +1042,6 @@ static int netvsc_change_mtu(struct net_device *ndev, int mtu) return ret; } - netif_device_detach(ndev); - was_opened = rndis_filter_opened(nvdev); - if (was_opened) - rndis_filter_close(nvdev); - memset(&device_info, 0, sizeof(device_info)); device_info.num_chn = nvdev->num_chn; device_info.send_sections = nvdev->send_section_cnt; @@ -997,35 +1049,27 @@ static int netvsc_change_mtu(struct net_device *ndev, int mtu) device_info.recv_sections = nvdev->recv_section_cnt; device_info.recv_section_size = nvdev->recv_section_size; - rndis_filter_device_remove(hdev, nvdev); + ret = netvsc_detach(ndev, nvdev); + if (ret) + goto rollback_vf; ndev->mtu = mtu; - nvdev = rndis_filter_device_add(hdev, &device_info); - if (IS_ERR(nvdev)) { - ret = PTR_ERR(nvdev); - - /* Attempt rollback to original MTU */ - ndev->mtu = orig_mtu; - nvdev = rndis_filter_device_add(hdev, &device_info); - - if (vf_netdev) - dev_set_mtu(vf_netdev, orig_mtu); - - if (IS_ERR(nvdev)) { - netdev_err(ndev, "restoring mtu failed: %ld\n", - PTR_ERR(nvdev)); - return ret; - } - } + ret = netvsc_attach(ndev, &device_info); + if (ret) + goto rollback; - if (was_opened) - rndis_filter_open(nvdev); + return 0; - netif_device_attach(ndev); +rollback: + /* Attempt rollback to original MTU */ + ndev->mtu = orig_mtu; - /* We may have missed link change notifications */ - schedule_delayed_work(&ndevctx->dwork, 0); + if (netvsc_attach(ndev, &device_info)) + netdev_err(ndev, "restoring mtu failed\n"); +rollback_vf: + if (vf_netdev) + dev_set_mtu(vf_netdev, orig_mtu); return ret; } @@ -1531,11 +1575,9 @@ static int netvsc_set_ringparam(struct net_device *ndev, { struct net_device_context *ndevctx = netdev_priv(ndev); struct netvsc_device *nvdev = rtnl_dereference(ndevctx->nvdev); - struct hv_device *hdev = ndevctx->device_ctx; struct netvsc_device_info device_info; struct ethtool_ringparam orig; u32 new_tx, new_rx; - bool was_opened; int ret = 0; if (!nvdev || nvdev->destroy) @@ -1560,34 +1602,18 @@ static int netvsc_set_ringparam(struct net_device *ndev, device_info.recv_sections = new_rx; device_info.recv_section_size = nvdev->recv_section_size; - netif_device_detach(ndev); - was_opened = rndis_filter_opened(nvdev); - if (was_opened) - rndis_filter_close(nvdev); - - rndis_filter_device_remove(hdev, nvdev); - - nvdev = rndis_filter_device_add(hdev, &device_info); - if (IS_ERR(nvdev)) { - ret = PTR_ERR(nvdev); + ret = netvsc_detach(ndev, nvdev); + if (ret) + return ret; + ret = netvsc_attach(ndev, &device_info); + if (ret) { device_info.send_sections = orig.tx_pending; device_info.recv_sections = orig.rx_pending; - nvdev = rndis_filter_device_add(hdev, &device_info); - if (IS_ERR(nvdev)) { - netdev_err(ndev, "restoring ringparam failed: %ld\n", - PTR_ERR(nvdev)); - return ret; - } - } - if (was_opened) - rndis_filter_open(nvdev); - netif_device_attach(ndev); - - /* We may have missed link change notifications */ - ndevctx->last_reconfig = 0; - schedule_delayed_work(&ndevctx->dwork, 0); + if (netvsc_attach(ndev, &device_info)) + netdev_err(ndev, "restoring ringparam failed"); + } return ret; } @@ -2072,8 +2098,8 @@ no_net: static int netvsc_remove(struct hv_device *dev) { struct net_device_context *ndev_ctx; - struct net_device *vf_netdev; - struct net_device *net; + struct net_device *vf_netdev, *net; + struct netvsc_device *nvdev; net = hv_get_drvdata(dev); if (net == NULL) { @@ -2083,10 +2109,14 @@ static int netvsc_remove(struct hv_device *dev) ndev_ctx = netdev_priv(net); - netif_device_detach(net); - cancel_delayed_work_sync(&ndev_ctx->dwork); + rcu_read_lock(); + nvdev = rcu_dereference(ndev_ctx->nvdev); + + if (nvdev) + cancel_work_sync(&nvdev->subchan_work); + /* * Call to the vsc driver to let it know that the device is being * removed. Also blocks mtu and channel changes. @@ -2096,11 +2126,13 @@ static int netvsc_remove(struct hv_device *dev) if (vf_netdev) netvsc_unregister_vf(vf_netdev); + if (nvdev) + rndis_filter_device_remove(dev, nvdev); + unregister_netdevice(net); - rndis_filter_device_remove(dev, - rtnl_dereference(ndev_ctx->nvdev)); rtnl_unlock(); + rcu_read_unlock(); hv_set_drvdata(dev, NULL); diff --git a/drivers/net/hyperv/rndis_filter.c b/drivers/net/hyperv/rndis_filter.c index 963314eb3226..a6ec41c399d6 100644 --- a/drivers/net/hyperv/rndis_filter.c +++ b/drivers/net/hyperv/rndis_filter.c @@ -1118,6 +1118,7 @@ void rndis_set_subchannel(struct work_struct *w) for (i = 0; i < VRSS_SEND_TAB_SIZE; i++) ndev_ctx->tx_table[i] = i % nvdev->num_chn; + netif_device_attach(ndev); rtnl_unlock(); return; @@ -1128,6 +1129,8 @@ failed: nvdev->max_chn = 1; nvdev->num_chn = 1; + + netif_device_attach(ndev); unlock: rtnl_unlock(); } @@ -1330,6 +1333,10 @@ out: net_device->num_chn = 1; } + /* No sub channels, device is ready */ + if (net_device->num_chn == 1) + netif_device_attach(net); + return net_device; err_dev_remv: @@ -1342,9 +1349,6 @@ void rndis_filter_device_remove(struct hv_device *dev, { struct rndis_device *rndis_dev = net_dev->extension; - /* Don't try and setup sub channels if about to halt */ - cancel_work_sync(&net_dev->subchan_work); - /* Halt and release the rndis device */ rndis_filter_halt_device(rndis_dev); @@ -1368,10 +1372,3 @@ int rndis_filter_close(struct netvsc_device *nvdev) return rndis_filter_close_device(nvdev->extension); } - -bool rndis_filter_opened(const struct netvsc_device *nvdev) -{ - const struct rndis_device *dev = nvdev->extension; - - return dev->state == RNDIS_DEV_DATAINITIALIZED; -} -- cgit v1.2.3 From 40013ff20b1beed31184935fc0aea6a859d4d4ef Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Tue, 20 Mar 2018 17:31:10 -0700 Subject: net: dsa: Fix functional dsa-loop dependency on FIXED_PHY We have a functional dependency on the FIXED_PHY MDIO bus because we register fixed PHY devices "the old way" which only works if the code that does this has had a chance to run before the fixed MDIO bus is probed. Make sure we account for that and have dsa_loop_bdinfo.o be either built-in or modular depending on whether CONFIG_FIXED_PHY reflects that too. Fixes: 98cd1552ea27 ("net: dsa: Mock-up driver") Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/dsa/Makefile | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/dsa/Makefile b/drivers/net/dsa/Makefile index d040aeb45172..15c2a831edf1 100644 --- a/drivers/net/dsa/Makefile +++ b/drivers/net/dsa/Makefile @@ -1,7 +1,10 @@ # SPDX-License-Identifier: GPL-2.0 obj-$(CONFIG_NET_DSA_BCM_SF2) += bcm-sf2.o bcm-sf2-objs := bcm_sf2.o bcm_sf2_cfp.o -obj-$(CONFIG_NET_DSA_LOOP) += dsa_loop.o dsa_loop_bdinfo.o +obj-$(CONFIG_NET_DSA_LOOP) += dsa_loop.o +ifdef CONFIG_NET_DSA_LOOP +obj-$(CONFIG_FIXED_PHY) += dsa_loop_bdinfo.o +endif obj-$(CONFIG_NET_DSA_MT7530) += mt7530.o obj-$(CONFIG_NET_DSA_MV88E6060) += mv88e6060.o obj-$(CONFIG_NET_DSA_QCA8K) += qca8k.o -- cgit v1.2.3 From 5dcd8400884cc4a043a6d4617e042489e5d566a9 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 21 Mar 2018 11:09:01 +0300 Subject: macsec: missing dev_put() on error in macsec_newlink() We moved the dev_hold(real_dev); call earlier in the function but forgot to update the error paths. Fixes: 0759e552bce7 ("macsec: fix negative refcnt on parent link") Signed-off-by: Dan Carpenter Signed-off-by: David S. Miller --- drivers/net/macsec.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c index 7de88b33d5b9..9cbb0c8a896a 100644 --- a/drivers/net/macsec.c +++ b/drivers/net/macsec.c @@ -3277,7 +3277,7 @@ static int macsec_newlink(struct net *net, struct net_device *dev, err = netdev_upper_dev_link(real_dev, dev, extack); if (err < 0) - goto unregister; + goto put_dev; /* need to be already registered so that ->init has run and * the MAC addr is set @@ -3316,7 +3316,8 @@ del_dev: macsec_del_dev(macsec); unlink: netdev_upper_dev_unlink(real_dev, dev); -unregister: +put_dev: + dev_put(real_dev); unregister_netdevice(dev); return err; } -- cgit v1.2.3 From ff6781fd1bb404d8a551c02c35c70cec1da17ff1 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Wed, 21 Mar 2018 12:22:28 +1000 Subject: powerpc/64s: Fix lost pending interrupt due to race causing lost update to irq_happened force_external_irq_replay() can be called in the do_IRQ path with interrupts hard enabled and soft disabled if may_hard_irq_enable() set MSR[EE]=1. It updates local_paca->irq_happened with a load, modify, store sequence. If a maskable interrupt hits during this sequence, it will go to the masked handler to be marked pending in irq_happened. This update will be lost when the interrupt returns and the store instruction executes. This can result in unpredictable latencies, timeouts, lockups, etc. Fix this by ensuring hard interrupts are disabled before modifying irq_happened. This could cause any maskable asynchronous interrupt to get lost, but it was noticed on P9 SMP system doing RDMA NVMe target over 100GbE, so very high external interrupt rate and high IPI rate. The hang was bisected down to enabling doorbell interrupts for IPIs. These provided an interrupt type that could run at high rates in the do_IRQ path, stressing the race. Fixes: 1d607bb3bd60 ("powerpc/irq: Add mechanism to force a replay of interrupts") Cc: stable@vger.kernel.org # v4.8+ Reported-by: Carol L. Soto Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/irq.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index f88038847790..061aa0f47bb1 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -476,6 +476,14 @@ void force_external_irq_replay(void) */ WARN_ON(!arch_irqs_disabled()); + /* + * Interrupts must always be hard disabled before irq_happened is + * modified (to prevent lost update in case of interrupt between + * load and store). + */ + __hard_irq_disable(); + local_paca->irq_happened |= PACA_IRQ_HARD_DIS; + /* Indicate in the PACA that we have an interrupt to replay */ local_paca->irq_happened |= PACA_IRQ_EE; } -- cgit v1.2.3 From 5a9f698feb11b198f17b2acebbfe0e2716a3beed Mon Sep 17 00:00:00 2001 From: "Y.C. Chen" Date: Mon, 12 Mar 2018 11:40:23 +0800 Subject: drm/ast: Fixed 1280x800 Display Issue The original ast driver cannot display properly if the resolution is 1280x800 and the pixel clock is 83.5MHz. Here is the update to fix it. Signed-off-by: Y.C. Chen Signed-off-by: Dave Airlie --- drivers/gpu/drm/ast/ast_tables.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/ast/ast_tables.h b/drivers/gpu/drm/ast/ast_tables.h index 5f4c2e833a65..d665dd5af5dd 100644 --- a/drivers/gpu/drm/ast/ast_tables.h +++ b/drivers/gpu/drm/ast/ast_tables.h @@ -97,7 +97,7 @@ static const struct ast_vbios_dclk_info dclk_table[] = { {0x67, 0x22, 0x00}, /* 0E: VCLK157_5 */ {0x6A, 0x22, 0x00}, /* 0F: VCLK162 */ {0x4d, 0x4c, 0x80}, /* 10: VCLK154 */ - {0xa7, 0x78, 0x80}, /* 11: VCLK83.5 */ + {0x68, 0x6f, 0x80}, /* 11: VCLK83.5 */ {0x28, 0x49, 0x80}, /* 12: VCLK106.5 */ {0x37, 0x49, 0x80}, /* 13: VCLK146.25 */ {0x1f, 0x45, 0x80}, /* 14: VCLK148.5 */ @@ -127,7 +127,7 @@ static const struct ast_vbios_dclk_info dclk_table_ast2500[] = { {0x67, 0x22, 0x00}, /* 0E: VCLK157_5 */ {0x6A, 0x22, 0x00}, /* 0F: VCLK162 */ {0x4d, 0x4c, 0x80}, /* 10: VCLK154 */ - {0xa7, 0x78, 0x80}, /* 11: VCLK83.5 */ + {0x68, 0x6f, 0x80}, /* 11: VCLK83.5 */ {0x28, 0x49, 0x80}, /* 12: VCLK106.5 */ {0x37, 0x49, 0x80}, /* 13: VCLK146.25 */ {0x1f, 0x45, 0x80}, /* 14: VCLK148.5 */ -- cgit v1.2.3 From 8970a63e965b43288c4f5f40efbc2bbf80de7f16 Mon Sep 17 00:00:00 2001 From: Yisheng Xie Date: Thu, 22 Mar 2018 16:17:02 -0700 Subject: mm/mempolicy.c: avoid use uninitialized preferred_node Alexander reported a use of uninitialized memory in __mpol_equal(), which is caused by incorrect use of preferred_node. When mempolicy in mode MPOL_PREFERRED with flags MPOL_F_LOCAL, it uses numa_node_id() instead of preferred_node, however, __mpol_equal() uses preferred_node without checking whether it is MPOL_F_LOCAL or not. [akpm@linux-foundation.org: slight comment tweak] Link: http://lkml.kernel.org/r/4ebee1c2-57f6-bcb8-0e2d-1833d1ee0bb7@huawei.com Fixes: fc36b8d3d819 ("mempolicy: use MPOL_F_LOCAL to Indicate Preferred Local Policy") Signed-off-by: Yisheng Xie Reported-by: Alexander Potapenko Tested-by: Alexander Potapenko Reviewed-by: Andrew Morton Cc: Dmitriy Vyukov Cc: Vlastimil Babka Cc: Michal Hocko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/mempolicy.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/mm/mempolicy.c b/mm/mempolicy.c index d879f1d8a44a..32cba0332787 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -2124,6 +2124,9 @@ bool __mpol_equal(struct mempolicy *a, struct mempolicy *b) case MPOL_INTERLEAVE: return !!nodes_equal(a->v.nodes, b->v.nodes); case MPOL_PREFERRED: + /* a's ->flags is the same as b's */ + if (a->flags & MPOL_F_LOCAL) + return true; return a->v.preferred_node == b->v.preferred_node; default: BUG(); -- cgit v1.2.3 From 296cefee07a20c89c8008c32ea0421ca64caf358 Mon Sep 17 00:00:00 2001 From: Mark Fasheh Date: Thu, 22 Mar 2018 16:17:05 -0700 Subject: MAINTAINERS: update Mark Fasheh's e-mail I'd like to use my personal e-mail for Ocfs2 requests and review. Link: http://lkml.kernel.org/r/20180311231356.9385-1-mfasheh@versity.com Signed-off-by: Mark Fasheh Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 4e62756936fa..73c0cdabf755 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -10334,7 +10334,7 @@ F: drivers/oprofile/ F: include/linux/oprofile.h ORACLE CLUSTER FILESYSTEM 2 (OCFS2) -M: Mark Fasheh +M: Mark Fasheh M: Joel Becker L: ocfs2-devel@oss.oracle.com (moderated for non-subscribers) W: http://ocfs2.wiki.kernel.org -- cgit v1.2.3 From 2e517d681632326ed98399cb4dd99519efe3e32c Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Thu, 22 Mar 2018 16:17:10 -0700 Subject: lockdep: fix fs_reclaim warning Dave Jones reported fs_reclaim lockdep warnings. ============================================ WARNING: possible recursive locking detected 4.15.0-rc9-backup-debug+ #1 Not tainted -------------------------------------------- sshd/24800 is trying to acquire lock: (fs_reclaim){+.+.}, at: [<0000000084f438c2>] fs_reclaim_acquire.part.102+0x5/0x30 but task is already holding lock: (fs_reclaim){+.+.}, at: [<0000000084f438c2>] fs_reclaim_acquire.part.102+0x5/0x30 other info that might help us debug this: Possible unsafe locking scenario: CPU0 ---- lock(fs_reclaim); lock(fs_reclaim); *** DEADLOCK *** May be due to missing lock nesting notation 2 locks held by sshd/24800: #0: (sk_lock-AF_INET6){+.+.}, at: [<000000001a069652>] tcp_sendmsg+0x19/0x40 #1: (fs_reclaim){+.+.}, at: [<0000000084f438c2>] fs_reclaim_acquire.part.102+0x5/0x30 stack backtrace: CPU: 3 PID: 24800 Comm: sshd Not tainted 4.15.0-rc9-backup-debug+ #1 Call Trace: dump_stack+0xbc/0x13f __lock_acquire+0xa09/0x2040 lock_acquire+0x12e/0x350 fs_reclaim_acquire.part.102+0x29/0x30 kmem_cache_alloc+0x3d/0x2c0 alloc_extent_state+0xa7/0x410 __clear_extent_bit+0x3ea/0x570 try_release_extent_mapping+0x21a/0x260 __btrfs_releasepage+0xb0/0x1c0 btrfs_releasepage+0x161/0x170 try_to_release_page+0x162/0x1c0 shrink_page_list+0x1d5a/0x2fb0 shrink_inactive_list+0x451/0x940 shrink_node_memcg.constprop.88+0x4c9/0x5e0 shrink_node+0x12d/0x260 try_to_free_pages+0x418/0xaf0 __alloc_pages_slowpath+0x976/0x1790 __alloc_pages_nodemask+0x52c/0x5c0 new_slab+0x374/0x3f0 ___slab_alloc.constprop.81+0x47e/0x5a0 __slab_alloc.constprop.80+0x32/0x60 __kmalloc_track_caller+0x267/0x310 __kmalloc_reserve.isra.40+0x29/0x80 __alloc_skb+0xee/0x390 sk_stream_alloc_skb+0xb8/0x340 tcp_sendmsg_locked+0x8e6/0x1d30 tcp_sendmsg+0x27/0x40 inet_sendmsg+0xd0/0x310 sock_write_iter+0x17a/0x240 __vfs_write+0x2ab/0x380 vfs_write+0xfb/0x260 SyS_write+0xb6/0x140 do_syscall_64+0x1e5/0xc05 entry_SYSCALL64_slow_path+0x25/0x25 This warning is caused by commit d92a8cfcb37e ("locking/lockdep: Rework FS_RECLAIM annotation") which replaced the use of lockdep_{set,clear}_current_reclaim_state() in __perform_reclaim() and lockdep_trace_alloc() in slab_pre_alloc_hook() with fs_reclaim_acquire()/ fs_reclaim_release(). Since __kmalloc_reserve() from __alloc_skb() adds __GFP_NOMEMALLOC | __GFP_NOWARN to gfp_mask, and all reclaim path simply propagates __GFP_NOMEMALLOC, fs_reclaim_acquire() in slab_pre_alloc_hook() is trying to grab the 'fake' lock again when __perform_reclaim() already grabbed the 'fake' lock. The /* this guy won't enter reclaim */ if ((current->flags & PF_MEMALLOC) && !(gfp_mask & __GFP_NOMEMALLOC)) return false; test which causes slab_pre_alloc_hook() to try to grab the 'fake' lock was added by commit cf40bd16fdad ("lockdep: annotate reclaim context (__GFP_NOFS)"). But that test is outdated because PF_MEMALLOC thread won't enter reclaim regardless of __GFP_NOMEMALLOC after commit 341ce06f69ab ("page allocator: calculate the alloc_flags for allocation only once") added the PF_MEMALLOC safeguard ( /* Avoid recursion of direct reclaim */ if (p->flags & PF_MEMALLOC) goto nopage; in __alloc_pages_slowpath()). Thus, let's fix outdated test by removing __GFP_NOMEMALLOC test and allow __need_fs_reclaim() to return false. Link: http://lkml.kernel.org/r/201802280650.FJC73911.FOSOMLJVFFQtHO@I-love.SAKURA.ne.jp Fixes: d92a8cfcb37ecd13 ("locking/lockdep: Rework FS_RECLAIM annotation") Signed-off-by: Tetsuo Handa Reported-by: Dave Jones Tested-by: Dave Jones Cc: Peter Zijlstra Cc: Nick Piggin Cc: Ingo Molnar Cc: Nikolay Borisov Cc: Michal Hocko Cc: [4.14+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/page_alloc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 635d7dd29d7f..010dee0f089e 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -3596,7 +3596,7 @@ static bool __need_fs_reclaim(gfp_t gfp_mask) return false; /* this guy won't enter reclaim */ - if ((current->flags & PF_MEMALLOC) && !(gfp_mask & __GFP_NOMEMALLOC)) + if (current->flags & PF_MEMALLOC) return false; /* We're only interested __GFP_FS allocations for now */ -- cgit v1.2.3 From 63489f8e821144000e0bdca7e65a8d1cc23a7ee7 Mon Sep 17 00:00:00 2001 From: Mike Kravetz Date: Thu, 22 Mar 2018 16:17:13 -0700 Subject: hugetlbfs: check for pgoff value overflow A vma with vm_pgoff large enough to overflow a loff_t type when converted to a byte offset can be passed via the remap_file_pages system call. The hugetlbfs mmap routine uses the byte offset to calculate reservations and file size. A sequence such as: mmap(0x20a00000, 0x600000, 0, 0x66033, -1, 0); remap_file_pages(0x20a00000, 0x600000, 0, 0x20000000000000, 0); will result in the following when task exits/file closed, kernel BUG at mm/hugetlb.c:749! Call Trace: hugetlbfs_evict_inode+0x2f/0x40 evict+0xcb/0x190 __dentry_kill+0xcb/0x150 __fput+0x164/0x1e0 task_work_run+0x84/0xa0 exit_to_usermode_loop+0x7d/0x80 do_syscall_64+0x18b/0x190 entry_SYSCALL_64_after_hwframe+0x3d/0xa2 The overflowed pgoff value causes hugetlbfs to try to set up a mapping with a negative range (end < start) that leaves invalid state which causes the BUG. The previous overflow fix to this code was incomplete and did not take the remap_file_pages system call into account. [mike.kravetz@oracle.com: v3] Link: http://lkml.kernel.org/r/20180309002726.7248-1-mike.kravetz@oracle.com [akpm@linux-foundation.org: include mmdebug.h] [akpm@linux-foundation.org: fix -ve left shift count on sh] Link: http://lkml.kernel.org/r/20180308210502.15952-1-mike.kravetz@oracle.com Fixes: 045c7a3f53d9 ("hugetlbfs: fix offset overflow in hugetlbfs mmap") Signed-off-by: Mike Kravetz Reported-by: Nic Losby Acked-by: Michal Hocko Cc: "Kirill A . Shutemov" Cc: Yisheng Xie Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/hugetlbfs/inode.c | 17 ++++++++++++++--- mm/hugetlb.c | 7 +++++++ 2 files changed, 21 insertions(+), 3 deletions(-) diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index 8fe1b0aa2896..b9a254dcc0e7 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -108,6 +108,16 @@ static void huge_pagevec_release(struct pagevec *pvec) pagevec_reinit(pvec); } +/* + * Mask used when checking the page offset value passed in via system + * calls. This value will be converted to a loff_t which is signed. + * Therefore, we want to check the upper PAGE_SHIFT + 1 bits of the + * value. The extra bit (- 1 in the shift value) is to take the sign + * bit into account. + */ +#define PGOFF_LOFFT_MAX \ + (((1UL << (PAGE_SHIFT + 1)) - 1) << (BITS_PER_LONG - (PAGE_SHIFT + 1))) + static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma) { struct inode *inode = file_inode(file); @@ -127,12 +137,13 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma) vma->vm_ops = &hugetlb_vm_ops; /* - * Offset passed to mmap (before page shift) could have been - * negative when represented as a (l)off_t. + * page based offset in vm_pgoff could be sufficiently large to + * overflow a (l)off_t when converted to byte offset. */ - if (((loff_t)vma->vm_pgoff << PAGE_SHIFT) < 0) + if (vma->vm_pgoff & PGOFF_LOFFT_MAX) return -EINVAL; + /* must be huge page aligned */ if (vma->vm_pgoff & (~huge_page_mask(h) >> PAGE_SHIFT)) return -EINVAL; diff --git a/mm/hugetlb.c b/mm/hugetlb.c index a963f2034dfc..976bbc5646fe 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -4374,6 +4375,12 @@ int hugetlb_reserve_pages(struct inode *inode, struct resv_map *resv_map; long gbl_reserve; + /* This should never happen */ + if (from > to) { + VM_WARN(1, "%s called with a negative range\n", __func__); + return -EINVAL; + } + /* * Only apply hugepage reservation if asked. At fault time, an * attempt will be made for VM_NORESERVE to allocate a page -- cgit v1.2.3 From 1705f7c534163594f8b05e060cb49fbea86ca70b Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 22 Mar 2018 16:17:17 -0700 Subject: h8300: remove extraneous __BIG_ENDIAN definition A bugfix I did earlier caused a build regression on h8300, which defines the __BIG_ENDIAN macro in a slightly different way than the generic code: arch/h8300/include/asm/byteorder.h:5:0: warning: "__BIG_ENDIAN" redefined We don't need to define it here, as the same macro is already provided by the linux/byteorder/big_endian.h, and that version does not conflict. While this is a v4.16 regression, my earlier patch also got backported to the 4.14 and 4.15 stable kernels, so we need the fixup there as well. Link: http://lkml.kernel.org/r/20180313120752.2645129-1-arnd@arndb.de Fixes: 101110f6271c ("Kbuild: always define endianess in kconfig.h") Signed-off-by: Arnd Bergmann Cc: Yoshinori Sato Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/h8300/include/asm/byteorder.h | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/h8300/include/asm/byteorder.h b/arch/h8300/include/asm/byteorder.h index ecff2d1ca5a3..6eaa7ad5fc2c 100644 --- a/arch/h8300/include/asm/byteorder.h +++ b/arch/h8300/include/asm/byteorder.h @@ -2,7 +2,6 @@ #ifndef __H8300_BYTEORDER_H__ #define __H8300_BYTEORDER_H__ -#define __BIG_ENDIAN __ORDER_BIG_ENDIAN__ #include #endif -- cgit v1.2.3 From b6bdb7517c3d3f41f20e5c2948d6bc3f8897394e Mon Sep 17 00:00:00 2001 From: Toshi Kani Date: Thu, 22 Mar 2018 16:17:20 -0700 Subject: mm/vmalloc: add interfaces to free unmapped page table On architectures with CONFIG_HAVE_ARCH_HUGE_VMAP set, ioremap() may create pud/pmd mappings. A kernel panic was observed on arm64 systems with Cortex-A75 in the following steps as described by Hanjun Guo. 1. ioremap a 4K size, valid page table will build, 2. iounmap it, pte0 will set to 0; 3. ioremap the same address with 2M size, pgd/pmd is unchanged, then set the a new value for pmd; 4. pte0 is leaked; 5. CPU may meet exception because the old pmd is still in TLB, which will lead to kernel panic. This panic is not reproducible on x86. INVLPG, called from iounmap, purges all levels of entries associated with purged address on x86. x86 still has memory leak. The patch changes the ioremap path to free unmapped page table(s) since doing so in the unmap path has the following issues: - The iounmap() path is shared with vunmap(). Since vmap() only supports pte mappings, making vunmap() to free a pte page is an overhead for regular vmap users as they do not need a pte page freed up. - Checking if all entries in a pte page are cleared in the unmap path is racy, and serializing this check is expensive. - The unmap path calls free_vmap_area_noflush() to do lazy TLB purges. Clearing a pud/pmd entry before the lazy TLB purges needs extra TLB purge. Add two interfaces, pud_free_pmd_page() and pmd_free_pte_page(), which clear a given pud/pmd entry and free up a page for the lower level entries. This patch implements their stub functions on x86 and arm64, which work as workaround. [akpm@linux-foundation.org: fix typo in pmd_free_pte_page() stub] Link: http://lkml.kernel.org/r/20180314180155.19492-2-toshi.kani@hpe.com Fixes: e61ce6ade404e ("mm: change ioremap to set up huge I/O mappings") Reported-by: Lei Li Signed-off-by: Toshi Kani Cc: Catalin Marinas Cc: Wang Xuefeng Cc: Will Deacon Cc: Hanjun Guo Cc: Michal Hocko Cc: Thomas Gleixner Cc: Ingo Molnar Cc: "H. Peter Anvin" Cc: Borislav Petkov Cc: Matthew Wilcox Cc: Chintan Pandya Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/arm64/mm/mmu.c | 10 ++++++++++ arch/x86/mm/pgtable.c | 24 ++++++++++++++++++++++++ include/asm-generic/pgtable.h | 10 ++++++++++ lib/ioremap.c | 6 ++++-- 4 files changed, 48 insertions(+), 2 deletions(-) diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 8c704f1e53c2..2dbb2c9f1ec1 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -972,3 +972,13 @@ int pmd_clear_huge(pmd_t *pmdp) pmd_clear(pmdp); return 1; } + +int pud_free_pmd_page(pud_t *pud) +{ + return pud_none(*pud); +} + +int pmd_free_pte_page(pmd_t *pmd) +{ + return pmd_none(*pmd); +} diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c index 004abf9ebf12..1eed7ed518e6 100644 --- a/arch/x86/mm/pgtable.c +++ b/arch/x86/mm/pgtable.c @@ -702,4 +702,28 @@ int pmd_clear_huge(pmd_t *pmd) return 0; } + +/** + * pud_free_pmd_page - Clear pud entry and free pmd page. + * @pud: Pointer to a PUD. + * + * Context: The pud range has been unmaped and TLB purged. + * Return: 1 if clearing the entry succeeded. 0 otherwise. + */ +int pud_free_pmd_page(pud_t *pud) +{ + return pud_none(*pud); +} + +/** + * pmd_free_pte_page - Clear pmd entry and free pte page. + * @pmd: Pointer to a PMD. + * + * Context: The pmd range has been unmaped and TLB purged. + * Return: 1 if clearing the entry succeeded. 0 otherwise. + */ +int pmd_free_pte_page(pmd_t *pmd) +{ + return pmd_none(*pmd); +} #endif /* CONFIG_HAVE_ARCH_HUGE_VMAP */ diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h index 2cfa3075d148..bfbb44a5ad38 100644 --- a/include/asm-generic/pgtable.h +++ b/include/asm-generic/pgtable.h @@ -983,6 +983,8 @@ int pud_set_huge(pud_t *pud, phys_addr_t addr, pgprot_t prot); int pmd_set_huge(pmd_t *pmd, phys_addr_t addr, pgprot_t prot); int pud_clear_huge(pud_t *pud); int pmd_clear_huge(pmd_t *pmd); +int pud_free_pmd_page(pud_t *pud); +int pmd_free_pte_page(pmd_t *pmd); #else /* !CONFIG_HAVE_ARCH_HUGE_VMAP */ static inline int p4d_set_huge(p4d_t *p4d, phys_addr_t addr, pgprot_t prot) { @@ -1008,6 +1010,14 @@ static inline int pmd_clear_huge(pmd_t *pmd) { return 0; } +static inline int pud_free_pmd_page(pud_t *pud) +{ + return 0; +} +static inline int pmd_free_pte_page(pmd_t *pmd) +{ + return 0; +} #endif /* CONFIG_HAVE_ARCH_HUGE_VMAP */ #ifndef __HAVE_ARCH_FLUSH_PMD_TLB_RANGE diff --git a/lib/ioremap.c b/lib/ioremap.c index b808a390e4c3..54e5bbaa3200 100644 --- a/lib/ioremap.c +++ b/lib/ioremap.c @@ -91,7 +91,8 @@ static inline int ioremap_pmd_range(pud_t *pud, unsigned long addr, if (ioremap_pmd_enabled() && ((next - addr) == PMD_SIZE) && - IS_ALIGNED(phys_addr + addr, PMD_SIZE)) { + IS_ALIGNED(phys_addr + addr, PMD_SIZE) && + pmd_free_pte_page(pmd)) { if (pmd_set_huge(pmd, phys_addr + addr, prot)) continue; } @@ -117,7 +118,8 @@ static inline int ioremap_pud_range(p4d_t *p4d, unsigned long addr, if (ioremap_pud_enabled() && ((next - addr) == PUD_SIZE) && - IS_ALIGNED(phys_addr + addr, PUD_SIZE)) { + IS_ALIGNED(phys_addr + addr, PUD_SIZE) && + pud_free_pmd_page(pud)) { if (pud_set_huge(pud, phys_addr + addr, prot)) continue; } -- cgit v1.2.3 From 28ee90fe6048fa7b7ceaeb8831c0e4e454a4cf89 Mon Sep 17 00:00:00 2001 From: Toshi Kani Date: Thu, 22 Mar 2018 16:17:24 -0700 Subject: x86/mm: implement free pmd/pte page interfaces Implement pud_free_pmd_page() and pmd_free_pte_page() on x86, which clear a given pud/pmd entry and free up lower level page table(s). The address range associated with the pud/pmd entry must have been purged by INVLPG. Link: http://lkml.kernel.org/r/20180314180155.19492-3-toshi.kani@hpe.com Fixes: e61ce6ade404e ("mm: change ioremap to set up huge I/O mappings") Signed-off-by: Toshi Kani Reported-by: Lei Li Cc: Michal Hocko Cc: Thomas Gleixner Cc: Ingo Molnar Cc: "H. Peter Anvin" Cc: Borislav Petkov Cc: Matthew Wilcox Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/mm/pgtable.c | 28 ++++++++++++++++++++++++++-- 1 file changed, 26 insertions(+), 2 deletions(-) diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c index 1eed7ed518e6..34cda7e0551b 100644 --- a/arch/x86/mm/pgtable.c +++ b/arch/x86/mm/pgtable.c @@ -712,7 +712,22 @@ int pmd_clear_huge(pmd_t *pmd) */ int pud_free_pmd_page(pud_t *pud) { - return pud_none(*pud); + pmd_t *pmd; + int i; + + if (pud_none(*pud)) + return 1; + + pmd = (pmd_t *)pud_page_vaddr(*pud); + + for (i = 0; i < PTRS_PER_PMD; i++) + if (!pmd_free_pte_page(&pmd[i])) + return 0; + + pud_clear(pud); + free_page((unsigned long)pmd); + + return 1; } /** @@ -724,6 +739,15 @@ int pud_free_pmd_page(pud_t *pud) */ int pmd_free_pte_page(pmd_t *pmd) { - return pmd_none(*pmd); + pte_t *pte; + + if (pmd_none(*pmd)) + return 1; + + pte = (pte_t *)pmd_page_vaddr(*pmd); + pmd_clear(pmd); + free_page((unsigned long)pte); + + return 1; } #endif /* CONFIG_HAVE_ARCH_HUGE_VMAP */ -- cgit v1.2.3 From fece2029a9e65b9a990831afe2a2b83290cbbe26 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Thu, 22 Mar 2018 16:17:28 -0700 Subject: mm/khugepaged.c: convert VM_BUG_ON() to collapse fail khugepaged is not yet able to convert PTE-mapped huge pages back to PMD mapped. We do not collapse such pages. See check khugepaged_scan_pmd(). But if between khugepaged_scan_pmd() and __collapse_huge_page_isolate() somebody managed to instantiate THP in the range and then split the PMD back to PTEs we would have a problem -- VM_BUG_ON_PAGE(PageCompound(page)) will get triggered. It's possible since we drop mmap_sem during collapse to re-take for write. Replace the VM_BUG_ON() with graceful collapse fail. Link: http://lkml.kernel.org/r/20180315152353.27989-1-kirill.shutemov@linux.intel.com Fixes: b1caa957ae6d ("khugepaged: ignore pmd tables with THP mapped with ptes") Signed-off-by: Kirill A. Shutemov Cc: Laura Abbott Cc: Jerome Marchand Cc: Vlastimil Babka Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/khugepaged.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/mm/khugepaged.c b/mm/khugepaged.c index b7e2268dfc9a..c15da1ea7e63 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -530,7 +530,12 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma, goto out; } - VM_BUG_ON_PAGE(PageCompound(page), page); + /* TODO: teach khugepaged to collapse THP mapped with pte */ + if (PageCompound(page)) { + result = SCAN_PAGE_COMPOUND; + goto out; + } + VM_BUG_ON_PAGE(!PageAnon(page), page); /* -- cgit v1.2.3 From fa41b900c30b45fab03783724932dc30cd46a6be Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Thu, 22 Mar 2018 16:17:31 -0700 Subject: mm/thp: do not wait for lock_page() in deferred_split_scan() deferred_split_scan() gets called from reclaim path. Waiting for page lock may lead to deadlock there. Replace lock_page() with trylock_page() and skip the page if we failed to lock it. We will get to the page on the next scan. Link: http://lkml.kernel.org/r/20180315150747.31945-1-kirill.shutemov@linux.intel.com Fixes: 9a982250f773 ("thp: introduce deferred_split_huge_page()") Signed-off-by: Kirill A. Shutemov Acked-by: Michal Hocko Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/huge_memory.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 87ab9b8f56b5..529cf36b7edb 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -2783,11 +2783,13 @@ static unsigned long deferred_split_scan(struct shrinker *shrink, list_for_each_safe(pos, next, &list) { page = list_entry((void *)pos, struct page, mapping); - lock_page(page); + if (!trylock_page(page)) + goto next; /* split_huge_page() removes page from list on success */ if (!split_huge_page(page)) split++; unlock_page(page); +next: put_page(page); } -- cgit v1.2.3 From b3cd54b257ad95d344d121dc563d943ca39b0921 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Thu, 22 Mar 2018 16:17:35 -0700 Subject: mm/shmem: do not wait for lock_page() in shmem_unused_huge_shrink() shmem_unused_huge_shrink() gets called from reclaim path. Waiting for page lock may lead to deadlock there. There was a bug report that may be attributed to this: http://lkml.kernel.org/r/alpine.LRH.2.11.1801242349220.30642@mail.ewheeler.net Replace lock_page() with trylock_page() and skip the page if we failed to lock it. We will get to the page on the next scan. We can test for the PageTransHuge() outside the page lock as we only need protection against splitting the page under us. Holding pin oni the page is enough for this. Link: http://lkml.kernel.org/r/20180316210830.43738-1-kirill.shutemov@linux.intel.com Fixes: 779750d20b93 ("shmem: split huge pages beyond i_size under memory pressure") Signed-off-by: Kirill A. Shutemov Reported-by: Eric Wheeler Acked-by: Michal Hocko Reviewed-by: Andrew Morton Cc: Tetsuo Handa Cc: Hugh Dickins Cc: [4.8+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/shmem.c | 31 ++++++++++++++++++++----------- 1 file changed, 20 insertions(+), 11 deletions(-) diff --git a/mm/shmem.c b/mm/shmem.c index 1907688b75ee..b85919243399 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -493,36 +493,45 @@ next: info = list_entry(pos, struct shmem_inode_info, shrinklist); inode = &info->vfs_inode; - if (nr_to_split && split >= nr_to_split) { - iput(inode); - continue; - } + if (nr_to_split && split >= nr_to_split) + goto leave; - page = find_lock_page(inode->i_mapping, + page = find_get_page(inode->i_mapping, (inode->i_size & HPAGE_PMD_MASK) >> PAGE_SHIFT); if (!page) goto drop; + /* No huge page at the end of the file: nothing to split */ if (!PageTransHuge(page)) { - unlock_page(page); put_page(page); goto drop; } + /* + * Leave the inode on the list if we failed to lock + * the page at this time. + * + * Waiting for the lock may lead to deadlock in the + * reclaim path. + */ + if (!trylock_page(page)) { + put_page(page); + goto leave; + } + ret = split_huge_page(page); unlock_page(page); put_page(page); - if (ret) { - /* split failed: leave it on the list */ - iput(inode); - continue; - } + /* If split failed leave the inode on the list */ + if (ret) + goto leave; split++; drop: list_del_init(&info->shrinklist); removed++; +leave: iput(inode); } -- cgit v1.2.3 From f59f1caf72ba00d519c793c3deb32cd3be32edc2 Mon Sep 17 00:00:00 2001 From: Daniel Vacek Date: Thu, 22 Mar 2018 16:17:38 -0700 Subject: Revert "mm: page_alloc: skip over regions of invalid pfns where possible" This reverts commit b92df1de5d28 ("mm: page_alloc: skip over regions of invalid pfns where possible"). The commit is meant to be a boot init speed up skipping the loop in memmap_init_zone() for invalid pfns. But given some specific memory mapping on x86_64 (or more generally theoretically anywhere but on arm with CONFIG_HAVE_ARCH_PFN_VALID) the implementation also skips valid pfns which is plain wrong and causes 'kernel BUG at mm/page_alloc.c:1389!' crash> log | grep -e BUG -e RIP -e Call.Trace -e move_freepages_block -e rmqueue -e freelist -A1 kernel BUG at mm/page_alloc.c:1389! invalid opcode: 0000 [#1] SMP -- RIP: 0010: move_freepages+0x15e/0x160 -- Call Trace: move_freepages_block+0x73/0x80 __rmqueue+0x263/0x460 get_page_from_freelist+0x7e1/0x9e0 __alloc_pages_nodemask+0x176/0x420 -- crash> page_init_bug -v | grep RAM 1000 - 9bfff System RAM (620.00 KiB) 100000 - 430bffff System RAM ( 1.05 GiB = 1071.75 MiB = 1097472.00 KiB) 4b0c8000 - 4bf9cfff System RAM ( 14.83 MiB = 15188.00 KiB) 4bfac000 - 646b1fff System RAM (391.02 MiB = 400408.00 KiB) 7b788000 - 7b7fffff System RAM (480.00 KiB) 100000000 - 67fffffff System RAM ( 22.00 GiB) crash> page_init_bug | head -6 7b788000 - 7b7fffff System RAM (480.00 KiB) 1fffff00000000 0 1 DMA32 4096 1048575 505736 505344 505855 0 0 0 DMA 1 4095 1fffff00000400 0 1 DMA32 4096 1048575 BUG, zones differ! crash> kmem -p 77fff000 78000000 7b5ff000 7b600000 7b787000 7b788000 PAGE PHYSICAL MAPPING INDEX CNT FLAGS ffffea0001e00000 78000000 0 0 0 0 ffffea0001ed7fc0 7b5ff000 0 0 0 0 ffffea0001ed8000 7b600000 0 0 0 0 <<<< ffffea0001ede1c0 7b787000 0 0 0 0 ffffea0001ede200 7b788000 0 0 1 1fffff00000000 Link: http://lkml.kernel.org/r/20180316143855.29838-1-neelx@redhat.com Fixes: b92df1de5d28 ("mm: page_alloc: skip over regions of invalid pfns where possible") Signed-off-by: Daniel Vacek Acked-by: Ard Biesheuvel Acked-by: Michal Hocko Reviewed-by: Andrew Morton Cc: Vlastimil Babka Cc: Mel Gorman Cc: Pavel Tatashin Cc: Paul Burton Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memblock.h | 1 - mm/memblock.c | 28 ---------------------------- mm/page_alloc.c | 11 +---------- 3 files changed, 1 insertion(+), 39 deletions(-) diff --git a/include/linux/memblock.h b/include/linux/memblock.h index 8be5077efb5f..f92ea7783652 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -187,7 +187,6 @@ int memblock_search_pfn_nid(unsigned long pfn, unsigned long *start_pfn, unsigned long *end_pfn); void __next_mem_pfn_range(int *idx, int nid, unsigned long *out_start_pfn, unsigned long *out_end_pfn, int *out_nid); -unsigned long memblock_next_valid_pfn(unsigned long pfn, unsigned long max_pfn); /** * for_each_mem_pfn_range - early memory pfn range iterator diff --git a/mm/memblock.c b/mm/memblock.c index b6ba6b7adadc..48376bd33274 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -1101,34 +1101,6 @@ void __init_memblock __next_mem_pfn_range(int *idx, int nid, *out_nid = r->nid; } -unsigned long __init_memblock memblock_next_valid_pfn(unsigned long pfn, - unsigned long max_pfn) -{ - struct memblock_type *type = &memblock.memory; - unsigned int right = type->cnt; - unsigned int mid, left = 0; - phys_addr_t addr = PFN_PHYS(++pfn); - - do { - mid = (right + left) / 2; - - if (addr < type->regions[mid].base) - right = mid; - else if (addr >= (type->regions[mid].base + - type->regions[mid].size)) - left = mid + 1; - else { - /* addr is within the region, so pfn is valid */ - return pfn; - } - } while (left < right); - - if (right == type->cnt) - return -1UL; - else - return PHYS_PFN(type->regions[right].base); -} - /** * memblock_set_node - set node ID on memblock regions * @base: base of area to set node ID for diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 010dee0f089e..1741dd23e7c1 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -5356,17 +5356,8 @@ void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone, if (context != MEMMAP_EARLY) goto not_early; - if (!early_pfn_valid(pfn)) { -#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP - /* - * Skip to the pfn preceding the next valid one (or - * end_pfn), such that we hit a valid pfn (or end_pfn) - * on our next iteration of the loop. - */ - pfn = memblock_next_valid_pfn(pfn, end_pfn) - 1; -#endif + if (!early_pfn_valid(pfn)) continue; - } if (!early_pfn_in_nid(pfn, nid)) continue; if (!update_defer_init(pgdat, pfn, end_pfn, &nr_initialised)) -- cgit v1.2.3 From 1c610d5f93c709df56787f50b3576704ac271826 Mon Sep 17 00:00:00 2001 From: Andrey Ryabinin Date: Thu, 22 Mar 2018 16:17:42 -0700 Subject: mm/vmscan: wake up flushers for legacy cgroups too Commit 726d061fbd36 ("mm: vmscan: kick flushers when we encounter dirty pages on the LRU") added flusher invocation to shrink_inactive_list() when many dirty pages on the LRU are encountered. However, shrink_inactive_list() doesn't wake up flushers for legacy cgroup reclaim, so the next commit bbef938429f5 ("mm: vmscan: remove old flusher wakeup from direct reclaim path") removed the only source of flusher's wake up in legacy mem cgroup reclaim path. This leads to premature OOM if there is too many dirty pages in cgroup: # mkdir /sys/fs/cgroup/memory/test # echo $$ > /sys/fs/cgroup/memory/test/tasks # echo 50M > /sys/fs/cgroup/memory/test/memory.limit_in_bytes # dd if=/dev/zero of=tmp_file bs=1M count=100 Killed dd invoked oom-killer: gfp_mask=0x14000c0(GFP_KERNEL), nodemask=(null), order=0, oom_score_adj=0 Call Trace: dump_stack+0x46/0x65 dump_header+0x6b/0x2ac oom_kill_process+0x21c/0x4a0 out_of_memory+0x2a5/0x4b0 mem_cgroup_out_of_memory+0x3b/0x60 mem_cgroup_oom_synchronize+0x2ed/0x330 pagefault_out_of_memory+0x24/0x54 __do_page_fault+0x521/0x540 page_fault+0x45/0x50 Task in /test killed as a result of limit of /test memory: usage 51200kB, limit 51200kB, failcnt 73 memory+swap: usage 51200kB, limit 9007199254740988kB, failcnt 0 kmem: usage 296kB, limit 9007199254740988kB, failcnt 0 Memory cgroup stats for /test: cache:49632KB rss:1056KB rss_huge:0KB shmem:0KB mapped_file:0KB dirty:49500KB writeback:0KB swap:0KB inactive_anon:0KB active_anon:1168KB inactive_file:24760KB active_file:24960KB unevictable:0KB Memory cgroup out of memory: Kill process 3861 (bash) score 88 or sacrifice child Killed process 3876 (dd) total-vm:8484kB, anon-rss:1052kB, file-rss:1720kB, shmem-rss:0kB oom_reaper: reaped process 3876 (dd), now anon-rss:0kB, file-rss:0kB, shmem-rss:0kB Wake up flushers in legacy cgroup reclaim too. Link: http://lkml.kernel.org/r/20180315164553.17856-1-aryabinin@virtuozzo.com Fixes: bbef938429f5 ("mm: vmscan: remove old flusher wakeup from direct reclaim path") Signed-off-by: Andrey Ryabinin Tested-by: Shakeel Butt Acked-by: Michal Hocko Cc: Mel Gorman Cc: Tejun Heo Cc: Johannes Weiner Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/vmscan.c | 31 ++++++++++++++++--------------- 1 file changed, 16 insertions(+), 15 deletions(-) diff --git a/mm/vmscan.c b/mm/vmscan.c index bee53495a829..cd5dc3faaa57 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -1779,6 +1779,20 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec, if (stat.nr_writeback && stat.nr_writeback == nr_taken) set_bit(PGDAT_WRITEBACK, &pgdat->flags); + /* + * If dirty pages are scanned that are not queued for IO, it + * implies that flushers are not doing their job. This can + * happen when memory pressure pushes dirty pages to the end of + * the LRU before the dirty limits are breached and the dirty + * data has expired. It can also happen when the proportion of + * dirty pages grows not through writes but through memory + * pressure reclaiming all the clean cache. And in some cases, + * the flushers simply cannot keep up with the allocation + * rate. Nudge the flusher threads in case they are asleep. + */ + if (stat.nr_unqueued_dirty == nr_taken) + wakeup_flusher_threads(WB_REASON_VMSCAN); + /* * Legacy memcg will stall in page writeback so avoid forcibly * stalling here. @@ -1791,22 +1805,9 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec, if (stat.nr_dirty && stat.nr_dirty == stat.nr_congested) set_bit(PGDAT_CONGESTED, &pgdat->flags); - /* - * If dirty pages are scanned that are not queued for IO, it - * implies that flushers are not doing their job. This can - * happen when memory pressure pushes dirty pages to the end of - * the LRU before the dirty limits are breached and the dirty - * data has expired. It can also happen when the proportion of - * dirty pages grows not through writes but through memory - * pressure reclaiming all the clean cache. And in some cases, - * the flushers simply cannot keep up with the allocation - * rate. Nudge the flusher threads in case they are asleep, but - * also allow kswapd to start writing pages during reclaim. - */ - if (stat.nr_unqueued_dirty == nr_taken) { - wakeup_flusher_threads(WB_REASON_VMSCAN); + /* Allow kswapd to start writing pages during reclaim. */ + if (stat.nr_unqueued_dirty == nr_taken) set_bit(PGDAT_DIRTY, &pgdat->flags); - } /* * If kswapd scans pages marked marked for immediate -- cgit v1.2.3 From 9d3c3354bb85bab4d865fe95039443f09a4c8394 Mon Sep 17 00:00:00 2001 From: David Rientjes Date: Thu, 22 Mar 2018 16:17:45 -0700 Subject: mm, thp: do not cause memcg oom for thp Commit 2516035499b9 ("mm, thp: remove __GFP_NORETRY from khugepaged and madvised allocations") changed the page allocator to no longer detect thp allocations based on __GFP_NORETRY. It did not, however, modify the mem cgroup try_charge() path to avoid oom kill for either khugepaged collapsing or thp faulting. It is never expected to oom kill a process to allocate a hugepage for thp; reclaim is governed by the thp defrag mode and MADV_HUGEPAGE, but allocations (and charging) should fallback instead of oom killing processes. Link: http://lkml.kernel.org/r/alpine.DEB.2.20.1803191409420.124411@chino.kir.corp.google.com Fixes: 2516035499b9 ("mm, thp: remove __GFP_NORETRY from khugepaged and madvised allocations") Signed-off-by: David Rientjes Cc: "Kirill A. Shutemov" Cc: Michal Hocko Cc: Vlastimil Babka Cc: Johannes Weiner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/huge_memory.c | 5 +++-- mm/khugepaged.c | 8 ++++++-- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 529cf36b7edb..5a68730eebd6 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -555,7 +555,8 @@ static int __do_huge_pmd_anonymous_page(struct vm_fault *vmf, struct page *page, VM_BUG_ON_PAGE(!PageCompound(page), page); - if (mem_cgroup_try_charge(page, vma->vm_mm, gfp, &memcg, true)) { + if (mem_cgroup_try_charge(page, vma->vm_mm, gfp | __GFP_NORETRY, &memcg, + true)) { put_page(page); count_vm_event(THP_FAULT_FALLBACK); return VM_FAULT_FALLBACK; @@ -1316,7 +1317,7 @@ alloc: } if (unlikely(mem_cgroup_try_charge(new_page, vma->vm_mm, - huge_gfp, &memcg, true))) { + huge_gfp | __GFP_NORETRY, &memcg, true))) { put_page(new_page); split_huge_pmd(vma, vmf->pmd, vmf->address); if (page) diff --git a/mm/khugepaged.c b/mm/khugepaged.c index c15da1ea7e63..e42568284e06 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -965,7 +965,9 @@ static void collapse_huge_page(struct mm_struct *mm, goto out_nolock; } - if (unlikely(mem_cgroup_try_charge(new_page, mm, gfp, &memcg, true))) { + /* Do not oom kill for khugepaged charges */ + if (unlikely(mem_cgroup_try_charge(new_page, mm, gfp | __GFP_NORETRY, + &memcg, true))) { result = SCAN_CGROUP_CHARGE_FAIL; goto out_nolock; } @@ -1324,7 +1326,9 @@ static void collapse_shmem(struct mm_struct *mm, goto out; } - if (unlikely(mem_cgroup_try_charge(new_page, mm, gfp, &memcg, true))) { + /* Do not oom kill for khugepaged charges */ + if (unlikely(mem_cgroup_try_charge(new_page, mm, gfp | __GFP_NORETRY, + &memcg, true))) { result = SCAN_CGROUP_CHARGE_FAIL; goto out; } -- cgit v1.2.3 From aff6f8cb3e2170b9e58b0932bce7bfb492775e23 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Fri, 23 Mar 2018 09:29:05 +1100 Subject: powerpc/mm: Add tracking of the number of coprocessors using a context Currently, when using coprocessors (which use the Nest MMU), we simply increment the active_cpu count to force all TLB invalidations to be come broadcast. Unfortunately, due to an errata in POWER9, we will need to know more specifically that coprocessors are in use. This maintains a separate copros counter in the MMU context for that purpose. NB. The commit mentioned in the fixes tag below is not at fault for the bug we're fixing in this commit and the next, but this fix applies on top the infrastructure it introduced. Fixes: 03b8abedf4f4 ("cxl: Enable global TLBIs for cxl contexts") Cc: stable@vger.kernel.org # v4.15+ Signed-off-by: Benjamin Herrenschmidt Tested-by: Balbir Singh Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/book3s/64/mmu.h | 3 +++ arch/powerpc/include/asm/mmu_context.h | 18 +++++++++++++----- arch/powerpc/mm/mmu_context_book3s64.c | 1 + 3 files changed, 17 insertions(+), 5 deletions(-) diff --git a/arch/powerpc/include/asm/book3s/64/mmu.h b/arch/powerpc/include/asm/book3s/64/mmu.h index 0abeb0e2d616..37671feb2bf6 100644 --- a/arch/powerpc/include/asm/book3s/64/mmu.h +++ b/arch/powerpc/include/asm/book3s/64/mmu.h @@ -87,6 +87,9 @@ typedef struct { /* Number of bits in the mm_cpumask */ atomic_t active_cpus; + /* Number of users of the external (Nest) MMU */ + atomic_t copros; + /* NPU NMMU context */ struct npu_context *npu_context; diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h index 051b3d63afe3..3a15b6db9501 100644 --- a/arch/powerpc/include/asm/mmu_context.h +++ b/arch/powerpc/include/asm/mmu_context.h @@ -92,15 +92,23 @@ static inline void dec_mm_active_cpus(struct mm_struct *mm) static inline void mm_context_add_copro(struct mm_struct *mm) { /* - * On hash, should only be called once over the lifetime of - * the context, as we can't decrement the active cpus count - * and flush properly for the time being. + * If any copro is in use, increment the active CPU count + * in order to force TLB invalidations to be global as to + * propagate to the Nest MMU. */ - inc_mm_active_cpus(mm); + if (atomic_inc_return(&mm->context.copros) == 1) + inc_mm_active_cpus(mm); } static inline void mm_context_remove_copro(struct mm_struct *mm) { + int c; + + c = atomic_dec_if_positive(&mm->context.copros); + + /* Detect imbalance between add and remove */ + WARN_ON(c < 0); + /* * Need to broadcast a global flush of the full mm before * decrementing active_cpus count, as the next TLBI may be @@ -111,7 +119,7 @@ static inline void mm_context_remove_copro(struct mm_struct *mm) * for the time being. Invalidations will remain global if * used on hash. */ - if (radix_enabled()) { + if (c == 0 && radix_enabled()) { flush_all_mm(mm); dec_mm_active_cpus(mm); } diff --git a/arch/powerpc/mm/mmu_context_book3s64.c b/arch/powerpc/mm/mmu_context_book3s64.c index 929d9ef7083f..3f980baade4c 100644 --- a/arch/powerpc/mm/mmu_context_book3s64.c +++ b/arch/powerpc/mm/mmu_context_book3s64.c @@ -173,6 +173,7 @@ int init_new_context(struct task_struct *tsk, struct mm_struct *mm) mm_iommu_init(mm); #endif atomic_set(&mm->context.active_cpus, 0); + atomic_set(&mm->context.copros, 0); return 0; } -- cgit v1.2.3 From 80a4ae202f2d319eced8bbf612a4e8b0f11c21f5 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Fri, 23 Mar 2018 09:29:06 +1100 Subject: powerpc/mm: Workaround Nest MMU bug with TLB invalidations On POWER9 the Nest MMU may fail to invalidate some translations when doing a tlbie "by PID" or "by LPID" that is targeted at the TLB only and not the page walk cache. This works around it by forcing such invalidations to escalate to RIC=2 (full invalidation of TLB *and* PWC) when a coprocessor is in use for the context. Fixes: 03b8abedf4f4 ("cxl: Enable global TLBIs for cxl contexts") Cc: stable@vger.kernel.org # v4.15+ Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Balbir Singh [balbirs: fixed spelling and coding style to quiesce checkpatch.pl] Tested-by: Balbir Singh Signed-off-by: Michael Ellerman --- arch/powerpc/mm/tlb-radix.c | 50 ++++++++++++++++++++++++++++++++++++++------- 1 file changed, 43 insertions(+), 7 deletions(-) diff --git a/arch/powerpc/mm/tlb-radix.c b/arch/powerpc/mm/tlb-radix.c index 71d1b19ad1c0..8e4c6cb4a808 100644 --- a/arch/powerpc/mm/tlb-radix.c +++ b/arch/powerpc/mm/tlb-radix.c @@ -151,7 +151,23 @@ static inline void _tlbiel_pid(unsigned long pid, unsigned long ric) static inline void _tlbie_pid(unsigned long pid, unsigned long ric) { asm volatile("ptesync": : :"memory"); - __tlbie_pid(pid, ric); + + /* + * Workaround the fact that the "ric" argument to __tlbie_pid + * must be a compile-time contraint to match the "i" constraint + * in the asm statement. + */ + switch (ric) { + case RIC_FLUSH_TLB: + __tlbie_pid(pid, RIC_FLUSH_TLB); + break; + case RIC_FLUSH_PWC: + __tlbie_pid(pid, RIC_FLUSH_PWC); + break; + case RIC_FLUSH_ALL: + default: + __tlbie_pid(pid, RIC_FLUSH_ALL); + } asm volatile("eieio; tlbsync; ptesync": : :"memory"); } @@ -311,6 +327,16 @@ void radix__local_flush_tlb_page(struct vm_area_struct *vma, unsigned long vmadd } EXPORT_SYMBOL(radix__local_flush_tlb_page); +static bool mm_needs_flush_escalation(struct mm_struct *mm) +{ + /* + * P9 nest MMU has issues with the page walk cache + * caching PTEs and not flushing them properly when + * RIC = 0 for a PID/LPID invalidate + */ + return atomic_read(&mm->context.copros) != 0; +} + #ifdef CONFIG_SMP void radix__flush_tlb_mm(struct mm_struct *mm) { @@ -321,9 +347,12 @@ void radix__flush_tlb_mm(struct mm_struct *mm) return; preempt_disable(); - if (!mm_is_thread_local(mm)) - _tlbie_pid(pid, RIC_FLUSH_TLB); - else + if (!mm_is_thread_local(mm)) { + if (mm_needs_flush_escalation(mm)) + _tlbie_pid(pid, RIC_FLUSH_ALL); + else + _tlbie_pid(pid, RIC_FLUSH_TLB); + } else _tlbiel_pid(pid, RIC_FLUSH_TLB); preempt_enable(); } @@ -435,10 +464,14 @@ void radix__flush_tlb_range(struct vm_area_struct *vma, unsigned long start, } if (full) { - if (local) + if (local) { _tlbiel_pid(pid, RIC_FLUSH_TLB); - else - _tlbie_pid(pid, RIC_FLUSH_TLB); + } else { + if (mm_needs_flush_escalation(mm)) + _tlbie_pid(pid, RIC_FLUSH_ALL); + else + _tlbie_pid(pid, RIC_FLUSH_TLB); + } } else { bool hflush = false; unsigned long hstart, hend; @@ -548,6 +581,9 @@ static inline void __radix__flush_tlb_range_psize(struct mm_struct *mm, } if (full) { + if (!local && mm_needs_flush_escalation(mm)) + also_pwc = true; + if (local) _tlbiel_pid(pid, also_pwc ? RIC_FLUSH_ALL : RIC_FLUSH_TLB); else -- cgit v1.2.3 From 99491e2d0e50c53a0620dfec340b249d08f5968f Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Fri, 23 Mar 2018 10:26:25 +0530 Subject: powerpc/mm/radix: Remove unused code These function are not used in the code. Remove them. Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman --- .../powerpc/include/asm/book3s/64/tlbflush-radix.h | 3 -- arch/powerpc/mm/tlb-radix.c | 40 ---------------------- 2 files changed, 43 deletions(-) diff --git a/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h b/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h index 8eea90f80e45..19b45ba6caf9 100644 --- a/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h +++ b/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h @@ -47,9 +47,6 @@ extern void radix__flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmad #endif extern void radix__flush_tlb_pwc(struct mmu_gather *tlb, unsigned long addr); extern void radix__flush_tlb_collapsed_pmd(struct mm_struct *mm, unsigned long addr); -extern void radix__flush_tlb_lpid_va(unsigned long lpid, unsigned long gpa, - unsigned long page_size); -extern void radix__flush_tlb_lpid(unsigned long lpid); extern void radix__flush_tlb_all(void); extern void radix__flush_tlb_pte_p9_dd1(unsigned long old_pte, struct mm_struct *mm, unsigned long address); diff --git a/arch/powerpc/mm/tlb-radix.c b/arch/powerpc/mm/tlb-radix.c index 8e4c6cb4a808..e47ee8c867c5 100644 --- a/arch/powerpc/mm/tlb-radix.c +++ b/arch/powerpc/mm/tlb-radix.c @@ -639,46 +639,6 @@ void radix__flush_tlb_collapsed_pmd(struct mm_struct *mm, unsigned long addr) } #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ -void radix__flush_tlb_lpid_va(unsigned long lpid, unsigned long gpa, - unsigned long page_size) -{ - unsigned long rb,rs,prs,r; - unsigned long ap; - unsigned long ric = RIC_FLUSH_TLB; - - ap = mmu_get_ap(radix_get_mmu_psize(page_size)); - rb = gpa & ~(PPC_BITMASK(52, 63)); - rb |= ap << PPC_BITLSHIFT(58); - rs = lpid & ((1UL << 32) - 1); - prs = 0; /* process scoped */ - r = 1; /* raidx format */ - - asm volatile("ptesync": : :"memory"); - asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) - : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); - asm volatile("eieio; tlbsync; ptesync": : :"memory"); - trace_tlbie(lpid, 0, rb, rs, ric, prs, r); -} -EXPORT_SYMBOL(radix__flush_tlb_lpid_va); - -void radix__flush_tlb_lpid(unsigned long lpid) -{ - unsigned long rb,rs,prs,r; - unsigned long ric = RIC_FLUSH_ALL; - - rb = 0x2 << PPC_BITLSHIFT(53); /* IS = 2 */ - rs = lpid & ((1UL << 32) - 1); - prs = 0; /* partition scoped */ - r = 1; /* raidx format */ - - asm volatile("ptesync": : :"memory"); - asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) - : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); - asm volatile("eieio; tlbsync; ptesync": : :"memory"); - trace_tlbie(lpid, 0, rb, rs, ric, prs, r); -} -EXPORT_SYMBOL(radix__flush_tlb_lpid); - void radix__flush_pmd_tlb_range(struct vm_area_struct *vma, unsigned long start, unsigned long end) { -- cgit v1.2.3 From 243fee3249ff78e5f7ab822139dc89719def82d2 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Fri, 23 Mar 2018 10:26:26 +0530 Subject: powerpc/mm/radix: Move the functions that does the actual tlbie closer No functionality change. Just code movement to ease code changes later Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman --- arch/powerpc/mm/tlb-radix.c | 64 ++++++++++++++++++++++----------------------- 1 file changed, 32 insertions(+), 32 deletions(-) diff --git a/arch/powerpc/mm/tlb-radix.c b/arch/powerpc/mm/tlb-radix.c index e47ee8c867c5..74354c26d316 100644 --- a/arch/powerpc/mm/tlb-radix.c +++ b/arch/powerpc/mm/tlb-radix.c @@ -119,6 +119,38 @@ static inline void __tlbie_pid(unsigned long pid, unsigned long ric) trace_tlbie(0, 0, rb, rs, ric, prs, r); } +static inline void __tlbiel_va(unsigned long va, unsigned long pid, + unsigned long ap, unsigned long ric) +{ + unsigned long rb,rs,prs,r; + + rb = va & ~(PPC_BITMASK(52, 63)); + rb |= ap << PPC_BITLSHIFT(58); + rs = pid << PPC_BITLSHIFT(31); + prs = 1; /* process scoped */ + r = 1; /* raidx format */ + + asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1) + : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); + trace_tlbie(0, 1, rb, rs, ric, prs, r); +} + +static inline void __tlbie_va(unsigned long va, unsigned long pid, + unsigned long ap, unsigned long ric) +{ + unsigned long rb,rs,prs,r; + + rb = va & ~(PPC_BITMASK(52, 63)); + rb |= ap << PPC_BITLSHIFT(58); + rs = pid << PPC_BITLSHIFT(31); + prs = 1; /* process scoped */ + r = 1; /* raidx format */ + + asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) + : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); + trace_tlbie(0, 0, rb, rs, ric, prs, r); +} + /* * We use 128 set in radix mode and 256 set in hpt mode. */ @@ -171,22 +203,6 @@ static inline void _tlbie_pid(unsigned long pid, unsigned long ric) asm volatile("eieio; tlbsync; ptesync": : :"memory"); } -static inline void __tlbiel_va(unsigned long va, unsigned long pid, - unsigned long ap, unsigned long ric) -{ - unsigned long rb,rs,prs,r; - - rb = va & ~(PPC_BITMASK(52, 63)); - rb |= ap << PPC_BITLSHIFT(58); - rs = pid << PPC_BITLSHIFT(31); - prs = 1; /* process scoped */ - r = 1; /* raidx format */ - - asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1) - : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); - trace_tlbie(0, 1, rb, rs, ric, prs, r); -} - static inline void __tlbiel_va_range(unsigned long start, unsigned long end, unsigned long pid, unsigned long page_size, unsigned long psize) @@ -219,22 +235,6 @@ static inline void _tlbiel_va_range(unsigned long start, unsigned long end, asm volatile("ptesync": : :"memory"); } -static inline void __tlbie_va(unsigned long va, unsigned long pid, - unsigned long ap, unsigned long ric) -{ - unsigned long rb,rs,prs,r; - - rb = va & ~(PPC_BITMASK(52, 63)); - rb |= ap << PPC_BITLSHIFT(58); - rs = pid << PPC_BITLSHIFT(31); - prs = 1; /* process scoped */ - r = 1; /* raidx format */ - - asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) - : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); - trace_tlbie(0, 0, rb, rs, ric, prs, r); -} - static inline void __tlbie_va_range(unsigned long start, unsigned long end, unsigned long pid, unsigned long page_size, unsigned long psize) -- cgit v1.2.3 From a5d4b5891c2f1f865a2def1eb0030f534e77ff86 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Fri, 23 Mar 2018 10:26:27 +0530 Subject: powerpc/mm: Fixup tlbie vs store ordering issue on POWER9 On POWER9, under some circumstances, a broadcast TLB invalidation might complete before all previous stores have drained, potentially allowing stale stores from becoming visible after the invalidation. This works around it by doubling up those TLB invalidations which was verified by HW to be sufficient to close the risk window. This will be documented in a yet-to-be-published errata. Fixes: 1a472c9dba6b ("powerpc/mm/radix: Add tlbflush routines") Signed-off-by: Aneesh Kumar K.V [mpe: Enable the feature in the DT CPU features code for all Power9, rename the feature to CPU_FTR_P9_TLBIE_BUG per benh.] Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/cputable.h | 3 ++- arch/powerpc/kernel/dt_cpu_ftrs.c | 3 +++ arch/powerpc/kvm/book3s_64_mmu_radix.c | 3 +++ arch/powerpc/kvm/book3s_hv_rm_mmu.c | 11 +++++++++++ arch/powerpc/mm/hash_native_64.c | 16 +++++++++++++++- arch/powerpc/mm/pgtable_64.c | 1 + arch/powerpc/mm/tlb-radix.c | 15 +++++++++++++++ 7 files changed, 50 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/include/asm/cputable.h b/arch/powerpc/include/asm/cputable.h index a2c5c95882cf..2e2bacbdf6ed 100644 --- a/arch/powerpc/include/asm/cputable.h +++ b/arch/powerpc/include/asm/cputable.h @@ -203,6 +203,7 @@ static inline void cpu_feature_keys_init(void) { } #define CPU_FTR_DAWR LONG_ASM_CONST(0x0400000000000000) #define CPU_FTR_DABRX LONG_ASM_CONST(0x0800000000000000) #define CPU_FTR_PMAO_BUG LONG_ASM_CONST(0x1000000000000000) +#define CPU_FTR_P9_TLBIE_BUG LONG_ASM_CONST(0x2000000000000000) #define CPU_FTR_POWER9_DD1 LONG_ASM_CONST(0x4000000000000000) #define CPU_FTR_POWER9_DD2_1 LONG_ASM_CONST(0x8000000000000000) @@ -465,7 +466,7 @@ static inline void cpu_feature_keys_init(void) { } CPU_FTR_CFAR | CPU_FTR_HVMODE | CPU_FTR_VMX_COPY | \ CPU_FTR_DBELL | CPU_FTR_HAS_PPR | CPU_FTR_DAWR | \ CPU_FTR_ARCH_207S | CPU_FTR_TM_COMP | CPU_FTR_ARCH_300 | \ - CPU_FTR_PKEY) + CPU_FTR_PKEY | CPU_FTR_P9_TLBIE_BUG) #define CPU_FTRS_POWER9_DD1 ((CPU_FTRS_POWER9 | CPU_FTR_POWER9_DD1) & \ (~CPU_FTR_SAO)) #define CPU_FTRS_POWER9_DD2_0 CPU_FTRS_POWER9 diff --git a/arch/powerpc/kernel/dt_cpu_ftrs.c b/arch/powerpc/kernel/dt_cpu_ftrs.c index 0bcfb0f256e1..8ca5d5b74618 100644 --- a/arch/powerpc/kernel/dt_cpu_ftrs.c +++ b/arch/powerpc/kernel/dt_cpu_ftrs.c @@ -709,6 +709,9 @@ static __init void cpufeatures_cpu_quirks(void) cur_cpu_spec->cpu_features |= CPU_FTR_POWER9_DD1; else if ((version & 0xffffefff) == 0x004e0201) cur_cpu_spec->cpu_features |= CPU_FTR_POWER9_DD2_1; + + if ((version & 0xffff0000) == 0x004e0000) + cur_cpu_spec->cpu_features |= CPU_FTR_P9_TLBIE_BUG; } static void __init cpufeatures_setup_finished(void) diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c index 0c854816e653..0837b9738d76 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_radix.c +++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c @@ -157,6 +157,9 @@ static void kvmppc_radix_tlbie_page(struct kvm *kvm, unsigned long addr, asm volatile("ptesync": : :"memory"); asm volatile(PPC_TLBIE_5(%0, %1, 0, 0, 1) : : "r" (addr), "r" (kvm->arch.lpid) : "memory"); + if (cpu_has_feature(CPU_FTR_P9_TLBIE_BUG)) + asm volatile(PPC_TLBIE_5(%0, %1, 0, 0, 1) + : : "r" (addr), "r" (kvm->arch.lpid) : "memory"); asm volatile("ptesync": : :"memory"); } diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c index 8888e625a999..e1c083fbe434 100644 --- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c +++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c @@ -473,6 +473,17 @@ static void do_tlbies(struct kvm *kvm, unsigned long *rbvalues, trace_tlbie(kvm->arch.lpid, 0, rbvalues[i], kvm->arch.lpid, 0, 0, 0); } + + if (cpu_has_feature(CPU_FTR_P9_TLBIE_BUG)) { + /* + * Need the extra ptesync to make sure we don't + * re-order the tlbie + */ + asm volatile("ptesync": : :"memory"); + asm volatile(PPC_TLBIE_5(%0,%1,0,0,0) : : + "r" (rbvalues[0]), "r" (kvm->arch.lpid)); + } + asm volatile("eieio; tlbsync; ptesync" : : : "memory"); kvm->arch.tlbie_lock = 0; } else { diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c index a0675e91ad7d..656933c85925 100644 --- a/arch/powerpc/mm/hash_native_64.c +++ b/arch/powerpc/mm/hash_native_64.c @@ -201,6 +201,15 @@ static inline unsigned long ___tlbie(unsigned long vpn, int psize, return va; } +static inline void fixup_tlbie(unsigned long vpn, int psize, int apsize, int ssize) +{ + if (cpu_has_feature(CPU_FTR_P9_TLBIE_BUG)) { + /* Need the extra ptesync to ensure we don't reorder tlbie*/ + asm volatile("ptesync": : :"memory"); + ___tlbie(vpn, psize, apsize, ssize); + } +} + static inline void __tlbie(unsigned long vpn, int psize, int apsize, int ssize) { unsigned long rb; @@ -278,6 +287,7 @@ static inline void tlbie(unsigned long vpn, int psize, int apsize, asm volatile("ptesync": : :"memory"); } else { __tlbie(vpn, psize, apsize, ssize); + fixup_tlbie(vpn, psize, apsize, ssize); asm volatile("eieio; tlbsync; ptesync": : :"memory"); } if (lock_tlbie && !use_local) @@ -771,7 +781,7 @@ static void native_hpte_clear(void) */ static void native_flush_hash_range(unsigned long number, int local) { - unsigned long vpn; + unsigned long vpn = 0; unsigned long hash, index, hidx, shift, slot; struct hash_pte *hptep; unsigned long hpte_v; @@ -843,6 +853,10 @@ static void native_flush_hash_range(unsigned long number, int local) __tlbie(vpn, psize, psize, ssize); } pte_iterate_hashed_end(); } + /* + * Just do one more with the last used values. + */ + fixup_tlbie(vpn, psize, psize, ssize); asm volatile("eieio; tlbsync; ptesync":::"memory"); if (lock_tlbie) diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c index 28c980eb4422..adf469f312f2 100644 --- a/arch/powerpc/mm/pgtable_64.c +++ b/arch/powerpc/mm/pgtable_64.c @@ -481,6 +481,7 @@ void mmu_partition_table_set_entry(unsigned int lpid, unsigned long dw0, "r" (TLBIEL_INVAL_SET_LPID), "r" (lpid)); trace_tlbie(lpid, 0, TLBIEL_INVAL_SET_LPID, lpid, 2, 0, 0); } + /* do we need fixup here ?*/ asm volatile("eieio; tlbsync; ptesync" : : : "memory"); } EXPORT_SYMBOL_GPL(mmu_partition_table_set_entry); diff --git a/arch/powerpc/mm/tlb-radix.c b/arch/powerpc/mm/tlb-radix.c index 74354c26d316..a07f5372a4bf 100644 --- a/arch/powerpc/mm/tlb-radix.c +++ b/arch/powerpc/mm/tlb-radix.c @@ -151,6 +151,17 @@ static inline void __tlbie_va(unsigned long va, unsigned long pid, trace_tlbie(0, 0, rb, rs, ric, prs, r); } +static inline void fixup_tlbie(void) +{ + unsigned long pid = 0; + unsigned long va = ((1UL << 52) - 1); + + if (cpu_has_feature(CPU_FTR_P9_TLBIE_BUG)) { + asm volatile("ptesync": : :"memory"); + __tlbie_va(va, pid, mmu_get_ap(MMU_PAGE_64K), RIC_FLUSH_TLB); + } +} + /* * We use 128 set in radix mode and 256 set in hpt mode. */ @@ -200,6 +211,7 @@ static inline void _tlbie_pid(unsigned long pid, unsigned long ric) default: __tlbie_pid(pid, RIC_FLUSH_ALL); } + fixup_tlbie(); asm volatile("eieio; tlbsync; ptesync": : :"memory"); } @@ -253,6 +265,7 @@ static inline void _tlbie_va(unsigned long va, unsigned long pid, asm volatile("ptesync": : :"memory"); __tlbie_va(va, pid, ap, ric); + fixup_tlbie(); asm volatile("eieio; tlbsync; ptesync": : :"memory"); } @@ -264,6 +277,7 @@ static inline void _tlbie_va_range(unsigned long start, unsigned long end, if (also_pwc) __tlbie_pid(pid, RIC_FLUSH_PWC); __tlbie_va_range(start, end, pid, page_size, psize); + fixup_tlbie(); asm volatile("eieio; tlbsync; ptesync": : :"memory"); } @@ -498,6 +512,7 @@ void radix__flush_tlb_range(struct vm_area_struct *vma, unsigned long start, if (hflush) __tlbie_va_range(hstart, hend, pid, HPAGE_PMD_SIZE, MMU_PAGE_2M); + fixup_tlbie(); asm volatile("eieio; tlbsync; ptesync": : :"memory"); } } -- cgit v1.2.3 From c5d343b6b7badd1f5fe0873eff2e8d63a193e732 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Sat, 17 Mar 2018 21:38:10 +0900 Subject: tracing: probeevent: Fix to support minus offset from symbol In Documentation/trace/kprobetrace.txt, it says @SYM[+|-offs] : Fetch memory at SYM +|- offs (SYM should be a data symbol) However, the parser doesn't parse minus offset correctly, since commit 2fba0c8867af ("tracing/kprobes: Fix probe offset to be unsigned") drops minus ("-") offset support for kprobe probe address usage. This fixes the traceprobe_split_symbol_offset() to parse minus offset again with checking the offset range, and add a minus offset check in kprobe probe address usage. Link: http://lkml.kernel.org/r/152129028983.31874.13419301530285775521.stgit@devbox Cc: Ingo Molnar Cc: Tom Zanussi Cc: Arnaldo Carvalho de Melo Cc: Ravi Bangoria Cc: stable@vger.kernel.org Fixes: 2fba0c8867af ("tracing/kprobes: Fix probe offset to be unsigned") Acked-by: Namhyung Kim Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_kprobe.c | 4 ++-- kernel/trace/trace_probe.c | 8 +++----- kernel/trace/trace_probe.h | 2 +- 3 files changed, 6 insertions(+), 8 deletions(-) diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 1fad24acd444..ae4147eaebd4 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -659,7 +659,7 @@ static int create_trace_kprobe(int argc, char **argv) char *symbol = NULL, *event = NULL, *group = NULL; int maxactive = 0; char *arg; - unsigned long offset = 0; + long offset = 0; void *addr = NULL; char buf[MAX_EVENT_NAME_LEN]; @@ -747,7 +747,7 @@ static int create_trace_kprobe(int argc, char **argv) symbol = argv[1]; /* TODO: support .init module functions */ ret = traceprobe_split_symbol_offset(symbol, &offset); - if (ret) { + if (ret || offset < 0 || offset > UINT_MAX) { pr_info("Failed to parse either an address or a symbol.\n"); return ret; } diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c index d59357308677..daf54bda4dc8 100644 --- a/kernel/trace/trace_probe.c +++ b/kernel/trace/trace_probe.c @@ -320,7 +320,7 @@ static fetch_func_t get_fetch_size_function(const struct fetch_type *type, } /* Split symbol and offset. */ -int traceprobe_split_symbol_offset(char *symbol, unsigned long *offset) +int traceprobe_split_symbol_offset(char *symbol, long *offset) { char *tmp; int ret; @@ -328,13 +328,11 @@ int traceprobe_split_symbol_offset(char *symbol, unsigned long *offset) if (!offset) return -EINVAL; - tmp = strchr(symbol, '+'); + tmp = strpbrk(symbol, "+-"); if (tmp) { - /* skip sign because kstrtoul doesn't accept '+' */ - ret = kstrtoul(tmp + 1, 0, offset); + ret = kstrtol(tmp, 0, offset); if (ret) return ret; - *tmp = '\0'; } else *offset = 0; diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h index e101c5bb9eda..6a4d3fa94042 100644 --- a/kernel/trace/trace_probe.h +++ b/kernel/trace/trace_probe.h @@ -365,7 +365,7 @@ extern int traceprobe_conflict_field_name(const char *name, extern void traceprobe_update_arg(struct probe_arg *arg); extern void traceprobe_free_probe_arg(struct probe_arg *arg); -extern int traceprobe_split_symbol_offset(char *symbol, unsigned long *offset); +extern int traceprobe_split_symbol_offset(char *symbol, long *offset); /* Sum up total data length for dynamic arraies (strings) */ static nokprobe_inline int -- cgit v1.2.3 From 871bef2000968c312a4000b2f56d370dcedbc93c Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Sat, 17 Mar 2018 21:38:56 +0900 Subject: selftests: ftrace: Add probe event argument syntax testcase Add a testcase for probe event argument syntax which ensures the kprobe_events interface correctly parses given event arguments. Link: http://lkml.kernel.org/r/152129033679.31874.12705519603869152799.stgit@devbox Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- .../ftrace/test.d/kprobe/kprobe_args_syntax.tc | 97 ++++++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_syntax.tc diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_syntax.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_syntax.tc new file mode 100644 index 000000000000..231bcd2c4eb5 --- /dev/null +++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_syntax.tc @@ -0,0 +1,97 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 +# description: Kprobe event argument syntax + +[ -f kprobe_events ] || exit_unsupported # this is configurable + +grep "x8/16/32/64" README > /dev/null || exit_unsupported # version issue + +echo 0 > events/enable +echo > kprobe_events + +PROBEFUNC="vfs_read" +GOODREG= +BADREG= +GOODSYM="_sdata" +if ! grep -qw ${GOODSYM} /proc/kallsyms ; then + GOODSYM=$PROBEFUNC +fi +BADSYM="deaqswdefr" +SYMADDR=0x`grep -w ${GOODSYM} /proc/kallsyms | cut -f 1 -d " "` +GOODTYPE="x16" +BADTYPE="y16" + +case `uname -m` in +x86_64|i[3456]86) + GOODREG=%ax + BADREG=%ex +;; +aarch64) + GOODREG=%x0 + BADREG=%ax +;; +arm*) + GOODREG=%r0 + BADREG=%ax +;; +esac + +test_goodarg() # Good-args +{ + while [ "$1" ]; do + echo "p ${PROBEFUNC} $1" > kprobe_events + shift 1 + done; +} + +test_badarg() # Bad-args +{ + while [ "$1" ]; do + ! echo "p ${PROBEFUNC} $1" > kprobe_events + shift 1 + done; +} + +echo > kprobe_events + +: "Register access" +test_goodarg ${GOODREG} +test_badarg ${BADREG} + +: "Symbol access" +test_goodarg "@${GOODSYM}" "@${SYMADDR}" "@${GOODSYM}+10" "@${GOODSYM}-10" +test_badarg "@" "@${BADSYM}" "@${GOODSYM}*10" "@${GOODSYM}/10" \ + "@${GOODSYM}%10" "@${GOODSYM}&10" "@${GOODSYM}|10" + +: "Stack access" +test_goodarg "\$stack" "\$stack0" "\$stack1" +test_badarg "\$stackp" "\$stack0+10" "\$stack1-10" + +: "Retval access" +echo "r ${PROBEFUNC} \$retval" > kprobe_events +! echo "p ${PROBEFUNC} \$retval" > kprobe_events + +: "Comm access" +test_goodarg "\$comm" + +: "Indirect memory access" +test_goodarg "+0(${GOODREG})" "-0(${GOODREG})" "+10(\$stack)" \ + "+0(\$stack1)" "+10(@${GOODSYM}-10)" "+0(+10(+20(\$stack)))" +test_badarg "+(${GOODREG})" "(${GOODREG}+10)" "-(${GOODREG})" "(${GOODREG})" \ + "+10(\$comm)" "+0(${GOODREG})+10" + +: "Name assignment" +test_goodarg "varname=${GOODREG}" +test_badarg "varname=varname2=${GOODREG}" + +: "Type syntax" +test_goodarg "${GOODREG}:${GOODTYPE}" +test_badarg "${GOODREG}::${GOODTYPE}" "${GOODREG}:${BADTYPE}" \ + "${GOODTYPE}:${GOODREG}" + +: "Combination check" + +test_goodarg "\$comm:string" "+0(\$stack):string" +test_badarg "\$comm:x64" "\$stack:string" "${GOODREG}:string" + +echo > kprobe_events -- cgit v1.2.3 From 5fbdbed797b6d12d043a5121fdbc8d8b49d10e80 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Sat, 17 Mar 2018 21:39:44 +0900 Subject: selftests: ftrace: Add a testcase for string type with kprobe_event Add a testcase for string type with kprobe event. This tests good/bad syntax combinations and also the traced data is correct in several way. Link: http://lkml.kernel.org/r/152129038381.31874.9201387794548737554.stgit@devbox Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- .../ftrace/test.d/kprobe/kprobe_args_string.tc | 46 ++++++++++++++++++++++ 1 file changed, 46 insertions(+) create mode 100644 tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_string.tc diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_string.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_string.tc new file mode 100644 index 000000000000..5ba73035e1d9 --- /dev/null +++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_string.tc @@ -0,0 +1,46 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 +# description: Kprobe event string type argument + +[ -f kprobe_events ] || exit_unsupported # this is configurable + +echo 0 > events/enable +echo > kprobe_events + +case `uname -m` in +x86_64) + ARG2=%si + OFFS=8 +;; +i[3456]86) + ARG2=%cx + OFFS=4 +;; +aarch64) + ARG2=%x1 + OFFS=8 +;; +arm*) + ARG2=%r1 + OFFS=4 +;; +*) + echo "Please implement other architecture here" + exit_untested +esac + +: "Test get argument (1)" +echo "p:testprobe create_trace_kprobe arg1=+0(+0(${ARG2})):string" > kprobe_events +echo 1 > events/kprobes/testprobe/enable +! echo test >> kprobe_events +tail -n 1 trace | grep -qe "testprobe.* arg1=\"test\"" + +echo 0 > events/kprobes/testprobe/enable +: "Test get argument (2)" +echo "p:testprobe create_trace_kprobe arg1=+0(+0(${ARG2})):string arg2=+0(+${OFFS}(${ARG2})):string" > kprobe_events +echo 1 > events/kprobes/testprobe/enable +! echo test1 test2 >> kprobe_events +tail -n 1 trace | grep -qe "testprobe.* arg1=\"test1\" arg2=\"test2\"" + +echo 0 > events/enable +echo > kprobe_events -- cgit v1.2.3 From dfa453bc90eca0febff33c8d292a656e53702158 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Sat, 17 Mar 2018 21:40:31 +0900 Subject: selftests: ftrace: Add a testcase for probepoint Add a testcase for probe point definition. This tests symbol, address and symbol+offset syntax. The offset must be positive and smaller than UINT_MAX. Link: http://lkml.kernel.org/r/152129043097.31874.14273580606301767394.stgit@devbox Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- .../selftests/ftrace/test.d/kprobe/probepoint.tc | 43 ++++++++++++++++++++++ 1 file changed, 43 insertions(+) create mode 100644 tools/testing/selftests/ftrace/test.d/kprobe/probepoint.tc diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/probepoint.tc b/tools/testing/selftests/ftrace/test.d/kprobe/probepoint.tc new file mode 100644 index 000000000000..4fda01a08da4 --- /dev/null +++ b/tools/testing/selftests/ftrace/test.d/kprobe/probepoint.tc @@ -0,0 +1,43 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 +# description: Kprobe events - probe points + +[ -f kprobe_events ] || exit_unsupported # this is configurable + +TARGET_FUNC=create_trace_kprobe + +dec_addr() { # hexaddr + printf "%d" "0x"`echo $1 | tail -c 8` +} + +set_offs() { # prev target next + A1=`dec_addr $1` + A2=`dec_addr $2` + A3=`dec_addr $3` + TARGET="0x$2" # an address + PREV=`expr $A1 - $A2` # offset to previous symbol + NEXT=+`expr $A3 - $A2` # offset to next symbol + OVERFLOW=+`printf "0x%x" ${PREV}` # overflow offset to previous symbol +} + +# We have to decode symbol addresses to get correct offsets. +# If the offset is not an instruction boundary, it cause -EILSEQ. +set_offs `grep -A1 -B1 ${TARGET_FUNC} /proc/kallsyms | cut -f 1 -d " " | xargs` + +UINT_TEST=no +# printf "%x" -1 returns (unsigned long)-1. +if [ `printf "%x" -1 | wc -c` != 9 ]; then + UINT_TEST=yes +fi + +echo 0 > events/enable +echo > kprobe_events +echo "p:testprobe ${TARGET_FUNC}" > kprobe_events +echo "p:testprobe ${TARGET}" > kprobe_events +echo "p:testprobe ${TARGET_FUNC}${NEXT}" > kprobe_events +! echo "p:testprobe ${TARGET_FUNC}${PREV}" > kprobe_events +if [ "${UINT_TEST}" = yes ]; then +! echo "p:testprobe ${TARGET_FUNC}${OVERFLOW}" > kprobe_events +fi +echo > kprobe_events +clear_trace -- cgit v1.2.3 From 06ace26f4e6fcf747e890a39193be811777a048a Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Thu, 22 Mar 2018 15:18:53 -0400 Subject: x86/efi: Free efi_pgd with free_pages() The efi_pgd is allocated as PGD_ALLOCATION_ORDER pages and therefore must also be freed as PGD_ALLOCATION_ORDER pages with free_pages(). Fixes: d9e9a6418065 ("x86/mm/pti: Allocate a separate user PGD") Signed-off-by: Waiman Long Signed-off-by: Thomas Gleixner Cc: linux-efi@vger.kernel.org Cc: Dave Hansen Cc: Ard Biesheuvel Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/1521746333-19593-1-git-send-email-longman@redhat.com --- arch/x86/platform/efi/efi_64.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c index c310a8284358..f9cfbc0d1f33 100644 --- a/arch/x86/platform/efi/efi_64.c +++ b/arch/x86/platform/efi/efi_64.c @@ -227,7 +227,7 @@ int __init efi_alloc_page_tables(void) if (!pud) { if (CONFIG_PGTABLE_LEVELS > 4) free_page((unsigned long) pgd_page_vaddr(*pgd)); - free_page((unsigned long)efi_pgd); + free_pages((unsigned long)efi_pgd, PGD_ALLOCATION_ORDER); return -ENOMEM; } -- cgit v1.2.3 From c70a36268799cf2f902b5a31e452571fcb96bfe9 Mon Sep 17 00:00:00 2001 From: Felix Kuehling Date: Fri, 23 Mar 2018 15:30:33 -0400 Subject: drm/amdkfd: Fix scratch memory with HWS enabled Program sh_hidden_private_base_vmid correctly in the map-process PM4 packet. Signed-off-by: Felix Kuehling Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c index 0ecbd1f9b606..0c3bc00978f7 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c @@ -188,8 +188,7 @@ static int pm_create_map_process(struct packet_manager *pm, uint32_t *buffer, packet->sh_mem_ape1_base = qpd->sh_mem_ape1_base; packet->sh_mem_ape1_limit = qpd->sh_mem_ape1_limit; - /* TODO: scratch support */ - packet->sh_hidden_private_base_vmid = 0; + packet->sh_hidden_private_base_vmid = qpd->sh_hidden_private_base; packet->gds_addr_lo = lower_32_bits(qpd->gds_context_area); packet->gds_addr_hi = upper_32_bits(qpd->gds_context_area); -- cgit v1.2.3 From 72a01d231dcb22a276209c7a924a8cd475fbaa9b Mon Sep 17 00:00:00 2001 From: Felix Kuehling Date: Fri, 23 Mar 2018 15:30:34 -0400 Subject: drm/amdkfd: Deallocate SDMA queues correctly Deallocate SDMA queues during abnormal process termination and when queue creation fails after the SDMA allocation. Signed-off-by: Felix Kuehling Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- .../gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index b21285afa4ea..1bd5f26b3f00 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -821,13 +821,13 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, pr_warn("Can't create new usermode queue because %d queues were already created\n", dqm->total_queue_count); retval = -EPERM; - goto out; + goto out_unlock; } if (q->properties.type == KFD_QUEUE_TYPE_SDMA) { retval = allocate_sdma_queue(dqm, &q->sdma_id); if (retval) - goto out; + goto out_unlock; q->properties.sdma_queue_id = q->sdma_id / CIK_SDMA_QUEUES_PER_ENGINE; q->properties.sdma_engine_id = @@ -838,7 +838,7 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, if (!mqd) { retval = -ENOMEM; - goto out; + goto out_deallocate_sdma_queue; } dqm->asic_ops.init_sdma_vm(dqm, q, qpd); @@ -848,7 +848,7 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, retval = mqd->init_mqd(mqd, &q->mqd, &q->mqd_mem_obj, &q->gart_mqd_addr, &q->properties); if (retval) - goto out; + goto out_deallocate_sdma_queue; list_add(&q->list, &qpd->queues_list); qpd->queue_count++; @@ -869,7 +869,13 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, pr_debug("Total of %d queues are accountable so far\n", dqm->total_queue_count); -out: + mutex_unlock(&dqm->lock); + return retval; + +out_deallocate_sdma_queue: + if (q->properties.type == KFD_QUEUE_TYPE_SDMA) + deallocate_sdma_queue(dqm, q->sdma_id); +out_unlock: mutex_unlock(&dqm->lock); return retval; } @@ -1188,8 +1194,10 @@ static int process_termination_cpsch(struct device_queue_manager *dqm, /* Clear all user mode queues */ list_for_each_entry(q, &qpd->queues_list, list) { - if (q->properties.type == KFD_QUEUE_TYPE_SDMA) + if (q->properties.type == KFD_QUEUE_TYPE_SDMA) { dqm->sdma_queue_count--; + deallocate_sdma_queue(dqm, q->sdma_id); + } if (q->properties.is_active) dqm->queue_count--; -- cgit v1.2.3 From d8ba61ba58c88d5207c1ba2f7d9a2280e7d03be9 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Thu, 23 Jul 2015 15:37:48 -0700 Subject: x86/entry/64: Don't use IST entry for #BP stack There's nothing IST-worthy about #BP/int3. We don't allow kprobes in the small handful of places in the kernel that run at CPL0 with an invalid stack, and 32-bit kernels have used normal interrupt gates for #BP forever. Furthermore, we don't allow kprobes in places that have usergs while in kernel mode, so "paranoid" is also unnecessary. Signed-off-by: Andy Lutomirski Signed-off-by: Linus Torvalds Signed-off-by: Thomas Gleixner Cc: stable@vger.kernel.org --- arch/x86/entry/entry_64.S | 2 +- arch/x86/kernel/idt.c | 2 -- arch/x86/kernel/traps.c | 15 ++++++++------- 3 files changed, 9 insertions(+), 10 deletions(-) diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index d5c7f18f79ac..9b114675fbc0 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -1138,7 +1138,7 @@ apicinterrupt3 HYPERV_REENLIGHTENMENT_VECTOR \ #endif /* CONFIG_HYPERV */ idtentry debug do_debug has_error_code=0 paranoid=1 shift_ist=DEBUG_STACK -idtentry int3 do_int3 has_error_code=0 paranoid=1 shift_ist=DEBUG_STACK +idtentry int3 do_int3 has_error_code=0 idtentry stack_segment do_stack_segment has_error_code=1 #ifdef CONFIG_XEN diff --git a/arch/x86/kernel/idt.c b/arch/x86/kernel/idt.c index 56d99be3706a..50bee5fe1140 100644 --- a/arch/x86/kernel/idt.c +++ b/arch/x86/kernel/idt.c @@ -160,7 +160,6 @@ static const __initconst struct idt_data early_pf_idts[] = { */ static const __initconst struct idt_data dbg_idts[] = { INTG(X86_TRAP_DB, debug), - INTG(X86_TRAP_BP, int3), }; #endif @@ -183,7 +182,6 @@ gate_desc debug_idt_table[IDT_ENTRIES] __page_aligned_bss; static const __initconst struct idt_data ist_idts[] = { ISTG(X86_TRAP_DB, debug, DEBUG_STACK), ISTG(X86_TRAP_NMI, nmi, NMI_STACK), - SISTG(X86_TRAP_BP, int3, DEBUG_STACK), ISTG(X86_TRAP_DF, double_fault, DOUBLEFAULT_STACK), #ifdef CONFIG_X86_MCE ISTG(X86_TRAP_MC, &machine_check, MCE_STACK), diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 3d9b2308e7fa..03f3d7695dac 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -577,7 +577,6 @@ do_general_protection(struct pt_regs *regs, long error_code) } NOKPROBE_SYMBOL(do_general_protection); -/* May run on IST stack. */ dotraplinkage void notrace do_int3(struct pt_regs *regs, long error_code) { #ifdef CONFIG_DYNAMIC_FTRACE @@ -592,6 +591,13 @@ dotraplinkage void notrace do_int3(struct pt_regs *regs, long error_code) if (poke_int3_handler(regs)) return; + /* + * Use ist_enter despite the fact that we don't use an IST stack. + * We can be called from a kprobe in non-CONTEXT_KERNEL kernel + * mode or even during context tracking state changes. + * + * This means that we can't schedule. That's okay. + */ ist_enter(regs); RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU"); #ifdef CONFIG_KGDB_LOW_LEVEL_TRAP @@ -609,15 +615,10 @@ dotraplinkage void notrace do_int3(struct pt_regs *regs, long error_code) SIGTRAP) == NOTIFY_STOP) goto exit; - /* - * Let others (NMI) know that the debug stack is in use - * as we may switch to the interrupt stack. - */ - debug_stack_usage_inc(); cond_local_irq_enable(regs); do_trap(X86_TRAP_BP, SIGTRAP, "int3", regs, error_code, NULL); cond_local_irq_disable(regs); - debug_stack_usage_dec(); + exit: ist_exit(regs); } -- cgit v1.2.3 From 1328f02005bbbaed15b9d5b7f3ab5ec9d4d5268a Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Mon, 22 Jan 2018 12:20:26 +0100 Subject: ARM: 8746/1: vfp: Go back to clearing vfp_current_hw_state[] Commit 384b38b66947 ("ARM: 7873/1: vfp: clear vfp_current_hw_state for dying cpu") fixed the cpu dying notifier by clearing vfp_current_hw_state[]. However commit e5b61bafe704 ("arm: Convert VFP hotplug notifiers to state machine") incorrectly used the original vfp_force_reload() function in the cpu dying notifier. Fix it by going back to clearing vfp_current_hw_state[]. Fixes: e5b61bafe704 ("arm: Convert VFP hotplug notifiers to state machine") Cc: linux-stable Reported-by: Kohji Okuno Signed-off-by: Fabio Estevam Signed-off-by: Russell King --- arch/arm/vfp/vfpmodule.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/vfp/vfpmodule.c b/arch/arm/vfp/vfpmodule.c index 03c6a3c72f9c..4c375e11ae95 100644 --- a/arch/arm/vfp/vfpmodule.c +++ b/arch/arm/vfp/vfpmodule.c @@ -648,7 +648,7 @@ int vfp_restore_user_hwstate(struct user_vfp __user *ufp, */ static int vfp_dying_cpu(unsigned int cpu) { - vfp_force_reload(cpu, current_thread_info()); + vfp_current_hw_state[cpu] = NULL; return 0; } -- cgit v1.2.3 From ad43fc9aa24e9fc9ad780faad62fd78a09da839c Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 28 Feb 2018 14:56:07 +0100 Subject: ARM: 8747/1: make CONFIG_DEBUG_WX depend on MMU Without CONFIG_MMU, this results in a build failure: ./arch/arm/include/asm/memory.h:92:23: error: initializer element is not constant #define VECTORS_BASE vectors_base arch/arm/mm/dump.c:32:4: note: in expansion of macro 'VECTORS_BASE' { VECTORS_BASE, "Vectors" }, arch/arm/mm/dump.c:71:11: error: 'L_PTE_USER' undeclared here (not in a function); did you mean 'VTIME_USER'? .mask = L_PTE_USER, ^~~~~~~~~~ Obviously the feature only makes sense with an MMU, so let's add the dependency here. Fixes: a8e53c151fe7 ("ARM: 8737/1: mm: dump: add checking for writable and executable") Acked-by: Laura Abbott Signed-off-by: Arnd Bergmann Signed-off-by: Russell King --- arch/arm/Kconfig.debug | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/Kconfig.debug b/arch/arm/Kconfig.debug index 78a647080ebc..199ebc1c4538 100644 --- a/arch/arm/Kconfig.debug +++ b/arch/arm/Kconfig.debug @@ -22,6 +22,7 @@ config ARM_PTDUMP_DEBUGFS config DEBUG_WX bool "Warn on W+X mappings at boot" + depends on MMU select ARM_PTDUMP_CORE ---help--- Generate a warning if any W+X mappings are found at boot. -- cgit v1.2.3 From 73b9160d0dfe44dfdaffd6465dc1224c38a4a73c Mon Sep 17 00:00:00 2001 From: Jinbum Park Date: Tue, 6 Mar 2018 01:37:21 +0100 Subject: ARM: 8748/1: mm: Define vdso_start, vdso_end as array Define vdso_start, vdso_end as array to avoid compile-time analysis error for the case of built with CONFIG_FORTIFY_SOURCE. and, since vdso_start, vdso_end are used in vdso.c only, move extern-declaration from vdso.h to vdso.c. If kernel is built with CONFIG_FORTIFY_SOURCE, compile-time error happens at this code. - if (memcmp(&vdso_start, "177ELF", 4)) The size of "&vdso_start" is recognized as 1 byte, but n is 4, So that compile-time error is reported. Acked-by: Kees Cook Signed-off-by: Jinbum Park Signed-off-by: Russell King --- arch/arm/include/asm/vdso.h | 2 -- arch/arm/kernel/vdso.c | 12 +++++++----- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/arch/arm/include/asm/vdso.h b/arch/arm/include/asm/vdso.h index 9c99e817535e..5b85889f82ee 100644 --- a/arch/arm/include/asm/vdso.h +++ b/arch/arm/include/asm/vdso.h @@ -12,8 +12,6 @@ struct mm_struct; void arm_install_vdso(struct mm_struct *mm, unsigned long addr); -extern char vdso_start, vdso_end; - extern unsigned int vdso_total_pages; #else /* CONFIG_VDSO */ diff --git a/arch/arm/kernel/vdso.c b/arch/arm/kernel/vdso.c index a4d6dc0f2427..f4dd7f9663c1 100644 --- a/arch/arm/kernel/vdso.c +++ b/arch/arm/kernel/vdso.c @@ -39,6 +39,8 @@ static struct page **vdso_text_pagelist; +extern char vdso_start[], vdso_end[]; + /* Total number of pages needed for the data and text portions of the VDSO. */ unsigned int vdso_total_pages __ro_after_init; @@ -197,13 +199,13 @@ static int __init vdso_init(void) unsigned int text_pages; int i; - if (memcmp(&vdso_start, "\177ELF", 4)) { + if (memcmp(vdso_start, "\177ELF", 4)) { pr_err("VDSO is not a valid ELF object!\n"); return -ENOEXEC; } - text_pages = (&vdso_end - &vdso_start) >> PAGE_SHIFT; - pr_debug("vdso: %i text pages at base %p\n", text_pages, &vdso_start); + text_pages = (vdso_end - vdso_start) >> PAGE_SHIFT; + pr_debug("vdso: %i text pages at base %p\n", text_pages, vdso_start); /* Allocate the VDSO text pagelist */ vdso_text_pagelist = kcalloc(text_pages, sizeof(struct page *), @@ -218,7 +220,7 @@ static int __init vdso_init(void) for (i = 0; i < text_pages; i++) { struct page *page; - page = virt_to_page(&vdso_start + i * PAGE_SIZE); + page = virt_to_page(vdso_start + i * PAGE_SIZE); vdso_text_pagelist[i] = page; } @@ -229,7 +231,7 @@ static int __init vdso_init(void) cntvct_ok = cntvct_functional(); - patch_vdso(&vdso_start); + patch_vdso(vdso_start); return 0; } -- cgit v1.2.3 From 1b8837b61714c3743fadb3a6bbb57ffd53839e7c Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Mon, 12 Mar 2018 15:51:26 +0100 Subject: ARM: 8750/1: deflate_xip_data.sh: minor fixes Send nm complaints about broken pipe (when sed exits early) to /dev/null. All errors should be printed to stderr. Don't trap on normal exit so the trap can return an error code. Signed-off-by: Nicolas Pitre Tested-by: Arnd Bergmann Signed-off-by: Russell King --- arch/arm/boot/deflate_xip_data.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm/boot/deflate_xip_data.sh b/arch/arm/boot/deflate_xip_data.sh index 1189598a25eb..5e7d758ebdd6 100755 --- a/arch/arm/boot/deflate_xip_data.sh +++ b/arch/arm/boot/deflate_xip_data.sh @@ -30,7 +30,7 @@ esac sym_val() { # extract hex value for symbol in $1 - local val=$($NM "$VMLINUX" | sed -n "/ $1$/{s/ .*$//p;q}") + local val=$($NM "$VMLINUX" 2>/dev/null | sed -n "/ $1\$/{s/ .*$//p;q}") [ "$val" ] || { echo "can't find $1 in $VMLINUX" 1>&2; exit 1; } # convert from hex to decimal echo $((0x$val)) @@ -48,12 +48,12 @@ data_end=$(($_edata_loc - $base_offset)) file_end=$(stat -c "%s" "$XIPIMAGE") if [ "$file_end" != "$data_end" ]; then printf "end of xipImage doesn't match with _edata_loc (%#x vs %#x)\n" \ - $(($file_end + $base_offset)) $_edata_loc 2>&1 + $(($file_end + $base_offset)) $_edata_loc 1>&2 exit 1; fi # be ready to clean up -trap 'rm -f "$XIPIMAGE.tmp"' 0 1 2 3 +trap 'rm -f "$XIPIMAGE.tmp"; exit 1' 1 2 3 # substitute the data section by a compressed version $DD if="$XIPIMAGE" count=$data_start iflag=count_bytes of="$XIPIMAGE.tmp" -- cgit v1.2.3 From cfb2f6f6e0ba11ea7b263d6b69c170c4b32ac0ea Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Sat, 24 Mar 2018 11:28:14 -0500 Subject: Revert "mqueue: switch to on-demand creation of internal mount" This reverts commit 36735a6a2b5e042db1af956ce4bcc13f3ff99e21. Aleksa Sarai writes: > [REGRESSION v4.16-rc6] [PATCH] mqueue: forbid unprivileged user access to internal mount > > Felix reported weird behaviour on 4.16.0-rc6 with regards to mqueue[1], > which was introduced by 36735a6a2b5e ("mqueue: switch to on-demand > creation of internal mount"). > > Basically, the reproducer boils down to being able to mount mqueue if > you create a new user namespace, even if you don't unshare the IPC > namespace. > > Previously this was not possible, and you would get an -EPERM. The mount > is the *host* mqueue mount, which is being cached and just returned from > mqueue_mount(). To be honest, I'm not sure if this is safe or not (or if > it was intentional -- since I'm not familiar with mqueue). > > To me it looks like there is a missing permission check. I've included a > patch below that I've compile-tested, and should block the above case. > Can someone please tell me if I'm missing something? Is this actually > safe? > > [1]: https://github.com/docker/docker/issues/36674 The issue is a lot deeper than a missing permission check. sb->s_user_ns was is improperly set as well. So in addition to the filesystem being mounted when it should not be mounted, so things are not allow that should be. We are practically to the release of 4.16 and there is no agreement between Al Viro and myself on what the code should looks like to fix things properly. So revert the code to what it was before so that we can take our time and discuss this properly. Fixes: 36735a6a2b5e ("mqueue: switch to on-demand creation of internal mount") Reported-by: Felix Abecassis Reported-by: Aleksa Sarai Signed-off-by: "Eric W. Biederman" --- ipc/mqueue.c | 74 ++++++++++++++++-------------------------------------------- 1 file changed, 19 insertions(+), 55 deletions(-) diff --git a/ipc/mqueue.c b/ipc/mqueue.c index d7f309f74dec..a808f29d4c5a 100644 --- a/ipc/mqueue.c +++ b/ipc/mqueue.c @@ -325,9 +325,8 @@ err: static int mqueue_fill_super(struct super_block *sb, void *data, int silent) { struct inode *inode; - struct ipc_namespace *ns = data; + struct ipc_namespace *ns = sb->s_fs_info; - sb->s_fs_info = ns; sb->s_iflags |= SB_I_NOEXEC | SB_I_NODEV; sb->s_blocksize = PAGE_SIZE; sb->s_blocksize_bits = PAGE_SHIFT; @@ -344,44 +343,18 @@ static int mqueue_fill_super(struct super_block *sb, void *data, int silent) return 0; } -static struct file_system_type mqueue_fs_type; -/* - * Return value is pinned only by reference in ->mq_mnt; it will - * live until ipcns dies. Caller does not need to drop it. - */ -static struct vfsmount *mq_internal_mount(void) -{ - struct ipc_namespace *ns = current->nsproxy->ipc_ns; - struct vfsmount *m = ns->mq_mnt; - if (m) - return m; - m = kern_mount_data(&mqueue_fs_type, ns); - spin_lock(&mq_lock); - if (unlikely(ns->mq_mnt)) { - spin_unlock(&mq_lock); - if (!IS_ERR(m)) - kern_unmount(m); - return ns->mq_mnt; - } - if (!IS_ERR(m)) - ns->mq_mnt = m; - spin_unlock(&mq_lock); - return m; -} - static struct dentry *mqueue_mount(struct file_system_type *fs_type, int flags, const char *dev_name, void *data) { - struct vfsmount *m; - if (flags & SB_KERNMOUNT) - return mount_nodev(fs_type, flags, data, mqueue_fill_super); - m = mq_internal_mount(); - if (IS_ERR(m)) - return ERR_CAST(m); - atomic_inc(&m->mnt_sb->s_active); - down_write(&m->mnt_sb->s_umount); - return dget(m->mnt_root); + struct ipc_namespace *ns; + if (flags & SB_KERNMOUNT) { + ns = data; + data = NULL; + } else { + ns = current->nsproxy->ipc_ns; + } + return mount_ns(fs_type, flags, data, ns, ns->user_ns, mqueue_fill_super); } static void init_once(void *foo) @@ -771,16 +744,13 @@ static int prepare_open(struct dentry *dentry, int oflag, int ro, static int do_mq_open(const char __user *u_name, int oflag, umode_t mode, struct mq_attr *attr) { - struct vfsmount *mnt = mq_internal_mount(); - struct dentry *root; + struct vfsmount *mnt = current->nsproxy->ipc_ns->mq_mnt; + struct dentry *root = mnt->mnt_root; struct filename *name; struct path path; int fd, error; int ro; - if (IS_ERR(mnt)) - return PTR_ERR(mnt); - audit_mq_open(oflag, mode, attr); if (IS_ERR(name = getname(u_name))) @@ -791,7 +761,6 @@ static int do_mq_open(const char __user *u_name, int oflag, umode_t mode, goto out_putname; ro = mnt_want_write(mnt); /* we'll drop it in any case */ - root = mnt->mnt_root; inode_lock(d_inode(root)); path.dentry = lookup_one_len(name->name, root, strlen(name->name)); if (IS_ERR(path.dentry)) { @@ -840,9 +809,6 @@ SYSCALL_DEFINE1(mq_unlink, const char __user *, u_name) struct ipc_namespace *ipc_ns = current->nsproxy->ipc_ns; struct vfsmount *mnt = ipc_ns->mq_mnt; - if (!mnt) - return -ENOENT; - name = getname(u_name); if (IS_ERR(name)) return PTR_ERR(name); @@ -1569,26 +1535,28 @@ int mq_init_ns(struct ipc_namespace *ns) ns->mq_msgsize_max = DFLT_MSGSIZEMAX; ns->mq_msg_default = DFLT_MSG; ns->mq_msgsize_default = DFLT_MSGSIZE; - ns->mq_mnt = NULL; + ns->mq_mnt = kern_mount_data(&mqueue_fs_type, ns); + if (IS_ERR(ns->mq_mnt)) { + int err = PTR_ERR(ns->mq_mnt); + ns->mq_mnt = NULL; + return err; + } return 0; } void mq_clear_sbinfo(struct ipc_namespace *ns) { - if (ns->mq_mnt) - ns->mq_mnt->mnt_sb->s_fs_info = NULL; + ns->mq_mnt->mnt_sb->s_fs_info = NULL; } void mq_put_mnt(struct ipc_namespace *ns) { - if (ns->mq_mnt) - kern_unmount(ns->mq_mnt); + kern_unmount(ns->mq_mnt); } static int __init init_mqueue_fs(void) { - struct vfsmount *m; int error; mqueue_inode_cachep = kmem_cache_create("mqueue_inode_cache", @@ -1610,10 +1578,6 @@ static int __init init_mqueue_fs(void) if (error) goto out_filesystem; - m = kern_mount_data(&mqueue_fs_type, &init_ipc_ns); - if (IS_ERR(m)) - goto out_filesystem; - init_ipc_ns.mq_mnt = m; return 0; out_filesystem: -- cgit v1.2.3 From f1869a890cdedb92a3fab969db5d0fd982850273 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sat, 24 Mar 2018 10:43:26 +0100 Subject: tty: vt: fix up tabstops properly Tabs on a console with long lines do not wrap properly, so correctly account for the line length when computing the tab placement location. Reported-by: James Holderness Signed-off-by: Greg Kroah-Hartman Cc: stable Signed-off-by: Linus Torvalds --- drivers/tty/vt/vt.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/tty/vt/vt.c b/drivers/tty/vt/vt.c index 88b902c525d7..b4e57c5a8bba 100644 --- a/drivers/tty/vt/vt.c +++ b/drivers/tty/vt/vt.c @@ -1727,7 +1727,7 @@ static void reset_terminal(struct vc_data *vc, int do_clear) default_attr(vc); update_attr(vc); - vc->vc_tab_stop[0] = 0x01010100; + vc->vc_tab_stop[0] = vc->vc_tab_stop[1] = vc->vc_tab_stop[2] = vc->vc_tab_stop[3] = @@ -1771,7 +1771,7 @@ static void do_con_trol(struct tty_struct *tty, struct vc_data *vc, int c) vc->vc_pos -= (vc->vc_x << 1); while (vc->vc_x < vc->vc_cols - 1) { vc->vc_x++; - if (vc->vc_tab_stop[vc->vc_x >> 5] & (1 << (vc->vc_x & 31))) + if (vc->vc_tab_stop[7 & (vc->vc_x >> 5)] & (1 << (vc->vc_x & 31))) break; } vc->vc_pos += (vc->vc_x << 1); @@ -1831,7 +1831,7 @@ static void do_con_trol(struct tty_struct *tty, struct vc_data *vc, int c) lf(vc); return; case 'H': - vc->vc_tab_stop[vc->vc_x >> 5] |= (1 << (vc->vc_x & 31)); + vc->vc_tab_stop[7 & (vc->vc_x >> 5)] |= (1 << (vc->vc_x & 31)); return; case 'Z': respond_ID(tty); @@ -2024,7 +2024,7 @@ static void do_con_trol(struct tty_struct *tty, struct vc_data *vc, int c) return; case 'g': if (!vc->vc_par[0]) - vc->vc_tab_stop[vc->vc_x >> 5] &= ~(1 << (vc->vc_x & 31)); + vc->vc_tab_stop[7 & (vc->vc_x >> 5)] &= ~(1 << (vc->vc_x & 31)); else if (vc->vc_par[0] == 3) { vc->vc_tab_stop[0] = vc->vc_tab_stop[1] = -- cgit v1.2.3 From 52396500f97c53860164debc7d4f759077853423 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Fri, 23 Mar 2018 15:53:38 +1000 Subject: powerpc/64s: Fix i-side SLB miss bad address handler saving nonvolatile GPRs The SLB bad address handler's trap number fixup does not preserve the low bit that indicates nonvolatile GPRs have not been saved. This leads save_nvgprs to skip saving them, and subsequent functions and return from interrupt will think they are saved. This causes kernel branch-to-garbage debugging to not have correct registers, can also cause userspace to have its registers clobbered after a segfault. Fixes: f0f558b131db ("powerpc/mm: Preserve CFAR value on SLB miss caused by access to bogus address") Cc: stable@vger.kernel.org # v4.9+ Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/exceptions-64s.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 3ac87e53b3da..1ecfd8ffb098 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -706,7 +706,7 @@ EXC_COMMON_BEGIN(bad_addr_slb) ld r3, PACA_EXSLB+EX_DAR(r13) std r3, _DAR(r1) beq cr6, 2f - li r10, 0x480 /* fix trap number for I-SLB miss */ + li r10, 0x481 /* fix trap number for I-SLB miss */ std r10, _TRAP(r1) 2: bl save_nvgprs addi r3, r1, STACK_FRAME_OVERHEAD -- cgit v1.2.3 From 3eb2ce825ea1ad89d20f7a3b5780df850e4be274 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 25 Mar 2018 12:44:30 -1000 Subject: Linux 4.16-rc7 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 486db374d1c1..7ba478ab8c82 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 4 PATCHLEVEL = 16 SUBLEVEL = 0 -EXTRAVERSION = -rc6 +EXTRAVERSION = -rc7 NAME = Fearless Coyote # *DOCUMENTATION* -- cgit v1.2.3 From 9137108cc3d64ade13e753108ec611a0daed16a0 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Thu, 22 Mar 2018 14:04:23 -0600 Subject: RDMA/rdma_cm: Fix use after free race with process_one_req process_one_req() can race with rdma_addr_cancel(): CPU0 CPU1 ==== ==== process_one_work() debug_work_deactivate(work); process_one_req() rdma_addr_cancel() mutex_lock(&lock); set_timeout(&req->work,..); __queue_work() debug_work_activate(work); mutex_unlock(&lock); mutex_lock(&lock); [..] list_del(&req->list); mutex_unlock(&lock); [..] // ODEBUG explodes since the work is still queued. kfree(req); Causing ODEBUG to detect the use after free: ODEBUG: free active (active state 0) object type: work_struct hint: process_one_req+0x0/0x6c0 include/net/dst.h:165 WARNING: CPU: 0 PID: 79 at lib/debugobjects.c:291 debug_print_object+0x166/0x220 lib/debugobjects.c:288 kvm: emulating exchange as write Kernel panic - not syncing: panic_on_warn set ... CPU: 0 PID: 79 Comm: kworker/u4:3 Not tainted 4.16.0-rc6+ #361 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Workqueue: ib_addr process_one_req Call Trace: __dump_stack lib/dump_stack.c:17 [inline] dump_stack+0x194/0x24d lib/dump_stack.c:53 panic+0x1e4/0x41c kernel/panic.c:183 __warn+0x1dc/0x200 kernel/panic.c:547 report_bug+0x1f4/0x2b0 lib/bug.c:186 fixup_bug.part.11+0x37/0x80 arch/x86/kernel/traps.c:178 fixup_bug arch/x86/kernel/traps.c:247 [inline] do_error_trap+0x2d7/0x3e0 arch/x86/kernel/traps.c:296 do_invalid_op+0x1b/0x20 arch/x86/kernel/traps.c:315 invalid_op+0x1b/0x40 arch/x86/entry/entry_64.S:986 RIP: 0010:debug_print_object+0x166/0x220 lib/debugobjects.c:288 RSP: 0000:ffff8801d966f210 EFLAGS: 00010086 RAX: dffffc0000000008 RBX: 0000000000000003 RCX: ffffffff815acd6e RDX: 0000000000000000 RSI: 1ffff1003b2cddf2 RDI: 0000000000000000 RBP: ffff8801d966f250 R08: 0000000000000000 R09: 1ffff1003b2cddc8 R10: ffffed003b2cde71 R11: ffffffff86f39a98 R12: 0000000000000001 R13: ffffffff86f15540 R14: ffffffff86408700 R15: ffffffff8147c0a0 __debug_check_no_obj_freed lib/debugobjects.c:745 [inline] debug_check_no_obj_freed+0x662/0xf1f lib/debugobjects.c:774 kfree+0xc7/0x260 mm/slab.c:3799 process_one_req+0x2e7/0x6c0 drivers/infiniband/core/addr.c:592 process_one_work+0xc47/0x1bb0 kernel/workqueue.c:2113 worker_thread+0x223/0x1990 kernel/workqueue.c:2247 kthread+0x33c/0x400 kernel/kthread.c:238 ret_from_fork+0x3a/0x50 arch/x86/entry/entry_64.S:406 Fixes: 5fff41e1f89d ("IB/core: Fix race condition in resolving IP to MAC") Reported-by: Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/addr.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c index 9183d148d644..c3e6811bb1bd 100644 --- a/drivers/infiniband/core/addr.c +++ b/drivers/infiniband/core/addr.c @@ -586,6 +586,15 @@ static void process_one_req(struct work_struct *_work) list_del(&req->list); mutex_unlock(&lock); + /* + * Although the work will normally have been canceled by the + * workqueue, it can still be requeued as long as it is on the + * req_list, so it could have been requeued before we grabbed &lock. + * We need to cancel it after it is removed from req_list to really be + * sure it is safe to free. + */ + cancel_delayed_work(&req->work); + req->callback(req->status, (struct sockaddr *)&req->src_addr, req->addr, req->context); put_client(req->client); -- cgit v1.2.3 From 4b658d1bbc16605330694bb3ef2570c465ef383d Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Sun, 25 Mar 2018 11:23:55 +0300 Subject: RDMA/ucma: Check that device is connected prior to access it Add missing check that device is connected prior to access it. [ 55.358652] BUG: KASAN: null-ptr-deref in rdma_init_qp_attr+0x4a/0x2c0 [ 55.359389] Read of size 8 at addr 00000000000000b0 by task qp/618 [ 55.360255] [ 55.360432] CPU: 1 PID: 618 Comm: qp Not tainted 4.16.0-rc1-00071-gcaf61b1b8b88 #91 [ 55.361693] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.11.0-0-g63451fca13-prebuilt.qemu-project.org 04/01/2014 [ 55.363264] Call Trace: [ 55.363833] dump_stack+0x5c/0x77 [ 55.364215] kasan_report+0x163/0x380 [ 55.364610] ? rdma_init_qp_attr+0x4a/0x2c0 [ 55.365238] rdma_init_qp_attr+0x4a/0x2c0 [ 55.366410] ucma_init_qp_attr+0x111/0x200 [ 55.366846] ? ucma_notify+0xf0/0xf0 [ 55.367405] ? _get_random_bytes+0xea/0x1b0 [ 55.367846] ? urandom_read+0x2f0/0x2f0 [ 55.368436] ? kmem_cache_alloc_trace+0xd2/0x1e0 [ 55.369104] ? refcount_inc_not_zero+0x9/0x60 [ 55.369583] ? refcount_inc+0x5/0x30 [ 55.370155] ? rdma_create_id+0x215/0x240 [ 55.370937] ? _copy_to_user+0x4f/0x60 [ 55.371620] ? mem_cgroup_commit_charge+0x1f5/0x290 [ 55.372127] ? _copy_from_user+0x5e/0x90 [ 55.372720] ucma_write+0x174/0x1f0 [ 55.373090] ? ucma_close_id+0x40/0x40 [ 55.373805] ? __lru_cache_add+0xa8/0xd0 [ 55.374403] __vfs_write+0xc4/0x350 [ 55.374774] ? kernel_read+0xa0/0xa0 [ 55.375173] ? fsnotify+0x899/0x8f0 [ 55.375544] ? fsnotify_unmount_inodes+0x170/0x170 [ 55.376689] ? __fsnotify_update_child_dentry_flags+0x30/0x30 [ 55.377522] ? handle_mm_fault+0x174/0x320 [ 55.378169] vfs_write+0xf7/0x280 [ 55.378864] SyS_write+0xa1/0x120 [ 55.379270] ? SyS_read+0x120/0x120 [ 55.379643] ? mm_fault_error+0x180/0x180 [ 55.380071] ? task_work_run+0x7d/0xd0 [ 55.380910] ? __task_pid_nr_ns+0x120/0x140 [ 55.381366] ? SyS_read+0x120/0x120 [ 55.381739] do_syscall_64+0xeb/0x250 [ 55.382143] entry_SYSCALL_64_after_hwframe+0x21/0x86 [ 55.382841] RIP: 0033:0x7fc2ef803e99 [ 55.383227] RSP: 002b:00007fffcc5f3be8 EFLAGS: 00000217 ORIG_RAX: 0000000000000001 [ 55.384173] RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007fc2ef803e99 [ 55.386145] RDX: 0000000000000057 RSI: 0000000020000080 RDI: 0000000000000003 [ 55.388418] RBP: 00007fffcc5f3c00 R08: 0000000000000000 R09: 0000000000000000 [ 55.390542] R10: 0000000000000000 R11: 0000000000000217 R12: 0000000000400480 [ 55.392916] R13: 00007fffcc5f3cf0 R14: 0000000000000000 R15: 0000000000000000 [ 55.521088] Code: e5 4d 1e ff 48 89 df 44 0f b6 b3 b8 01 00 00 e8 65 50 1e ff 4c 8b 2b 49 8d bd b0 00 00 00 e8 56 50 1e ff 41 0f b6 c6 48 c1 e0 04 <49> 03 85 b0 00 00 00 48 8d 78 08 48 89 04 24 e8 3a 4f 1e ff 48 [ 55.525980] RIP: rdma_init_qp_attr+0x52/0x2c0 RSP: ffff8801e2c2f9d8 [ 55.532648] CR2: 00000000000000b0 [ 55.534396] ---[ end trace 70cee64090251c0b ]--- Fixes: 75216638572f ("RDMA/cma: Export rdma cm interface to userspace") Fixes: d541e45500bd ("IB/core: Convert ah_attr from OPA to IB when copying to user") Reported-by: Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/ucma.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c index 60449d611fb4..0811f58c670f 100644 --- a/drivers/infiniband/core/ucma.c +++ b/drivers/infiniband/core/ucma.c @@ -1166,6 +1166,11 @@ static ssize_t ucma_init_qp_attr(struct ucma_file *file, if (IS_ERR(ctx)) return PTR_ERR(ctx); + if (!ctx->cm_id->device) { + ret = -EINVAL; + goto out; + } + resp.qp_attr_mask = 0; memset(&qp_attr, 0, sizeof qp_attr); qp_attr.qp_state = cmd.qp_state; -- cgit v1.2.3 From c8d3bcbfc5eab3f01cf373d039af725f3b488813 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Sun, 25 Mar 2018 11:39:05 +0300 Subject: RDMA/ucma: Check that device exists prior to accessing it Ensure that device exists prior to accessing its properties. Reported-by: Fixes: 75216638572f ("RDMA/cma: Export rdma cm interface to userspace") Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/ucma.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c index 0811f58c670f..21585055cf32 100644 --- a/drivers/infiniband/core/ucma.c +++ b/drivers/infiniband/core/ucma.c @@ -1336,7 +1336,7 @@ static ssize_t ucma_notify(struct ucma_file *file, const char __user *inbuf, { struct rdma_ucm_notify cmd; struct ucma_context *ctx; - int ret; + int ret = -EINVAL; if (copy_from_user(&cmd, inbuf, sizeof(cmd))) return -EFAULT; @@ -1345,7 +1345,9 @@ static ssize_t ucma_notify(struct ucma_file *file, const char __user *inbuf, if (IS_ERR(ctx)) return PTR_ERR(ctx); - ret = rdma_notify(ctx->cm_id, (enum ib_event_type) cmd.event); + if (ctx->cm_id->device) + ret = rdma_notify(ctx->cm_id, (enum ib_event_type)cmd.event); + ucma_put_ctx(ctx); return ret; } -- cgit v1.2.3 From 38759d6175d338fbf9282c8ea2b51f3b7ab9bc98 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Mon, 26 Mar 2018 16:10:18 +0100 Subject: RDMA/hns: ensure for-loop actually iterates and free's buffers The current for-loop zeros variable i and only loops once, hence not all the buffers are free'd. Fix this by setting i correctly. Detected by CoverityScan, CID#1463415 ("Operands don't affect result") Fixes: a5073d6054f7 ("RDMA/hns: Add eq support of hip08") Signed-off-by: Colin Ian King Reviewed-by: Yixian Liu Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index db2ff352d75f..ec638778661c 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -4383,7 +4383,7 @@ err_dma_alloc_buf: eq->l0_dma = 0; if (mhop_num == 1) - for (i -= i; i >= 0; i--) + for (i -= 1; i >= 0; i--) dma_free_coherent(dev, buf_chk_sz, eq->buf[i], eq->buf_dma[i]); else if (mhop_num == 2) { -- cgit v1.2.3 From ef1b204a7024de1a813a7bf7d68c45fac9b9d1a3 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Wed, 28 Mar 2018 12:52:10 +0000 Subject: drm/tegra: dc: Using NULL instead of plain integer Fixes the following sparse warnings: drivers/gpu/drm/tegra/dc.c:2181:69: warning: Using plain integer as NULL pointer Signed-off-by: Wei Yongjun Signed-off-by: Thierry Reding --- drivers/gpu/drm/tegra/dc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/tegra/dc.c b/drivers/gpu/drm/tegra/dc.c index c1a5ae8faaef..90b25ce363ca 100644 --- a/drivers/gpu/drm/tegra/dc.c +++ b/drivers/gpu/drm/tegra/dc.c @@ -2160,7 +2160,7 @@ static int tegra_dc_couple(struct tegra_dc *dc) struct device_link *link; struct device *partner; - partner = driver_find_device(dc->dev->driver, NULL, 0, + partner = driver_find_device(dc->dev->driver, NULL, NULL, tegra_dc_match_by_pipe); if (!partner) return -EPROBE_DEFER; -- cgit v1.2.3 From 84652aefb347297aa08e91e283adf7b18f77c2d5 Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Wed, 28 Mar 2018 11:27:22 -0700 Subject: RDMA/ucma: Introduce safer rdma_addr_size() variants There are several places in the ucma ABI where userspace can pass in a sockaddr but set the address family to AF_IB. When that happens, rdma_addr_size() will return a size bigger than sizeof struct sockaddr_in6, and the ucma kernel code might end up copying past the end of a buffer not sized for a struct sockaddr_ib. Fix this by introducing new variants int rdma_addr_size_in6(struct sockaddr_in6 *addr); int rdma_addr_size_kss(struct __kernel_sockaddr_storage *addr); that are type-safe for the types used in the ucma ABI and return 0 if the size computed is bigger than the size of the type passed in. We can use these new variants to check what size userspace has passed in before copying any addresses. Reported-by: Signed-off-by: Roland Dreier Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/addr.c | 16 ++++++++++++++++ drivers/infiniband/core/ucma.c | 34 +++++++++++++++++----------------- include/rdma/ib_addr.h | 2 ++ 3 files changed, 35 insertions(+), 17 deletions(-) diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c index c3e6811bb1bd..cb1d2ab13c66 100644 --- a/drivers/infiniband/core/addr.c +++ b/drivers/infiniband/core/addr.c @@ -207,6 +207,22 @@ int rdma_addr_size(struct sockaddr *addr) } EXPORT_SYMBOL(rdma_addr_size); +int rdma_addr_size_in6(struct sockaddr_in6 *addr) +{ + int ret = rdma_addr_size((struct sockaddr *) addr); + + return ret <= sizeof(*addr) ? ret : 0; +} +EXPORT_SYMBOL(rdma_addr_size_in6); + +int rdma_addr_size_kss(struct __kernel_sockaddr_storage *addr) +{ + int ret = rdma_addr_size((struct sockaddr *) addr); + + return ret <= sizeof(*addr) ? ret : 0; +} +EXPORT_SYMBOL(rdma_addr_size_kss); + static struct rdma_addr_client self; void rdma_addr_register_client(struct rdma_addr_client *client) diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c index 21585055cf32..d933336d7e01 100644 --- a/drivers/infiniband/core/ucma.c +++ b/drivers/infiniband/core/ucma.c @@ -632,6 +632,9 @@ static ssize_t ucma_bind_ip(struct ucma_file *file, const char __user *inbuf, if (copy_from_user(&cmd, inbuf, sizeof(cmd))) return -EFAULT; + if (!rdma_addr_size_in6(&cmd.addr)) + return -EINVAL; + ctx = ucma_get_ctx(file, cmd.id); if (IS_ERR(ctx)) return PTR_ERR(ctx); @@ -645,22 +648,21 @@ static ssize_t ucma_bind(struct ucma_file *file, const char __user *inbuf, int in_len, int out_len) { struct rdma_ucm_bind cmd; - struct sockaddr *addr; struct ucma_context *ctx; int ret; if (copy_from_user(&cmd, inbuf, sizeof(cmd))) return -EFAULT; - addr = (struct sockaddr *) &cmd.addr; - if (cmd.reserved || !cmd.addr_size || (cmd.addr_size != rdma_addr_size(addr))) + if (cmd.reserved || !cmd.addr_size || + cmd.addr_size != rdma_addr_size_kss(&cmd.addr)) return -EINVAL; ctx = ucma_get_ctx(file, cmd.id); if (IS_ERR(ctx)) return PTR_ERR(ctx); - ret = rdma_bind_addr(ctx->cm_id, addr); + ret = rdma_bind_addr(ctx->cm_id, (struct sockaddr *) &cmd.addr); ucma_put_ctx(ctx); return ret; } @@ -670,23 +672,22 @@ static ssize_t ucma_resolve_ip(struct ucma_file *file, int in_len, int out_len) { struct rdma_ucm_resolve_ip cmd; - struct sockaddr *src, *dst; struct ucma_context *ctx; int ret; if (copy_from_user(&cmd, inbuf, sizeof(cmd))) return -EFAULT; - src = (struct sockaddr *) &cmd.src_addr; - dst = (struct sockaddr *) &cmd.dst_addr; - if (!rdma_addr_size(src) || !rdma_addr_size(dst)) + if (!rdma_addr_size_in6(&cmd.src_addr) || + !rdma_addr_size_in6(&cmd.dst_addr)) return -EINVAL; ctx = ucma_get_ctx(file, cmd.id); if (IS_ERR(ctx)) return PTR_ERR(ctx); - ret = rdma_resolve_addr(ctx->cm_id, src, dst, cmd.timeout_ms); + ret = rdma_resolve_addr(ctx->cm_id, (struct sockaddr *) &cmd.src_addr, + (struct sockaddr *) &cmd.dst_addr, cmd.timeout_ms); ucma_put_ctx(ctx); return ret; } @@ -696,24 +697,23 @@ static ssize_t ucma_resolve_addr(struct ucma_file *file, int in_len, int out_len) { struct rdma_ucm_resolve_addr cmd; - struct sockaddr *src, *dst; struct ucma_context *ctx; int ret; if (copy_from_user(&cmd, inbuf, sizeof(cmd))) return -EFAULT; - src = (struct sockaddr *) &cmd.src_addr; - dst = (struct sockaddr *) &cmd.dst_addr; - if (cmd.reserved || (cmd.src_size && (cmd.src_size != rdma_addr_size(src))) || - !cmd.dst_size || (cmd.dst_size != rdma_addr_size(dst))) + if (cmd.reserved || + (cmd.src_size && (cmd.src_size != rdma_addr_size_kss(&cmd.src_addr))) || + !cmd.dst_size || (cmd.dst_size != rdma_addr_size_kss(&cmd.dst_addr))) return -EINVAL; ctx = ucma_get_ctx(file, cmd.id); if (IS_ERR(ctx)) return PTR_ERR(ctx); - ret = rdma_resolve_addr(ctx->cm_id, src, dst, cmd.timeout_ms); + ret = rdma_resolve_addr(ctx->cm_id, (struct sockaddr *) &cmd.src_addr, + (struct sockaddr *) &cmd.dst_addr, cmd.timeout_ms); ucma_put_ctx(ctx); return ret; } @@ -1433,7 +1433,7 @@ static ssize_t ucma_join_ip_multicast(struct ucma_file *file, join_cmd.response = cmd.response; join_cmd.uid = cmd.uid; join_cmd.id = cmd.id; - join_cmd.addr_size = rdma_addr_size((struct sockaddr *) &cmd.addr); + join_cmd.addr_size = rdma_addr_size_in6(&cmd.addr); if (!join_cmd.addr_size) return -EINVAL; @@ -1452,7 +1452,7 @@ static ssize_t ucma_join_multicast(struct ucma_file *file, if (copy_from_user(&cmd, inbuf, sizeof(cmd))) return -EFAULT; - if (!rdma_addr_size((struct sockaddr *)&cmd.addr)) + if (!rdma_addr_size_kss(&cmd.addr)) return -EINVAL; return ucma_process_join(file, &cmd, out_len); diff --git a/include/rdma/ib_addr.h b/include/rdma/ib_addr.h index d656809f1217..415e09960017 100644 --- a/include/rdma/ib_addr.h +++ b/include/rdma/ib_addr.h @@ -130,6 +130,8 @@ void rdma_copy_addr(struct rdma_dev_addr *dev_addr, const unsigned char *dst_dev_addr); int rdma_addr_size(struct sockaddr *addr); +int rdma_addr_size_in6(struct sockaddr_in6 *addr); +int rdma_addr_size_kss(struct __kernel_sockaddr_storage *addr); int rdma_addr_find_l2_eth_by_grh(const union ib_gid *sgid, const union ib_gid *dgid, -- cgit v1.2.3 From 880cd276dff17ea29e9a8404275c9502b265afa7 Mon Sep 17 00:00:00 2001 From: Shakeel Butt Date: Wed, 28 Mar 2018 16:00:57 -0700 Subject: mm, slab: memcg_link the SLAB's kmem_cache All the root caches are linked into slab_root_caches which was introduced by the commit 510ded33e075 ("slab: implement slab_root_caches list") but it missed to add the SLAB's kmem_cache. While experimenting with opt-in/opt-out kmem accounting, I noticed system crashes due to NULL dereference inside cache_from_memcg_idx() while deferencing kmem_cache.memcg_params.memcg_caches. The upstream clean kernel will not see these crashes but SLAB should be consistent with SLUB which does linked its boot caches (kmem_cache_node and kmem_cache) into slab_root_caches. Link: http://lkml.kernel.org/r/20180319210020.60289-1-shakeelb@google.com Fixes: 510ded33e075c ("slab: implement slab_root_caches list") Signed-off-by: Shakeel Butt Cc: Tejun Heo Cc: Vladimir Davydov Cc: Greg Thelen Cc: Christoph Lameter Cc: Pekka Enberg Cc: David Rientjes Cc: Joonsoo Kim Cc: Johannes Weiner Cc: Michal Hocko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/slab.c | 1 + 1 file changed, 1 insertion(+) diff --git a/mm/slab.c b/mm/slab.c index 324446621b3e..9095c3945425 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -1283,6 +1283,7 @@ void __init kmem_cache_init(void) nr_node_ids * sizeof(struct kmem_cache_node *), SLAB_HWCACHE_ALIGN, 0, 0); list_add(&kmem_cache->list, &slab_caches); + memcg_link_cache(kmem_cache); slab_state = PARTIAL; /* -- cgit v1.2.3 From 3d942ee079b917b24e2a0c5f18d35ac8ec9fee48 Mon Sep 17 00:00:00 2001 From: Mike Kravetz Date: Wed, 28 Mar 2018 16:01:01 -0700 Subject: ipc/shm.c: add split function to shm_vm_ops If System V shmget/shmat operations are used to create a hugetlbfs backed mapping, it is possible to munmap part of the mapping and split the underlying vma such that it is not huge page aligned. This will untimately result in the following BUG: kernel BUG at /build/linux-jWa1Fv/linux-4.15.0/mm/hugetlb.c:3310! Oops: Exception in kernel mode, sig: 5 [#1] LE SMP NR_CPUS=2048 NUMA PowerNV Modules linked in: kcm nfc af_alg caif_socket caif phonet fcrypt CPU: 18 PID: 43243 Comm: trinity-subchil Tainted: G C E 4.15.0-10-generic #11-Ubuntu NIP: c00000000036e764 LR: c00000000036ee48 CTR: 0000000000000009 REGS: c000003fbcdcf810 TRAP: 0700 Tainted: G C E (4.15.0-10-generic) MSR: 9000000000029033 CR: 24002222 XER: 20040000 CFAR: c00000000036ee44 SOFTE: 1 NIP __unmap_hugepage_range+0xa4/0x760 LR __unmap_hugepage_range_final+0x28/0x50 Call Trace: 0x7115e4e00000 (unreliable) __unmap_hugepage_range_final+0x28/0x50 unmap_single_vma+0x11c/0x190 unmap_vmas+0x94/0x140 exit_mmap+0x9c/0x1d0 mmput+0xa8/0x1d0 do_exit+0x360/0xc80 do_group_exit+0x60/0x100 SyS_exit_group+0x24/0x30 system_call+0x58/0x6c ---[ end trace ee88f958a1c62605 ]--- This bug was introduced by commit 31383c6865a5 ("mm, hugetlbfs: introduce ->split() to vm_operations_struct"). A split function was added to vm_operations_struct to determine if a mapping can be split. This was mostly for device-dax and hugetlbfs mappings which have specific alignment constraints. Mappings initiated via shmget/shmat have their original vm_ops overwritten with shm_vm_ops. shm_vm_ops functions will call back to the original vm_ops if needed. Add such a split function to shm_vm_ops. Link: http://lkml.kernel.org/r/20180321161314.7711-1-mike.kravetz@oracle.com Fixes: 31383c6865a5 ("mm, hugetlbfs: introduce ->split() to vm_operations_struct") Signed-off-by: Mike Kravetz Reported-by: Laurent Dufour Reviewed-by: Laurent Dufour Tested-by: Laurent Dufour Reviewed-by: Dan Williams Acked-by: Michal Hocko Cc: Davidlohr Bueso Cc: Manfred Spraul Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- ipc/shm.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/ipc/shm.c b/ipc/shm.c index 4643865e9171..93e0e3a4d009 100644 --- a/ipc/shm.c +++ b/ipc/shm.c @@ -386,6 +386,17 @@ static int shm_fault(struct vm_fault *vmf) return sfd->vm_ops->fault(vmf); } +static int shm_split(struct vm_area_struct *vma, unsigned long addr) +{ + struct file *file = vma->vm_file; + struct shm_file_data *sfd = shm_file_data(file); + + if (sfd->vm_ops && sfd->vm_ops->split) + return sfd->vm_ops->split(vma, addr); + + return 0; +} + #ifdef CONFIG_NUMA static int shm_set_policy(struct vm_area_struct *vma, struct mempolicy *new) { @@ -510,6 +521,7 @@ static const struct vm_operations_struct shm_vm_ops = { .open = shm_open, /* callback for a new vm-area open */ .close = shm_close, /* callback for when the vm-area is released */ .fault = shm_fault, + .split = shm_split, #if defined(CONFIG_NUMA) .set_policy = shm_set_policy, .get_policy = shm_get_policy, -- cgit v1.2.3 From 299815a4fba9f3c7a81434dba0072148f1690608 Mon Sep 17 00:00:00 2001 From: Maninder Singh Date: Wed, 28 Mar 2018 16:01:05 -0700 Subject: mm/page_owner: fix recursion bug after changing skip entries This patch fixes commit 5f48f0bd4e36 ("mm, page_owner: skip unnecessary stack_trace entries"). Because if we skip first two entries then logic of checking count value as 2 for recursion is broken and code will go in one depth recursion. so we need to check only one call of _RET_IP(__set_page_owner) while checking for recursion. Current Backtrace while checking for recursion:- (save_stack) from (__set_page_owner) // (But recursion returns true here) (__set_page_owner) from (get_page_from_freelist) (get_page_from_freelist) from (__alloc_pages_nodemask) (__alloc_pages_nodemask) from (depot_save_stack) (depot_save_stack) from (save_stack) // recursion should return true here (save_stack) from (__set_page_owner) (__set_page_owner) from (get_page_from_freelist) (get_page_from_freelist) from (__alloc_pages_nodemask+) (__alloc_pages_nodemask) from (depot_save_stack) (depot_save_stack) from (save_stack) (save_stack) from (__set_page_owner) (__set_page_owner) from (get_page_from_freelist) Correct Backtrace with fix: (save_stack) from (__set_page_owner) // recursion returned true here (__set_page_owner) from (get_page_from_freelist) (get_page_from_freelist) from (__alloc_pages_nodemask+) (__alloc_pages_nodemask) from (depot_save_stack) (depot_save_stack) from (save_stack) (save_stack) from (__set_page_owner) (__set_page_owner) from (get_page_from_freelist) Link: http://lkml.kernel.org/r/1521607043-34670-1-git-send-email-maninder1.s@samsung.com Fixes: 5f48f0bd4e36 ("mm, page_owner: skip unnecessary stack_trace entries") Signed-off-by: Maninder Singh Signed-off-by: Vaneet Narang Acked-by: Vlastimil Babka Cc: Michal Hocko Cc: Oscar Salvador Cc: Greg Kroah-Hartman Cc: Ayush Mittal Cc: Prakash Gupta Cc: Vinayak Menon Cc: Vasyl Gomonovych Cc: Amit Sahrawat Cc: Cc: Vaneet Narang Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/page_owner.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/mm/page_owner.c b/mm/page_owner.c index 9886c6073828..7172e0a80e13 100644 --- a/mm/page_owner.c +++ b/mm/page_owner.c @@ -123,13 +123,13 @@ void __reset_page_owner(struct page *page, unsigned int order) static inline bool check_recursive_alloc(struct stack_trace *trace, unsigned long ip) { - int i, count; + int i; if (!trace->nr_entries) return false; - for (i = 0, count = 0; i < trace->nr_entries; i++) { - if (trace->entries[i] == ip && ++count == 2) + for (i = 0; i < trace->nr_entries; i++) { + if (trace->entries[i] == ip) return true; } -- cgit v1.2.3 From c7f26ccfb2c31eb1bf810ba13d044fcf583232db Mon Sep 17 00:00:00 2001 From: "Steven J. Hill" Date: Wed, 28 Mar 2018 16:01:09 -0700 Subject: mm/vmstat.c: fix vmstat_update() preemption BUG Attempting to hotplug CPUs with CONFIG_VM_EVENT_COUNTERS enabled can cause vmstat_update() to report a BUG due to preemption not being disabled around smp_processor_id(). Discovered on Ubiquiti EdgeRouter Pro with Cavium Octeon II processor. BUG: using smp_processor_id() in preemptible [00000000] code: kworker/1:1/269 caller is vmstat_update+0x50/0xa0 CPU: 0 PID: 269 Comm: kworker/1:1 Not tainted 4.16.0-rc4-Cavium-Octeon-00009-gf83bbd5-dirty #1 Workqueue: mm_percpu_wq vmstat_update Call Trace: show_stack+0x94/0x128 dump_stack+0xa4/0xe0 check_preemption_disabled+0x118/0x120 vmstat_update+0x50/0xa0 process_one_work+0x144/0x348 worker_thread+0x150/0x4b8 kthread+0x110/0x140 ret_from_kernel_thread+0x14/0x1c Link: http://lkml.kernel.org/r/1520881552-25659-1-git-send-email-steven.hill@cavium.com Signed-off-by: Steven J. Hill Reviewed-by: Andrew Morton Cc: Tejun Heo Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/vmstat.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/mm/vmstat.c b/mm/vmstat.c index 40b2db6db6b1..33581be705f0 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -1839,9 +1839,11 @@ static void vmstat_update(struct work_struct *w) * to occur in the future. Keep on running the * update worker thread. */ + preempt_disable(); queue_delayed_work_on(smp_processor_id(), mm_percpu_wq, this_cpu_ptr(&vmstat_work), round_jiffies_relative(sysctl_stat_interval)); + preempt_enable(); } } -- cgit v1.2.3 From b213b54fbf9d282dc545252313d727f3972be8e0 Mon Sep 17 00:00:00 2001 From: Honglei Wang Date: Wed, 28 Mar 2018 16:01:12 -0700 Subject: mm/memcontrol.c: fix parameter description mismatch There are a couple of places where parameter description and function name do not match the actual code. Fix it. Link: http://lkml.kernel.org/r/1520843448-17347-1-git-send-email-honglei.wang@oracle.com Signed-off-by: Honglei Wang Acked-by: Tejun Heo Cc: Johannes Weiner Cc: Michal Hocko Cc: Vladimir Davydov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memcontrol.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 670e99b68aa6..9ec024b862ac 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -714,9 +714,9 @@ static struct mem_cgroup *get_mem_cgroup_from_mm(struct mm_struct *mm) * invocations for reference counting, or use mem_cgroup_iter_break() * to cancel a hierarchy walk before the round-trip is complete. * - * Reclaimers can specify a zone and a priority level in @reclaim to + * Reclaimers can specify a node and a priority level in @reclaim to * divide up the memcgs in the hierarchy among all concurrent - * reclaimers operating on the same zone and priority. + * reclaimers operating on the same node and priority. */ struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *root, struct mem_cgroup *prev, @@ -2299,7 +2299,7 @@ void memcg_kmem_put_cache(struct kmem_cache *cachep) } /** - * memcg_kmem_charge: charge a kmem page + * memcg_kmem_charge_memcg: charge a kmem page * @page: page to charge * @gfp: reclaim mode * @order: allocation order -- cgit v1.2.3 From 914b6dfff790544d9b77dfd1723adb3745ec9700 Mon Sep 17 00:00:00 2001 From: Vinayak Menon Date: Wed, 28 Mar 2018 16:01:16 -0700 Subject: mm/kmemleak.c: wait for scan completion before disabling free A crash is observed when kmemleak_scan accesses the object->pointer, likely due to the following race. TASK A TASK B TASK C kmemleak_write (with "scan" and NOT "scan=on") kmemleak_scan() create_object kmem_cache_alloc fails kmemleak_disable kmemleak_do_cleanup kmemleak_free_enabled = 0 kfree kmemleak_free bails out (kmemleak_free_enabled is 0) slub frees object->pointer update_checksum crash - object->pointer freed (DEBUG_PAGEALLOC) kmemleak_do_cleanup waits for the scan thread to complete, but not for direct call to kmemleak_scan via kmemleak_write. So add a wait for kmemleak_scan completion before disabling kmemleak_free, and while at it fix the comment on stop_scan_thread. [vinmenon@codeaurora.org: fix stop_scan_thread comment] Link: http://lkml.kernel.org/r/1522219972-22809-1-git-send-email-vinmenon@codeaurora.org Link: http://lkml.kernel.org/r/1522063429-18992-1-git-send-email-vinmenon@codeaurora.org Signed-off-by: Vinayak Menon Reviewed-by: Catalin Marinas Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/kmemleak.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/mm/kmemleak.c b/mm/kmemleak.c index e83987c55a08..46c2290a08f1 100644 --- a/mm/kmemleak.c +++ b/mm/kmemleak.c @@ -1657,8 +1657,7 @@ static void start_scan_thread(void) } /* - * Stop the automatic memory scanning thread. This function must be called - * with the scan_mutex held. + * Stop the automatic memory scanning thread. */ static void stop_scan_thread(void) { @@ -1921,12 +1920,15 @@ static void kmemleak_do_cleanup(struct work_struct *work) { stop_scan_thread(); + mutex_lock(&scan_mutex); /* - * Once the scan thread has stopped, it is safe to no longer track - * object freeing. Ordering of the scan thread stopping and the memory - * accesses below is guaranteed by the kthread_stop() function. + * Once it is made sure that kmemleak_scan has stopped, it is safe to no + * longer track object freeing. Ordering of the scan thread stopping and + * the memory accesses below is guaranteed by the kthread_stop() + * function. */ kmemleak_free_enabled = 0; + mutex_unlock(&scan_mutex); if (!kmemleak_found_leaks) __kmemleak_do_cleanup(); -- cgit v1.2.3 From 5b634e8e387372091cad237b6317b26add734555 Mon Sep 17 00:00:00 2001 From: Russell King Date: Wed, 28 Mar 2018 16:01:19 -0700 Subject: MAINTAINERS: correct rmk's email address Correct my email address in the MAINTAINTERS file. Link: http://lkml.kernel.org/r/E1ezkgi-0002fH-01@rmk-PC.armlinux.org.uk Signed-off-by: Russell King Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- MAINTAINERS | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 73c0cdabf755..08cc4b867bc8 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -8434,7 +8434,7 @@ S: Orphan F: drivers/net/wireless/marvell/libertas/ MARVELL MACCHIATOBIN SUPPORT -M: Russell King +M: Russell King L: linux-arm-kernel@lists.infradead.org S: Maintained F: arch/arm64/boot/dts/marvell/armada-8040-mcbin.dts @@ -8447,7 +8447,7 @@ F: drivers/net/ethernet/marvell/mv643xx_eth.* F: include/linux/mv643xx.h MARVELL MV88X3310 PHY DRIVER -M: Russell King +M: Russell King L: netdev@vger.kernel.org S: Maintained F: drivers/net/phy/marvell10g.c -- cgit v1.2.3 From 18bd49043caa8b272649d4868c29133eb0a3d143 Mon Sep 17 00:00:00 2001 From: Russell King Date: Wed, 28 Mar 2018 16:01:22 -0700 Subject: MAINTAINERS: demote ARM port to "odd fixes" As of the start of 2018, I am no longer paid to support the core 32-bit ARM architecture code. This means that this code is no longer commercially supported, and is now only supported through voluntary effort. I will continue to merge patches as and when able, but this will be at a lower priority than before (which means a longer latency.) I have also be scaled back the amount of time spent reading email, so email that is intended for my attention needs to make itself plainly obvious, or I will miss it. In an attempt to reduce the amount of email Cc'd to me, exclude arch/arm/boot/dts from the maintainers patterns, but add entries for the SolidRun platforms I look after. Link: http://lkml.kernel.org/r/E1ezkgn-0002fO-52@rmk-PC.armlinux.org.uk Signed-off-by: Russell King Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- MAINTAINERS | 28 +++++++++++++++++++++------- 1 file changed, 21 insertions(+), 7 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 08cc4b867bc8..c2df576114c3 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1060,41 +1060,42 @@ ARM PORT M: Russell King L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) W: http://www.armlinux.org.uk/ -S: Maintained +S: Odd Fixes T: git git://git.armlinux.org.uk/~rmk/linux-arm.git F: arch/arm/ +X: arch/arm/boot/dts/ ARM PRIMECELL AACI PL041 DRIVER M: Russell King -S: Maintained +S: Odd Fixes F: sound/arm/aaci.* ARM PRIMECELL BUS SUPPORT M: Russell King -S: Maintained +S: Odd Fixes F: drivers/amba/ F: include/linux/amba/bus.h ARM PRIMECELL CLCD PL110 DRIVER M: Russell King -S: Maintained +S: Odd Fixes F: drivers/video/fbdev/amba-clcd.* ARM PRIMECELL KMI PL050 DRIVER M: Russell King -S: Maintained +S: Odd Fixes F: drivers/input/serio/ambakmi.* F: include/linux/amba/kmi.h ARM PRIMECELL MMCI PL180/1 DRIVER M: Russell King -S: Maintained +S: Odd Fixes F: drivers/mmc/host/mmci.* F: include/linux/amba/mmci.h ARM PRIMECELL UART PL010 AND PL011 DRIVERS M: Russell King -S: Maintained +S: Odd Fixes F: drivers/tty/serial/amba-pl01*.c F: include/linux/amba/serial.h @@ -12875,6 +12876,19 @@ S: Maintained F: drivers/net/ethernet/socionext/netsec.c F: Documentation/devicetree/bindings/net/socionext-netsec.txt +SOLIDRUN CLEARFOG SUPPORT +M: Russell King +S: Maintained +F: arch/arm/boot/dts/armada-388-clearfog* +F: arch/arm/boot/dts/armada-38x-solidrun-* + +SOLIDRUN CUBOX-I/HUMMINGBOARD SUPPORT +M: Russell King +S: Maintained +F: arch/arm/boot/dts/imx6*-cubox-i* +F: arch/arm/boot/dts/imx6*-hummingboard* +F: arch/arm/boot/dts/imx6*-sr-* + SONIC NETWORK DRIVER M: Thomas Bogendoerfer L: netdev@vger.kernel.org -- cgit v1.2.3 From 2b1b1b4ac716fd929a2d221bd4ade62263bed915 Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Tue, 27 Mar 2018 19:01:58 +0200 Subject: mtd: nand: atmel: Fix get_sectorsize() function get_sectorsize() was not using the appropriate macro to extract the ECC sector size from the config cache, which led to buggy ECC when using 1024 byte sectors. Fixes: f88fc122cc34 ("mtd: nand: Cleanup/rework the atmel_nand driver") Cc: Reported-by: Olivier Schonken Signed-off-by: Boris Brezillon Reviewed-by: Richard Weinberger Acked-by: Nicolas Ferre Tested-by: Olivier Schonken --- drivers/mtd/nand/atmel/pmecc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mtd/nand/atmel/pmecc.c b/drivers/mtd/nand/atmel/pmecc.c index fcbe4fd6e684..ca0a70389ba9 100644 --- a/drivers/mtd/nand/atmel/pmecc.c +++ b/drivers/mtd/nand/atmel/pmecc.c @@ -426,7 +426,7 @@ static int get_strength(struct atmel_pmecc_user *user) static int get_sectorsize(struct atmel_pmecc_user *user) { - return user->cache.cfg & PMECC_LOOKUP_TABLE_SIZE_1024 ? 1024 : 512; + return user->cache.cfg & PMECC_CFG_SECTOR1024 ? 1024 : 512; } static void atmel_pmecc_gen_syndrome(struct atmel_pmecc_user *user, int sector) -- cgit v1.2.3 From 87a73eb5b56fd6e07c8e499fe8608ef2d8912b82 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Sat, 3 Mar 2018 23:29:03 +0100 Subject: mtd: jedec_probe: Fix crash in jedec_read_mfr() It turns out that the loop where we read manufacturer jedec_read_mfd() can under some circumstances get a CFI_MFR_CONTINUATION repeatedly, making the loop go over all banks and eventually hit the end of the map and crash because of an access violation: Unable to handle kernel paging request at virtual address c4980000 pgd = (ptrval) [c4980000] *pgd=03808811, *pte=00000000, *ppte=00000000 Internal error: Oops: 7 [#1] PREEMPT ARM CPU: 0 PID: 1 Comm: swapper Not tainted 4.16.0-rc1+ #150 Hardware name: Gemini (Device Tree) PC is at jedec_probe_chip+0x6ec/0xcd0 LR is at 0x4 pc : [] lr : [<00000004>] psr: 60000013 sp : c382dd18 ip : 0000ffff fp : 00000000 r10: c0626388 r9 : 00020000 r8 : c0626340 r7 : 00000000 r6 : 00000001 r5 : c3a71afc r4 : c382dd70 r3 : 00000001 r2 : c4900000 r1 : 00000002 r0 : 00080000 Flags: nZCv IRQs on FIQs on Mode SVC_32 ISA ARM Segment none Control: 0000397f Table: 00004000 DAC: 00000053 Process swapper (pid: 1, stack limit = 0x(ptrval)) Fix this by breaking the loop with a return 0 if the offset exceeds the map size. Fixes: 5c9c11e1c47c ("[MTD] [NOR] Add support for flash chips with ID in bank other than 0") Cc: Signed-off-by: Linus Walleij Signed-off-by: Boris Brezillon --- drivers/mtd/chips/jedec_probe.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/mtd/chips/jedec_probe.c b/drivers/mtd/chips/jedec_probe.c index 7c0b27d132b1..b479bd81120b 100644 --- a/drivers/mtd/chips/jedec_probe.c +++ b/drivers/mtd/chips/jedec_probe.c @@ -1889,6 +1889,8 @@ static inline u32 jedec_read_mfr(struct map_info *map, uint32_t base, do { uint32_t ofs = cfi_build_cmd_addr(0 + (bank << 8), map, cfi); mask = (1 << (cfi->device_type * 8)) - 1; + if (ofs >= map->size) + return 0; result = map_read(map, base + ofs); bank++; } while ((result.x[0] & mask) == CFI_MFR_CONTINUATION); -- cgit v1.2.3 From e457edf0b21c873be827b7c2f6b8e1545485c415 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Thu, 29 Mar 2018 11:50:10 -0400 Subject: dm mpath: fix support for loading scsi_dh modules during table load The ability to have multipath dynamically attach a scsi_dh, that the user specified in the multipath table, was broken by commit e8f74a0f00 ("dm mpath: eliminate need to use scsi_device_from_queue"). Restore the ability to load, and attach, a particular scsi_dh module if one is specified (as noticed by checking m->hw_handler_name). Fixes: e8f74a0f00 ("dm mpath: eliminate need to use scsi_device_from_queue") Reported-by: Paul Mackerras Signed-off-by: Mike Snitzer --- drivers/md/dm-mpath.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c index a05a560d3cba..a6b7baf31cdd 100644 --- a/drivers/md/dm-mpath.c +++ b/drivers/md/dm-mpath.c @@ -887,7 +887,7 @@ static struct pgpath *parse_path(struct dm_arg_set *as, struct path_selector *ps q = bdev_get_queue(p->path.dev->bdev); attached_handler_name = scsi_dh_attached_handler_name(q, GFP_KERNEL); - if (attached_handler_name) { + if (attached_handler_name || m->hw_handler_name) { INIT_DELAYED_WORK(&p->activate_path, activate_path_work); r = setup_scsi_dh(p->path.dev->bdev, m, attached_handler_name, &ti->error); if (r) { -- cgit v1.2.3 From da5dadb4f11660ca67580cd4a7420161266d6254 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Thu, 29 Mar 2018 23:31:32 -0400 Subject: dm: fix dropped return code from dm_get_bdev_for_ioctl dm_get_bdev_for_ioctl()'s return of 0 or 1 must be the result from prepare_ioctl (1 means the ioctl was issued to a partition, 0 means it wasn't). Unfortunately commit 519049afea ("dm: use blkdev_get rather than bdgrab when issuing pass-through ioctl") reused the variable 'r' to store the return from blkdev_get() that follows prepare_ioctl() -- whereby dropping prepare_ioctl()'s result on the floor. This can lead to an ioctl or persistent reservation being issued to a partition going unnoticed, which implies the extra permission check for CAP_SYS_RAWIO is skipped. Fix this by using a different variable to store blkdev_get()'s return. Fixes: 519049afea ("dm: use blkdev_get rather than bdgrab when issuing pass-through ioctl") Reported-by: Alasdair G Kergon Signed-off-by: Mike Snitzer --- drivers/md/dm.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 45328d8b2859..353ea0ede091 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -466,7 +466,7 @@ static int dm_get_bdev_for_ioctl(struct mapped_device *md, { struct dm_target *tgt; struct dm_table *map; - int srcu_idx, r; + int srcu_idx, r, r2; retry: r = -ENOTTY; @@ -492,9 +492,11 @@ retry: goto out; bdgrab(*bdev); - r = blkdev_get(*bdev, *mode, _dm_claim_ptr); - if (r < 0) + r2 = blkdev_get(*bdev, *mode, _dm_claim_ptr); + if (r2 < 0) { + r = r2; goto out; + } dm_put_live_table(md, srcu_idx); return r; -- cgit v1.2.3