From 8b0a9d42301e45d501d751074a6f767fded680b1 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Mon, 29 Jun 2015 11:16:25 +0200 Subject: virtio_net: document VIRTIO_NET_CTRL_GUEST_OFFLOADS Document VIRTIO_NET_CTRL_GUEST_OFFLOADS and the relevant feature bits. Will allow ethtool control of the offloads down the road. Reported-by: Yan Vugenfirer Signed-off-by: Michael S. Tsirkin --- include/uapi/linux/virtio_net.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/virtio_net.h b/include/uapi/linux/virtio_net.h index 7bbee79ca293..ec32293a00db 100644 --- a/include/uapi/linux/virtio_net.h +++ b/include/uapi/linux/virtio_net.h @@ -34,6 +34,7 @@ /* The feature bitmap for virtio net */ #define VIRTIO_NET_F_CSUM 0 /* Host handles pkts w/ partial csum */ #define VIRTIO_NET_F_GUEST_CSUM 1 /* Guest handles pkts w/ partial csum */ +#define VIRTIO_NET_F_CTRL_GUEST_OFFLOADS 2 /* Dynamic offload configuration. */ #define VIRTIO_NET_F_MAC 5 /* Host has given MAC address. */ #define VIRTIO_NET_F_GUEST_TSO4 7 /* Guest can handle TSOv4 in. */ #define VIRTIO_NET_F_GUEST_TSO6 8 /* Guest can handle TSOv6 in. */ @@ -226,4 +227,19 @@ struct virtio_net_ctrl_mq { #define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN 1 #define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX 0x8000 +/* + * Control network offloads + * + * Reconfigures the network offloads that Guest can handle. + * + * Available with the VIRTIO_NET_F_CTRL_GUEST_OFFLOADS feature bit. + * + * Command data format matches the feature bit mask exactly. + * + * See VIRTIO_NET_F_GUEST_* for the list of offloads + * that can be enabled/disabled. + */ +#define VIRTIO_NET_CTRL_GUEST_OFFLOADS 5 +#define VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET 0 + #endif /* _LINUX_VIRTIO_NET_H */ -- cgit v1.2.3 From 9c3a473220dda073603a24e93d3015322bda950a Mon Sep 17 00:00:00 2001 From: Vinod Koul Date: Mon, 29 Jun 2015 17:36:45 +0100 Subject: ASoC: topology: fix typos in topology header Signed-off-by: Vinod Koul Signed-off-by: Mark Brown --- include/uapi/sound/asoc.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/uapi/sound/asoc.h b/include/uapi/sound/asoc.h index 12215205ab8d..785c5ca0994b 100644 --- a/include/uapi/sound/asoc.h +++ b/include/uapi/sound/asoc.h @@ -110,7 +110,7 @@ /* * Block Header. - * This header preceeds all object and object arrays below. + * This header precedes all object and object arrays below. */ struct snd_soc_tplg_hdr { __le32 magic; /* magic number */ @@ -222,7 +222,7 @@ struct snd_soc_tplg_stream_config { /* * Manifest. List totals for each payload type. Not used in parsing, but will * be passed to the component driver before any other objects in order for any - * global componnent resource allocations. + * global component resource allocations. * * File block representation for manifest :- * +-----------------------------------+----+ -- cgit v1.2.3 From d768f32aec8c0ebb8499ffca89cfed8f5f1a4432 Mon Sep 17 00:00:00 2001 From: Thomas Huth Date: Thu, 2 Jul 2015 09:21:22 +0200 Subject: virtio: Fix typecast of pointer in vring_init() The virtio_ring.h header is used in userspace programs (ie. QEMU), too. Here we can not assume that sizeof(pointer) is the same as sizeof(long), e.g. when compiling for Windows, so the typecast in vring_init() should be done with (uintptr_t) instead of (unsigned long). Signed-off-by: Thomas Huth Signed-off-by: Michael S. Tsirkin --- include/uapi/linux/virtio_ring.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/virtio_ring.h b/include/uapi/linux/virtio_ring.h index 915980ac68df..c07295969b7e 100644 --- a/include/uapi/linux/virtio_ring.h +++ b/include/uapi/linux/virtio_ring.h @@ -31,6 +31,9 @@ * SUCH DAMAGE. * * Copyright Rusty Russell IBM Corporation 2007. */ +#ifndef __KERNEL__ +#include +#endif #include #include @@ -143,7 +146,7 @@ static inline void vring_init(struct vring *vr, unsigned int num, void *p, vr->num = num; vr->desc = p; vr->avail = p + num*sizeof(struct vring_desc); - vr->used = (void *)(((unsigned long)&vr->avail->ring[num] + sizeof(__virtio16) + vr->used = (void *)(((uintptr_t)&vr->avail->ring[num] + sizeof(__virtio16) + align-1) & ~(align - 1)); } -- cgit v1.2.3 From 3121bb023e2db4f00ed6678898c09e35ed4b5301 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Thu, 2 Jul 2015 10:56:49 +0200 Subject: virtio: define virtio_pci_cfg_cap in header. We already have VIRTIO_PCI_CAP_PCI_CFG, let's define the structure that goes with it. Signed-off-by: Michael S. Tsirkin --- include/uapi/linux/virtio_pci.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/virtio_pci.h b/include/uapi/linux/virtio_pci.h index 75301468359f..90007a1abcab 100644 --- a/include/uapi/linux/virtio_pci.h +++ b/include/uapi/linux/virtio_pci.h @@ -157,6 +157,12 @@ struct virtio_pci_common_cfg { __le32 queue_used_hi; /* read-write */ }; +/* Fields in VIRTIO_PCI_CAP_PCI_CFG: */ +struct virtio_pci_cfg_cap { + struct virtio_pci_cap cap; + __u8 pci_cfg_data[4]; /* Data for BAR access. */ +}; + /* Macro versions of offsets for the Old Timers! */ #define VIRTIO_PCI_CAP_VNDR 0 #define VIRTIO_PCI_CAP_NEXT 1 -- cgit v1.2.3 From 5f867db63473f32cce1b868e281ebd42a41f8fad Mon Sep 17 00:00:00 2001 From: Scott Wood Date: Fri, 26 Jun 2015 19:43:58 -0500 Subject: mtd: nand: Fix NAND_USE_BOUNCE_BUFFER flag conflict Commit 66507c7bc8895f0da6b ("mtd: nand: Add support to use nand_base poi databuf as bounce buffer") added a flag NAND_USE_BOUNCE_BUFFER using the same bit value as the existing NAND_BUSWIDTH_AUTO. Cc: Kamal Dasu Fixes: 66507c7bc8895f0da6b ("mtd: nand: Add support to use nand_base poi databuf as bounce buffer") Signed-off-by: Scott Wood Signed-off-by: Brian Norris --- include/linux/mtd/nand.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h index f25e2bdd188c..272f42952f34 100644 --- a/include/linux/mtd/nand.h +++ b/include/linux/mtd/nand.h @@ -177,11 +177,6 @@ typedef enum { #define NAND_OWN_BUFFERS 0x00020000 /* Chip may not exist, so silence any errors in scan */ #define NAND_SCAN_SILENT_NODEV 0x00040000 -/* - * This option could be defined by controller drivers to protect against - * kmap'ed, vmalloc'ed highmem buffers being passed from upper layers - */ -#define NAND_USE_BOUNCE_BUFFER 0x00080000 /* * Autodetect nand buswidth with readid/onfi. * This suppose the driver will configure the hardware in 8 bits mode @@ -189,6 +184,11 @@ typedef enum { * before calling nand_scan_tail. */ #define NAND_BUSWIDTH_AUTO 0x00080000 +/* + * This option could be defined by controller drivers to protect against + * kmap'ed, vmalloc'ed highmem buffers being passed from upper layers + */ +#define NAND_USE_BOUNCE_BUFFER 0x00100000 /* Options set by nand scan */ /* Nand scan has allocated controller struct */ -- cgit v1.2.3 From 4c62360d7562a20c996836d163259c87d9378120 Mon Sep 17 00:00:00 2001 From: "Luck, Tony" Date: Tue, 30 Jun 2015 15:57:51 -0700 Subject: efi: Handle memory error structures produced based on old versions of standard The memory error record structure includes as its first field a bitmask of which subsequent fields are valid. The allows new fields to be added to the structure while keeping compatibility with older software that parses these records. This mechanism was used between versions 2.2 and 2.3 to add four new fields, growing the size of the structure from 73 bytes to 80. But Linux just added all the new fields so this test: if (gdata->error_data_length >= sizeof(*mem_err)) cper_print_mem(newpfx, mem_err); else goto err_section_too_small; now make Linux complain about old format records being too short. Add a definition for the old format of the structure and use that for the minimum size check. Pass the actual size to cper_print_mem() so it can sanity check the validation_bits field to ensure that if a BIOS using the old format sets bits as if it were new, we won't access fields beyond the end of the structure. Signed-off-by: Tony Luck Cc: Signed-off-by: Matt Fleming --- drivers/firmware/efi/cper.c | 15 ++++++++++++--- include/linux/cper.h | 22 +++++++++++++++++++++- 2 files changed, 33 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/drivers/firmware/efi/cper.c b/drivers/firmware/efi/cper.c index 4fd9961d552e..d42537425438 100644 --- a/drivers/firmware/efi/cper.c +++ b/drivers/firmware/efi/cper.c @@ -305,10 +305,17 @@ const char *cper_mem_err_unpack(struct trace_seq *p, return ret; } -static void cper_print_mem(const char *pfx, const struct cper_sec_mem_err *mem) +static void cper_print_mem(const char *pfx, const struct cper_sec_mem_err *mem, + int len) { struct cper_mem_err_compact cmem; + /* Don't trust UEFI 2.1/2.2 structure with bad validation bits */ + if (len == sizeof(struct cper_sec_mem_err_old) && + (mem->validation_bits & ~(CPER_MEM_VALID_RANK_NUMBER - 1))) { + pr_err(FW_WARN "valid bits set for fields beyond structure\n"); + return; + } if (mem->validation_bits & CPER_MEM_VALID_ERROR_STATUS) printk("%s""error_status: 0x%016llx\n", pfx, mem->error_status); if (mem->validation_bits & CPER_MEM_VALID_PA) @@ -405,8 +412,10 @@ static void cper_estatus_print_section( } else if (!uuid_le_cmp(*sec_type, CPER_SEC_PLATFORM_MEM)) { struct cper_sec_mem_err *mem_err = (void *)(gdata + 1); printk("%s""section_type: memory error\n", newpfx); - if (gdata->error_data_length >= sizeof(*mem_err)) - cper_print_mem(newpfx, mem_err); + if (gdata->error_data_length >= + sizeof(struct cper_sec_mem_err_old)) + cper_print_mem(newpfx, mem_err, + gdata->error_data_length); else goto err_section_too_small; } else if (!uuid_le_cmp(*sec_type, CPER_SEC_PCIE)) { diff --git a/include/linux/cper.h b/include/linux/cper.h index 76abba4b238e..dcacb1a72e26 100644 --- a/include/linux/cper.h +++ b/include/linux/cper.h @@ -340,7 +340,27 @@ struct cper_ia_proc_ctx { __u64 mm_reg_addr; }; -/* Memory Error Section */ +/* Old Memory Error Section UEFI 2.1, 2.2 */ +struct cper_sec_mem_err_old { + __u64 validation_bits; + __u64 error_status; + __u64 physical_addr; + __u64 physical_addr_mask; + __u16 node; + __u16 card; + __u16 module; + __u16 bank; + __u16 device; + __u16 row; + __u16 column; + __u16 bit_pos; + __u64 requestor_id; + __u64 responder_id; + __u64 target_id; + __u8 error_type; +}; + +/* Memory Error Section UEFI >= 2.3 */ struct cper_sec_mem_err { __u64 validation_bits; __u64 error_status; -- cgit v1.2.3 From 71d126fd28de2d4d9b7b2088dbccd7ca62fad6e0 Mon Sep 17 00:00:00 2001 From: Arne Fitzenreiter Date: Wed, 15 Jul 2015 13:54:36 +0200 Subject: libata: add ATA_HORKAGE_NOTRIM Some devices lose data on TRIM whether queued or not. This patch adds a horkage to disable TRIM. tj: Collapsed unnecessary if() nesting. Signed-off-by: Arne Fitzenreiter Signed-off-by: Tejun Heo Cc: stable@vger.kernel.org --- drivers/ata/libata-scsi.c | 3 ++- drivers/ata/libata-transport.c | 2 ++ include/linux/libata.h | 2 ++ 3 files changed, 6 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c index 3131adcc1f87..641a61a59e89 100644 --- a/drivers/ata/libata-scsi.c +++ b/drivers/ata/libata-scsi.c @@ -2568,7 +2568,8 @@ static unsigned int ata_scsiop_read_cap(struct ata_scsi_args *args, u8 *rbuf) rbuf[14] = (lowest_aligned >> 8) & 0x3f; rbuf[15] = lowest_aligned; - if (ata_id_has_trim(args->id)) { + if (ata_id_has_trim(args->id) && + !(dev->horkage & ATA_HORKAGE_NOTRIM)) { rbuf[14] |= 0x80; /* LBPME */ if (ata_id_has_zero_after_trim(args->id) && diff --git a/drivers/ata/libata-transport.c b/drivers/ata/libata-transport.c index d6c37bcd416d..e2d94972962d 100644 --- a/drivers/ata/libata-transport.c +++ b/drivers/ata/libata-transport.c @@ -569,6 +569,8 @@ show_ata_dev_trim(struct device *dev, if (!ata_id_has_trim(ata_dev->id)) mode = "unsupported"; + else if (ata_dev->horkage & ATA_HORKAGE_NOTRIM) + mode = "forced_unsupported"; else if (ata_dev->horkage & ATA_HORKAGE_NO_NCQ_TRIM) mode = "forced_unqueued"; else if (ata_fpdma_dsm_supported(ata_dev)) diff --git a/include/linux/libata.h b/include/linux/libata.h index 36ce37bcc963..5c8bac6225a6 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -431,6 +431,8 @@ enum { ATA_HORKAGE_WD_BROKEN_LPM = (1 << 21), /* some WDs have broken LPM */ ATA_HORKAGE_ZERO_AFTER_TRIM = (1 << 22),/* guarantees zero after trim */ ATA_HORKAGE_NO_NCQ_LOG = (1 << 23), /* don't use NCQ for log read */ + ATA_HORKAGE_NOTRIM = (1 << 24), /* don't use TRIM */ + /* DMA mask for user DMA control: User visible values; DO NOT renumber */ -- cgit v1.2.3 From af34d637637eabaf49406eb35c948cd51ba262a6 Mon Sep 17 00:00:00 2001 From: David Milburn Date: Mon, 13 Jul 2015 11:48:23 -0500 Subject: libata: add ATA_HORKAGE_MAX_SEC_1024 to revert back to previous max_sectors limit Since no longer limiting max_sectors to BLK_DEF_MAX_SECTORS (commit 34b48db66e08), data corruption may occur on ST380013AS drive configured on 82801JI (ICH10 Family) SATA controller. This patch will allow the driver to limit max_sectors as before # cat /sys/block/sdb/queue/max_sectors_kb 512 I was able to double the max_sectors_kb value up to 16384 on linux-4.2.0-rc2 before seeing corruption, but seems safer to use previous limit. Without this patch max_sectors_kb will be 32767. tj: Minor comment update. Reported-by: Jeff Moyer Signed-off-by: David Milburn Signed-off-by: Tejun Heo Cc: stable@vger.kernel.org # v3.19 and later Fixes: 34b48db66e08 ("block: remove artifical max_hw_sectors cap") --- drivers/ata/libata-core.c | 10 ++++++++++ include/linux/ata.h | 1 + include/linux/libata.h | 2 +- 3 files changed, 12 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index ed2b218ea64d..68202a8a3a0b 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -2478,6 +2478,10 @@ int ata_dev_configure(struct ata_device *dev) dev->max_sectors = min_t(unsigned int, ATA_MAX_SECTORS_128, dev->max_sectors); + if (dev->horkage & ATA_HORKAGE_MAX_SEC_1024) + dev->max_sectors = min_t(unsigned int, ATA_MAX_SECTORS_1024, + dev->max_sectors); + if (dev->horkage & ATA_HORKAGE_MAX_SEC_LBA48) dev->max_sectors = ATA_MAX_SECTORS_LBA48; @@ -4146,6 +4150,12 @@ static const struct ata_blacklist_entry ata_device_blacklist [] = { { "Slimtype DVD A DS8A8SH", NULL, ATA_HORKAGE_MAX_SEC_LBA48 }, { "Slimtype DVD A DS8A9SH", NULL, ATA_HORKAGE_MAX_SEC_LBA48 }, + /* + * Causes silent data corruption with higher max sects. + * http://lkml.kernel.org/g/x49wpy40ysk.fsf@segfault.boston.devel.redhat.com + */ + { "ST380013AS", "3.20", ATA_HORKAGE_MAX_SEC_1024 }, + /* Devices we expect to fail diagnostics */ /* Devices where NCQ should be avoided */ diff --git a/include/linux/ata.h b/include/linux/ata.h index fed36418dd1c..6c78956aa470 100644 --- a/include/linux/ata.h +++ b/include/linux/ata.h @@ -45,6 +45,7 @@ enum { ATA_SECT_SIZE = 512, ATA_MAX_SECTORS_128 = 128, ATA_MAX_SECTORS = 256, + ATA_MAX_SECTORS_1024 = 1024, ATA_MAX_SECTORS_LBA48 = 65535,/* TODO: 65536? */ ATA_MAX_SECTORS_TAPE = 65535, diff --git a/include/linux/libata.h b/include/linux/libata.h index 5c8bac6225a6..c9cfbcdb8d14 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -432,7 +432,7 @@ enum { ATA_HORKAGE_ZERO_AFTER_TRIM = (1 << 22),/* guarantees zero after trim */ ATA_HORKAGE_NO_NCQ_LOG = (1 << 23), /* don't use NCQ for log read */ ATA_HORKAGE_NOTRIM = (1 << 24), /* don't use TRIM */ - + ATA_HORKAGE_MAX_SEC_1024 = (1 << 25), /* Limit max sects to 1024 */ /* DMA mask for user DMA control: User visible values; DO NOT renumber */ -- cgit v1.2.3 From 0838aa7fcfcd875caa7bcc5dab0c3fd40444553d Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 13 Jul 2015 15:11:48 +0200 Subject: netfilter: fix netns dependencies with conntrack templates Quoting Daniel Borkmann: "When adding connection tracking template rules to a netns, f.e. to configure netfilter zones, the kernel will endlessly busy-loop as soon as we try to delete the given netns in case there's at least one template present, which is problematic i.e. if there is such bravery that the priviledged user inside the netns is assumed untrusted. Minimal example: ip netns add foo ip netns exec foo iptables -t raw -A PREROUTING -d 1.2.3.4 -j CT --zone 1 ip netns del foo What happens is that when nf_ct_iterate_cleanup() is being called from nf_conntrack_cleanup_net_list() for a provided netns, we always end up with a net->ct.count > 0 and thus jump back to i_see_dead_people. We don't get a soft-lockup as we still have a schedule() point, but the serving CPU spins on 100% from that point onwards. Since templates are normally allocated with nf_conntrack_alloc(), we also bump net->ct.count. The issue why they are not yet nf_ct_put() is because the per netns .exit() handler from x_tables (which would eventually invoke xt_CT's xt_ct_tg_destroy() that drops reference on info->ct) is called in the dependency chain at a *later* point in time than the per netns .exit() handler for the connection tracker. This is clearly a chicken'n'egg problem: after the connection tracker .exit() handler, we've teared down all the connection tracking infrastructure already, so rightfully, xt_ct_tg_destroy() cannot be invoked at a later point in time during the netns cleanup, as that would lead to a use-after-free. At the same time, we cannot make x_tables depend on the connection tracker module, so that the xt_ct_tg_destroy() would be invoked earlier in the cleanup chain." Daniel confirms this has to do with the order in which modules are loaded or having compiled nf_conntrack as modules while x_tables built-in. So we have no guarantees regarding the order in which netns callbacks are executed. Fix this by allocating the templates through kmalloc() from the respective SYNPROXY and CT targets, so they don't depend on the conntrack kmem cache. Then, release then via nf_ct_tmpl_free() from destroy_conntrack(). This branch is marked as unlikely since conntrack templates are rarely allocated and only from the configuration plane path. Note that templates are not kept in any list to avoid further dependencies with nf_conntrack anymore, thus, the tmpl larval list is removed. Reported-by: Daniel Borkmann Signed-off-by: Pablo Neira Ayuso Tested-by: Daniel Borkmann --- include/net/netfilter/nf_conntrack.h | 2 +- include/net/netns/conntrack.h | 1 - net/netfilter/nf_conntrack_core.c | 67 +++++++++++++++++++++++------------- net/netfilter/nf_synproxy_core.c | 7 ++-- net/netfilter/xt_CT.c | 8 ++--- 5 files changed, 51 insertions(+), 34 deletions(-) (limited to 'include') diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index 095433b8a8b0..37cd3911d5c5 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -291,7 +291,7 @@ extern unsigned int nf_conntrack_max; extern unsigned int nf_conntrack_hash_rnd; void init_nf_conntrack_hash_rnd(void); -void nf_conntrack_tmpl_insert(struct net *net, struct nf_conn *tmpl); +struct nf_conn *nf_ct_tmpl_alloc(struct net *net, u16 zone, gfp_t flags); #define NF_CT_STAT_INC(net, count) __this_cpu_inc((net)->ct.stat->count) #define NF_CT_STAT_INC_ATOMIC(net, count) this_cpu_inc((net)->ct.stat->count) diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h index 29d6a94db54d..723b61c82b3f 100644 --- a/include/net/netns/conntrack.h +++ b/include/net/netns/conntrack.h @@ -68,7 +68,6 @@ struct ct_pcpu { spinlock_t lock; struct hlist_nulls_head unconfirmed; struct hlist_nulls_head dying; - struct hlist_nulls_head tmpl; }; struct netns_ct { diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 13fad8668f83..651039ad1681 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -287,6 +287,46 @@ static void nf_ct_del_from_dying_or_unconfirmed_list(struct nf_conn *ct) spin_unlock(&pcpu->lock); } +/* Released via destroy_conntrack() */ +struct nf_conn *nf_ct_tmpl_alloc(struct net *net, u16 zone, gfp_t flags) +{ + struct nf_conn *tmpl; + + tmpl = kzalloc(sizeof(struct nf_conn), GFP_KERNEL); + if (tmpl == NULL) + return NULL; + + tmpl->status = IPS_TEMPLATE; + write_pnet(&tmpl->ct_net, net); + +#ifdef CONFIG_NF_CONNTRACK_ZONES + if (zone) { + struct nf_conntrack_zone *nf_ct_zone; + + nf_ct_zone = nf_ct_ext_add(tmpl, NF_CT_EXT_ZONE, GFP_ATOMIC); + if (!nf_ct_zone) + goto out_free; + nf_ct_zone->id = zone; + } +#endif + atomic_set(&tmpl->ct_general.use, 0); + + return tmpl; +#ifdef CONFIG_NF_CONNTRACK_ZONES +out_free: + kfree(tmpl); + return NULL; +#endif +} +EXPORT_SYMBOL_GPL(nf_ct_tmpl_alloc); + +static void nf_ct_tmpl_free(struct nf_conn *tmpl) +{ + nf_ct_ext_destroy(tmpl); + nf_ct_ext_free(tmpl); + kfree(tmpl); +} + static void destroy_conntrack(struct nf_conntrack *nfct) { @@ -298,6 +338,10 @@ destroy_conntrack(struct nf_conntrack *nfct) NF_CT_ASSERT(atomic_read(&nfct->use) == 0); NF_CT_ASSERT(!timer_pending(&ct->timeout)); + if (unlikely(nf_ct_is_template(ct))) { + nf_ct_tmpl_free(ct); + return; + } rcu_read_lock(); l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct)); if (l4proto && l4proto->destroy) @@ -540,28 +584,6 @@ out: } EXPORT_SYMBOL_GPL(nf_conntrack_hash_check_insert); -/* deletion from this larval template list happens via nf_ct_put() */ -void nf_conntrack_tmpl_insert(struct net *net, struct nf_conn *tmpl) -{ - struct ct_pcpu *pcpu; - - __set_bit(IPS_TEMPLATE_BIT, &tmpl->status); - __set_bit(IPS_CONFIRMED_BIT, &tmpl->status); - nf_conntrack_get(&tmpl->ct_general); - - /* add this conntrack to the (per cpu) tmpl list */ - local_bh_disable(); - tmpl->cpu = smp_processor_id(); - pcpu = per_cpu_ptr(nf_ct_net(tmpl)->ct.pcpu_lists, tmpl->cpu); - - spin_lock(&pcpu->lock); - /* Overload tuple linked list to put us in template list. */ - hlist_nulls_add_head_rcu(&tmpl->tuplehash[IP_CT_DIR_ORIGINAL].hnnode, - &pcpu->tmpl); - spin_unlock_bh(&pcpu->lock); -} -EXPORT_SYMBOL_GPL(nf_conntrack_tmpl_insert); - /* Confirm a connection given skb; places it in hash table */ int __nf_conntrack_confirm(struct sk_buff *skb) @@ -1751,7 +1773,6 @@ int nf_conntrack_init_net(struct net *net) spin_lock_init(&pcpu->lock); INIT_HLIST_NULLS_HEAD(&pcpu->unconfirmed, UNCONFIRMED_NULLS_VAL); INIT_HLIST_NULLS_HEAD(&pcpu->dying, DYING_NULLS_VAL); - INIT_HLIST_NULLS_HEAD(&pcpu->tmpl, TEMPLATE_NULLS_VAL); } net->ct.stat = alloc_percpu(struct ip_conntrack_stat); diff --git a/net/netfilter/nf_synproxy_core.c b/net/netfilter/nf_synproxy_core.c index 789feeae6c44..71f1e9fdfa18 100644 --- a/net/netfilter/nf_synproxy_core.c +++ b/net/netfilter/nf_synproxy_core.c @@ -349,12 +349,10 @@ static void __net_exit synproxy_proc_exit(struct net *net) static int __net_init synproxy_net_init(struct net *net) { struct synproxy_net *snet = synproxy_pernet(net); - struct nf_conntrack_tuple t; struct nf_conn *ct; int err = -ENOMEM; - memset(&t, 0, sizeof(t)); - ct = nf_conntrack_alloc(net, 0, &t, &t, GFP_KERNEL); + ct = nf_ct_tmpl_alloc(net, 0, GFP_KERNEL); if (IS_ERR(ct)) { err = PTR_ERR(ct); goto err1; @@ -365,7 +363,8 @@ static int __net_init synproxy_net_init(struct net *net) if (!nfct_synproxy_ext_add(ct)) goto err2; - nf_conntrack_tmpl_insert(net, ct); + __set_bit(IPS_CONFIRMED_BIT, &ct->status); + nf_conntrack_get(&ct->ct_general); snet->tmpl = ct; snet->stats = alloc_percpu(struct synproxy_stats); diff --git a/net/netfilter/xt_CT.c b/net/netfilter/xt_CT.c index 75747aecdebe..c6630030c912 100644 --- a/net/netfilter/xt_CT.c +++ b/net/netfilter/xt_CT.c @@ -184,7 +184,6 @@ out: static int xt_ct_tg_check(const struct xt_tgchk_param *par, struct xt_ct_target_info_v1 *info) { - struct nf_conntrack_tuple t; struct nf_conn *ct; int ret = -EOPNOTSUPP; @@ -202,8 +201,7 @@ static int xt_ct_tg_check(const struct xt_tgchk_param *par, if (ret < 0) goto err1; - memset(&t, 0, sizeof(t)); - ct = nf_conntrack_alloc(par->net, info->zone, &t, &t, GFP_KERNEL); + ct = nf_ct_tmpl_alloc(par->net, info->zone, GFP_KERNEL); ret = PTR_ERR(ct); if (IS_ERR(ct)) goto err2; @@ -227,8 +225,8 @@ static int xt_ct_tg_check(const struct xt_tgchk_param *par, if (ret < 0) goto err3; } - - nf_conntrack_tmpl_insert(par->net, ct); + __set_bit(IPS_CONFIRMED_BIT, &ct->status); + nf_conntrack_get(&ct->ct_general); out: info->ct = ct; return 0; -- cgit v1.2.3 From 648a9bc5308d952f2c80772301b339f73026f013 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 16 Jul 2015 12:37:56 +0100 Subject: drm/i915: Use two 32bit reads for select 64bit REG_READ ioctls MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since the hardware sometimes mysteriously totally flummoxes the 64bit read of a 64bit register when read using a single instruction, split the read into two instructions. Since the read here is of automatically incrementing timestamp counters, we also have to be very careful in order to make sure that it does not increment between the two instructions. However, since userspace tried to workaround this issue and so enshrined this ABI for a broken hardware read and in the process neglected that the read only fails in some environments, we have to introduce a new uABI flag for userspace to request the 2x32 bit accurate read of the timestamp. v2: Fix alignment check and include details of the workaround for userspace. Reported-by: Karol Herbst Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=91317 Testcase: igt/gem_reg_read Signed-off-by: Chris Wilson Cc: Michał Winiarski Cc: stable@vger.kernel.org Tested-by: Michał Winiarski Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_uncore.c | 26 +++++++++++++++++++------- include/uapi/drm/i915_drm.h | 8 ++++++++ 2 files changed, 27 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index a6d8a3ee7750..260389acfb77 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -1274,10 +1274,12 @@ int i915_reg_read_ioctl(struct drm_device *dev, struct drm_i915_private *dev_priv = dev->dev_private; struct drm_i915_reg_read *reg = data; struct register_whitelist const *entry = whitelist; + unsigned size; + u64 offset; int i, ret = 0; for (i = 0; i < ARRAY_SIZE(whitelist); i++, entry++) { - if (entry->offset == reg->offset && + if (entry->offset == (reg->offset & -entry->size) && (1 << INTEL_INFO(dev)->gen & entry->gen_bitmask)) break; } @@ -1285,23 +1287,33 @@ int i915_reg_read_ioctl(struct drm_device *dev, if (i == ARRAY_SIZE(whitelist)) return -EINVAL; + /* We use the low bits to encode extra flags as the register should + * be naturally aligned (and those that are not so aligned merely + * limit the available flags for that register). + */ + offset = entry->offset; + size = entry->size; + size |= reg->offset ^ offset; + intel_runtime_pm_get(dev_priv); - switch (entry->size) { + switch (size) { + case 8 | 1: + reg->val = I915_READ64_2x32(offset, offset+4); + break; case 8: - reg->val = I915_READ64(reg->offset); + reg->val = I915_READ64(offset); break; case 4: - reg->val = I915_READ(reg->offset); + reg->val = I915_READ(offset); break; case 2: - reg->val = I915_READ16(reg->offset); + reg->val = I915_READ16(offset); break; case 1: - reg->val = I915_READ8(reg->offset); + reg->val = I915_READ8(offset); break; default: - MISSING_CASE(entry->size); ret = -EINVAL; goto out; } diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index 6e1a2ed116cb..db809b722985 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -1070,6 +1070,14 @@ struct drm_i915_reg_read { __u64 offset; __u64 val; /* Return value */ }; +/* Known registers: + * + * Render engine timestamp - 0x2358 + 64bit - gen7+ + * - Note this register returns an invalid value if using the default + * single instruction 8byte read, in order to workaround that use + * offset (0x2538 | 1) instead. + * + */ struct drm_i915_reset_stats { __u32 ctx_id; -- cgit v1.2.3 From cd812599796f500b042f5464b6665755eca21137 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 5 Jul 2015 11:12:07 -0400 Subject: NFS: Remove the "NFS_CAP_CHANGE_ATTR" capability Setting the change attribute has been mandatory for all NFS versions, since commit 3a1556e8662c ("NFSv2/v3: Simulate the change attribute"). We should therefore not have anything be conditional on it being set/unset. Signed-off-by: Trond Myklebust --- fs/nfs/client.c | 2 +- fs/nfs/inode.c | 4 ++-- fs/nfs/nfs4proc.c | 3 --- include/linux/nfs_fs_sb.h | 2 +- 4 files changed, 4 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/fs/nfs/client.c b/fs/nfs/client.c index ecebb406cc1a..4a90c9bb3135 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -775,7 +775,7 @@ static int nfs_init_server(struct nfs_server *server, server->options = data->options; server->caps |= NFS_CAP_HARDLINKS|NFS_CAP_SYMLINKS|NFS_CAP_FILEID| NFS_CAP_MODE|NFS_CAP_NLINK|NFS_CAP_OWNER|NFS_CAP_OWNER_GROUP| - NFS_CAP_ATIME|NFS_CAP_CTIME|NFS_CAP_MTIME|NFS_CAP_CHANGE_ATTR; + NFS_CAP_ATIME|NFS_CAP_CTIME|NFS_CAP_MTIME; if (data->rsize) server->rsize = nfs_block_size(data->rsize, NULL); diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 426e4f8207ef..0adc7d245b3d 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -442,7 +442,7 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr, st nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATTR); if (fattr->valid & NFS_ATTR_FATTR_CHANGE) inode->i_version = fattr->change_attr; - else if (nfs_server_capable(inode, NFS_CAP_CHANGE_ATTR)) + else nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATTR | NFS_INO_REVAL_PAGECACHE); if (fattr->valid & NFS_ATTR_FATTR_SIZE) @@ -1692,7 +1692,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) nfs_force_lookup_revalidate(inode); inode->i_version = fattr->change_attr; } - } else if (server->caps & NFS_CAP_CHANGE_ATTR) + } else nfsi->cache_validity |= save_cache_validity; if (fattr->valid & NFS_ATTR_FATTR_MTIME) { diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 9264994ec9d3..c85ffe67b5f3 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -8591,7 +8591,6 @@ static const struct nfs4_minor_version_ops nfs_v4_0_minor_ops = { .minor_version = 0, .init_caps = NFS_CAP_READDIRPLUS | NFS_CAP_ATOMIC_OPEN - | NFS_CAP_CHANGE_ATTR | NFS_CAP_POSIX_LOCK, .init_client = nfs40_init_client, .shutdown_client = nfs40_shutdown_client, @@ -8617,7 +8616,6 @@ static const struct nfs4_minor_version_ops nfs_v4_1_minor_ops = { .minor_version = 1, .init_caps = NFS_CAP_READDIRPLUS | NFS_CAP_ATOMIC_OPEN - | NFS_CAP_CHANGE_ATTR | NFS_CAP_POSIX_LOCK | NFS_CAP_STATEID_NFSV41 | NFS_CAP_ATOMIC_OPEN_V1, @@ -8640,7 +8638,6 @@ static const struct nfs4_minor_version_ops nfs_v4_2_minor_ops = { .minor_version = 2, .init_caps = NFS_CAP_READDIRPLUS | NFS_CAP_ATOMIC_OPEN - | NFS_CAP_CHANGE_ATTR | NFS_CAP_POSIX_LOCK | NFS_CAP_STATEID_NFSV41 | NFS_CAP_ATOMIC_OPEN_V1 diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index a2ea1491d3df..20bc8e51b161 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -220,7 +220,7 @@ struct nfs_server { #define NFS_CAP_SYMLINKS (1U << 2) #define NFS_CAP_ACLS (1U << 3) #define NFS_CAP_ATOMIC_OPEN (1U << 4) -#define NFS_CAP_CHANGE_ATTR (1U << 5) +/* #define NFS_CAP_CHANGE_ATTR (1U << 5) */ #define NFS_CAP_FILEID (1U << 6) #define NFS_CAP_MODE (1U << 7) #define NFS_CAP_NLINK (1U << 8) -- cgit v1.2.3 From 115c48d7a5351abeadd0c8a3dc87eca3d66a6475 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 5 Jul 2015 12:36:34 -0400 Subject: NFS: nfs_mark_for_revalidate should always set NFS_INO_REVAL_PAGECACHE I'm not aware of any existing bugs around this, but the expectation is that nfs_mark_for_revalidate() should always force a revalidation of the cached metadata. Signed-off-by: Trond Myklebust --- include/linux/nfs_fs.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index f91b5ade30c9..874b77228fb9 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -292,9 +292,12 @@ static inline void nfs_mark_for_revalidate(struct inode *inode) struct nfs_inode *nfsi = NFS_I(inode); spin_lock(&inode->i_lock); - nfsi->cache_validity |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS; + nfsi->cache_validity |= NFS_INO_INVALID_ATTR | + NFS_INO_REVAL_PAGECACHE | + NFS_INO_INVALID_ACCESS | + NFS_INO_INVALID_ACL; if (S_ISDIR(inode->i_mode)) - nfsi->cache_validity |= NFS_INO_REVAL_PAGECACHE|NFS_INO_INVALID_DATA; + nfsi->cache_validity |= NFS_INO_INVALID_DATA; spin_unlock(&inode->i_lock); } -- cgit v1.2.3 From fa92754e9c47cf3e5607c0865f4cf59d090cda37 Mon Sep 17 00:00:00 2001 From: Leo Liu Date: Mon, 13 Jul 2015 12:46:23 -0400 Subject: drm/amdgpu: add VCE harvesting instance query MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Leo Liu Reviewed-by: Alex Deucher Reviewed-by: Christian König --- drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 1 + include/uapi/drm/amdgpu_drm.h | 2 ++ 2 files changed, 3 insertions(+) (limited to 'include') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index 5533434c7a8f..31ad444c6386 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -459,6 +459,7 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file memcpy(&dev_info.cu_bitmap[0], &cu_info.bitmap[0], sizeof(cu_info.bitmap)); dev_info.vram_type = adev->mc.vram_type; dev_info.vram_bit_width = adev->mc.vram_width; + dev_info.vce_harvest_config = adev->vce.harvest_config; return copy_to_user(out, &dev_info, min((size_t)size, sizeof(dev_info))) ? -EFAULT : 0; diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index b6fce900a833..d708a53b8fb1 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -614,6 +614,8 @@ struct drm_amdgpu_info_device { uint32_t vram_type; /** video memory bit width*/ uint32_t vram_bit_width; + /* vce harvesting instance */ + uint32_t vce_harvest_config; }; struct drm_amdgpu_info_hw_ip { -- cgit v1.2.3 From a3bd4f989f532694337dd30538b635d5213ab86a Mon Sep 17 00:00:00 2001 From: Dong Aisheng Date: Wed, 22 Jul 2015 20:53:09 +0800 Subject: mmc: sdhci-esdhc-imx: clear f_max in boarddata After commit 8d86e4fcccf6 ("mmc: sdhci-esdhc-imx: Call mmc_of_parse()"), it's not used anymore. Signed-off-by: Dong Aisheng Reviewed-by: Johan Derycke Signed-off-by: Ulf Hansson --- drivers/mmc/host/sdhci-esdhc-imx.c | 7 +------ include/linux/platform_data/mmc-esdhc-imx.h | 1 - 2 files changed, 1 insertion(+), 7 deletions(-) (limited to 'include') diff --git a/drivers/mmc/host/sdhci-esdhc-imx.c b/drivers/mmc/host/sdhci-esdhc-imx.c index 1b0e61847e73..c6b9f6492e1a 100644 --- a/drivers/mmc/host/sdhci-esdhc-imx.c +++ b/drivers/mmc/host/sdhci-esdhc-imx.c @@ -581,13 +581,8 @@ static void esdhc_writeb_le(struct sdhci_host *host, u8 val, int reg) static unsigned int esdhc_pltfm_get_max_clock(struct sdhci_host *host) { struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host); - struct pltfm_imx_data *imx_data = pltfm_host->priv; - struct esdhc_platform_data *boarddata = &imx_data->boarddata; - if (boarddata->f_max && (boarddata->f_max < pltfm_host->clock)) - return boarddata->f_max; - else - return pltfm_host->clock; + return pltfm_host->clock; } static unsigned int esdhc_pltfm_get_min_clock(struct sdhci_host *host) diff --git a/include/linux/platform_data/mmc-esdhc-imx.h b/include/linux/platform_data/mmc-esdhc-imx.h index 75f70f6ac137..e1571efa3f2b 100644 --- a/include/linux/platform_data/mmc-esdhc-imx.h +++ b/include/linux/platform_data/mmc-esdhc-imx.h @@ -43,7 +43,6 @@ struct esdhc_platform_data { enum wp_types wp_type; enum cd_types cd_type; int max_bus_width; - unsigned int f_max; bool support_vsel; unsigned int delay_line; }; -- cgit v1.2.3 From e3eea1404f5ff7a2ceb7b5e7ba412a6fd94f2935 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Fri, 24 Jul 2015 10:38:12 -0400 Subject: ftrace: Fix breakage of set_ftrace_pid Commit 4104d326b670 ("ftrace: Remove global function list and call function directly") simplified the ftrace code by removing the global_ops list with a new design. But this cleanup also broke the filtering of PIDs that are added to the set_ftrace_pid file. Add back the proper hooks to have pid filtering working once again. Cc: stable@vger.kernel.org # 3.16+ Reported-by: Matt Fleming Reported-by: Richard Weinberger Tested-by: Matt Fleming Signed-off-by: Steven Rostedt --- include/linux/ftrace.h | 3 +++ kernel/trace/ftrace.c | 52 +++++++++++++++++++++++++++++++++----------------- 2 files changed, 37 insertions(+), 18 deletions(-) (limited to 'include') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 1da602982cf9..6cd8c0ee4b6f 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -116,6 +116,7 @@ ftrace_func_t ftrace_ops_get_func(struct ftrace_ops *ops); * SAVE_REGS. If another ops with this flag set is already registered * for any of the functions that this ops will be registered for, then * this ops will fail to register or set_filter_ip. + * PID - Is affected by set_ftrace_pid (allows filtering on those pids) */ enum { FTRACE_OPS_FL_ENABLED = 1 << 0, @@ -132,6 +133,7 @@ enum { FTRACE_OPS_FL_MODIFYING = 1 << 11, FTRACE_OPS_FL_ALLOC_TRAMP = 1 << 12, FTRACE_OPS_FL_IPMODIFY = 1 << 13, + FTRACE_OPS_FL_PID = 1 << 14, }; #ifdef CONFIG_DYNAMIC_FTRACE @@ -159,6 +161,7 @@ struct ftrace_ops { struct ftrace_ops *next; unsigned long flags; void *private; + ftrace_func_t saved_func; int __percpu *disabled; #ifdef CONFIG_DYNAMIC_FTRACE int nr_trampolines; diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 02bece4a99ea..eb11011b5292 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -98,6 +98,13 @@ struct ftrace_pid { struct pid *pid; }; +static bool ftrace_pids_enabled(void) +{ + return !list_empty(&ftrace_pids); +} + +static void ftrace_update_trampoline(struct ftrace_ops *ops); + /* * ftrace_disabled is set when an anomaly is discovered. * ftrace_disabled is much stronger than ftrace_enabled. @@ -109,7 +116,6 @@ static DEFINE_MUTEX(ftrace_lock); static struct ftrace_ops *ftrace_control_list __read_mostly = &ftrace_list_end; static struct ftrace_ops *ftrace_ops_list __read_mostly = &ftrace_list_end; ftrace_func_t ftrace_trace_function __read_mostly = ftrace_stub; -ftrace_func_t ftrace_pid_function __read_mostly = ftrace_stub; static struct ftrace_ops global_ops; static struct ftrace_ops control_ops; @@ -183,14 +189,7 @@ static void ftrace_pid_func(unsigned long ip, unsigned long parent_ip, if (!test_tsk_trace_trace(current)) return; - ftrace_pid_function(ip, parent_ip, op, regs); -} - -static void set_ftrace_pid_function(ftrace_func_t func) -{ - /* do not set ftrace_pid_function to itself! */ - if (func != ftrace_pid_func) - ftrace_pid_function = func; + op->saved_func(ip, parent_ip, op, regs); } /** @@ -202,7 +201,6 @@ static void set_ftrace_pid_function(ftrace_func_t func) void clear_ftrace_function(void) { ftrace_trace_function = ftrace_stub; - ftrace_pid_function = ftrace_stub; } static void control_ops_disable_all(struct ftrace_ops *ops) @@ -436,6 +434,12 @@ static int __register_ftrace_function(struct ftrace_ops *ops) } else add_ftrace_ops(&ftrace_ops_list, ops); + /* Always save the function, and reset at unregistering */ + ops->saved_func = ops->func; + + if (ops->flags & FTRACE_OPS_FL_PID && ftrace_pids_enabled()) + ops->func = ftrace_pid_func; + ftrace_update_trampoline(ops); if (ftrace_enabled) @@ -463,15 +467,28 @@ static int __unregister_ftrace_function(struct ftrace_ops *ops) if (ftrace_enabled) update_ftrace_function(); + ops->func = ops->saved_func; + return 0; } static void ftrace_update_pid_func(void) { + bool enabled = ftrace_pids_enabled(); + struct ftrace_ops *op; + /* Only do something if we are tracing something */ if (ftrace_trace_function == ftrace_stub) return; + do_for_each_ftrace_op(op, ftrace_ops_list) { + if (op->flags & FTRACE_OPS_FL_PID) { + op->func = enabled ? ftrace_pid_func : + op->saved_func; + ftrace_update_trampoline(op); + } + } while_for_each_ftrace_op(op); + update_ftrace_function(); } @@ -1133,7 +1150,8 @@ static struct ftrace_ops global_ops = { .local_hash.filter_hash = EMPTY_HASH, INIT_OPS_HASH(global_ops) .flags = FTRACE_OPS_FL_RECURSION_SAFE | - FTRACE_OPS_FL_INITIALIZED, + FTRACE_OPS_FL_INITIALIZED | + FTRACE_OPS_FL_PID, }; /* @@ -5023,7 +5041,9 @@ static void ftrace_update_trampoline(struct ftrace_ops *ops) static struct ftrace_ops global_ops = { .func = ftrace_stub, - .flags = FTRACE_OPS_FL_RECURSION_SAFE | FTRACE_OPS_FL_INITIALIZED, + .flags = FTRACE_OPS_FL_RECURSION_SAFE | + FTRACE_OPS_FL_INITIALIZED | + FTRACE_OPS_FL_PID, }; static int __init ftrace_nodyn_init(void) @@ -5080,11 +5100,6 @@ void ftrace_init_array_ops(struct trace_array *tr, ftrace_func_t func) if (WARN_ON(tr->ops->func != ftrace_stub)) printk("ftrace ops had %pS for function\n", tr->ops->func); - /* Only the top level instance does pid tracing */ - if (!list_empty(&ftrace_pids)) { - set_ftrace_pid_function(func); - func = ftrace_pid_func; - } } tr->ops->func = func; tr->ops->private = tr; @@ -5371,7 +5386,7 @@ static void *fpid_start(struct seq_file *m, loff_t *pos) { mutex_lock(&ftrace_lock); - if (list_empty(&ftrace_pids) && (!*pos)) + if (!ftrace_pids_enabled() && (!*pos)) return (void *) 1; return seq_list_start(&ftrace_pids, *pos); @@ -5610,6 +5625,7 @@ static struct ftrace_ops graph_ops = { .func = ftrace_stub, .flags = FTRACE_OPS_FL_RECURSION_SAFE | FTRACE_OPS_FL_INITIALIZED | + FTRACE_OPS_FL_PID | FTRACE_OPS_FL_STUB, #ifdef FTRACE_GRAPH_TRAMP_ADDR .trampoline = FTRACE_GRAPH_TRAMP_ADDR, -- cgit v1.2.3 From e54198657b65625085834847ab6271087323ffea Mon Sep 17 00:00:00 2001 From: Nicholas Bellinger Date: Wed, 22 Jul 2015 23:14:19 -0700 Subject: iscsi-target: Fix iscsit_start_kthreads failure OOPs This patch fixes a regression introduced with the following commit in v4.0-rc1 code, where a iscsit_start_kthreads() failure triggers a NULL pointer dereference OOPs: commit 88dcd2dab5c23b1c9cfc396246d8f476c872f0ca Author: Nicholas Bellinger Date: Thu Feb 26 22:19:15 2015 -0800 iscsi-target: Convert iscsi_thread_set usage to kthread.h To address this bug, move iscsit_start_kthreads() immediately preceeding the transmit of last login response, before signaling a successful transition into full-feature-phase within existing iscsi_target_do_tx_login_io() logic. This ensures that no target-side resource allocation failures can occur after the final login response has been successfully sent. Also, it adds a iscsi_conn->rx_login_comp to allow the RX thread to sleep to prevent other socket related failures until the final iscsi_post_login_handler() call is able to complete. Cc: Sagi Grimberg Cc: # v3.10+ Signed-off-by: Nicholas Bellinger --- drivers/target/iscsi/iscsi_target.c | 18 ++++++++++--- drivers/target/iscsi/iscsi_target_login.c | 45 ++++++++++++------------------- drivers/target/iscsi/iscsi_target_login.h | 3 ++- drivers/target/iscsi/iscsi_target_nego.c | 34 ++++++++++++++++++++++- include/target/iscsi/iscsi_target_core.h | 1 + 5 files changed, 68 insertions(+), 33 deletions(-) (limited to 'include') diff --git a/drivers/target/iscsi/iscsi_target.c b/drivers/target/iscsi/iscsi_target.c index afab32376126..202a42858f25 100644 --- a/drivers/target/iscsi/iscsi_target.c +++ b/drivers/target/iscsi/iscsi_target.c @@ -3998,7 +3998,13 @@ get_immediate: } transport_err: - iscsit_take_action_for_connection_exit(conn); + /* + * Avoid the normal connection failure code-path if this connection + * is still within LOGIN mode, and iscsi_np process context is + * responsible for cleaning up the early connection failure. + */ + if (conn->conn_state != TARG_CONN_STATE_IN_LOGIN) + iscsit_take_action_for_connection_exit(conn); out: return 0; } @@ -4082,7 +4088,7 @@ reject: int iscsi_target_rx_thread(void *arg) { - int ret; + int ret, rc; u8 buffer[ISCSI_HDR_LEN], opcode; u32 checksum = 0, digest = 0; struct iscsi_conn *conn = arg; @@ -4092,10 +4098,16 @@ int iscsi_target_rx_thread(void *arg) * connection recovery / failure event can be triggered externally. */ allow_signal(SIGINT); + /* + * Wait for iscsi_post_login_handler() to complete before allowing + * incoming iscsi/tcp socket I/O, and/or failing the connection. + */ + rc = wait_for_completion_interruptible(&conn->rx_login_comp); + if (rc < 0) + return 0; if (conn->conn_transport->transport_type == ISCSI_INFINIBAND) { struct completion comp; - int rc; init_completion(&comp); rc = wait_for_completion_interruptible(&comp); diff --git a/drivers/target/iscsi/iscsi_target_login.c b/drivers/target/iscsi/iscsi_target_login.c index 3d0fe4ff5590..7e8f65e5448f 100644 --- a/drivers/target/iscsi/iscsi_target_login.c +++ b/drivers/target/iscsi/iscsi_target_login.c @@ -82,6 +82,7 @@ static struct iscsi_login *iscsi_login_init_conn(struct iscsi_conn *conn) init_completion(&conn->conn_logout_comp); init_completion(&conn->rx_half_close_comp); init_completion(&conn->tx_half_close_comp); + init_completion(&conn->rx_login_comp); spin_lock_init(&conn->cmd_lock); spin_lock_init(&conn->conn_usage_lock); spin_lock_init(&conn->immed_queue_lock); @@ -644,7 +645,7 @@ static void iscsi_post_login_start_timers(struct iscsi_conn *conn) iscsit_start_nopin_timer(conn); } -static int iscsit_start_kthreads(struct iscsi_conn *conn) +int iscsit_start_kthreads(struct iscsi_conn *conn) { int ret = 0; @@ -679,6 +680,7 @@ static int iscsit_start_kthreads(struct iscsi_conn *conn) return 0; out_tx: + send_sig(SIGINT, conn->tx_thread, 1); kthread_stop(conn->tx_thread); conn->tx_thread_active = false; out_bitmap: @@ -689,7 +691,7 @@ out_bitmap: return ret; } -int iscsi_post_login_handler( +void iscsi_post_login_handler( struct iscsi_np *np, struct iscsi_conn *conn, u8 zero_tsih) @@ -699,7 +701,6 @@ int iscsi_post_login_handler( struct se_session *se_sess = sess->se_sess; struct iscsi_portal_group *tpg = sess->tpg; struct se_portal_group *se_tpg = &tpg->tpg_se_tpg; - int rc; iscsit_inc_conn_usage_count(conn); @@ -739,10 +740,6 @@ int iscsi_post_login_handler( sess->sess_ops->InitiatorName); spin_unlock_bh(&sess->conn_lock); - rc = iscsit_start_kthreads(conn); - if (rc) - return rc; - iscsi_post_login_start_timers(conn); /* * Determine CPU mask to ensure connection's RX and TX kthreads @@ -751,15 +748,20 @@ int iscsi_post_login_handler( iscsit_thread_get_cpumask(conn); conn->conn_rx_reset_cpumask = 1; conn->conn_tx_reset_cpumask = 1; - + /* + * Wakeup the sleeping iscsi_target_rx_thread() now that + * iscsi_conn is in TARG_CONN_STATE_LOGGED_IN state. + */ + complete(&conn->rx_login_comp); iscsit_dec_conn_usage_count(conn); + if (stop_timer) { spin_lock_bh(&se_tpg->session_lock); iscsit_stop_time2retain_timer(sess); spin_unlock_bh(&se_tpg->session_lock); } iscsit_dec_session_usage_count(sess); - return 0; + return; } iscsi_set_session_parameters(sess->sess_ops, conn->param_list, 1); @@ -800,10 +802,6 @@ int iscsi_post_login_handler( " iSCSI Target Portal Group: %hu\n", tpg->nsessions, tpg->tpgt); spin_unlock_bh(&se_tpg->session_lock); - rc = iscsit_start_kthreads(conn); - if (rc) - return rc; - iscsi_post_login_start_timers(conn); /* * Determine CPU mask to ensure connection's RX and TX kthreads @@ -812,10 +810,12 @@ int iscsi_post_login_handler( iscsit_thread_get_cpumask(conn); conn->conn_rx_reset_cpumask = 1; conn->conn_tx_reset_cpumask = 1; - + /* + * Wakeup the sleeping iscsi_target_rx_thread() now that + * iscsi_conn is in TARG_CONN_STATE_LOGGED_IN state. + */ + complete(&conn->rx_login_comp); iscsit_dec_conn_usage_count(conn); - - return 0; } static void iscsi_handle_login_thread_timeout(unsigned long data) @@ -1380,23 +1380,12 @@ static int __iscsi_target_login_thread(struct iscsi_np *np) if (ret < 0) goto new_sess_out; - if (!conn->sess) { - pr_err("struct iscsi_conn session pointer is NULL!\n"); - goto new_sess_out; - } - iscsi_stop_login_thread_timer(np); - if (signal_pending(current)) - goto new_sess_out; - if (ret == 1) { tpg_np = conn->tpg_np; - ret = iscsi_post_login_handler(np, conn, zero_tsih); - if (ret < 0) - goto new_sess_out; - + iscsi_post_login_handler(np, conn, zero_tsih); iscsit_deaccess_np(np, tpg, tpg_np); } diff --git a/drivers/target/iscsi/iscsi_target_login.h b/drivers/target/iscsi/iscsi_target_login.h index 1c7358081533..57aa0d0fd820 100644 --- a/drivers/target/iscsi/iscsi_target_login.h +++ b/drivers/target/iscsi/iscsi_target_login.h @@ -12,7 +12,8 @@ extern int iscsit_accept_np(struct iscsi_np *, struct iscsi_conn *); extern int iscsit_get_login_rx(struct iscsi_conn *, struct iscsi_login *); extern int iscsit_put_login_tx(struct iscsi_conn *, struct iscsi_login *, u32); extern void iscsit_free_conn(struct iscsi_np *, struct iscsi_conn *); -extern int iscsi_post_login_handler(struct iscsi_np *, struct iscsi_conn *, u8); +extern int iscsit_start_kthreads(struct iscsi_conn *); +extern void iscsi_post_login_handler(struct iscsi_np *, struct iscsi_conn *, u8); extern void iscsi_target_login_sess_out(struct iscsi_conn *, struct iscsi_np *, bool, bool); extern int iscsi_target_login_thread(void *); diff --git a/drivers/target/iscsi/iscsi_target_nego.c b/drivers/target/iscsi/iscsi_target_nego.c index 8c02fa34716f..f9cde9141836 100644 --- a/drivers/target/iscsi/iscsi_target_nego.c +++ b/drivers/target/iscsi/iscsi_target_nego.c @@ -17,6 +17,7 @@ ******************************************************************************/ #include +#include #include #include #include @@ -361,10 +362,24 @@ static int iscsi_target_do_tx_login_io(struct iscsi_conn *conn, struct iscsi_log ntohl(login_rsp->statsn), login->rsp_length); padding = ((-login->rsp_length) & 3); + /* + * Before sending the last login response containing the transition + * bit for full-feature-phase, go ahead and start up TX/RX threads + * now to avoid potential resource allocation failures after the + * final login response has been sent. + */ + if (login->login_complete) { + int rc = iscsit_start_kthreads(conn); + if (rc) { + iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_TARGET_ERR, + ISCSI_LOGIN_STATUS_NO_RESOURCES); + return -1; + } + } if (conn->conn_transport->iscsit_put_login_tx(conn, login, login->rsp_length + padding) < 0) - return -1; + goto err; login->rsp_length = 0; mutex_lock(&sess->cmdsn_mutex); @@ -373,6 +388,23 @@ static int iscsi_target_do_tx_login_io(struct iscsi_conn *conn, struct iscsi_log mutex_unlock(&sess->cmdsn_mutex); return 0; + +err: + if (login->login_complete) { + if (conn->rx_thread && conn->rx_thread_active) { + send_sig(SIGINT, conn->rx_thread, 1); + kthread_stop(conn->rx_thread); + } + if (conn->tx_thread && conn->tx_thread_active) { + send_sig(SIGINT, conn->tx_thread, 1); + kthread_stop(conn->tx_thread); + } + spin_lock(&iscsit_global->ts_bitmap_lock); + bitmap_release_region(iscsit_global->ts_bitmap, conn->bitmap_id, + get_order(1)); + spin_unlock(&iscsit_global->ts_bitmap_lock); + } + return -1; } static void iscsi_target_sk_data_ready(struct sock *sk) diff --git a/include/target/iscsi/iscsi_target_core.h b/include/target/iscsi/iscsi_target_core.h index 34117b8b72e4..0aedbb2c10e0 100644 --- a/include/target/iscsi/iscsi_target_core.h +++ b/include/target/iscsi/iscsi_target_core.h @@ -595,6 +595,7 @@ struct iscsi_conn { int bitmap_id; int rx_thread_active; struct task_struct *rx_thread; + struct completion rx_login_comp; int tx_thread_active; struct task_struct *tx_thread; /* list_head for session connection list */ -- cgit v1.2.3 From 2392debc2be721a7d5b907cbcbc0ebb858dead01 Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Wed, 22 Jul 2015 10:43:23 +0300 Subject: ipv4: consider TOS in fib_select_default fib_select_default considers alternative routes only when res->fi is for the first alias in res->fa_head. In the common case this can happen only when the initial lookup matches the first alias with highest TOS value. This prevents the alternative routes to require specific TOS. This patch solves the problem as follows: - routes that require specific TOS should be returned by fib_select_default only when TOS matches, as already done in fib_table_lookup. This rule implies that depending on the TOS we can have many different lists of alternative gateways and we have to keep the last used gateway (fa_default) in first alias for the TOS instead of using single tb_default value. - as the aliases are ordered by many keys (TOS desc, fib_priority asc), we restrict the possible results to routes with matching TOS and lowest metric (fib_priority) and routes that match any TOS, again with lowest metric. For example, packet with TOS 8 can not use gw3 (not lowest metric), gw4 (different TOS) and gw6 (not lowest metric), all other gateways can be used: tos 8 via gw1 metric 2 <--- res->fa_head and res->fi tos 8 via gw2 metric 2 tos 8 via gw3 metric 3 tos 4 via gw4 tos 0 via gw5 tos 0 via gw6 metric 1 Reported-by: Hagen Paul Pfeifer Signed-off-by: Julian Anastasov Signed-off-by: David S. Miller --- include/net/ip_fib.h | 3 +-- net/ipv4/fib_lookup.h | 1 + net/ipv4/fib_semantics.c | 36 +++++++++++++++++++++++++----------- net/ipv4/fib_trie.c | 3 ++- net/ipv4/route.c | 2 +- 5 files changed, 30 insertions(+), 15 deletions(-) (limited to 'include') diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index 49c142bdf01e..5fa643b4e891 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -183,7 +183,6 @@ __be32 fib_info_update_nh_saddr(struct net *net, struct fib_nh *nh); struct fib_table { struct hlist_node tb_hlist; u32 tb_id; - int tb_default; int tb_num_default; struct rcu_head rcu; unsigned long *tb_data; @@ -290,7 +289,7 @@ __be32 fib_compute_spec_dst(struct sk_buff *skb); int fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst, u8 tos, int oif, struct net_device *dev, struct in_device *idev, u32 *itag); -void fib_select_default(struct fib_result *res); +void fib_select_default(const struct flowi4 *flp, struct fib_result *res); #ifdef CONFIG_IP_ROUTE_CLASSID static inline int fib_num_tclassid_users(struct net *net) { diff --git a/net/ipv4/fib_lookup.h b/net/ipv4/fib_lookup.h index c6211ed60b03..9c02920725db 100644 --- a/net/ipv4/fib_lookup.h +++ b/net/ipv4/fib_lookup.h @@ -13,6 +13,7 @@ struct fib_alias { u8 fa_state; u8 fa_slen; u32 tb_id; + s16 fa_default; struct rcu_head rcu; }; diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index e1079583b8b7..3a06586b170c 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -1202,28 +1202,40 @@ int fib_sync_down_dev(struct net_device *dev, unsigned long event) } /* Must be invoked inside of an RCU protected region. */ -void fib_select_default(struct fib_result *res) +void fib_select_default(const struct flowi4 *flp, struct fib_result *res) { struct fib_info *fi = NULL, *last_resort = NULL; struct hlist_head *fa_head = res->fa_head; struct fib_table *tb = res->table; u8 slen = 32 - res->prefixlen; int order = -1, last_idx = -1; - struct fib_alias *fa; + struct fib_alias *fa, *fa1 = NULL; + u32 last_prio = res->fi->fib_priority; + u8 last_tos = 0; hlist_for_each_entry_rcu(fa, fa_head, fa_list) { struct fib_info *next_fi = fa->fa_info; if (fa->fa_slen != slen) continue; + if (fa->fa_tos && fa->fa_tos != flp->flowi4_tos) + continue; if (fa->tb_id != tb->tb_id) continue; + if (next_fi->fib_priority > last_prio && + fa->fa_tos == last_tos) { + if (last_tos) + continue; + break; + } + if (next_fi->fib_flags & RTNH_F_DEAD) + continue; + last_tos = fa->fa_tos; + last_prio = next_fi->fib_priority; + if (next_fi->fib_scope != res->scope || fa->fa_type != RTN_UNICAST) continue; - - if (next_fi->fib_priority > res->fi->fib_priority) - break; if (!next_fi->fib_nh[0].nh_gw || next_fi->fib_nh[0].nh_scope != RT_SCOPE_LINK) continue; @@ -1233,10 +1245,11 @@ void fib_select_default(struct fib_result *res) if (!fi) { if (next_fi != res->fi) break; + fa1 = fa; } else if (!fib_detect_death(fi, order, &last_resort, - &last_idx, tb->tb_default)) { + &last_idx, fa1->fa_default)) { fib_result_assign(res, fi); - tb->tb_default = order; + fa1->fa_default = order; goto out; } fi = next_fi; @@ -1244,20 +1257,21 @@ void fib_select_default(struct fib_result *res) } if (order <= 0 || !fi) { - tb->tb_default = -1; + if (fa1) + fa1->fa_default = -1; goto out; } if (!fib_detect_death(fi, order, &last_resort, &last_idx, - tb->tb_default)) { + fa1->fa_default)) { fib_result_assign(res, fi); - tb->tb_default = order; + fa1->fa_default = order; goto out; } if (last_idx >= 0) fib_result_assign(res, last_resort); - tb->tb_default = last_idx; + fa1->fa_default = last_idx; out: return; } diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 15d32612e3c6..81797e065b21 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -1171,6 +1171,7 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg) new_fa->fa_state = state & ~FA_S_ACCESSED; new_fa->fa_slen = fa->fa_slen; new_fa->tb_id = tb->tb_id; + new_fa->fa_default = -1; err = switchdev_fib_ipv4_add(key, plen, fi, new_fa->fa_tos, @@ -1222,6 +1223,7 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg) new_fa->fa_state = 0; new_fa->fa_slen = slen; new_fa->tb_id = tb->tb_id; + new_fa->fa_default = -1; /* (Optionally) offload fib entry to switch hardware. */ err = switchdev_fib_ipv4_add(key, plen, fi, tos, cfg->fc_type, @@ -1990,7 +1992,6 @@ struct fib_table *fib_trie_table(u32 id, struct fib_table *alias) return NULL; tb->tb_id = id; - tb->tb_default = -1; tb->tb_num_default = 0; tb->tb_data = (alias ? alias->__data : tb->__data); diff --git a/net/ipv4/route.c b/net/ipv4/route.c index d0362a2de3d3..e681b852ced1 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -2176,7 +2176,7 @@ struct rtable *__ip_route_output_key(struct net *net, struct flowi4 *fl4) if (!res.prefixlen && res.table->tb_num_default > 1 && res.type == RTN_UNICAST && !fl4->flowi4_oif) - fib_select_default(&res); + fib_select_default(fl4, &res); if (!fl4->saddr) fl4->saddr = FIB_RES_PREFSRC(net, res); -- cgit v1.2.3 From d1fe19444d82e399e38c1594c71b850eca8e9de0 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 23 Jul 2015 12:05:37 +0200 Subject: inet: frag: don't re-use chainlist for evictor commit 65ba1f1ec0eff ("inet: frags: fix a race between inet_evict_bucket and inet_frag_kill") describes the bug, but the fix doesn't work reliably. Problem is that ->flags member can be set on other cpu without chainlock being held by that task, i.e. the RMW-Cycle can clear INET_FRAG_EVICTED bit after we put the element on the evictor private list. We can crash when walking the 'private' evictor list since an element can be deleted from list underneath the evictor. Join work with Nikolay Alexandrov. Fixes: b13d3cbfb8e8 ("inet: frag: move eviction of queues to work queue") Reported-by: Johan Schuijt Tested-by: Frank Schreuder Signed-off-by: Nikolay Alexandrov Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- include/net/inet_frag.h | 2 ++ net/ipv4/inet_fragment.c | 8 +++----- 2 files changed, 5 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h index e1300b3dd597..56a3a5685f76 100644 --- a/include/net/inet_frag.h +++ b/include/net/inet_frag.h @@ -45,6 +45,7 @@ enum { * @flags: fragment queue flags * @max_size: maximum received fragment size * @net: namespace that this frag belongs to + * @list_evictor: list of queues to forcefully evict (e.g. due to low memory) */ struct inet_frag_queue { spinlock_t lock; @@ -59,6 +60,7 @@ struct inet_frag_queue { __u8 flags; u16 max_size; struct netns_frags *net; + struct hlist_node list_evictor; }; #define INETFRAGS_HASHSZ 1024 diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c index 5e346a082e5f..172234864fec 100644 --- a/net/ipv4/inet_fragment.c +++ b/net/ipv4/inet_fragment.c @@ -151,14 +151,13 @@ evict_again: } fq->flags |= INET_FRAG_EVICTED; - hlist_del(&fq->list); - hlist_add_head(&fq->list, &expired); + hlist_add_head(&fq->list_evictor, &expired); ++evicted; } spin_unlock(&hb->chain_lock); - hlist_for_each_entry_safe(fq, n, &expired, list) + hlist_for_each_entry_safe(fq, n, &expired, list_evictor) f->frag_expire((unsigned long) fq); return evicted; @@ -284,8 +283,7 @@ static inline void fq_unlink(struct inet_frag_queue *fq, struct inet_frags *f) struct inet_frag_bucket *hb; hb = get_frag_bucket_locked(fq, f); - if (!(fq->flags & INET_FRAG_EVICTED)) - hlist_del(&fq->list); + hlist_del(&fq->list); spin_unlock(&hb->chain_lock); } -- cgit v1.2.3 From 0e60d245a0be7fdbb723607f1d6621007916b252 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 23 Jul 2015 12:05:38 +0200 Subject: inet: frag: change *_frag_mem_limit functions to take netns_frags as argument Followup patch will call it after inet_frag_queue was freed, so q->net doesn't work anymore (but netf = q->net; free(q); mem_limit(netf) would). Tested-by: Frank Schreuder Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- include/net/inet_frag.h | 8 ++++---- net/ieee802154/6lowpan/reassembly.c | 6 +++--- net/ipv4/inet_fragment.c | 4 ++-- net/ipv4/ip_fragment.c | 10 +++++----- net/ipv6/netfilter/nf_conntrack_reasm.c | 6 +++--- net/ipv6/reassembly.c | 6 +++--- 6 files changed, 20 insertions(+), 20 deletions(-) (limited to 'include') diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h index 56a3a5685f76..e71ca17024f2 100644 --- a/include/net/inet_frag.h +++ b/include/net/inet_frag.h @@ -141,14 +141,14 @@ static inline int frag_mem_limit(struct netns_frags *nf) return percpu_counter_read(&nf->mem); } -static inline void sub_frag_mem_limit(struct inet_frag_queue *q, int i) +static inline void sub_frag_mem_limit(struct netns_frags *nf, int i) { - __percpu_counter_add(&q->net->mem, -i, frag_percpu_counter_batch); + __percpu_counter_add(&nf->mem, -i, frag_percpu_counter_batch); } -static inline void add_frag_mem_limit(struct inet_frag_queue *q, int i) +static inline void add_frag_mem_limit(struct netns_frags *nf, int i) { - __percpu_counter_add(&q->net->mem, i, frag_percpu_counter_batch); + __percpu_counter_add(&nf->mem, i, frag_percpu_counter_batch); } static inline void init_frag_mem_limit(struct netns_frags *nf) diff --git a/net/ieee802154/6lowpan/reassembly.c b/net/ieee802154/6lowpan/reassembly.c index f46e4d1306f2..214d44aef35b 100644 --- a/net/ieee802154/6lowpan/reassembly.c +++ b/net/ieee802154/6lowpan/reassembly.c @@ -207,7 +207,7 @@ found: } else { fq->q.meat += skb->len; } - add_frag_mem_limit(&fq->q, skb->truesize); + add_frag_mem_limit(fq->q.net, skb->truesize); if (fq->q.flags == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) && fq->q.meat == fq->q.len) { @@ -287,7 +287,7 @@ static int lowpan_frag_reasm(struct lowpan_frag_queue *fq, struct sk_buff *prev, clone->data_len = clone->len; head->data_len -= clone->len; head->len -= clone->len; - add_frag_mem_limit(&fq->q, clone->truesize); + add_frag_mem_limit(fq->q.net, clone->truesize); } WARN_ON(head == NULL); @@ -310,7 +310,7 @@ static int lowpan_frag_reasm(struct lowpan_frag_queue *fq, struct sk_buff *prev, } fp = next; } - sub_frag_mem_limit(&fq->q, sum_truesize); + sub_frag_mem_limit(fq->q.net, sum_truesize); head->next = NULL; head->dev = dev; diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c index 172234864fec..4473232e4e88 100644 --- a/net/ipv4/inet_fragment.c +++ b/net/ipv4/inet_fragment.c @@ -328,7 +328,7 @@ void inet_frag_destroy(struct inet_frag_queue *q, struct inet_frags *f) fp = xp; } sum = sum_truesize + f->qsize; - sub_frag_mem_limit(q, sum); + sub_frag_mem_limit(q->net, sum); if (f->destructor) f->destructor(q); @@ -388,7 +388,7 @@ static struct inet_frag_queue *inet_frag_alloc(struct netns_frags *nf, q->net = nf; f->constructor(q, arg); - add_frag_mem_limit(q, f->qsize); + add_frag_mem_limit(nf, f->qsize); setup_timer(&q->timer, f->frag_expire, (unsigned long)q); spin_lock_init(&q->lock); diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index 31f71b15cfba..b4a77d021c0d 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -309,7 +309,7 @@ static int ip_frag_reinit(struct ipq *qp) kfree_skb(fp); fp = xp; } while (fp); - sub_frag_mem_limit(&qp->q, sum_truesize); + sub_frag_mem_limit(qp->q.net, sum_truesize); qp->q.flags = 0; qp->q.len = 0; @@ -455,7 +455,7 @@ found: qp->q.fragments = next; qp->q.meat -= free_it->len; - sub_frag_mem_limit(&qp->q, free_it->truesize); + sub_frag_mem_limit(qp->q.net, free_it->truesize); kfree_skb(free_it); } } @@ -479,7 +479,7 @@ found: qp->q.stamp = skb->tstamp; qp->q.meat += skb->len; qp->ecn |= ecn; - add_frag_mem_limit(&qp->q, skb->truesize); + add_frag_mem_limit(qp->q.net, skb->truesize); if (offset == 0) qp->q.flags |= INET_FRAG_FIRST_IN; @@ -587,7 +587,7 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev, head->len -= clone->len; clone->csum = 0; clone->ip_summed = head->ip_summed; - add_frag_mem_limit(&qp->q, clone->truesize); + add_frag_mem_limit(qp->q.net, clone->truesize); } skb_push(head, head->data - skb_network_header(head)); @@ -615,7 +615,7 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev, } fp = next; } - sub_frag_mem_limit(&qp->q, sum_truesize); + sub_frag_mem_limit(qp->q.net, sum_truesize); head->next = NULL; head->dev = dev; diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index 6f187c8d8a1b..6d02498172c1 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -348,7 +348,7 @@ found: fq->ecn |= ecn; if (payload_len > fq->q.max_size) fq->q.max_size = payload_len; - add_frag_mem_limit(&fq->q, skb->truesize); + add_frag_mem_limit(fq->q.net, skb->truesize); /* The first fragment. * nhoffset is obtained from the first fragment, of course. @@ -430,7 +430,7 @@ nf_ct_frag6_reasm(struct frag_queue *fq, struct net_device *dev) clone->ip_summed = head->ip_summed; NFCT_FRAG6_CB(clone)->orig = NULL; - add_frag_mem_limit(&fq->q, clone->truesize); + add_frag_mem_limit(fq->q.net, clone->truesize); } /* We have to remove fragment header from datagram and to relocate @@ -454,7 +454,7 @@ nf_ct_frag6_reasm(struct frag_queue *fq, struct net_device *dev) head->csum = csum_add(head->csum, fp->csum); head->truesize += fp->truesize; } - sub_frag_mem_limit(&fq->q, head->truesize); + sub_frag_mem_limit(fq->q.net, head->truesize); head->ignore_df = 1; head->next = NULL; diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index 8ffa2c8cce77..5c3bbca6a150 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -330,7 +330,7 @@ found: fq->q.stamp = skb->tstamp; fq->q.meat += skb->len; fq->ecn |= ecn; - add_frag_mem_limit(&fq->q, skb->truesize); + add_frag_mem_limit(fq->q.net, skb->truesize); /* The first fragment. * nhoffset is obtained from the first fragment, of course. @@ -443,7 +443,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev, head->len -= clone->len; clone->csum = 0; clone->ip_summed = head->ip_summed; - add_frag_mem_limit(&fq->q, clone->truesize); + add_frag_mem_limit(fq->q.net, clone->truesize); } /* We have to remove fragment header from datagram and to relocate @@ -481,7 +481,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev, } fp = next; } - sub_frag_mem_limit(&fq->q, sum_truesize); + sub_frag_mem_limit(fq->q.net, sum_truesize); head->next = NULL; head->dev = dev; -- cgit v1.2.3 From caaecdd3d3f8ec0ea9906c54b1dd8ec8316d26b9 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Thu, 23 Jul 2015 12:05:40 +0200 Subject: inet: frags: remove INET_FRAG_EVICTED and use list_evictor for the test We can simply remove the INET_FRAG_EVICTED flag to avoid all the flags race conditions with the evictor and use a participation test for the evictor list, when we're at that point (after inet_frag_kill) in the timer there're 2 possible cases: 1. The evictor added the entry to its evictor list while the timer was waiting for the chainlock or 2. The timer unchained the entry and the evictor won't see it In both cases we should be able to see list_evictor correctly due to the sync on the chainlock. Joint work with Florian Westphal. Tested-by: Frank Schreuder Signed-off-by: Nikolay Aleksandrov Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- include/net/inet_frag.h | 7 +++++-- net/ipv4/inet_fragment.c | 1 - net/ipv4/ip_fragment.c | 2 +- net/ipv6/reassembly.c | 2 +- 4 files changed, 7 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h index e71ca17024f2..53eead2da743 100644 --- a/include/net/inet_frag.h +++ b/include/net/inet_frag.h @@ -21,13 +21,11 @@ struct netns_frags { * @INET_FRAG_FIRST_IN: first fragment has arrived * @INET_FRAG_LAST_IN: final fragment has arrived * @INET_FRAG_COMPLETE: frag queue has been processed and is due for destruction - * @INET_FRAG_EVICTED: frag queue is being evicted */ enum { INET_FRAG_FIRST_IN = BIT(0), INET_FRAG_LAST_IN = BIT(1), INET_FRAG_COMPLETE = BIT(2), - INET_FRAG_EVICTED = BIT(3) }; /** @@ -127,6 +125,11 @@ static inline void inet_frag_put(struct inet_frag_queue *q, struct inet_frags *f inet_frag_destroy(q, f); } +static inline bool inet_frag_evicting(struct inet_frag_queue *q) +{ + return !hlist_unhashed(&q->list_evictor); +} + /* Memory Tracking Functions. */ /* The default percpu_counter batch size is not big enough to scale to diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c index a00ca4c00c35..d0a7c0319e3d 100644 --- a/net/ipv4/inet_fragment.c +++ b/net/ipv4/inet_fragment.c @@ -140,7 +140,6 @@ inet_evict_bucket(struct inet_frags *f, struct inet_frag_bucket *hb) if (!del_timer(&fq->timer)) continue; - fq->flags |= INET_FRAG_EVICTED; hlist_add_head(&fq->list_evictor, &expired); ++evicted; } diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index b4a77d021c0d..921138f6c97c 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -202,7 +202,7 @@ static void ip_expire(unsigned long arg) ipq_kill(qp); IP_INC_STATS_BH(net, IPSTATS_MIB_REASMFAILS); - if (!(qp->q.flags & INET_FRAG_EVICTED)) { + if (!inet_frag_evicting(&qp->q)) { struct sk_buff *head = qp->q.fragments; const struct iphdr *iph; int err; diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index 5c3bbca6a150..f1159bb76e0a 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -144,7 +144,7 @@ void ip6_expire_frag_queue(struct net *net, struct frag_queue *fq, IP6_INC_STATS_BH(net, __in6_dev_get(dev), IPSTATS_MIB_REASMFAILS); - if (fq->q.flags & INET_FRAG_EVICTED) + if (inet_frag_evicting(&fq->q)) goto out_rcu_unlock; IP6_INC_STATS_BH(net, __in6_dev_get(dev), IPSTATS_MIB_REASMTIMEOUT); -- cgit v1.2.3 From dfbafc995304ebb9a9b03f65083e6e9cea143b20 Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Fri, 24 Jul 2015 18:19:25 +0200 Subject: tcp: fix recv with flags MSG_WAITALL | MSG_PEEK Currently, tcp_recvmsg enters a busy loop in sk_wait_data if called with flags = MSG_WAITALL | MSG_PEEK. sk_wait_data waits for sk_receive_queue not empty, but in this case, the receive queue is not empty, but does not contain any skb that we can use. Add a "last skb seen on receive queue" argument to sk_wait_data, so that it sleeps until the receive queue has new skbs. Link: https://bugzilla.kernel.org/show_bug.cgi?id=99461 Link: https://sourceware.org/bugzilla/show_bug.cgi?id=18493 Link: https://bugzilla.redhat.com/show_bug.cgi?id=1205258 Reported-by: Enrico Scholz Reported-by: Dan Searle Signed-off-by: Sabrina Dubroca Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/sock.h | 2 +- net/core/sock.c | 5 +++-- net/dccp/proto.c | 2 +- net/ipv4/tcp.c | 11 +++++++---- net/llc/af_llc.c | 4 ++-- 5 files changed, 14 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/net/sock.h b/include/net/sock.h index 05a8c1aea251..f21f0708ec59 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -902,7 +902,7 @@ void sk_stream_kill_queues(struct sock *sk); void sk_set_memalloc(struct sock *sk); void sk_clear_memalloc(struct sock *sk); -int sk_wait_data(struct sock *sk, long *timeo); +int sk_wait_data(struct sock *sk, long *timeo, const struct sk_buff *skb); struct request_sock_ops; struct timewait_sock_ops; diff --git a/net/core/sock.c b/net/core/sock.c index 08f16db46070..8a14f1285fc4 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1967,20 +1967,21 @@ static void __release_sock(struct sock *sk) * sk_wait_data - wait for data to arrive at sk_receive_queue * @sk: sock to wait on * @timeo: for how long + * @skb: last skb seen on sk_receive_queue * * Now socket state including sk->sk_err is changed only under lock, * hence we may omit checks after joining wait queue. * We check receive queue before schedule() only as optimization; * it is very likely that release_sock() added new data. */ -int sk_wait_data(struct sock *sk, long *timeo) +int sk_wait_data(struct sock *sk, long *timeo, const struct sk_buff *skb) { int rc; DEFINE_WAIT(wait); prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags); - rc = sk_wait_event(sk, timeo, !skb_queue_empty(&sk->sk_receive_queue)); + rc = sk_wait_event(sk, timeo, skb_peek_tail(&sk->sk_receive_queue) != skb); clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags); finish_wait(sk_sleep(sk), &wait); return rc; diff --git a/net/dccp/proto.c b/net/dccp/proto.c index 52a94016526d..b5cf13a28009 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c @@ -886,7 +886,7 @@ verify_sock_status: break; } - sk_wait_data(sk, &timeo); + sk_wait_data(sk, &timeo, NULL); continue; found_ok_skb: if (len > skb->len) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 7f4056785acc..45534a5ab430 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -780,7 +780,7 @@ ssize_t tcp_splice_read(struct socket *sock, loff_t *ppos, ret = -EAGAIN; break; } - sk_wait_data(sk, &timeo); + sk_wait_data(sk, &timeo, NULL); if (signal_pending(current)) { ret = sock_intr_errno(timeo); break; @@ -1575,7 +1575,7 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock, int target; /* Read at least this many bytes */ long timeo; struct task_struct *user_recv = NULL; - struct sk_buff *skb; + struct sk_buff *skb, *last; u32 urg_hole = 0; if (unlikely(flags & MSG_ERRQUEUE)) @@ -1635,7 +1635,9 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock, /* Next get a buffer. */ + last = skb_peek_tail(&sk->sk_receive_queue); skb_queue_walk(&sk->sk_receive_queue, skb) { + last = skb; /* Now that we have two receive queues this * shouldn't happen. */ @@ -1754,8 +1756,9 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock, /* Do not sleep, just process backlog. */ release_sock(sk); lock_sock(sk); - } else - sk_wait_data(sk, &timeo); + } else { + sk_wait_data(sk, &timeo, last); + } if (user_recv) { int chunk; diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c index 8fd9febaa5ba..8dab4e569571 100644 --- a/net/llc/af_llc.c +++ b/net/llc/af_llc.c @@ -613,7 +613,7 @@ static int llc_wait_data(struct sock *sk, long timeo) if (signal_pending(current)) break; rc = 0; - if (sk_wait_data(sk, &timeo)) + if (sk_wait_data(sk, &timeo, NULL)) break; } return rc; @@ -802,7 +802,7 @@ static int llc_ui_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, release_sock(sk); lock_sock(sk); } else - sk_wait_data(sk, &timeo); + sk_wait_data(sk, &timeo, NULL); if ((flags & MSG_PEEK) && peek_seq != llc->copied_seq) { net_dbg_ratelimited("LLC(%s:%d): Application bug, race in MSG_PEEK\n", -- cgit v1.2.3 From e018a0cce3d849bc73e72686c571420adc40bad2 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 24 Jul 2015 21:24:04 +0300 Subject: net/macb: convert to kernel doc This patch coverts struct description to the kernel doc format. There is no functional change. Signed-off-by: Andy Shevchenko Signed-off-by: David S. Miller --- include/linux/platform_data/macb.h | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/platform_data/macb.h b/include/linux/platform_data/macb.h index 044a124bfbbc..21b15f6fee25 100644 --- a/include/linux/platform_data/macb.h +++ b/include/linux/platform_data/macb.h @@ -8,11 +8,19 @@ #ifndef __MACB_PDATA_H__ #define __MACB_PDATA_H__ +/** + * struct macb_platform_data - platform data for MACB Ethernet + * @phy_mask: phy mask passed when register the MDIO bus + * within the driver + * @phy_irq_pin: PHY IRQ + * @is_rmii: using RMII interface? + * @rev_eth_addr: reverse Ethernet address byte order + */ struct macb_platform_data { u32 phy_mask; - int phy_irq_pin; /* PHY IRQ */ - u8 is_rmii; /* using RMII interface? */ - u8 rev_eth_addr; /* reverse Ethernet address byte order */ + int phy_irq_pin; + u8 is_rmii; + u8 rev_eth_addr; }; #endif /* __MACB_PDATA_H__ */ -- cgit v1.2.3 From 35068ce8cbf1749ef1a4b9b1493af83b8488c37b Mon Sep 17 00:00:00 2001 From: Tomeu Vizoso Date: Wed, 1 Jul 2015 09:10:43 +0200 Subject: of: constify drv arg of of_driver_match_device stub With this change the stub has the same signature as the actual function, preventing this compiler warning when building without CONFIG_OF: drivers/base/property.c: In function 'fwnode_driver_match_device': >> drivers/base/property.c:608:38: warning: passing argument 2 of 'of_driver_match_device' discards 'const' qualifier from pointer target type return of_driver_match_device(dev, drv); ^ In file included from drivers/base/property.c:18:0: include/linux/of_device.h:61:19: note: expected 'struct device_driver *' but argument is of type 'const struct device_driver *' static inline int of_driver_match_device(struct device *dev, ^ Signed-off-by: Tomeu Vizoso Signed-off-by: Rob Herring --- include/linux/of_device.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/of_device.h b/include/linux/of_device.h index 4c508549833a..cc7dd687a89d 100644 --- a/include/linux/of_device.h +++ b/include/linux/of_device.h @@ -59,7 +59,7 @@ void of_dma_configure(struct device *dev, struct device_node *np); #else /* CONFIG_OF */ static inline int of_driver_match_device(struct device *dev, - struct device_driver *drv) + const struct device_driver *drv) { return 0; } -- cgit v1.2.3 From 559ed40752dc63e68f9b9ad301b20e6a3fe5cf21 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sun, 26 Jul 2015 02:07:47 +0200 Subject: cpufreq: Avoid attempts to create duplicate symbolic links After commit 87549141d516 (cpufreq: Stop migrating sysfs files on hotplug) there is a problem with CPUs that share cpufreq policy objects with other CPUs and are initially offline. Say CPU1 shares a policy with CPU0 which is online and is registered first. As part of the registration process, cpufreq_add_dev() is called for it. It creates the policy object and a symbolic link to it from the CPU1's sysfs directory. If CPU1 is registered subsequently and it is offline at that time, cpufreq_add_dev() will attempt to create a symbolic link to the policy object for it, but that link is present already, so a warning about that will be triggered. To avoid that warning, make cpufreq use an additional CPU mask containing related CPUs that are actually present for each policy object. That mask is initialized when the policy object is populated after its creation (for the first online CPU using it) and it includes CPUs from the "policy CPUs" mask returned by the cpufreq driver's ->init() callback that are physically present at that time. Symbolic links to the policy are created only for the CPUs in that mask. If cpufreq_add_dev() is invoked for an offline CPU, it checks the new mask and only creates the symlink if the CPU was not in it (the CPU is added to the mask at the same time). In turn, cpufreq_remove_dev() drops the given CPU from the new mask, removes its symlink to the policy object and returns, unless it is the CPU owning the policy object. In that case, the policy object is moved to a new CPU's sysfs directory or deleted if the CPU being removed was the last user of the policy. While at it, notice that cpufreq_remove_dev() can't fail, because its return value is ignored, so make it ignore return values from __cpufreq_remove_dev_prepare() and __cpufreq_remove_dev_finish() and prevent these functions from aborting on errors returned by __cpufreq_governor(). Also drop the now unused sif argument from them. Fixes: 87549141d516 (cpufreq: Stop migrating sysfs files on hotplug) Signed-off-by: Rafael J. Wysocki Reported-and-tested-by: Russell King Acked-by: Viresh Kumar --- drivers/cpufreq/cpufreq.c | 108 +++++++++++++++++++++++----------------------- include/linux/cpufreq.h | 1 + 2 files changed, 56 insertions(+), 53 deletions(-) (limited to 'include') diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 26063afb3eba..7a3c30c4336f 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -1002,7 +1002,7 @@ static int cpufreq_add_dev_symlink(struct cpufreq_policy *policy) int ret = 0; /* Some related CPUs might not be present (physically hotplugged) */ - for_each_cpu_and(j, policy->related_cpus, cpu_present_mask) { + for_each_cpu(j, policy->real_cpus) { if (j == policy->kobj_cpu) continue; @@ -1019,7 +1019,7 @@ static void cpufreq_remove_dev_symlink(struct cpufreq_policy *policy) unsigned int j; /* Some related CPUs might not be present (physically hotplugged) */ - for_each_cpu_and(j, policy->related_cpus, cpu_present_mask) { + for_each_cpu(j, policy->real_cpus) { if (j == policy->kobj_cpu) continue; @@ -1163,11 +1163,14 @@ static struct cpufreq_policy *cpufreq_policy_alloc(struct device *dev) if (!zalloc_cpumask_var(&policy->related_cpus, GFP_KERNEL)) goto err_free_cpumask; + if (!zalloc_cpumask_var(&policy->real_cpus, GFP_KERNEL)) + goto err_free_rcpumask; + ret = kobject_init_and_add(&policy->kobj, &ktype_cpufreq, &dev->kobj, "cpufreq"); if (ret) { pr_err("%s: failed to init policy->kobj: %d\n", __func__, ret); - goto err_free_rcpumask; + goto err_free_real_cpus; } INIT_LIST_HEAD(&policy->policy_list); @@ -1184,6 +1187,8 @@ static struct cpufreq_policy *cpufreq_policy_alloc(struct device *dev) return policy; +err_free_real_cpus: + free_cpumask_var(policy->real_cpus); err_free_rcpumask: free_cpumask_var(policy->related_cpus); err_free_cpumask: @@ -1234,6 +1239,7 @@ static void cpufreq_policy_free(struct cpufreq_policy *policy, bool notify) write_unlock_irqrestore(&cpufreq_driver_lock, flags); cpufreq_policy_put_kobj(policy, notify); + free_cpumask_var(policy->real_cpus); free_cpumask_var(policy->related_cpus); free_cpumask_var(policy->cpus); kfree(policy); @@ -1258,14 +1264,17 @@ static int cpufreq_add_dev(struct device *dev, struct subsys_interface *sif) pr_debug("adding CPU %u\n", cpu); - /* - * Only possible if 'cpu' wasn't physically present earlier and we are - * here from subsys_interface add callback. A hotplug notifier will - * follow and we will handle it like logical CPU hotplug then. For now, - * just create the sysfs link. - */ - if (cpu_is_offline(cpu)) - return add_cpu_dev_symlink(per_cpu(cpufreq_cpu_data, cpu), cpu); + if (cpu_is_offline(cpu)) { + /* + * Only possible if we are here from the subsys_interface add + * callback. A hotplug notifier will follow and we will handle + * it as CPU online then. For now, just create the sysfs link, + * unless there is no policy or the link is already present. + */ + policy = per_cpu(cpufreq_cpu_data, cpu); + return policy && !cpumask_test_and_set_cpu(cpu, policy->real_cpus) + ? add_cpu_dev_symlink(policy, cpu) : 0; + } if (!down_read_trylock(&cpufreq_rwsem)) return 0; @@ -1307,6 +1316,10 @@ static int cpufreq_add_dev(struct device *dev, struct subsys_interface *sif) /* related cpus should atleast have policy->cpus */ cpumask_or(policy->related_cpus, policy->related_cpus, policy->cpus); + /* Remember which CPUs have been present at the policy creation time. */ + if (!recover_policy) + cpumask_and(policy->real_cpus, policy->cpus, cpu_present_mask); + /* * affected cpus must always be the one, which are online. We aren't * managing offline cpus here. @@ -1420,8 +1433,7 @@ nomem_out: return ret; } -static int __cpufreq_remove_dev_prepare(struct device *dev, - struct subsys_interface *sif) +static int __cpufreq_remove_dev_prepare(struct device *dev) { unsigned int cpu = dev->id; int ret = 0; @@ -1437,10 +1449,8 @@ static int __cpufreq_remove_dev_prepare(struct device *dev, if (has_target()) { ret = __cpufreq_governor(policy, CPUFREQ_GOV_STOP); - if (ret) { + if (ret) pr_err("%s: Failed to stop governor\n", __func__); - return ret; - } } down_write(&policy->rwsem); @@ -1473,8 +1483,7 @@ static int __cpufreq_remove_dev_prepare(struct device *dev, return ret; } -static int __cpufreq_remove_dev_finish(struct device *dev, - struct subsys_interface *sif) +static int __cpufreq_remove_dev_finish(struct device *dev) { unsigned int cpu = dev->id; int ret; @@ -1492,10 +1501,8 @@ static int __cpufreq_remove_dev_finish(struct device *dev, /* If cpu is last user of policy, free policy */ if (has_target()) { ret = __cpufreq_governor(policy, CPUFREQ_GOV_POLICY_EXIT); - if (ret) { + if (ret) pr_err("%s: Failed to exit governor\n", __func__); - return ret; - } } /* @@ -1506,10 +1513,6 @@ static int __cpufreq_remove_dev_finish(struct device *dev, if (cpufreq_driver->exit) cpufreq_driver->exit(policy); - /* Free the policy only if the driver is getting removed. */ - if (sif) - cpufreq_policy_free(policy, true); - return 0; } @@ -1521,42 +1524,41 @@ static int __cpufreq_remove_dev_finish(struct device *dev, static int cpufreq_remove_dev(struct device *dev, struct subsys_interface *sif) { unsigned int cpu = dev->id; - int ret; - - /* - * Only possible if 'cpu' is getting physically removed now. A hotplug - * notifier should have already been called and we just need to remove - * link or free policy here. - */ - if (cpu_is_offline(cpu)) { - struct cpufreq_policy *policy = per_cpu(cpufreq_cpu_data, cpu); - struct cpumask mask; + struct cpufreq_policy *policy = per_cpu(cpufreq_cpu_data, cpu); - if (!policy) - return 0; + if (!policy) + return 0; - cpumask_copy(&mask, policy->related_cpus); - cpumask_clear_cpu(cpu, &mask); + if (cpu_online(cpu)) { + __cpufreq_remove_dev_prepare(dev); + __cpufreq_remove_dev_finish(dev); + } - /* - * Free policy only if all policy->related_cpus are removed - * physically. - */ - if (cpumask_intersects(&mask, cpu_present_mask)) { - remove_cpu_dev_symlink(policy, cpu); - return 0; - } + cpumask_clear_cpu(cpu, policy->real_cpus); + if (cpumask_empty(policy->real_cpus)) { cpufreq_policy_free(policy, true); return 0; } - ret = __cpufreq_remove_dev_prepare(dev, sif); + if (cpu != policy->kobj_cpu) { + remove_cpu_dev_symlink(policy, cpu); + } else { + /* + * The CPU owning the policy object is going away. Move it to + * another suitable CPU. + */ + unsigned int new_cpu = cpumask_first(policy->real_cpus); + struct device *new_dev = get_cpu_device(new_cpu); + + dev_dbg(dev, "%s: Moving policy object to CPU%u\n", __func__, new_cpu); - if (!ret) - ret = __cpufreq_remove_dev_finish(dev, sif); + sysfs_remove_link(&new_dev->kobj, "cpufreq"); + policy->kobj_cpu = new_cpu; + WARN_ON(kobject_move(&policy->kobj, &new_dev->kobj)); + } - return ret; + return 0; } static void handle_update(struct work_struct *work) @@ -2395,11 +2397,11 @@ static int cpufreq_cpu_callback(struct notifier_block *nfb, break; case CPU_DOWN_PREPARE: - __cpufreq_remove_dev_prepare(dev, NULL); + __cpufreq_remove_dev_prepare(dev); break; case CPU_POST_DEAD: - __cpufreq_remove_dev_finish(dev, NULL); + __cpufreq_remove_dev_finish(dev); break; case CPU_DOWN_FAILED: diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 29ad97c34fd5..bde1e567b3a9 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -62,6 +62,7 @@ struct cpufreq_policy { /* CPUs sharing clock, require sw coordination */ cpumask_var_t cpus; /* Online CPUs only */ cpumask_var_t related_cpus; /* Online + Offline CPUs */ + cpumask_var_t real_cpus; /* Related and present */ unsigned int shared_type; /* ACPI: ANY or ALL affected CPUs should set cpufreq */ -- cgit v1.2.3 From e13af53e7d5a8cea8992d9b61fac69bd0ed8d845 Mon Sep 17 00:00:00 2001 From: Michel Dänzer Date: Wed, 22 Jul 2015 17:29:00 +0900 Subject: drm/radeon: Drop drm/ prefix for including drm.h in radeon_drm.h MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This allows radeon_drm.h to be reused verbatim in libdrm. Reviewed-by: Christian König Signed-off-by: Michel Dänzer --- include/uapi/drm/radeon_drm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/drm/radeon_drm.h b/include/uapi/drm/radeon_drm.h index 1ef76661e1a1..01aa2a8e3f8d 100644 --- a/include/uapi/drm/radeon_drm.h +++ b/include/uapi/drm/radeon_drm.h @@ -33,7 +33,7 @@ #ifndef __RADEON_DRM_H__ #define __RADEON_DRM_H__ -#include +#include "drm.h" /* WARNING: If you change any of these defines, make sure to change the * defines in the X server file (radeon_sarea.h) -- cgit v1.2.3 From b3fcf36aeef3aeb890d9413c2066048ec7fda7e5 Mon Sep 17 00:00:00 2001 From: Michel Dänzer Date: Wed, 22 Jul 2015 17:29:01 +0900 Subject: drm/amdgpu: Drop drm/ prefix for including drm.h in amdgpu_drm.h MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This allows amdgpu_drm.h to be reused verbatim in libdrm. Reviewed-by: Alex Deucher Reviewed-by: Christian König Signed-off-by: Michel Dänzer --- include/uapi/drm/amdgpu_drm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index d708a53b8fb1..fbdd11851725 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -32,7 +32,7 @@ #ifndef __AMDGPU_DRM_H__ #define __AMDGPU_DRM_H__ -#include +#include "drm.h" #define DRM_AMDGPU_GEM_CREATE 0x00 #define DRM_AMDGPU_GEM_MMAP 0x01 -- cgit v1.2.3 From 28e6b67f0b292f557468c139085303b15f1a678f Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 29 Jul 2015 23:35:25 +0200 Subject: net: sched: fix refcount imbalance in actions Since commit 55334a5db5cd ("net_sched: act: refuse to remove bound action outside"), we end up with a wrong reference count for a tc action. Test case 1: FOO="1,6 0 0 4294967295," BAR="1,6 0 0 4294967294," tc filter add dev foo parent 1: bpf bytecode "$FOO" flowid 1:1 \ action bpf bytecode "$FOO" tc actions show action bpf action order 0: bpf bytecode '1,6 0 0 4294967295' default-action pipe index 1 ref 1 bind 1 tc actions replace action bpf bytecode "$BAR" index 1 tc actions show action bpf action order 0: bpf bytecode '1,6 0 0 4294967294' default-action pipe index 1 ref 2 bind 1 tc actions replace action bpf bytecode "$FOO" index 1 tc actions show action bpf action order 0: bpf bytecode '1,6 0 0 4294967295' default-action pipe index 1 ref 3 bind 1 Test case 2: FOO="1,6 0 0 4294967295," tc filter add dev foo parent 1: bpf bytecode "$FOO" flowid 1:1 action ok tc actions show action gact action order 0: gact action pass random type none pass val 0 index 1 ref 1 bind 1 tc actions add action drop index 1 RTNETLINK answers: File exists [...] tc actions show action gact action order 0: gact action pass random type none pass val 0 index 1 ref 2 bind 1 tc actions add action drop index 1 RTNETLINK answers: File exists [...] tc actions show action gact action order 0: gact action pass random type none pass val 0 index 1 ref 3 bind 1 What happens is that in tcf_hash_check(), we check tcf_common for a given index and increase tcfc_refcnt and conditionally tcfc_bindcnt when we've found an existing action. Now there are the following cases: 1) We do a late binding of an action. In that case, we leave the tcfc_refcnt/tcfc_bindcnt increased and are done with the ->init() handler. This is correctly handeled. 2) We replace the given action, or we try to add one without replacing and find out that the action at a specific index already exists (thus, we go out with error in that case). In case of 2), we have to undo the reference count increase from tcf_hash_check() in the tcf_hash_check() function. Currently, we fail to do so because of the 'tcfc_bindcnt > 0' check which bails out early with an -EPERM error. Now, while commit 55334a5db5cd prevents 'tc actions del action ...' on an already classifier-bound action to drop the reference count (which could then become negative, wrap around etc), this restriction only accounts for invocations outside a specific action's ->init() handler. One possible solution would be to add a flag thus we possibly trigger the -EPERM ony in situations where it is indeed relevant. After the patch, above test cases have correct reference count again. Fixes: 55334a5db5cd ("net_sched: act: refuse to remove bound action outside") Signed-off-by: Daniel Borkmann Reviewed-by: Cong Wang Signed-off-by: David S. Miller --- include/net/act_api.h | 8 +++++++- net/sched/act_api.c | 11 ++++++----- 2 files changed, 13 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/net/act_api.h b/include/net/act_api.h index 3ee4c92afd1b..931738bc5bba 100644 --- a/include/net/act_api.h +++ b/include/net/act_api.h @@ -99,7 +99,6 @@ struct tc_action_ops { int tcf_hash_search(struct tc_action *a, u32 index); void tcf_hash_destroy(struct tc_action *a); -int tcf_hash_release(struct tc_action *a, int bind); u32 tcf_hash_new_index(struct tcf_hashinfo *hinfo); int tcf_hash_check(u32 index, struct tc_action *a, int bind); int tcf_hash_create(u32 index, struct nlattr *est, struct tc_action *a, @@ -107,6 +106,13 @@ int tcf_hash_create(u32 index, struct nlattr *est, struct tc_action *a, void tcf_hash_cleanup(struct tc_action *a, struct nlattr *est); void tcf_hash_insert(struct tc_action *a); +int __tcf_hash_release(struct tc_action *a, bool bind, bool strict); + +static inline int tcf_hash_release(struct tc_action *a, bool bind) +{ + return __tcf_hash_release(a, bind, false); +} + int tcf_register_action(struct tc_action_ops *a, unsigned int mask); int tcf_unregister_action(struct tc_action_ops *a); int tcf_action_destroy(struct list_head *actions, int bind); diff --git a/net/sched/act_api.c b/net/sched/act_api.c index af427a3dbcba..43ec92680ae8 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -45,7 +45,7 @@ void tcf_hash_destroy(struct tc_action *a) } EXPORT_SYMBOL(tcf_hash_destroy); -int tcf_hash_release(struct tc_action *a, int bind) +int __tcf_hash_release(struct tc_action *a, bool bind, bool strict) { struct tcf_common *p = a->priv; int ret = 0; @@ -53,7 +53,7 @@ int tcf_hash_release(struct tc_action *a, int bind) if (p) { if (bind) p->tcfc_bindcnt--; - else if (p->tcfc_bindcnt > 0) + else if (strict && p->tcfc_bindcnt > 0) return -EPERM; p->tcfc_refcnt--; @@ -64,9 +64,10 @@ int tcf_hash_release(struct tc_action *a, int bind) ret = 1; } } + return ret; } -EXPORT_SYMBOL(tcf_hash_release); +EXPORT_SYMBOL(__tcf_hash_release); static int tcf_dump_walker(struct sk_buff *skb, struct netlink_callback *cb, struct tc_action *a) @@ -136,7 +137,7 @@ static int tcf_del_walker(struct sk_buff *skb, struct tc_action *a) head = &hinfo->htab[tcf_hash(i, hinfo->hmask)]; hlist_for_each_entry_safe(p, n, head, tcfc_head) { a->priv = p; - ret = tcf_hash_release(a, 0); + ret = __tcf_hash_release(a, false, true); if (ret == ACT_P_DELETED) { module_put(a->ops->owner); n_i++; @@ -408,7 +409,7 @@ int tcf_action_destroy(struct list_head *actions, int bind) int ret = 0; list_for_each_entry_safe(a, tmp, actions, list) { - ret = tcf_hash_release(a, bind); + ret = __tcf_hash_release(a, bind, true); if (ret == ACT_P_DELETED) module_put(a->ops->owner); else if (ret < 0) -- cgit v1.2.3