diff options
174 files changed, 2056 insertions, 1507 deletions
diff --git a/Documentation/hwmon/isl68137.rst b/Documentation/hwmon/isl68137.rst index cc4b61447b63..0e71b22047f8 100644 --- a/Documentation/hwmon/isl68137.rst +++ b/Documentation/hwmon/isl68137.rst @@ -16,7 +16,7 @@ Supported chips: * Renesas ISL68220 - Prefix: 'raa_dmpvr2_2rail' + Prefix: 'isl68220' Addresses scanned: - @@ -26,7 +26,7 @@ Supported chips: * Renesas ISL68221 - Prefix: 'raa_dmpvr2_3rail' + Prefix: 'isl68221' Addresses scanned: - @@ -36,7 +36,7 @@ Supported chips: * Renesas ISL68222 - Prefix: 'raa_dmpvr2_2rail' + Prefix: 'isl68222' Addresses scanned: - @@ -46,7 +46,7 @@ Supported chips: * Renesas ISL68223 - Prefix: 'raa_dmpvr2_2rail' + Prefix: 'isl68223' Addresses scanned: - @@ -56,7 +56,7 @@ Supported chips: * Renesas ISL68224 - Prefix: 'raa_dmpvr2_3rail' + Prefix: 'isl68224' Addresses scanned: - @@ -66,7 +66,7 @@ Supported chips: * Renesas ISL68225 - Prefix: 'raa_dmpvr2_2rail' + Prefix: 'isl68225' Addresses scanned: - @@ -76,7 +76,7 @@ Supported chips: * Renesas ISL68226 - Prefix: 'raa_dmpvr2_3rail' + Prefix: 'isl68226' Addresses scanned: - @@ -86,7 +86,7 @@ Supported chips: * Renesas ISL68227 - Prefix: 'raa_dmpvr2_1rail' + Prefix: 'isl68227' Addresses scanned: - @@ -96,7 +96,7 @@ Supported chips: * Renesas ISL68229 - Prefix: 'raa_dmpvr2_3rail' + Prefix: 'isl68229' Addresses scanned: - @@ -106,7 +106,7 @@ Supported chips: * Renesas ISL68233 - Prefix: 'raa_dmpvr2_2rail' + Prefix: 'isl68233' Addresses scanned: - @@ -116,7 +116,7 @@ Supported chips: * Renesas ISL68239 - Prefix: 'raa_dmpvr2_3rail' + Prefix: 'isl68239' Addresses scanned: - @@ -126,7 +126,7 @@ Supported chips: * Renesas ISL69222 - Prefix: 'raa_dmpvr2_2rail' + Prefix: 'isl69222' Addresses scanned: - @@ -136,7 +136,7 @@ Supported chips: * Renesas ISL69223 - Prefix: 'raa_dmpvr2_3rail' + Prefix: 'isl69223' Addresses scanned: - @@ -146,7 +146,7 @@ Supported chips: * Renesas ISL69224 - Prefix: 'raa_dmpvr2_2rail' + Prefix: 'isl69224' Addresses scanned: - @@ -156,7 +156,7 @@ Supported chips: * Renesas ISL69225 - Prefix: 'raa_dmpvr2_2rail' + Prefix: 'isl69225' Addresses scanned: - @@ -166,7 +166,7 @@ Supported chips: * Renesas ISL69227 - Prefix: 'raa_dmpvr2_3rail' + Prefix: 'isl69227' Addresses scanned: - @@ -176,7 +176,7 @@ Supported chips: * Renesas ISL69228 - Prefix: 'raa_dmpvr2_3rail' + Prefix: 'isl69228' Addresses scanned: - @@ -186,7 +186,7 @@ Supported chips: * Renesas ISL69234 - Prefix: 'raa_dmpvr2_2rail' + Prefix: 'isl69234' Addresses scanned: - @@ -196,7 +196,7 @@ Supported chips: * Renesas ISL69236 - Prefix: 'raa_dmpvr2_2rail' + Prefix: 'isl69236' Addresses scanned: - @@ -206,7 +206,7 @@ Supported chips: * Renesas ISL69239 - Prefix: 'raa_dmpvr2_3rail' + Prefix: 'isl69239' Addresses scanned: - @@ -216,7 +216,7 @@ Supported chips: * Renesas ISL69242 - Prefix: 'raa_dmpvr2_2rail' + Prefix: 'isl69242' Addresses scanned: - @@ -226,7 +226,7 @@ Supported chips: * Renesas ISL69243 - Prefix: 'raa_dmpvr2_1rail' + Prefix: 'isl69243' Addresses scanned: - @@ -236,7 +236,7 @@ Supported chips: * Renesas ISL69247 - Prefix: 'raa_dmpvr2_2rail' + Prefix: 'isl69247' Addresses scanned: - @@ -246,7 +246,7 @@ Supported chips: * Renesas ISL69248 - Prefix: 'raa_dmpvr2_2rail' + Prefix: 'isl69248' Addresses scanned: - @@ -256,7 +256,7 @@ Supported chips: * Renesas ISL69254 - Prefix: 'raa_dmpvr2_2rail' + Prefix: 'isl69254' Addresses scanned: - @@ -266,7 +266,7 @@ Supported chips: * Renesas ISL69255 - Prefix: 'raa_dmpvr2_2rail' + Prefix: 'isl69255' Addresses scanned: - @@ -276,7 +276,7 @@ Supported chips: * Renesas ISL69256 - Prefix: 'raa_dmpvr2_2rail' + Prefix: 'isl69256' Addresses scanned: - @@ -286,7 +286,7 @@ Supported chips: * Renesas ISL69259 - Prefix: 'raa_dmpvr2_2rail' + Prefix: 'isl69259' Addresses scanned: - @@ -296,7 +296,7 @@ Supported chips: * Renesas ISL69260 - Prefix: 'raa_dmpvr2_2rail' + Prefix: 'isl69260' Addresses scanned: - @@ -306,7 +306,7 @@ Supported chips: * Renesas ISL69268 - Prefix: 'raa_dmpvr2_2rail' + Prefix: 'isl69268' Addresses scanned: - @@ -316,7 +316,7 @@ Supported chips: * Renesas ISL69269 - Prefix: 'raa_dmpvr2_3rail' + Prefix: 'isl69269' Addresses scanned: - @@ -326,7 +326,7 @@ Supported chips: * Renesas ISL69298 - Prefix: 'raa_dmpvr2_2rail' + Prefix: 'isl69298' Addresses scanned: - @@ -336,7 +336,7 @@ Supported chips: * Renesas RAA228000 - Prefix: 'raa_dmpvr2_hv' + Prefix: 'raa228000' Addresses scanned: - @@ -346,7 +346,7 @@ Supported chips: * Renesas RAA228004 - Prefix: 'raa_dmpvr2_hv' + Prefix: 'raa228004' Addresses scanned: - @@ -356,7 +356,7 @@ Supported chips: * Renesas RAA228006 - Prefix: 'raa_dmpvr2_hv' + Prefix: 'raa228006' Addresses scanned: - @@ -366,7 +366,7 @@ Supported chips: * Renesas RAA228228 - Prefix: 'raa_dmpvr2_2rail' + Prefix: 'raa228228' Addresses scanned: - @@ -376,7 +376,7 @@ Supported chips: * Renesas RAA229001 - Prefix: 'raa_dmpvr2_2rail' + Prefix: 'raa229001' Addresses scanned: - @@ -386,7 +386,7 @@ Supported chips: * Renesas RAA229004 - Prefix: 'raa_dmpvr2_2rail' + Prefix: 'raa229004' Addresses scanned: - diff --git a/MAINTAINERS b/MAINTAINERS index c77f02282044..b816a453b10e 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -13857,7 +13857,8 @@ S: Maintained F: drivers/scsi/qla1280.[ch] QLOGIC QLA2XXX FC-SCSI DRIVER -M: hmadhani@marvell.com +M: Nilesh Javali <njavali@marvell.com> +M: GR-QLogic-Storage-Upstream@marvell.com L: linux-scsi@vger.kernel.org S: Supported F: Documentation/scsi/LICENSE.qla2xxx @@ -2,7 +2,7 @@ VERSION = 5 PATCHLEVEL = 7 SUBLEVEL = 0 -EXTRAVERSION = -rc1 +EXTRAVERSION = -rc2 NAME = Kleptomaniac Octopus # *DOCUMENTATION* diff --git a/arch/x86/include/asm/microcode_amd.h b/arch/x86/include/asm/microcode_amd.h index 6685e1218959..7063b5a43220 100644 --- a/arch/x86/include/asm/microcode_amd.h +++ b/arch/x86/include/asm/microcode_amd.h @@ -41,7 +41,7 @@ struct microcode_amd { unsigned int mpb[0]; }; -#define PATCH_MAX_SIZE PAGE_SIZE +#define PATCH_MAX_SIZE (3 * PAGE_SIZE) #ifdef CONFIG_MICROCODE_AMD extern void __init load_ucode_amd_bsp(unsigned int family); diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index bf08d4508ecb..a19a680542ce 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -1119,35 +1119,53 @@ void switch_to_sld(unsigned long tifn) sld_update_msr(!(tifn & _TIF_SLD)); } -#define SPLIT_LOCK_CPU(model) {X86_VENDOR_INTEL, 6, model, X86_FEATURE_ANY} - /* - * The following processors have the split lock detection feature. But - * since they don't have the IA32_CORE_CAPABILITIES MSR, the feature cannot - * be enumerated. Enable it by family and model matching on these - * processors. + * Bits in the IA32_CORE_CAPABILITIES are not architectural, so they should + * only be trusted if it is confirmed that a CPU model implements a + * specific feature at a particular bit position. + * + * The possible driver data field values: + * + * - 0: CPU models that are known to have the per-core split-lock detection + * feature even though they do not enumerate IA32_CORE_CAPABILITIES. + * + * - 1: CPU models which may enumerate IA32_CORE_CAPABILITIES and if so use + * bit 5 to enumerate the per-core split-lock detection feature. */ static const struct x86_cpu_id split_lock_cpu_ids[] __initconst = { - SPLIT_LOCK_CPU(INTEL_FAM6_ICELAKE_X), - SPLIT_LOCK_CPU(INTEL_FAM6_ICELAKE_L), + X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_X, 0), + X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_L, 0), + X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT, 1), + X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_D, 1), + X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_L, 1), {} }; void __init cpu_set_core_cap_bits(struct cpuinfo_x86 *c) { - u64 ia32_core_caps = 0; + const struct x86_cpu_id *m; + u64 ia32_core_caps; + + if (boot_cpu_has(X86_FEATURE_HYPERVISOR)) + return; - if (c->x86_vendor != X86_VENDOR_INTEL) + m = x86_match_cpu(split_lock_cpu_ids); + if (!m) return; - if (cpu_has(c, X86_FEATURE_CORE_CAPABILITIES)) { - /* Enumerate features reported in IA32_CORE_CAPABILITIES MSR. */ + + switch (m->driver_data) { + case 0: + break; + case 1: + if (!cpu_has(c, X86_FEATURE_CORE_CAPABILITIES)) + return; rdmsrl(MSR_IA32_CORE_CAPS, ia32_core_caps); - } else if (!boot_cpu_has(X86_FEATURE_HYPERVISOR)) { - /* Enumerate split lock detection by family and model. */ - if (x86_match_cpu(split_lock_cpu_ids)) - ia32_core_caps |= MSR_IA32_CORE_CAPS_SPLIT_LOCK_DETECT; + if (!(ia32_core_caps & MSR_IA32_CORE_CAPS_SPLIT_LOCK_DETECT)) + return; + break; + default: + return; } - if (ia32_core_caps & MSR_IA32_CORE_CAPS_SPLIT_LOCK_DETECT) - split_lock_setup(); + split_lock_setup(); } diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c index 89049b343c7a..d8cc5223b7ce 100644 --- a/arch/x86/kernel/cpu/resctrl/core.c +++ b/arch/x86/kernel/cpu/resctrl/core.c @@ -578,6 +578,8 @@ static void domain_add_cpu(int cpu, struct rdt_resource *r) d->id = id; cpumask_set_cpu(cpu, &d->cpu_mask); + rdt_domain_reconfigure_cdp(r); + if (r->alloc_capable && domain_setup_ctrlval(r, d)) { kfree(d); return; diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h index 181c992f448c..3dd13f3a8b23 100644 --- a/arch/x86/kernel/cpu/resctrl/internal.h +++ b/arch/x86/kernel/cpu/resctrl/internal.h @@ -601,5 +601,6 @@ bool has_busy_rmid(struct rdt_resource *r, struct rdt_domain *d); void __check_limbo(struct rdt_domain *d, bool force_free); bool cbm_validate_intel(char *buf, u32 *data, struct rdt_resource *r); bool cbm_validate_amd(char *buf, u32 *data, struct rdt_resource *r); +void rdt_domain_reconfigure_cdp(struct rdt_resource *r); #endif /* _ASM_X86_RESCTRL_INTERNAL_H */ diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c index 064e9ef44cd6..5a359d9fcc05 100644 --- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c +++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c @@ -1859,6 +1859,19 @@ static int set_cache_qos_cfg(int level, bool enable) return 0; } +/* Restore the qos cfg state when a domain comes online */ +void rdt_domain_reconfigure_cdp(struct rdt_resource *r) +{ + if (!r->alloc_capable) + return; + + if (r == &rdt_resources_all[RDT_RESOURCE_L2DATA]) + l2_qos_cfg_update(&r->alloc_enabled); + + if (r == &rdt_resources_all[RDT_RESOURCE_L3DATA]) + l3_qos_cfg_update(&r->alloc_enabled); +} + /* * Enable or disable the MBA software controller * which helps user specify bandwidth in MBps. @@ -3072,7 +3085,8 @@ static int rdtgroup_rmdir(struct kernfs_node *kn) * If the rdtgroup is a mon group and parent directory * is a valid "mon_groups" directory, remove the mon group. */ - if (rdtgrp->type == RDTCTRL_GROUP && parent_kn == rdtgroup_default.kn) { + if (rdtgrp->type == RDTCTRL_GROUP && parent_kn == rdtgroup_default.kn && + rdtgrp != &rdtgroup_default) { if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP || rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED) { ret = rdtgroup_ctrl_remove(kn, rdtgrp); diff --git a/arch/x86/kernel/umip.c b/arch/x86/kernel/umip.c index 4d732a444711..8d5cbe1bbb3b 100644 --- a/arch/x86/kernel/umip.c +++ b/arch/x86/kernel/umip.c @@ -81,7 +81,7 @@ #define UMIP_INST_SLDT 3 /* 0F 00 /0 */ #define UMIP_INST_STR 4 /* 0F 00 /1 */ -const char * const umip_insns[5] = { +static const char * const umip_insns[5] = { [UMIP_INST_SGDT] = "SGDT", [UMIP_INST_SIDT] = "SIDT", [UMIP_INST_SMSW] = "SMSW", diff --git a/block/Kconfig b/block/Kconfig index 3bc76bb113a0..41cb34b0fcd1 100644 --- a/block/Kconfig +++ b/block/Kconfig @@ -146,6 +146,7 @@ config BLK_CGROUP_IOLATENCY config BLK_CGROUP_IOCOST bool "Enable support for cost model based cgroup IO controller" depends on BLK_CGROUP=y + select BLK_RQ_IO_DATA_LEN select BLK_RQ_ALLOC_TIME ---help--- Enabling this option enables the .weight interface for cost diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index 930212c1a512..0ecc897b225c 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -1530,6 +1530,10 @@ static void blkcg_scale_delay(struct blkcg_gq *blkg, u64 now) { u64 old = atomic64_read(&blkg->delay_start); + /* negative use_delay means no scaling, see blkcg_set_delay() */ + if (atomic_read(&blkg->use_delay) < 0) + return; + /* * We only want to scale down every second. The idea here is that we * want to delay people for min(delay_nsec, NSEC_PER_SEC) in a certain @@ -1717,6 +1721,8 @@ void blkcg_schedule_throttle(struct request_queue *q, bool use_memdelay) */ void blkcg_add_delay(struct blkcg_gq *blkg, u64 now, u64 delta) { + if (WARN_ON_ONCE(atomic_read(&blkg->use_delay) < 0)) + return; blkcg_scale_delay(blkg, now); atomic64_add(delta, &blkg->delay_nsec); } diff --git a/block/blk-core.c b/block/blk-core.c index 7e4a1da0715e..7f11560bfddb 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -440,6 +440,23 @@ int blk_queue_enter(struct request_queue *q, blk_mq_req_flags_t flags) } } +static inline int bio_queue_enter(struct bio *bio) +{ + struct request_queue *q = bio->bi_disk->queue; + bool nowait = bio->bi_opf & REQ_NOWAIT; + int ret; + + ret = blk_queue_enter(q, nowait ? BLK_MQ_REQ_NOWAIT : 0); + if (unlikely(ret)) { + if (nowait && !blk_queue_dying(q)) + bio_wouldblock_error(bio); + else + bio_io_error(bio); + } + + return ret; +} + void blk_queue_exit(struct request_queue *q) { percpu_ref_put(&q->q_usage_counter); @@ -963,12 +980,13 @@ generic_make_request_checks(struct bio *bio) } /* - * Various block parts want %current->io_context and lazy ioc - * allocation ends up trading a lot of pain for a small amount of - * memory. Just allocate it upfront. This may fail and block - * layer knows how to live with it. + * Various block parts want %current->io_context, so allocate it up + * front rather than dealing with lots of pain to allocate it only + * where needed. This may fail and the block layer knows how to live + * with it. */ - create_io_context(GFP_ATOMIC, q->node); + if (unlikely(!current->io_context)) + create_task_io_context(current, GFP_ATOMIC, q->node); if (!blkcg_bio_issue_check(q, bio)) return false; @@ -991,28 +1009,13 @@ end_io: } /** - * generic_make_request - hand a buffer to its device driver for I/O + * generic_make_request - re-submit a bio to the block device layer for I/O * @bio: The bio describing the location in memory and on the device. * - * generic_make_request() is used to make I/O requests of block - * devices. It is passed a &struct bio, which describes the I/O that needs - * to be done. - * - * generic_make_request() does not return any status. The - * success/failure status of the request, along with notification of - * completion, is delivered asynchronously through the bio->bi_end_io - * function described (one day) else where. - * - * The caller of generic_make_request must make sure that bi_io_vec - * are set to describe the memory buffer, and that bi_dev and bi_sector are - * set to describe the device address, and the - * bi_end_io and optionally bi_private are set to describe how - * completion notification should be signaled. - * - * generic_make_request and the drivers it calls may use bi_next if this - * bio happens to be merged with someone else, and may resubmit the bio to - * a lower device by calling into generic_make_request recursively, which - * means the bio should NOT be touched after the call to ->make_request_fn. + * This is a version of submit_bio() that shall only be used for I/O that is + * resubmitted to lower level drivers by stacking block drivers. All file + * systems and other upper level users of the block layer should use + * submit_bio() instead. */ blk_qc_t generic_make_request(struct bio *bio) { @@ -1063,16 +1066,17 @@ blk_qc_t generic_make_request(struct bio *bio) current->bio_list = bio_list_on_stack; do { struct request_queue *q = bio->bi_disk->queue; - blk_mq_req_flags_t flags = bio->bi_opf & REQ_NOWAIT ? - BLK_MQ_REQ_NOWAIT : 0; - if (likely(blk_queue_enter(q, flags) == 0)) { + if (likely(bio_queue_enter(bio) == 0)) { struct bio_list lower, same; /* Create a fresh bio_list for all subordinate requests */ bio_list_on_stack[1] = bio_list_on_stack[0]; bio_list_init(&bio_list_on_stack[0]); - ret = q->make_request_fn(q, bio); + if (q->make_request_fn) + ret = q->make_request_fn(q, bio); + else + ret = blk_mq_make_request(q, bio); blk_queue_exit(q); @@ -1090,12 +1094,6 @@ blk_qc_t generic_make_request(struct bio *bio) bio_list_merge(&bio_list_on_stack[0], &lower); bio_list_merge(&bio_list_on_stack[0], &same); bio_list_merge(&bio_list_on_stack[0], &bio_list_on_stack[1]); - } else { - if (unlikely(!blk_queue_dying(q) && - (bio->bi_opf & REQ_NOWAIT))) - bio_wouldblock_error(bio); - else - bio_io_error(bio); } bio = bio_list_pop(&bio_list_on_stack[0]); } while (bio); @@ -1112,28 +1110,22 @@ EXPORT_SYMBOL(generic_make_request); * * This function behaves like generic_make_request(), but does not protect * against recursion. Must only be used if the called driver is known - * to not call generic_make_request (or direct_make_request) again from - * its make_request function. (Calling direct_make_request again from - * a workqueue is perfectly fine as that doesn't recurse). + * to be blk-mq based. */ blk_qc_t direct_make_request(struct bio *bio) { struct request_queue *q = bio->bi_disk->queue; - bool nowait = bio->bi_opf & REQ_NOWAIT; blk_qc_t ret; + if (WARN_ON_ONCE(q->make_request_fn)) { + bio_io_error(bio); + return BLK_QC_T_NONE; + } if (!generic_make_request_checks(bio)) return BLK_QC_T_NONE; - - if (unlikely(blk_queue_enter(q, nowait ? BLK_MQ_REQ_NOWAIT : 0))) { - if (nowait && !blk_queue_dying(q)) - bio_wouldblock_error(bio); - else - bio_io_error(bio); + if (unlikely(bio_queue_enter(bio))) return BLK_QC_T_NONE; - } - - ret = q->make_request_fn(q, bio); + ret = blk_mq_make_request(q, bio); blk_queue_exit(q); return ret; } @@ -1143,17 +1135,17 @@ EXPORT_SYMBOL_GPL(direct_make_request); * submit_bio - submit a bio to the block device layer for I/O * @bio: The &struct bio which describes the I/O * - * submit_bio() is very similar in purpose to generic_make_request(), and - * uses that function to do most of the work. Both are fairly rough - * interfaces; @bio must be presetup and ready for I/O. + * submit_bio() is used to submit I/O requests to block devices. It is passed a + * fully set up &struct bio that describes the I/O that needs to be done. The + * bio will be send to the device described by the bi_disk and bi_partno fields. * + * The success/failure status of the request, along with notification of + * completion, is delivered asynchronously through the ->bi_end_io() callback + * in @bio. The bio must NOT be touched by thecaller until ->bi_end_io() has + * been called. */ blk_qc_t submit_bio(struct bio *bio) { - bool workingset_read = false; - unsigned long pflags; - blk_qc_t ret; - if (blkcg_punt_bio_submit(bio)) return BLK_QC_T_NONE; @@ -1172,8 +1164,6 @@ blk_qc_t submit_bio(struct bio *bio) if (op_is_write(bio_op(bio))) { count_vm_events(PGPGOUT, count); } else { - if (bio_flagged(bio, BIO_WORKINGSET)) - workingset_read = true; task_io_account_read(bio->bi_iter.bi_size); count_vm_events(PGPGIN, count); } @@ -1189,20 +1179,24 @@ blk_qc_t submit_bio(struct bio *bio) } /* - * If we're reading data that is part of the userspace - * workingset, count submission time as memory stall. When the - * device is congested, or the submitting cgroup IO-throttled, - * submission can be a significant part of overall IO time. + * If we're reading data that is part of the userspace workingset, count + * submission time as memory stall. When the device is congested, or + * the submitting cgroup IO-throttled, submission can be a significant + * part of overall IO time. */ - if (workingset_read) - psi_memstall_enter(&pflags); - - ret = generic_make_request(bio); + if (unlikely(bio_op(bio) == REQ_OP_READ && + bio_flagged(bio, BIO_WORKINGSET))) { + unsigned long pflags; + blk_qc_t ret; - if (workingset_read) + psi_memstall_enter(&pflags); + ret = generic_make_request(bio); psi_memstall_leave(&pflags); - return ret; + return ret; + } + + return generic_make_request(bio); } EXPORT_SYMBOL(submit_bio); @@ -1638,7 +1632,6 @@ int blk_rq_prep_clone(struct request *rq, struct request *rq_src, } rq->nr_phys_segments = rq_src->nr_phys_segments; rq->ioprio = rq_src->ioprio; - rq->extra_len = rq_src->extra_len; return 0; diff --git a/block/blk-iocost.c b/block/blk-iocost.c index 7c1fe605d0d6..12eefb3d113f 100644 --- a/block/blk-iocost.c +++ b/block/blk-iocost.c @@ -260,6 +260,7 @@ enum { VTIME_PER_SEC_SHIFT = 37, VTIME_PER_SEC = 1LLU << VTIME_PER_SEC_SHIFT, VTIME_PER_USEC = VTIME_PER_SEC / USEC_PER_SEC, + VTIME_PER_NSEC = VTIME_PER_SEC / NSEC_PER_SEC, /* bound vrate adjustments within two orders of magnitude */ VRATE_MIN_PPM = 10000, /* 1% */ @@ -1206,14 +1207,14 @@ static enum hrtimer_restart iocg_waitq_timer_fn(struct hrtimer *timer) return HRTIMER_NORESTART; } -static bool iocg_kick_delay(struct ioc_gq *iocg, struct ioc_now *now, u64 cost) +static bool iocg_kick_delay(struct ioc_gq *iocg, struct ioc_now *now) { struct ioc *ioc = iocg->ioc; struct blkcg_gq *blkg = iocg_to_blkg(iocg); u64 vtime = atomic64_read(&iocg->vtime); u64 vmargin = ioc->margin_us * now->vrate; u64 margin_ns = ioc->margin_us * NSEC_PER_USEC; - u64 expires, oexpires; + u64 delta_ns, expires, oexpires; u32 hw_inuse; lockdep_assert_held(&iocg->waitq.lock); @@ -1236,15 +1237,10 @@ static bool iocg_kick_delay(struct ioc_gq *iocg, struct ioc_now *now, u64 cost) return false; /* use delay */ - if (cost) { - u64 cost_ns = DIV64_U64_ROUND_UP(cost * NSEC_PER_USEC, - now->vrate); - blkcg_add_delay(blkg, now->now_ns, cost_ns); - } - blkcg_use_delay(blkg); - - expires = now->now_ns + DIV64_U64_ROUND_UP(vtime - now->vnow, - now->vrate) * NSEC_PER_USEC; + delta_ns = DIV64_U64_ROUND_UP(vtime - now->vnow, + now->vrate) * NSEC_PER_USEC; + blkcg_set_delay(blkg, delta_ns); + expires = now->now_ns + delta_ns; /* if already active and close enough, don't bother */ oexpires = ktime_to_ns(hrtimer_get_softexpires(&iocg->delay_timer)); @@ -1265,7 +1261,7 @@ static enum hrtimer_restart iocg_delay_timer_fn(struct hrtimer *timer) spin_lock_irqsave(&iocg->waitq.lock, flags); ioc_now(iocg->ioc, &now); - iocg_kick_delay(iocg, &now, 0); + iocg_kick_delay(iocg, &now); spin_unlock_irqrestore(&iocg->waitq.lock, flags); return HRTIMER_NORESTART; @@ -1383,7 +1379,7 @@ static void ioc_timer_fn(struct timer_list *timer) if (waitqueue_active(&iocg->waitq) || iocg->abs_vdebt) { /* might be oversleeping vtime / hweight changes, kick */ iocg_kick_waitq(iocg, &now); - iocg_kick_delay(iocg, &now, 0); + iocg_kick_delay(iocg, &now); } else if (iocg_is_idle(iocg)) { /* no waiter and idle, deactivate */ iocg->last_inuse = iocg->inuse; @@ -1678,6 +1674,31 @@ static u64 calc_vtime_cost(struct bio *bio, struct ioc_gq *iocg, bool is_merge) return cost; } +static void calc_size_vtime_cost_builtin(struct request *rq, struct ioc *ioc, + u64 *costp) +{ + unsigned int pages = blk_rq_stats_sectors(rq) >> IOC_SECT_TO_PAGE_SHIFT; + + switch (req_op(rq)) { + case REQ_OP_READ: + *costp = pages * ioc->params.lcoefs[LCOEF_RPAGE]; + break; + case REQ_OP_WRITE: + *costp = pages * ioc->params.lcoefs[LCOEF_WPAGE]; + break; + default: + *costp = 0; + } +} + +static u64 calc_size_vtime_cost(struct request *rq, struct ioc *ioc) +{ + u64 cost; + + calc_size_vtime_cost_builtin(rq, ioc, &cost); + return cost; +} + static void ioc_rqos_throttle(struct rq_qos *rqos, struct bio *bio) { struct blkcg_gq *blkg = bio->bi_blkg; @@ -1762,7 +1783,7 @@ static void ioc_rqos_throttle(struct rq_qos *rqos, struct bio *bio) */ if (bio_issue_as_root_blkg(bio) || fatal_signal_pending(current)) { iocg->abs_vdebt += abs_cost; - if (iocg_kick_delay(iocg, &now, cost)) + if (iocg_kick_delay(iocg, &now)) blkcg_schedule_throttle(rqos->q, (bio->bi_opf & REQ_SWAP) == REQ_SWAP); spin_unlock_irq(&iocg->waitq.lock); @@ -1850,7 +1871,7 @@ static void ioc_rqos_merge(struct rq_qos *rqos, struct request *rq, spin_lock_irqsave(&iocg->waitq.lock, flags); if (likely(!list_empty(&iocg->active_list))) { iocg->abs_vdebt += abs_cost; - iocg_kick_delay(iocg, &now, cost); + iocg_kick_delay(iocg, &now); } else { iocg_commit_bio(iocg, bio, cost); } @@ -1868,7 +1889,7 @@ static void ioc_rqos_done_bio(struct rq_qos *rqos, struct bio *bio) static void ioc_rqos_done(struct rq_qos *rqos, struct request *rq) { struct ioc *ioc = rqos_to_ioc(rqos); - u64 on_q_ns, rq_wait_ns; + u64 on_q_ns, rq_wait_ns, size_nsec; int pidx, rw; if (!ioc->enabled || !rq->alloc_time_ns || !rq->start_time_ns) @@ -1889,8 +1910,10 @@ static void ioc_rqos_done(struct rq_qos *rqos, struct request *rq) on_q_ns = ktime_get_ns() - rq->alloc_time_ns; rq_wait_ns = rq->start_time_ns - rq->alloc_time_ns; + size_nsec = div64_u64(calc_size_vtime_cost(rq, ioc), VTIME_PER_NSEC); - if (on_q_ns <= ioc->params.qos[pidx] * NSEC_PER_USEC) + if (on_q_ns <= size_nsec || + on_q_ns - size_nsec <= ioc->params.qos[pidx] * NSEC_PER_USEC) this_cpu_inc(ioc->pcpu_stat->missed[rw].nr_met); else this_cpu_inc(ioc->pcpu_stat->missed[rw].nr_missed); @@ -2297,6 +2320,7 @@ static ssize_t ioc_qos_write(struct kernfs_open_file *of, char *input, spin_lock_irq(&ioc->lock); if (enable) { + blk_stat_enable_accounting(ioc->rqos.q); blk_queue_flag_set(QUEUE_FLAG_RQ_ALLOC_TIME, ioc->rqos.q); ioc->enabled = true; } else { diff --git a/block/blk-map.c b/block/blk-map.c index b72c361911a4..b6fa343fea9f 100644 --- a/block/blk-map.c +++ b/block/blk-map.c @@ -654,8 +654,6 @@ int blk_rq_map_user_iov(struct request_queue *q, struct request *rq, bio = rq->bio; } while (iov_iter_count(&i)); - if (!bio_flagged(bio, BIO_USER_MAPPED)) - rq->rq_flags |= RQF_COPY_USER; return 0; unmap_rq: @@ -731,7 +729,6 @@ int blk_rq_map_kern(struct request_queue *q, struct request *rq, void *kbuf, { int reading = rq_data_dir(rq) == READ; unsigned long addr = (unsigned long) kbuf; - int do_copy = 0; struct bio *bio, *orig_bio; int ret; @@ -740,8 +737,7 @@ int blk_rq_map_kern(struct request_queue *q, struct request *rq, void *kbuf, if (!len || !kbuf) return -EINVAL; - do_copy = !blk_rq_aligned(q, addr, len) || object_is_on_stack(kbuf); - if (do_copy) + if (!blk_rq_aligned(q, addr, len) || object_is_on_stack(kbuf)) bio = bio_copy_kern(q, kbuf, len, gfp_mask, reading); else bio = bio_map_kern(q, kbuf, len, gfp_mask); @@ -752,9 +748,6 @@ int blk_rq_map_kern(struct request_queue *q, struct request *rq, void *kbuf, bio->bi_opf &= ~REQ_OP_MASK; bio->bi_opf |= req_op(rq); - if (do_copy) - rq->rq_flags |= RQF_COPY_USER; - orig_bio = bio; ret = blk_rq_append_bio(rq, &bio); if (unlikely(ret)) { diff --git a/block/blk-merge.c b/block/blk-merge.c index 1534ed736363..a04e991b5ded 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -336,16 +336,6 @@ void __blk_queue_split(struct request_queue *q, struct bio **bio, /* there isn't chance to merge the splitted bio */ split->bi_opf |= REQ_NOMERGE; - /* - * Since we're recursing into make_request here, ensure - * that we mark this bio as already having entered the queue. - * If not, and the queue is going away, we can get stuck - * forever on waiting for the queue reference to drop. But - * that will never happen, as we're already holding a - * reference to it. - */ - bio_set_flag(*bio, BIO_QUEUE_ENTERED); - bio_chain(split, *bio); trace_block_split(q, split, (*bio)->bi_iter.bi_sector); generic_make_request(*bio); @@ -519,44 +509,20 @@ static int __blk_bios_map_sg(struct request_queue *q, struct bio *bio, * map a request to scatterlist, return number of sg entries setup. Caller * must make sure sg can hold rq->nr_phys_segments entries */ -int blk_rq_map_sg(struct request_queue *q, struct request *rq, - struct scatterlist *sglist) +int __blk_rq_map_sg(struct request_queue *q, struct request *rq, + struct scatterlist *sglist, struct scatterlist **last_sg) { - struct scatterlist *sg = NULL; int nsegs = 0; if (rq->rq_flags & RQF_SPECIAL_PAYLOAD) - nsegs = __blk_bvec_map_sg(rq->special_vec, sglist, &sg); + nsegs = __blk_bvec_map_sg(rq->special_vec, sglist, last_sg); else if (rq->bio && bio_op(rq->bio) == REQ_OP_WRITE_SAME) - nsegs = __blk_bvec_map_sg(bio_iovec(rq->bio), sglist, &sg); + nsegs = __blk_bvec_map_sg(bio_iovec(rq->bio), sglist, last_sg); else if (rq->bio) - nsegs = __blk_bios_map_sg(q, rq->bio, sglist, &sg); - - if (unlikely(rq->rq_flags & RQF_COPY_USER) && - (blk_rq_bytes(rq) & q->dma_pad_mask)) { - unsigned int pad_len = - (q->dma_pad_mask & ~blk_rq_bytes(rq)) + 1; - - sg->length += pad_len; - rq->extra_len += pad_len; - } - - if (q->dma_drain_size && q->dma_drain_needed(rq)) { - if (op_is_write(req_op(rq))) - memset(q->dma_drain_buffer, 0, q->dma_drain_size); - - sg_unmark_end(sg); - sg = sg_next(sg); - sg_set_page(sg, virt_to_page(q->dma_drain_buffer), - q->dma_drain_size, - ((unsigned long)q->dma_drain_buffer) & - (PAGE_SIZE - 1)); - nsegs++; - rq->extra_len += q->dma_drain_size; - } + nsegs = __blk_bios_map_sg(q, rq->bio, sglist, last_sg); - if (sg) - sg_mark_end(sg); + if (*last_sg) + sg_mark_end(*last_sg); /* * Something must have been wrong if the figured number of @@ -566,7 +532,7 @@ int blk_rq_map_sg(struct request_queue *q, struct request *rq, return nsegs; } -EXPORT_SYMBOL(blk_rq_map_sg); +EXPORT_SYMBOL(__blk_rq_map_sg); static inline int ll_new_hw_segment(struct request *req, struct bio *bio, unsigned int nr_phys_segs) diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c index b3f2ba483992..96b7a35c898a 100644 --- a/block/blk-mq-debugfs.c +++ b/block/blk-mq-debugfs.c @@ -292,7 +292,6 @@ static const char *const rqf_name[] = { RQF_NAME(MQ_INFLIGHT), RQF_NAME(DONTPREP), RQF_NAME(PREEMPT), - RQF_NAME(COPY_USER), RQF_NAME(FAILED), RQF_NAME(QUIET), RQF_NAME(ELVPRIV), diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c index 74cedea56034..fdcc2c1dd178 100644 --- a/block/blk-mq-sched.c +++ b/block/blk-mq-sched.c @@ -80,16 +80,22 @@ void blk_mq_sched_restart(struct blk_mq_hw_ctx *hctx) blk_mq_run_hw_queue(hctx, true); } +#define BLK_MQ_BUDGET_DELAY 3 /* ms units */ + /* * Only SCSI implements .get_budget and .put_budget, and SCSI restarts * its queue by itself in its completion handler, so we don't need to * restart queue if .get_budget() returns BLK_STS_NO_RESOURCE. + * + * Returns -EAGAIN if hctx->dispatch was found non-empty and run_work has to + * be run again. This is necessary to avoid starving flushes. */ -static void blk_mq_do_dispatch_sched(struct blk_mq_hw_ctx *hctx) +static int blk_mq_do_dispatch_sched(struct blk_mq_hw_ctx *hctx) { struct request_queue *q = hctx->queue; struct elevator_queue *e = q->elevator; LIST_HEAD(rq_list); + int ret = 0; do { struct request *rq; @@ -97,12 +103,25 @@ static void blk_mq_do_dispatch_sched(struct blk_mq_hw_ctx *hctx) if (e->type->ops.has_work && !e->type->ops.has_work(hctx)) break; + if (!list_empty_careful(&hctx->dispatch)) { + ret = -EAGAIN; + break; + } + if (!blk_mq_get_dispatch_budget(hctx)) break; rq = e->type->ops.dispatch_request(hctx); if (!rq) { blk_mq_put_dispatch_budget(hctx); + /* + * We're releasing without dispatching. Holding the + * budget could have blocked any "hctx"s with the + * same queue and if we didn't dispatch then there's + * no guarantee anyone will kick the queue. Kick it + * ourselves. + */ + blk_mq_delay_run_hw_queues(q, BLK_MQ_BUDGET_DELAY); break; } @@ -113,6 +132,8 @@ static void blk_mq_do_dispatch_sched(struct blk_mq_hw_ctx *hctx) */ list_add(&rq->queuelist, &rq_list); } while (blk_mq_dispatch_rq_list(q, &rq_list, true)); + + return ret; } static struct blk_mq_ctx *blk_mq_next_ctx(struct blk_mq_hw_ctx *hctx, @@ -130,16 +151,25 @@ static struct blk_mq_ctx *blk_mq_next_ctx(struct blk_mq_hw_ctx *hctx, * Only SCSI implements .get_budget and .put_budget, and SCSI restarts * its queue by itself in its completion handler, so we don't need to * restart queue if .get_budget() returns BLK_STS_NO_RESOURCE. + * + * Returns -EAGAIN if hctx->dispatch was found non-empty and run_work has to + * to be run again. This is necessary to avoid starving flushes. */ -static void blk_mq_do_dispatch_ctx(struct blk_mq_hw_ctx *hctx) +static int blk_mq_do_dispatch_ctx(struct blk_mq_hw_ctx *hctx) { struct request_queue *q = hctx->queue; LIST_HEAD(rq_list); struct blk_mq_ctx *ctx = READ_ONCE(hctx->dispatch_from); + int ret = 0; do { struct request *rq; + if (!list_empty_careful(&hctx->dispatch)) { + ret = -EAGAIN; + break; + } + if (!sbitmap_any_bit_set(&hctx->ctx_map)) break; @@ -149,6 +179,14 @@ static void blk_mq_do_dispatch_ctx(struct blk_mq_hw_ctx *hctx) rq = blk_mq_dequeue_from_ctx(hctx, ctx); if (!rq) { blk_mq_put_dispatch_budget(hctx); + /* + * We're releasing without dispatching. Holding the + * budget could have blocked any "hctx"s with the + * same queue and if we didn't dispatch then there's + * no guarantee anyone will kick the queue. Kick it + * ourselves. + */ + blk_mq_delay_run_hw_queues(q, BLK_MQ_BUDGET_DELAY); break; } @@ -165,21 +203,17 @@ static void blk_mq_do_dispatch_ctx(struct blk_mq_hw_ctx *hctx) } while (blk_mq_dispatch_rq_list(q, &rq_list, true)); WRITE_ONCE(hctx->dispatch_from, ctx); + return ret; } -void blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx) +static int __blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx) { struct request_queue *q = hctx->queue; struct elevator_queue *e = q->elevator; const bool has_sched_dispatch = e && e->type->ops.dispatch_request; + int ret = 0; LIST_HEAD(rq_list); - /* RCU or SRCU read lock is needed before checking quiesced flag */ - if (unlikely(blk_mq_hctx_stopped(hctx) || blk_queue_quiesced(q))) - return; - - hctx->run++; - /* * If we have previous entries on our dispatch list, grab them first for * more fair dispatch. @@ -208,19 +242,41 @@ void blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx) blk_mq_sched_mark_restart_hctx(hctx); if (blk_mq_dispatch_rq_list(q, &rq_list, false)) { if (has_sched_dispatch) - blk_mq_do_dispatch_sched(hctx); + ret = blk_mq_do_dispatch_sched(hctx); else - blk_mq_do_dispatch_ctx(hctx); + ret = blk_mq_do_dispatch_ctx(hctx); } } else if (has_sched_dispatch) { - blk_mq_do_dispatch_sched(hctx); + ret = blk_mq_do_dispatch_sched(hctx); } else if (hctx->dispatch_busy) { /* dequeue request one by one from sw queue if queue is busy */ - blk_mq_do_dispatch_ctx(hctx); + ret = blk_mq_do_dispatch_ctx(hctx); } else { blk_mq_flush_busy_ctxs(hctx, &rq_list); blk_mq_dispatch_rq_list(q, &rq_list, false); } + + return ret; +} + +void blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx) +{ + struct request_queue *q = hctx->queue; + + /* RCU or SRCU read lock is needed before checking quiesced flag */ + if (unlikely(blk_mq_hctx_stopped(hctx) || blk_queue_quiesced(q))) + return; + + hctx->run++; + + /* + * A return of -EAGAIN is an indication that hctx->dispatch is not + * empty and we must run again in order to avoid starving flushes. + */ + if (__blk_mq_sched_dispatch_requests(hctx) == -EAGAIN) { + if (__blk_mq_sched_dispatch_requests(hctx) == -EAGAIN) + blk_mq_run_hw_queue(hctx, true); + } } bool blk_mq_sched_try_merge(struct request_queue *q, struct bio *bio, diff --git a/block/blk-mq.c b/block/blk-mq.c index a7785df2c944..bcc3a2397d4a 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -318,7 +318,6 @@ static struct request *blk_mq_rq_ctx_init(struct blk_mq_alloc_data *data, rq->nr_integrity_segments = 0; #endif /* tag was already set */ - rq->extra_len = 0; WRITE_ONCE(rq->deadline, 0); rq->timeout = 0; @@ -667,15 +666,6 @@ void blk_mq_start_request(struct request *rq) blk_add_timer(rq); WRITE_ONCE(rq->state, MQ_RQ_IN_FLIGHT); - if (q->dma_drain_size && blk_rq_bytes(rq)) { - /* - * Make sure space for the drain appears. We know we can do - * this because max_hw_segments has been adjusted to be one - * fewer than the device can handle. - */ - rq->nr_phys_segments++; - } - #ifdef CONFIG_BLK_DEV_INTEGRITY if (blk_integrity_rq(rq) && req_op(rq) == REQ_OP_WRITE) q->integrity.profile->prepare_fn(rq); @@ -695,8 +685,6 @@ static void __blk_mq_requeue_request(struct request *rq) if (blk_mq_request_started(rq)) { WRITE_ONCE(rq->state, MQ_RQ_IDLE); rq->rq_flags &= ~RQF_TIMED_OUT; - if (q->dma_drain_size && blk_rq_bytes(rq)) - rq->nr_phys_segments--; } } @@ -1206,6 +1194,7 @@ bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list, bool no_tag = false; int errors, queued; blk_status_t ret = BLK_STS_OK; + bool no_budget_avail = false; if (list_empty(list)) return false; @@ -1224,6 +1213,7 @@ bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list, hctx = rq->mq_hctx; if (!got_budget && !blk_mq_get_dispatch_budget(hctx)) { blk_mq_put_driver_tag(rq); + no_budget_avail = true; break; } @@ -1320,13 +1310,15 @@ bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list, * * If driver returns BLK_STS_RESOURCE and SCHED_RESTART * bit is set, run queue after a delay to avoid IO stalls - * that could otherwise occur if the queue is idle. + * that could otherwise occur if the queue is idle. We'll do + * similar if we couldn't get budget and SCHED_RESTART is set. */ needs_restart = blk_mq_sched_needs_restart(hctx); if (!needs_restart || (no_tag && list_empty_careful(&hctx->dispatch_wait.entry))) blk_mq_run_hw_queue(hctx, true); - else if (needs_restart && (ret == BLK_STS_RESOURCE)) + else if (needs_restart && (ret == BLK_STS_RESOURCE || + no_budget_avail)) blk_mq_delay_run_hw_queue(hctx, BLK_MQ_RESOURCE_DELAY); blk_mq_update_dispatch_busy(hctx, true); @@ -1542,6 +1534,25 @@ void blk_mq_run_hw_queues(struct request_queue *q, bool async) EXPORT_SYMBOL(blk_mq_run_hw_queues); /** + * blk_mq_delay_run_hw_queues - Run all hardware queues asynchronously. + * @q: Pointer to the request queue to run. + * @msecs: Microseconds of delay to wait before running the queues. + */ +void blk_mq_delay_run_hw_queues(struct request_queue *q, unsigned long msecs) +{ + struct blk_mq_hw_ctx *hctx; + int i; + + queue_for_each_hw_ctx(q, hctx, i) { + if (blk_mq_hctx_stopped(hctx)) + continue; + + blk_mq_delay_run_hw_queue(hctx, msecs); + } +} +EXPORT_SYMBOL(blk_mq_delay_run_hw_queues); + +/** * blk_mq_queue_stopped() - check whether one or more hctxs have been stopped * @q: request queue. * @@ -1973,7 +1984,7 @@ static void blk_add_rq_to_plug(struct blk_plug *plug, struct request *rq) * * Returns: Request queue cookie. */ -static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio) +blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio) { const int is_sync = op_is_sync(bio->bi_opf); const int is_flush_fua = op_is_flush(bio->bi_opf); @@ -2085,6 +2096,7 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio) return cookie; } +EXPORT_SYMBOL_GPL(blk_mq_make_request); /* only for request based dm */ void blk_mq_free_rqs(struct blk_mq_tag_set *set, struct blk_mq_tags *tags, unsigned int hctx_idx) @@ -2944,7 +2956,6 @@ struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set, INIT_LIST_HEAD(&q->requeue_list); spin_lock_init(&q->requeue_lock); - q->make_request_fn = blk_mq_make_request; q->nr_requests = set->queue_depth; /* diff --git a/block/blk-settings.c b/block/blk-settings.c index 14397b4c4b53..2ab1967b9716 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -652,43 +652,6 @@ void blk_queue_update_dma_pad(struct request_queue *q, unsigned int mask) EXPORT_SYMBOL(blk_queue_update_dma_pad); /** - * blk_queue_dma_drain - Set up a drain buffer for excess dma. - * @q: the request queue for the device - * @dma_drain_needed: fn which returns non-zero if drain is necessary - * @buf: physically contiguous buffer - * @size: size of the buffer in bytes - * - * Some devices have excess DMA problems and can't simply discard (or - * zero fill) the unwanted piece of the transfer. They have to have a - * real area of memory to transfer it into. The use case for this is - * ATAPI devices in DMA mode. If the packet command causes a transfer - * bigger than the transfer size some HBAs will lock up if there - * aren't DMA elements to contain the excess transfer. What this API - * does is adjust the queue so that the buf is always appended - * silently to the scatterlist. - * - * Note: This routine adjusts max_hw_segments to make room for appending - * the drain buffer. If you call blk_queue_max_segments() after calling - * this routine, you must set the limit to one fewer than your device - * can support otherwise there won't be room for the drain buffer. - */ -int blk_queue_dma_drain(struct request_queue *q, - dma_drain_needed_fn *dma_drain_needed, - void *buf, unsigned int size) -{ - if (queue_max_segments(q) < 2) - return -EINVAL; - /* make room for appending the drain */ - blk_queue_max_segments(q, queue_max_segments(q) - 1); - q->dma_drain_needed = dma_drain_needed; - q->dma_drain_buffer = buf; - q->dma_drain_size = size; - - return 0; -} -EXPORT_SYMBOL_GPL(blk_queue_dma_drain); - -/** * blk_queue_segment_boundary - set boundary rules for segment merging * @q: the request queue for the device * @mask: the memory boundary mask diff --git a/block/blk.h b/block/blk.h index 0a94ec68af32..73bd3b1c6938 100644 --- a/block/blk.h +++ b/block/blk.h @@ -303,26 +303,6 @@ void ioc_clear_queue(struct request_queue *q); int create_task_io_context(struct task_struct *task, gfp_t gfp_mask, int node); -/** - * create_io_context - try to create task->io_context - * @gfp_mask: allocation mask - * @node: allocation node - * - * If %current->io_context is %NULL, allocate a new io_context and install - * it. Returns the current %current->io_context which may be %NULL if - * allocation failed. - * - * Note that this function can't be called with IRQ disabled because - * task_lock which protects %current->io_context is IRQ-unsafe. - */ -static inline struct io_context *create_io_context(gfp_t gfp_mask, int node) -{ - WARN_ON_ONCE(irqs_disabled()); - if (unlikely(!current->io_context)) - create_task_io_context(current, gfp_mask, node); - return current->io_context; -} - /* * Internal throttling interface */ @@ -389,20 +369,14 @@ char *disk_name(struct gendisk *hd, int partno, char *buf); #define ADDPART_FLAG_NONE 0 #define ADDPART_FLAG_RAID 1 #define ADDPART_FLAG_WHOLEDISK 2 -struct hd_struct *__must_check add_partition(struct gendisk *disk, int partno, - sector_t start, sector_t len, int flags, - struct partition_meta_info *info); -void __delete_partition(struct percpu_ref *ref); -void delete_partition(struct gendisk *disk, int partno); +void delete_partition(struct gendisk *disk, struct hd_struct *part); +int bdev_add_partition(struct block_device *bdev, int partno, + sector_t start, sector_t length); +int bdev_del_partition(struct block_device *bdev, int partno); +int bdev_resize_partition(struct block_device *bdev, int partno, + sector_t start, sector_t length); int disk_expand_part_tbl(struct gendisk *disk, int target); - -static inline int hd_ref_init(struct hd_struct *part) -{ - if (percpu_ref_init(&part->ref, __delete_partition, 0, - GFP_KERNEL)) - return -ENOMEM; - return 0; -} +int hd_ref_init(struct hd_struct *part); static inline void hd_struct_get(struct hd_struct *part) { @@ -419,11 +393,6 @@ static inline void hd_struct_put(struct hd_struct *part) percpu_ref_put(&part->ref); } -static inline void hd_struct_kill(struct hd_struct *part) -{ - percpu_ref_kill(&part->ref); -} - static inline void hd_free_part(struct hd_struct *part) { free_part_stats(part); diff --git a/block/genhd.c b/block/genhd.c index 06b642b23a07..c05d509877fa 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -878,6 +878,25 @@ void device_add_disk_no_queue_reg(struct device *parent, struct gendisk *disk) } EXPORT_SYMBOL(device_add_disk_no_queue_reg); +static void invalidate_partition(struct gendisk *disk, int partno) +{ + struct block_device *bdev; + + bdev = bdget_disk(disk, partno); + if (!bdev) + return; + + fsync_bdev(bdev); + __invalidate_device(bdev, true); + + /* + * Unhash the bdev inode for this device so that it gets evicted as soon + * as last inode reference is dropped. + */ + remove_inode_hash(bdev->bd_inode); + bdput(bdev); +} + void del_gendisk(struct gendisk *disk) { struct disk_part_iter piter; @@ -896,13 +915,11 @@ void del_gendisk(struct gendisk *disk) DISK_PITER_INCL_EMPTY | DISK_PITER_REVERSE); while ((part = disk_part_iter_next(&piter))) { invalidate_partition(disk, part->partno); - bdev_unhash_inode(part_devt(part)); - delete_partition(disk, part->partno); + delete_partition(disk, part); } disk_part_iter_exit(&piter); invalidate_partition(disk, 0); - bdev_unhash_inode(disk_devt(disk)); set_capacity(disk, 0); disk->flags &= ~GENHD_FL_UP; up_write(&disk->lookup_sem); @@ -1806,20 +1823,6 @@ int bdev_read_only(struct block_device *bdev) EXPORT_SYMBOL(bdev_read_only); -int invalidate_partition(struct gendisk *disk, int partno) -{ - int res = 0; - struct block_device *bdev = bdget_disk(disk, partno); - if (bdev) { - fsync_bdev(bdev); - res = __invalidate_device(bdev, true); - bdput(bdev); - } - return res; -} - -EXPORT_SYMBOL(invalidate_partition); - /* * Disk events - monitor disk events like media change and eject request. */ diff --git a/block/ioctl.c b/block/ioctl.c index 6e827de1a4c4..75c64811b534 100644 --- a/block/ioctl.c +++ b/block/ioctl.c @@ -16,142 +16,44 @@ static int blkpg_do_ioctl(struct block_device *bdev, struct blkpg_partition __user *upart, int op) { - struct block_device *bdevp; - struct gendisk *disk; - struct hd_struct *part, *lpart; struct blkpg_partition p; - struct disk_part_iter piter; long long start, length; - int partno; if (!capable(CAP_SYS_ADMIN)) return -EACCES; if (copy_from_user(&p, upart, sizeof(struct blkpg_partition))) return -EFAULT; - disk = bdev->bd_disk; if (bdev != bdev->bd_contains) return -EINVAL; - partno = p.pno; - if (partno <= 0) + + if (p.pno <= 0) return -EINVAL; + + if (op == BLKPG_DEL_PARTITION) + return bdev_del_partition(bdev, p.pno); + + start = p.start >> SECTOR_SHIFT; + length = p.length >> SECTOR_SHIFT; + + /* check for fit in a hd_struct */ + if (sizeof(sector_t) < sizeof(long long)) { + long pstart = start, plength = length; + + if (pstart != start || plength != length || pstart < 0 || + plength < 0 || p.pno > 65535) + return -EINVAL; + } + switch (op) { - case BLKPG_ADD_PARTITION: - start = p.start >> 9; - length = p.length >> 9; - /* check for fit in a hd_struct */ - if (sizeof(sector_t) == sizeof(long) && - sizeof(long long) > sizeof(long)) { - long pstart = start, plength = length; - if (pstart != start || plength != length - || pstart < 0 || plength < 0 || partno > 65535) - return -EINVAL; - } - /* check if partition is aligned to blocksize */ - if (p.start & (bdev_logical_block_size(bdev) - 1)) - return -EINVAL; - - mutex_lock(&bdev->bd_mutex); - - /* overlap? */ - disk_part_iter_init(&piter, disk, - DISK_PITER_INCL_EMPTY); - while ((part = disk_part_iter_next(&piter))) { - if (!(start + length <= part->start_sect || - start >= part->start_sect + part->nr_sects)) { - disk_part_iter_exit(&piter); - mutex_unlock(&bdev->bd_mutex); - return -EBUSY; - } - } - disk_part_iter_exit(&piter); - - /* all seems OK */ - part = add_partition(disk, partno, start, length, - ADDPART_FLAG_NONE, NULL); - mutex_unlock(&bdev->bd_mutex); - return PTR_ERR_OR_ZERO(part); - case BLKPG_DEL_PARTITION: - part = disk_get_part(disk, partno); - if (!part) - return -ENXIO; - - bdevp = bdget(part_devt(part)); - disk_put_part(part); - if (!bdevp) - return -ENOMEM; - - mutex_lock(&bdevp->bd_mutex); - if (bdevp->bd_openers) { - mutex_unlock(&bdevp->bd_mutex); - bdput(bdevp); - return -EBUSY; - } - /* all seems OK */ - fsync_bdev(bdevp); - invalidate_bdev(bdevp); - - mutex_lock_nested(&bdev->bd_mutex, 1); - delete_partition(disk, partno); - mutex_unlock(&bdev->bd_mutex); - mutex_unlock(&bdevp->bd_mutex); - bdput(bdevp); - - return 0; - case BLKPG_RESIZE_PARTITION: - start = p.start >> 9; - /* new length of partition in bytes */ - length = p.length >> 9; - /* check for fit in a hd_struct */ - if (sizeof(sector_t) == sizeof(long) && - sizeof(long long) > sizeof(long)) { - long pstart = start, plength = length; - if (pstart != start || plength != length - || pstart < 0 || plength < 0) - return -EINVAL; - } - part = disk_get_part(disk, partno); - if (!part) - return -ENXIO; - bdevp = bdget(part_devt(part)); - if (!bdevp) { - disk_put_part(part); - return -ENOMEM; - } - mutex_lock(&bdevp->bd_mutex); - mutex_lock_nested(&bdev->bd_mutex, 1); - if (start != part->start_sect) { - mutex_unlock(&bdevp->bd_mutex); - mutex_unlock(&bdev->bd_mutex); - bdput(bdevp); - disk_put_part(part); - return -EINVAL; - } - /* overlap? */ - disk_part_iter_init(&piter, disk, - DISK_PITER_INCL_EMPTY); - while ((lpart = disk_part_iter_next(&piter))) { - if (lpart->partno != partno && - !(start + length <= lpart->start_sect || - start >= lpart->start_sect + lpart->nr_sects) - ) { - disk_part_iter_exit(&piter); - mutex_unlock(&bdevp->bd_mutex); - mutex_unlock(&bdev->bd_mutex); - bdput(bdevp); - disk_put_part(part); - return -EBUSY; - } - } - disk_part_iter_exit(&piter); - part_nr_sects_write(part, (sector_t)length); - i_size_write(bdevp->bd_inode, p.length); - mutex_unlock(&bdevp->bd_mutex); - mutex_unlock(&bdev->bd_mutex); - bdput(bdevp); - disk_put_part(part); - return 0; - default: + case BLKPG_ADD_PARTITION: + /* check if partition is aligned to blocksize */ + if (p.start & (bdev_logical_block_size(bdev) - 1)) return -EINVAL; + return bdev_add_partition(bdev, p.pno, start, length); + case BLKPG_RESIZE_PARTITION: + return bdev_resize_partition(bdev, p.pno, start, length); + default: + return -EINVAL; } } diff --git a/block/partitions/core.c b/block/partitions/core.c index 9ef48a8cff86..873999e2e2f2 100644 --- a/block/partitions/core.c +++ b/block/partitions/core.c @@ -274,10 +274,10 @@ struct device_type part_type = { .uevent = part_uevent, }; -static void delete_partition_work_fn(struct work_struct *work) +static void hd_struct_free_work(struct work_struct *work) { - struct hd_struct *part = container_of(to_rcu_work(work), struct hd_struct, - rcu_work); + struct hd_struct *part = + container_of(to_rcu_work(work), struct hd_struct, rcu_work); part->start_sect = 0; part->nr_sects = 0; @@ -285,31 +285,31 @@ static void delete_partition_work_fn(struct work_struct *work) put_device(part_to_dev(part)); } -void __delete_partition(struct percpu_ref *ref) +static void hd_struct_free(struct percpu_ref *ref) { struct hd_struct *part = container_of(ref, struct hd_struct, ref); - INIT_RCU_WORK(&part->rcu_work, delete_partition_work_fn); + + INIT_RCU_WORK(&part->rcu_work, hd_struct_free_work); queue_rcu_work(system_wq, &part->rcu_work); } +int hd_ref_init(struct hd_struct *part) +{ + if (percpu_ref_init(&part->ref, hd_struct_free, 0, GFP_KERNEL)) + return -ENOMEM; + return 0; +} + /* * Must be called either with bd_mutex held, before a disk can be opened or * after all disk users are gone. */ -void delete_partition(struct gendisk *disk, int partno) +void delete_partition(struct gendisk *disk, struct hd_struct *part) { struct disk_part_tbl *ptbl = rcu_dereference_protected(disk->part_tbl, 1); - struct hd_struct *part; - if (partno >= ptbl->len) - return; - - part = rcu_dereference_protected(ptbl->part[partno], 1); - if (!part) - return; - - rcu_assign_pointer(ptbl->part[partno], NULL); + rcu_assign_pointer(ptbl->part[part->partno], NULL); rcu_assign_pointer(ptbl->last_lookup, NULL); kobject_put(part->holder_dir); device_del(part_to_dev(part)); @@ -321,7 +321,7 @@ void delete_partition(struct gendisk *disk, int partno) * "in-use" until we really free the gendisk. */ blk_invalidate_devt(part_devt(part)); - hd_struct_kill(part); + percpu_ref_kill(&part->ref); } static ssize_t whole_disk_show(struct device *dev, @@ -335,7 +335,7 @@ static DEVICE_ATTR(whole_disk, 0444, whole_disk_show, NULL); * Must be called either with bd_mutex held, before a disk can be opened or * after all disk users are gone. */ -struct hd_struct *add_partition(struct gendisk *disk, int partno, +static struct hd_struct *add_partition(struct gendisk *disk, int partno, sector_t start, sector_t len, int flags, struct partition_meta_info *info) { @@ -472,6 +472,121 @@ out_put: return ERR_PTR(err); } +static bool partition_overlaps(struct gendisk *disk, sector_t start, + sector_t length, int skip_partno) +{ + struct disk_part_iter piter; + struct hd_struct *part; + bool overlap = false; + + disk_part_iter_init(&piter, disk, DISK_PITER_INCL_EMPTY); + while ((part = disk_part_iter_next(&piter))) { + if (part->partno == skip_partno || + start >= part->start_sect + part->nr_sects || + start + length <= part->start_sect) + continue; + overlap = true; + break; + } + + disk_part_iter_exit(&piter); + return overlap; +} + +int bdev_add_partition(struct block_device *bdev, int partno, + sector_t start, sector_t length) +{ + struct hd_struct *part; + + mutex_lock(&bdev->bd_mutex); + if (partition_overlaps(bdev->bd_disk, start, length, -1)) { + mutex_unlock(&bdev->bd_mutex); + return -EBUSY; + } + + part = add_partition(bdev->bd_disk, partno, start, length, + ADDPART_FLAG_NONE, NULL); + mutex_unlock(&bdev->bd_mutex); + return PTR_ERR_OR_ZERO(part); +} + +int bdev_del_partition(struct block_device *bdev, int partno) +{ + struct block_device *bdevp; + struct hd_struct *part; + int ret = 0; + + part = disk_get_part(bdev->bd_disk, partno); + if (!part) + return -ENXIO; + + ret = -ENOMEM; + bdevp = bdget(part_devt(part)); + if (!bdevp) + goto out_put_part; + + mutex_lock(&bdevp->bd_mutex); + + ret = -EBUSY; + if (bdevp->bd_openers) + goto out_unlock; + + sync_blockdev(bdevp); + invalidate_bdev(bdevp); + + mutex_lock_nested(&bdev->bd_mutex, 1); + delete_partition(bdev->bd_disk, part); + mutex_unlock(&bdev->bd_mutex); + + ret = 0; +out_unlock: + mutex_unlock(&bdevp->bd_mutex); + bdput(bdevp); +out_put_part: + disk_put_part(part); + return ret; +} + +int bdev_resize_partition(struct block_device *bdev, int partno, + sector_t start, sector_t length) +{ + struct block_device *bdevp; + struct hd_struct *part; + int ret = 0; + + part = disk_get_part(bdev->bd_disk, partno); + if (!part) + return -ENXIO; + + ret = -ENOMEM; + bdevp = bdget(part_devt(part)); + if (!bdevp) + goto out_put_part; + + mutex_lock(&bdevp->bd_mutex); + mutex_lock_nested(&bdev->bd_mutex, 1); + + ret = -EINVAL; + if (start != part->start_sect) + goto out_unlock; + + ret = -EBUSY; + if (partition_overlaps(bdev->bd_disk, start, length, partno)) + goto out_unlock; + + part_nr_sects_write(part, (sector_t)length); + i_size_write(bdevp->bd_inode, length << SECTOR_SHIFT); + + ret = 0; +out_unlock: + mutex_unlock(&bdevp->bd_mutex); + mutex_unlock(&bdev->bd_mutex); + bdput(bdevp); +out_put_part: + disk_put_part(part); + return ret; +} + static bool disk_unlock_native_capacity(struct gendisk *disk) { const struct block_device_operations *bdops = disk->fops; @@ -488,27 +603,30 @@ static bool disk_unlock_native_capacity(struct gendisk *disk) } } -int blk_drop_partitions(struct gendisk *disk, struct block_device *bdev) +int blk_drop_partitions(struct block_device *bdev) { struct disk_part_iter piter; struct hd_struct *part; - int res; - if (!disk_part_scan_enabled(disk)) + if (!disk_part_scan_enabled(bdev->bd_disk)) return 0; if (bdev->bd_part_count) return -EBUSY; - res = invalidate_partition(disk, 0); - if (res) - return res; - disk_part_iter_init(&piter, disk, DISK_PITER_INCL_EMPTY); + sync_blockdev(bdev); + invalidate_bdev(bdev); + + disk_part_iter_init(&piter, bdev->bd_disk, DISK_PITER_INCL_EMPTY); while ((part = disk_part_iter_next(&piter))) - delete_partition(disk, part->partno); + delete_partition(bdev->bd_disk, part); disk_part_iter_exit(&piter); return 0; } +#ifdef CONFIG_S390 +/* for historic reasons in the DASD driver */ +EXPORT_SYMBOL_GPL(blk_drop_partitions); +#endif static bool blk_add_partition(struct gendisk *disk, struct block_device *bdev, struct parsed_partitions *state, int p) diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c index 36e588d88b95..435781a16875 100644 --- a/drivers/ata/libata-scsi.c +++ b/drivers/ata/libata-scsi.c @@ -649,7 +649,7 @@ static void ata_qc_set_pc_nbytes(struct ata_queued_cmd *qc) { struct scsi_cmnd *scmd = qc->scsicmd; - qc->extrabytes = scmd->request->extra_len; + qc->extrabytes = scmd->extra_len; qc->nbytes = scsi_bufflen(scmd) + qc->extrabytes; } @@ -1017,16 +1017,11 @@ void ata_scsi_sdev_config(struct scsi_device *sdev) * RETURNS: * 1 if ; otherwise, 0. */ -static int atapi_drain_needed(struct request *rq) +bool ata_scsi_dma_need_drain(struct request *rq) { - if (likely(!blk_rq_is_passthrough(rq))) - return 0; - - if (!blk_rq_bytes(rq) || op_is_write(req_op(rq))) - return 0; - return atapi_cmd_type(scsi_req(rq)->cmd[0]) == ATAPI_MISC; } +EXPORT_SYMBOL_GPL(ata_scsi_dma_need_drain); int ata_scsi_dev_config(struct scsi_device *sdev, struct ata_device *dev) { @@ -1039,21 +1034,21 @@ int ata_scsi_dev_config(struct scsi_device *sdev, struct ata_device *dev) blk_queue_max_hw_sectors(q, dev->max_sectors); if (dev->class == ATA_DEV_ATAPI) { - void *buf; - sdev->sector_size = ATA_SECT_SIZE; /* set DMA padding */ blk_queue_update_dma_pad(q, ATA_DMA_PAD_SZ - 1); - /* configure draining */ - buf = kmalloc(ATAPI_MAX_DRAIN, q->bounce_gfp | GFP_KERNEL); - if (!buf) { + /* make room for appending the drain */ + blk_queue_max_segments(q, queue_max_segments(q) - 1); + + sdev->dma_drain_len = ATAPI_MAX_DRAIN; + sdev->dma_drain_buf = kmalloc(sdev->dma_drain_len, + q->bounce_gfp | GFP_KERNEL); + if (!sdev->dma_drain_buf) { ata_dev_err(dev, "drain buffer allocation failed\n"); return -ENOMEM; } - - blk_queue_dma_drain(q, atapi_drain_needed, buf, ATAPI_MAX_DRAIN); } else { sdev->sector_size = ata_id_logical_sector_size(dev->id); sdev->manage_start_stop = 1; @@ -1135,7 +1130,6 @@ EXPORT_SYMBOL_GPL(ata_scsi_slave_config); void ata_scsi_slave_destroy(struct scsi_device *sdev) { struct ata_port *ap = ata_shost_to_port(sdev->host); - struct request_queue *q = sdev->request_queue; unsigned long flags; struct ata_device *dev; @@ -1152,9 +1146,7 @@ void ata_scsi_slave_destroy(struct scsi_device *sdev) } spin_unlock_irqrestore(ap->lock, flags); - kfree(q->dma_drain_buffer); - q->dma_drain_buffer = NULL; - q->dma_drain_size = 0; + kfree(sdev->dma_drain_buf); } EXPORT_SYMBOL_GPL(ata_scsi_slave_destroy); diff --git a/drivers/block/paride/pcd.c b/drivers/block/paride/pcd.c index cda5cf917e9a..5124eca90e83 100644 --- a/drivers/block/paride/pcd.c +++ b/drivers/block/paride/pcd.c @@ -1032,7 +1032,7 @@ static int __init pcd_init(void) for (unit = 0, cd = pcd; unit < PCD_UNITS; unit++, cd++) { if (cd->present) { - register_cdrom(&cd->info); + register_cdrom(cd->disk, &cd->info); cd->disk->private_data = cd; add_disk(cd->disk); } diff --git a/drivers/cdrom/cdrom.c b/drivers/cdrom/cdrom.c index faca0f346fff..06896c07b133 100644 --- a/drivers/cdrom/cdrom.c +++ b/drivers/cdrom/cdrom.c @@ -586,7 +586,7 @@ static int cdrom_mrw_set_lba_space(struct cdrom_device_info *cdi, int space) return 0; } -int register_cdrom(struct cdrom_device_info *cdi) +int register_cdrom(struct gendisk *disk, struct cdrom_device_info *cdi) { static char banner_printed; const struct cdrom_device_ops *cdo = cdi->ops; @@ -601,6 +601,9 @@ int register_cdrom(struct cdrom_device_info *cdi) cdrom_sysctl_register(); } + cdi->disk = disk; + disk->cdi = cdi; + ENSURE(cdo, drive_status, CDC_DRIVE_STATUS); if (cdo->check_events == NULL && cdo->media_changed == NULL) WARN_ON_ONCE(cdo->capability & (CDC_MEDIA_CHANGED | CDC_SELECT_DISC)); @@ -2292,37 +2295,46 @@ retry: return cdrom_read_cdda_old(cdi, ubuf, lba, nframes); } -static int cdrom_ioctl_multisession(struct cdrom_device_info *cdi, - void __user *argp) +int cdrom_multisession(struct cdrom_device_info *cdi, + struct cdrom_multisession *info) { - struct cdrom_multisession ms_info; u8 requested_format; int ret; - cd_dbg(CD_DO_IOCTL, "entering CDROMMULTISESSION\n"); - if (!(cdi->ops->capability & CDC_MULTI_SESSION)) return -ENOSYS; - if (copy_from_user(&ms_info, argp, sizeof(ms_info))) - return -EFAULT; - - requested_format = ms_info.addr_format; + requested_format = info->addr_format; if (requested_format != CDROM_MSF && requested_format != CDROM_LBA) return -EINVAL; - ms_info.addr_format = CDROM_LBA; + info->addr_format = CDROM_LBA; - ret = cdi->ops->get_last_session(cdi, &ms_info); - if (ret) - return ret; + ret = cdi->ops->get_last_session(cdi, info); + if (!ret) + sanitize_format(&info->addr, &info->addr_format, + requested_format); + return ret; +} +EXPORT_SYMBOL_GPL(cdrom_multisession); - sanitize_format(&ms_info.addr, &ms_info.addr_format, requested_format); +static int cdrom_ioctl_multisession(struct cdrom_device_info *cdi, + void __user *argp) +{ + struct cdrom_multisession info; + int ret; + + cd_dbg(CD_DO_IOCTL, "entering CDROMMULTISESSION\n"); - if (copy_to_user(argp, &ms_info, sizeof(ms_info))) + if (copy_from_user(&info, argp, sizeof(info))) + return -EFAULT; + ret = cdrom_multisession(cdi, &info); + if (ret) + return ret; + if (copy_to_user(argp, &info, sizeof(info))) return -EFAULT; cd_dbg(CD_DO_IOCTL, "CDROMMULTISESSION successful\n"); - return 0; + return ret; } static int cdrom_ioctl_eject(struct cdrom_device_info *cdi) @@ -2663,32 +2675,37 @@ static int cdrom_ioctl_read_tochdr(struct cdrom_device_info *cdi, return 0; } +int cdrom_read_tocentry(struct cdrom_device_info *cdi, + struct cdrom_tocentry *entry) +{ + u8 requested_format = entry->cdte_format; + int ret; + + if (requested_format != CDROM_MSF && requested_format != CDROM_LBA) + return -EINVAL; + + /* make interface to low-level uniform */ + entry->cdte_format = CDROM_MSF; + ret = cdi->ops->audio_ioctl(cdi, CDROMREADTOCENTRY, entry); + if (!ret) + sanitize_format(&entry->cdte_addr, &entry->cdte_format, + requested_format); + return ret; +} +EXPORT_SYMBOL_GPL(cdrom_read_tocentry); + static int cdrom_ioctl_read_tocentry(struct cdrom_device_info *cdi, void __user *argp) { struct cdrom_tocentry entry; - u8 requested_format; int ret; - /* cd_dbg(CD_DO_IOCTL, "entering CDROMREADTOCENTRY\n"); */ - if (copy_from_user(&entry, argp, sizeof(entry))) return -EFAULT; - - requested_format = entry.cdte_format; - if (requested_format != CDROM_MSF && requested_format != CDROM_LBA) - return -EINVAL; - /* make interface to low-level uniform */ - entry.cdte_format = CDROM_MSF; - ret = cdi->ops->audio_ioctl(cdi, CDROMREADTOCENTRY, &entry); - if (ret) - return ret; - sanitize_format(&entry.cdte_addr, &entry.cdte_format, requested_format); - - if (copy_to_user(argp, &entry, sizeof(entry))) + ret = cdrom_read_tocentry(cdi, &entry); + if (!ret && copy_to_user(argp, &entry, sizeof(entry))) return -EFAULT; - /* cd_dbg(CD_DO_IOCTL, "CDROMREADTOCENTRY successful\n"); */ - return 0; + return ret; } static int cdrom_ioctl_play_msf(struct cdrom_device_info *cdi, diff --git a/drivers/cdrom/gdrom.c b/drivers/cdrom/gdrom.c index c51292c2a131..09b0cd292720 100644 --- a/drivers/cdrom/gdrom.c +++ b/drivers/cdrom/gdrom.c @@ -770,7 +770,7 @@ static int probe_gdrom(struct platform_device *devptr) goto probe_fail_no_disk; } probe_gdrom_setupdisk(); - if (register_cdrom(gd.cd_info)) { + if (register_cdrom(gd.disk, gd.cd_info)) { err = -ENODEV; goto probe_fail_cdrom_register; } diff --git a/drivers/clk/clk-asm9260.c b/drivers/clk/clk-asm9260.c index 536b59aabd2c..bacebd457e6f 100644 --- a/drivers/clk/clk-asm9260.c +++ b/drivers/clk/clk-asm9260.c @@ -276,7 +276,7 @@ static void __init asm9260_acc_init(struct device_node *np) /* TODO: Convert to DT parent scheme */ ref_clk = of_clk_get_parent_name(np, 0); - hw = __clk_hw_register_fixed_rate_with_accuracy(NULL, NULL, pll_clk, + hw = __clk_hw_register_fixed_rate(NULL, NULL, pll_clk, ref_clk, NULL, NULL, 0, rate, 0, CLK_FIXED_RATE_PARENT_ACCURACY); diff --git a/drivers/clk/mmp/clk-pll.c b/drivers/clk/mmp/clk-pll.c index 7077be293871..962014cfdc44 100644 --- a/drivers/clk/mmp/clk-pll.c +++ b/drivers/clk/mmp/clk-pll.c @@ -97,7 +97,7 @@ static const struct clk_ops mmp_clk_pll_ops = { .recalc_rate = mmp_clk_pll_recalc_rate, }; -struct clk *mmp_clk_register_pll(char *name, +static struct clk *mmp_clk_register_pll(char *name, unsigned long default_rate, void __iomem *enable_reg, u32 enable, void __iomem *reg, u8 shift, @@ -137,3 +137,34 @@ struct clk *mmp_clk_register_pll(char *name, return clk; } + +void mmp_register_pll_clks(struct mmp_clk_unit *unit, + struct mmp_param_pll_clk *clks, + void __iomem *base, int size) +{ + struct clk *clk; + int i; + + for (i = 0; i < size; i++) { + void __iomem *reg = NULL; + + if (clks[i].offset) + reg = base + clks[i].offset; + + clk = mmp_clk_register_pll(clks[i].name, + clks[i].default_rate, + base + clks[i].enable_offset, + clks[i].enable, + reg, clks[i].shift, + clks[i].input_rate, + base + clks[i].postdiv_offset, + clks[i].postdiv_shift); + if (IS_ERR(clk)) { + pr_err("%s: failed to register clock %s\n", + __func__, clks[i].name); + continue; + } + if (clks[i].id) + unit->clk_table[clks[i].id] = clk; + } +} diff --git a/drivers/clk/mmp/clk.c b/drivers/clk/mmp/clk.c index 317123641d1e..ca7d37e2c7be 100644 --- a/drivers/clk/mmp/clk.c +++ b/drivers/clk/mmp/clk.c @@ -176,37 +176,6 @@ void mmp_register_div_clks(struct mmp_clk_unit *unit, } } -void mmp_register_pll_clks(struct mmp_clk_unit *unit, - struct mmp_param_pll_clk *clks, - void __iomem *base, int size) -{ - struct clk *clk; - int i; - - for (i = 0; i < size; i++) { - void __iomem *reg = NULL; - - if (clks[i].offset) - reg = base + clks[i].offset; - - clk = mmp_clk_register_pll(clks[i].name, - clks[i].default_rate, - base + clks[i].enable_offset, - clks[i].enable, - reg, clks[i].shift, - clks[i].input_rate, - base + clks[i].postdiv_offset, - clks[i].postdiv_shift); - if (IS_ERR(clk)) { - pr_err("%s: failed to register clock %s\n", - __func__, clks[i].name); - continue; - } - if (clks[i].id) - unit->clk_table[clks[i].id] = clk; - } -} - void mmp_clk_add(struct mmp_clk_unit *unit, unsigned int id, struct clk *clk) { diff --git a/drivers/clk/mmp/clk.h b/drivers/clk/mmp/clk.h index 971b4d6d992f..20dc1e5dd756 100644 --- a/drivers/clk/mmp/clk.h +++ b/drivers/clk/mmp/clk.h @@ -238,13 +238,6 @@ void mmp_register_pll_clks(struct mmp_clk_unit *unit, struct mmp_param_pll_clk *clks, void __iomem *base, int size); -extern struct clk *mmp_clk_register_pll(char *name, - unsigned long default_rate, - void __iomem *enable_reg, u32 enable, - void __iomem *reg, u8 shift, - unsigned long input_rate, - void __iomem *postdiv_reg, u8 postdiv_shift); - #define DEFINE_MIX_REG_INFO(w_d, s_d, w_m, s_m, fc) \ { \ .width_div = (w_d), \ diff --git a/drivers/clk/sprd/sc9863a-clk.c b/drivers/clk/sprd/sc9863a-clk.c index a0631f7756cf..2e2dfb2d48ff 100644 --- a/drivers/clk/sprd/sc9863a-clk.c +++ b/drivers/clk/sprd/sc9863a-clk.c @@ -1641,8 +1641,9 @@ static SPRD_SC_GATE_CLK_FW_NAME(i2c4_eb, "i2c4-eb", "ext-26m", 0x0, 0x1000, BIT(12), 0, 0); static SPRD_SC_GATE_CLK_FW_NAME(uart0_eb, "uart0-eb", "ext-26m", 0x0, 0x1000, BIT(13), 0, 0); +/* uart1_eb is for console, don't gate even if unused */ static SPRD_SC_GATE_CLK_FW_NAME(uart1_eb, "uart1-eb", "ext-26m", 0x0, - 0x1000, BIT(14), 0, 0); + 0x1000, BIT(14), CLK_IGNORE_UNUSED, 0); static SPRD_SC_GATE_CLK_FW_NAME(uart2_eb, "uart2-eb", "ext-26m", 0x0, 0x1000, BIT(15), 0, 0); static SPRD_SC_GATE_CLK_FW_NAME(uart3_eb, "uart3-eb", "ext-26m", 0x0, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 559dc24ef436..f84f9e35a73b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -2008,8 +2008,24 @@ static void amdgpu_device_fill_reset_magic(struct amdgpu_device *adev) */ static bool amdgpu_device_check_vram_lost(struct amdgpu_device *adev) { - return !!memcmp(adev->gart.ptr, adev->reset_magic, - AMDGPU_RESET_MAGIC_NUM); + if (memcmp(adev->gart.ptr, adev->reset_magic, + AMDGPU_RESET_MAGIC_NUM)) + return true; + + if (!adev->in_gpu_reset) + return false; + + /* + * For all ASICs with baco/mode1 reset, the VRAM is + * always assumed to be lost. + */ + switch (amdgpu_asic_reset_method(adev)) { + case AMD_RESET_METHOD_BACO: + case AMD_RESET_METHOD_MODE1: + return true; + default: + return false; + } } /** @@ -2340,6 +2356,8 @@ static int amdgpu_device_ip_suspend_phase1(struct amdgpu_device *adev) { int i, r; + amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE); + amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE); for (i = adev->num_ip_blocks - 1; i >= 0; i--) { if (!adev->ip_blocks[i].status.valid) diff --git a/drivers/gpu/drm/amd/amdgpu/cik.c b/drivers/gpu/drm/amd/amdgpu/cik.c index 006f21ef7ddf..62635e58e45e 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik.c +++ b/drivers/gpu/drm/amd/amdgpu/cik.c @@ -1358,8 +1358,6 @@ static int cik_asic_reset(struct amdgpu_device *adev) int r; if (cik_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) { - if (!adev->in_suspend) - amdgpu_inc_vram_lost(adev); r = amdgpu_dpm_baco_reset(adev); } else { r = cik_asic_pci_config_reset(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index d78059fd2c72..f92c158d89a1 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -279,7 +279,7 @@ static const struct soc15_reg_golden golden_settings_gc_10_1_2_nv12[] = #define DEFAULT_SH_MEM_CONFIG \ ((SH_MEM_ADDRESS_MODE_64 << SH_MEM_CONFIG__ADDRESS_MODE__SHIFT) | \ - (SH_MEM_ALIGNMENT_MODE_DWORD << SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT) | \ + (SH_MEM_ALIGNMENT_MODE_UNALIGNED << SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT) | \ (SH_MEM_RETRY_MODE_ALL << SH_MEM_CONFIG__RETRY_MODE__SHIFT) | \ (3 << SH_MEM_CONFIG__INITIAL_INST_PREFETCH__SHIFT)) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index e6b113ed2f40..0c390485bc10 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -1234,6 +1234,8 @@ struct amdgpu_gfxoff_quirk { static const struct amdgpu_gfxoff_quirk amdgpu_gfxoff_quirk_list[] = { /* https://bugzilla.kernel.org/show_bug.cgi?id=204689 */ { 0x1002, 0x15dd, 0x1002, 0x15dd, 0xc8 }, + /* https://bugzilla.kernel.org/show_bug.cgi?id=207171 */ + { 0x1002, 0x15dd, 0x103c, 0x83e7, 0xd3 }, { 0, 0, 0, 0, 0 }, }; diff --git a/drivers/gpu/drm/amd/amdgpu/nv.c b/drivers/gpu/drm/amd/amdgpu/nv.c index 033cbbca2072..52318b03c424 100644 --- a/drivers/gpu/drm/amd/amdgpu/nv.c +++ b/drivers/gpu/drm/amd/amdgpu/nv.c @@ -351,8 +351,6 @@ static int nv_asic_reset(struct amdgpu_device *adev) struct smu_context *smu = &adev->smu; if (nv_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) { - if (!adev->in_suspend) - amdgpu_inc_vram_lost(adev); ret = smu_baco_enter(smu); if (ret) return ret; @@ -360,8 +358,6 @@ static int nv_asic_reset(struct amdgpu_device *adev) if (ret) return ret; } else { - if (!adev->in_suspend) - amdgpu_inc_vram_lost(adev); ret = nv_asic_mode1_reset(adev); } diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index a40499d51c93..d42a8d8a0dea 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -569,14 +569,10 @@ static int soc15_asic_reset(struct amdgpu_device *adev) switch (soc15_asic_reset_method(adev)) { case AMD_RESET_METHOD_BACO: - if (!adev->in_suspend) - amdgpu_inc_vram_lost(adev); return soc15_asic_baco_reset(adev); case AMD_RESET_METHOD_MODE2: return amdgpu_dpm_mode2_reset(adev); default: - if (!adev->in_suspend) - amdgpu_inc_vram_lost(adev); return soc15_asic_mode1_reset(adev); } } diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c index 78b35901643b..3ce10e05d0d6 100644 --- a/drivers/gpu/drm/amd/amdgpu/vi.c +++ b/drivers/gpu/drm/amd/amdgpu/vi.c @@ -765,8 +765,6 @@ static int vi_asic_reset(struct amdgpu_device *adev) int r; if (vi_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) { - if (!adev->in_suspend) - amdgpu_inc_vram_lost(adev); r = amdgpu_dpm_baco_reset(adev); } else { r = vi_asic_pci_config_reset(adev); diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c index 7740488999df..4795eb66b2b2 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c @@ -3804,9 +3804,12 @@ static int smu7_trim_single_dpm_states(struct pp_hwmgr *hwmgr, { uint32_t i; + /* force the trim if mclk_switching is disabled to prevent flicker */ + bool force_trim = (low_limit == high_limit); for (i = 0; i < dpm_table->count; i++) { /*skip the trim if od is enabled*/ - if (!hwmgr->od_enabled && (dpm_table->dpm_levels[i].value < low_limit + if ((!hwmgr->od_enabled || force_trim) + && (dpm_table->dpm_levels[i].value < low_limit || dpm_table->dpm_levels[i].value > high_limit)) dpm_table->dpm_levels[i].enabled = false; else diff --git a/drivers/gpu/drm/amd/powerplay/smu_v11_0.c b/drivers/gpu/drm/amd/powerplay/smu_v11_0.c index 541c932a6005..655ba4fb05dc 100644 --- a/drivers/gpu/drm/amd/powerplay/smu_v11_0.c +++ b/drivers/gpu/drm/amd/powerplay/smu_v11_0.c @@ -1718,6 +1718,12 @@ int smu_v11_0_baco_set_state(struct smu_context *smu, enum smu_baco_state state) if (ret) goto out; + if (ras && ras->supported) { + ret = smu_send_smc_msg(smu, SMU_MSG_PrepareMp1ForUnload, NULL); + if (ret) + goto out; + } + /* clear vbios scratch 6 and 7 for coming asic reinit */ WREG32(adev->bios_scratch_reg_offset + 6, 0); WREG32(adev->bios_scratch_reg_offset + 7, 0); diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c index 074c4efb58eb..eee530453aa6 100644 --- a/drivers/gpu/drm/i915/gvt/kvmgt.c +++ b/drivers/gpu/drm/i915/gvt/kvmgt.c @@ -131,6 +131,7 @@ struct kvmgt_vdev { struct work_struct release_work; atomic_t released; struct vfio_device *vfio_device; + struct vfio_group *vfio_group; }; static inline struct kvmgt_vdev *kvmgt_vdev(struct intel_vgpu *vgpu) @@ -151,6 +152,7 @@ static void gvt_unpin_guest_page(struct intel_vgpu *vgpu, unsigned long gfn, unsigned long size) { struct drm_i915_private *i915 = vgpu->gvt->gt->i915; + struct kvmgt_vdev *vdev = kvmgt_vdev(vgpu); int total_pages; int npage; int ret; @@ -160,7 +162,7 @@ static void gvt_unpin_guest_page(struct intel_vgpu *vgpu, unsigned long gfn, for (npage = 0; npage < total_pages; npage++) { unsigned long cur_gfn = gfn + npage; - ret = vfio_unpin_pages(mdev_dev(kvmgt_vdev(vgpu)->mdev), &cur_gfn, 1); + ret = vfio_group_unpin_pages(vdev->vfio_group, &cur_gfn, 1); drm_WARN_ON(&i915->drm, ret != 1); } } @@ -169,6 +171,7 @@ static void gvt_unpin_guest_page(struct intel_vgpu *vgpu, unsigned long gfn, static int gvt_pin_guest_page(struct intel_vgpu *vgpu, unsigned long gfn, unsigned long size, struct page **page) { + struct kvmgt_vdev *vdev = kvmgt_vdev(vgpu); unsigned long base_pfn = 0; int total_pages; int npage; @@ -183,8 +186,8 @@ static int gvt_pin_guest_page(struct intel_vgpu *vgpu, unsigned long gfn, unsigned long cur_gfn = gfn + npage; unsigned long pfn; - ret = vfio_pin_pages(mdev_dev(kvmgt_vdev(vgpu)->mdev), &cur_gfn, 1, - IOMMU_READ | IOMMU_WRITE, &pfn); + ret = vfio_group_pin_pages(vdev->vfio_group, &cur_gfn, 1, + IOMMU_READ | IOMMU_WRITE, &pfn); if (ret != 1) { gvt_vgpu_err("vfio_pin_pages failed for gfn 0x%lx, ret %d\n", cur_gfn, ret); @@ -792,6 +795,7 @@ static int intel_vgpu_open(struct mdev_device *mdev) struct kvmgt_vdev *vdev = kvmgt_vdev(vgpu); unsigned long events; int ret; + struct vfio_group *vfio_group; vdev->iommu_notifier.notifier_call = intel_vgpu_iommu_notifier; vdev->group_notifier.notifier_call = intel_vgpu_group_notifier; @@ -814,6 +818,14 @@ static int intel_vgpu_open(struct mdev_device *mdev) goto undo_iommu; } + vfio_group = vfio_group_get_external_user_from_dev(mdev_dev(mdev)); + if (IS_ERR_OR_NULL(vfio_group)) { + ret = !vfio_group ? -EFAULT : PTR_ERR(vfio_group); + gvt_vgpu_err("vfio_group_get_external_user_from_dev failed\n"); + goto undo_register; + } + vdev->vfio_group = vfio_group; + /* Take a module reference as mdev core doesn't take * a reference for vendor driver. */ @@ -830,6 +842,10 @@ static int intel_vgpu_open(struct mdev_device *mdev) return ret; undo_group: + vfio_group_put_external_user(vdev->vfio_group); + vdev->vfio_group = NULL; + +undo_register: vfio_unregister_notifier(mdev_dev(mdev), VFIO_GROUP_NOTIFY, &vdev->group_notifier); @@ -884,6 +900,7 @@ static void __intel_vgpu_release(struct intel_vgpu *vgpu) kvmgt_guest_exit(info); intel_vgpu_release_msi_eventfd_ctx(vgpu); + vfio_group_put_external_user(vdev->vfio_group); vdev->kvm = NULL; vgpu->handle = 0; @@ -2035,33 +2052,14 @@ static int kvmgt_rw_gpa(unsigned long handle, unsigned long gpa, void *buf, unsigned long len, bool write) { struct kvmgt_guest_info *info; - struct kvm *kvm; - int idx, ret; - bool kthread = current->mm == NULL; if (!handle_valid(handle)) return -ESRCH; info = (struct kvmgt_guest_info *)handle; - kvm = info->kvm; - - if (kthread) { - if (!mmget_not_zero(kvm->mm)) - return -EFAULT; - use_mm(kvm->mm); - } - - idx = srcu_read_lock(&kvm->srcu); - ret = write ? kvm_write_guest(kvm, gpa, buf, len) : - kvm_read_guest(kvm, gpa, buf, len); - srcu_read_unlock(&kvm->srcu, idx); - - if (kthread) { - unuse_mm(kvm->mm); - mmput(kvm->mm); - } - return ret; + return vfio_dma_rw(kvmgt_vdev(info->vgpu)->vfio_group, + gpa, buf, len, write); } static int kvmgt_read_gpa(unsigned long handle, unsigned long gpa, diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index 551be589d6f4..66a46e41d5ef 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -2941,49 +2941,6 @@ void i915_oa_init_reg_state(const struct intel_context *ce, } /** - * i915_perf_read_locked - &i915_perf_stream_ops->read with error normalisation - * @stream: An i915 perf stream - * @file: An i915 perf stream file - * @buf: destination buffer given by userspace - * @count: the number of bytes userspace wants to read - * @ppos: (inout) file seek position (unused) - * - * Besides wrapping &i915_perf_stream_ops->read this provides a common place to - * ensure that if we've successfully copied any data then reporting that takes - * precedence over any internal error status, so the data isn't lost. - * - * For example ret will be -ENOSPC whenever there is more buffered data than - * can be copied to userspace, but that's only interesting if we weren't able - * to copy some data because it implies the userspace buffer is too small to - * receive a single record (and we never split records). - * - * Another case with ret == -EFAULT is more of a grey area since it would seem - * like bad form for userspace to ask us to overrun its buffer, but the user - * knows best: - * - * http://yarchive.net/comp/linux/partial_reads_writes.html - * - * Returns: The number of bytes copied or a negative error code on failure. - */ -static ssize_t i915_perf_read_locked(struct i915_perf_stream *stream, - struct file *file, - char __user *buf, - size_t count, - loff_t *ppos) -{ - /* Note we keep the offset (aka bytes read) separate from any - * error status so that the final check for whether we return - * the bytes read with a higher precedence than any error (see - * comment below) doesn't need to be handled/duplicated in - * stream->ops->read() implementations. - */ - size_t offset = 0; - int ret = stream->ops->read(stream, buf, count, &offset); - - return offset ?: (ret ?: -EAGAIN); -} - -/** * i915_perf_read - handles read() FOP for i915 perf stream FDs * @file: An i915 perf stream file * @buf: destination buffer given by userspace @@ -3008,7 +2965,8 @@ static ssize_t i915_perf_read(struct file *file, { struct i915_perf_stream *stream = file->private_data; struct i915_perf *perf = stream->perf; - ssize_t ret; + size_t offset = 0; + int ret; /* To ensure it's handled consistently we simply treat all reads of a * disabled stream as an error. In particular it might otherwise lead @@ -3031,13 +2989,12 @@ static ssize_t i915_perf_read(struct file *file, return ret; mutex_lock(&perf->lock); - ret = i915_perf_read_locked(stream, file, - buf, count, ppos); + ret = stream->ops->read(stream, buf, count, &offset); mutex_unlock(&perf->lock); - } while (ret == -EAGAIN); + } while (!offset && !ret); } else { mutex_lock(&perf->lock); - ret = i915_perf_read_locked(stream, file, buf, count, ppos); + ret = stream->ops->read(stream, buf, count, &offset); mutex_unlock(&perf->lock); } @@ -3048,15 +3005,15 @@ static ssize_t i915_perf_read(struct file *file, * and read() returning -EAGAIN. Clearing the oa.pollin state here * effectively ensures we back off until the next hrtimer callback * before reporting another EPOLLIN event. + * The exception to this is if ops->read() returned -ENOSPC which means + * that more OA data is available than could fit in the user provided + * buffer. In this case we want the next poll() call to not block. */ - if (ret >= 0 || ret == -EAGAIN) { - /* Maybe make ->pollin per-stream state if we support multiple - * concurrent streams in the future. - */ + if (ret != -ENOSPC) stream->pollin = false; - } - return ret; + /* Possible values for ret are 0, -EFAULT, -ENOSPC, -EIO, ... */ + return offset ?: (ret ?: -EAGAIN); } static enum hrtimer_restart oa_poll_check_timer_cb(struct hrtimer *hrtimer) diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/sec2/gp108.c b/drivers/gpu/drm/nouveau/nvkm/engine/sec2/gp108.c index 232a9d7c51e5..e770c9497871 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/sec2/gp108.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/sec2/gp108.c @@ -25,6 +25,9 @@ MODULE_FIRMWARE("nvidia/gp108/sec2/desc.bin"); MODULE_FIRMWARE("nvidia/gp108/sec2/image.bin"); MODULE_FIRMWARE("nvidia/gp108/sec2/sig.bin"); +MODULE_FIRMWARE("nvidia/gv100/sec2/desc.bin"); +MODULE_FIRMWARE("nvidia/gv100/sec2/image.bin"); +MODULE_FIRMWARE("nvidia/gv100/sec2/sig.bin"); static const struct nvkm_sec2_fwif gp108_sec2_fwif[] = { diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/sec2/tu102.c b/drivers/gpu/drm/nouveau/nvkm/engine/sec2/tu102.c index b6ebd95c9ba1..a8295653ceab 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/sec2/tu102.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/sec2/tu102.c @@ -56,6 +56,22 @@ tu102_sec2_nofw(struct nvkm_sec2 *sec2, int ver, return 0; } +MODULE_FIRMWARE("nvidia/tu102/sec2/desc.bin"); +MODULE_FIRMWARE("nvidia/tu102/sec2/image.bin"); +MODULE_FIRMWARE("nvidia/tu102/sec2/sig.bin"); +MODULE_FIRMWARE("nvidia/tu104/sec2/desc.bin"); +MODULE_FIRMWARE("nvidia/tu104/sec2/image.bin"); +MODULE_FIRMWARE("nvidia/tu104/sec2/sig.bin"); +MODULE_FIRMWARE("nvidia/tu106/sec2/desc.bin"); +MODULE_FIRMWARE("nvidia/tu106/sec2/image.bin"); +MODULE_FIRMWARE("nvidia/tu106/sec2/sig.bin"); +MODULE_FIRMWARE("nvidia/tu116/sec2/desc.bin"); +MODULE_FIRMWARE("nvidia/tu116/sec2/image.bin"); +MODULE_FIRMWARE("nvidia/tu116/sec2/sig.bin"); +MODULE_FIRMWARE("nvidia/tu117/sec2/desc.bin"); +MODULE_FIRMWARE("nvidia/tu117/sec2/image.bin"); +MODULE_FIRMWARE("nvidia/tu117/sec2/sig.bin"); + static const struct nvkm_sec2_fwif tu102_sec2_fwif[] = { { 0, gp102_sec2_load, &tu102_sec2, &gp102_sec2_acr_1 }, diff --git a/drivers/hwmon/Kconfig b/drivers/hwmon/Kconfig index 05a30832c6ba..4c62f900bf7e 100644 --- a/drivers/hwmon/Kconfig +++ b/drivers/hwmon/Kconfig @@ -412,7 +412,7 @@ config SENSORS_DRIVETEMP hard disk drives. This driver can also be built as a module. If so, the module - will be called satatemp. + will be called drivetemp. config SENSORS_DS620 tristate "Dallas Semiconductor DS620" diff --git a/drivers/hwmon/drivetemp.c b/drivers/hwmon/drivetemp.c index 370d0c74eb01..9179460c2d9d 100644 --- a/drivers/hwmon/drivetemp.c +++ b/drivers/hwmon/drivetemp.c @@ -264,12 +264,18 @@ static int drivetemp_get_scttemp(struct drivetemp_data *st, u32 attr, long *val) return err; switch (attr) { case hwmon_temp_input: + if (!temp_is_valid(buf[SCT_STATUS_TEMP])) + return -ENODATA; *val = temp_from_sct(buf[SCT_STATUS_TEMP]); break; case hwmon_temp_lowest: + if (!temp_is_valid(buf[SCT_STATUS_TEMP_LOWEST])) + return -ENODATA; *val = temp_from_sct(buf[SCT_STATUS_TEMP_LOWEST]); break; case hwmon_temp_highest: + if (!temp_is_valid(buf[SCT_STATUS_TEMP_HIGHEST])) + return -ENODATA; *val = temp_from_sct(buf[SCT_STATUS_TEMP_HIGHEST]); break; default: diff --git a/drivers/hwmon/jc42.c b/drivers/hwmon/jc42.c index f2d81b0558e5..e3f1ebee7130 100644 --- a/drivers/hwmon/jc42.c +++ b/drivers/hwmon/jc42.c @@ -506,7 +506,7 @@ static int jc42_probe(struct i2c_client *client, const struct i2c_device_id *id) } data->config = config; - hwmon_dev = devm_hwmon_device_register_with_info(dev, client->name, + hwmon_dev = devm_hwmon_device_register_with_info(dev, "jc42", data, &jc42_chip_info, NULL); return PTR_ERR_OR_ZERO(hwmon_dev); diff --git a/drivers/hwmon/k10temp.c b/drivers/hwmon/k10temp.c index 3f37d5d81fe4..9915578533bb 100644 --- a/drivers/hwmon/k10temp.c +++ b/drivers/hwmon/k10temp.c @@ -186,7 +186,7 @@ static long get_raw_temp(struct k10temp_data *data) return temp; } -const char *k10temp_temp_label[] = { +static const char *k10temp_temp_label[] = { "Tctl", "Tdie", "Tccd1", @@ -199,12 +199,12 @@ const char *k10temp_temp_label[] = { "Tccd8", }; -const char *k10temp_in_label[] = { +static const char *k10temp_in_label[] = { "Vcore", "Vsoc", }; -const char *k10temp_curr_label[] = { +static const char *k10temp_curr_label[] = { "Icore", "Isoc", }; diff --git a/drivers/hwmon/pmbus/isl68137.c b/drivers/hwmon/pmbus/isl68137.c index 4d2315208bb5..0c622711ef7e 100644 --- a/drivers/hwmon/pmbus/isl68137.c +++ b/drivers/hwmon/pmbus/isl68137.c @@ -21,8 +21,50 @@ #define ISL68137_VOUT_AVS 0x30 #define RAA_DMPVR2_READ_VMON 0xc8 -enum versions { +enum chips { isl68137, + isl68220, + isl68221, + isl68222, + isl68223, + isl68224, + isl68225, + isl68226, + isl68227, + isl68229, + isl68233, + isl68239, + isl69222, + isl69223, + isl69224, + isl69225, + isl69227, + isl69228, + isl69234, + isl69236, + isl69239, + isl69242, + isl69243, + isl69247, + isl69248, + isl69254, + isl69255, + isl69256, + isl69259, + isl69260, + isl69268, + isl69269, + isl69298, + raa228000, + raa228004, + raa228006, + raa228228, + raa229001, + raa229004, +}; + +enum variants { + raa_dmpvr1_2rail, raa_dmpvr2_1rail, raa_dmpvr2_2rail, raa_dmpvr2_3rail, @@ -186,7 +228,7 @@ static int isl68137_probe(struct i2c_client *client, memcpy(info, &raa_dmpvr_info, sizeof(*info)); switch (id->driver_data) { - case isl68137: + case raa_dmpvr1_2rail: info->pages = 2; info->R[PSC_VOLTAGE_IN] = 3; info->func[0] &= ~PMBUS_HAVE_VMON; @@ -224,11 +266,47 @@ static int isl68137_probe(struct i2c_client *client, } static const struct i2c_device_id raa_dmpvr_id[] = { - {"isl68137", isl68137}, - {"raa_dmpvr2_1rail", raa_dmpvr2_1rail}, - {"raa_dmpvr2_2rail", raa_dmpvr2_2rail}, - {"raa_dmpvr2_3rail", raa_dmpvr2_3rail}, - {"raa_dmpvr2_hv", raa_dmpvr2_hv}, + {"isl68137", raa_dmpvr1_2rail}, + {"isl68220", raa_dmpvr2_2rail}, + {"isl68221", raa_dmpvr2_3rail}, + {"isl68222", raa_dmpvr2_2rail}, + {"isl68223", raa_dmpvr2_2rail}, + {"isl68224", raa_dmpvr2_3rail}, + {"isl68225", raa_dmpvr2_2rail}, + {"isl68226", raa_dmpvr2_3rail}, + {"isl68227", raa_dmpvr2_1rail}, + {"isl68229", raa_dmpvr2_3rail}, + {"isl68233", raa_dmpvr2_2rail}, + {"isl68239", raa_dmpvr2_3rail}, + + {"isl69222", raa_dmpvr2_2rail}, + {"isl69223", raa_dmpvr2_3rail}, + {"isl69224", raa_dmpvr2_2rail}, + {"isl69225", raa_dmpvr2_2rail}, + {"isl69227", raa_dmpvr2_3rail}, + {"isl69228", raa_dmpvr2_3rail}, + {"isl69234", raa_dmpvr2_2rail}, + {"isl69236", raa_dmpvr2_2rail}, + {"isl69239", raa_dmpvr2_3rail}, + {"isl69242", raa_dmpvr2_2rail}, + {"isl69243", raa_dmpvr2_1rail}, + {"isl69247", raa_dmpvr2_2rail}, + {"isl69248", raa_dmpvr2_2rail}, + {"isl69254", raa_dmpvr2_2rail}, + {"isl69255", raa_dmpvr2_2rail}, + {"isl69256", raa_dmpvr2_2rail}, + {"isl69259", raa_dmpvr2_2rail}, + {"isl69260", raa_dmpvr2_2rail}, + {"isl69268", raa_dmpvr2_2rail}, + {"isl69269", raa_dmpvr2_3rail}, + {"isl69298", raa_dmpvr2_2rail}, + + {"raa228000", raa_dmpvr2_hv}, + {"raa228004", raa_dmpvr2_hv}, + {"raa228006", raa_dmpvr2_hv}, + {"raa228228", raa_dmpvr2_2rail}, + {"raa229001", raa_dmpvr2_2rail}, + {"raa229004", raa_dmpvr2_2rail}, {} }; diff --git a/drivers/i2c/busses/i2c-altera.c b/drivers/i2c/busses/i2c-altera.c index 20ef63820c77..f5c00f903df3 100644 --- a/drivers/i2c/busses/i2c-altera.c +++ b/drivers/i2c/busses/i2c-altera.c @@ -384,7 +384,6 @@ static int altr_i2c_probe(struct platform_device *pdev) struct altr_i2c_dev *idev = NULL; struct resource *res; int irq, ret; - u32 val; idev = devm_kzalloc(&pdev->dev, sizeof(*idev), GFP_KERNEL); if (!idev) @@ -411,17 +410,17 @@ static int altr_i2c_probe(struct platform_device *pdev) init_completion(&idev->msg_complete); spin_lock_init(&idev->lock); - val = device_property_read_u32(idev->dev, "fifo-size", + ret = device_property_read_u32(idev->dev, "fifo-size", &idev->fifo_size); - if (val) { + if (ret) { dev_err(&pdev->dev, "FIFO size set to default of %d\n", ALTR_I2C_DFLT_FIFO_SZ); idev->fifo_size = ALTR_I2C_DFLT_FIFO_SZ; } - val = device_property_read_u32(idev->dev, "clock-frequency", + ret = device_property_read_u32(idev->dev, "clock-frequency", &idev->bus_clk_rate); - if (val) { + if (ret) { dev_err(&pdev->dev, "Default to 100kHz\n"); idev->bus_clk_rate = I2C_MAX_STANDARD_MODE_FREQ; /* default clock rate */ } diff --git a/drivers/i2c/busses/i2c-designware-platdrv.c b/drivers/i2c/busses/i2c-designware-platdrv.c index c98befe2a92e..5536673060cc 100644 --- a/drivers/i2c/busses/i2c-designware-platdrv.c +++ b/drivers/i2c/busses/i2c-designware-platdrv.c @@ -354,10 +354,16 @@ static int dw_i2c_plat_probe(struct platform_device *pdev) adap->dev.of_node = pdev->dev.of_node; adap->nr = -1; - dev_pm_set_driver_flags(&pdev->dev, - DPM_FLAG_SMART_PREPARE | - DPM_FLAG_SMART_SUSPEND | - DPM_FLAG_LEAVE_SUSPENDED); + if (dev->flags & ACCESS_NO_IRQ_SUSPEND) { + dev_pm_set_driver_flags(&pdev->dev, + DPM_FLAG_SMART_PREPARE | + DPM_FLAG_LEAVE_SUSPENDED); + } else { + dev_pm_set_driver_flags(&pdev->dev, + DPM_FLAG_SMART_PREPARE | + DPM_FLAG_SMART_SUSPEND | + DPM_FLAG_LEAVE_SUSPENDED); + } /* The code below assumes runtime PM to be disabled. */ WARN_ON(pm_runtime_enabled(&pdev->dev)); diff --git a/drivers/i2c/busses/i2c-tegra.c b/drivers/i2c/busses/i2c-tegra.c index 4c4d17ddc96b..8280ac7cc1b7 100644 --- a/drivers/i2c/busses/i2c-tegra.c +++ b/drivers/i2c/busses/i2c-tegra.c @@ -996,14 +996,13 @@ tegra_i2c_poll_completion_timeout(struct tegra_i2c_dev *i2c_dev, do { u32 status = i2c_readl(i2c_dev, I2C_INT_STATUS); - if (status) { + if (status) tegra_i2c_isr(i2c_dev->irq, i2c_dev); - if (completion_done(complete)) { - s64 delta = ktime_ms_delta(ktimeout, ktime); + if (completion_done(complete)) { + s64 delta = ktime_ms_delta(ktimeout, ktime); - return msecs_to_jiffies(delta) ?: 1; - } + return msecs_to_jiffies(delta) ?: 1; } ktime = ktime_get(); @@ -1030,14 +1029,18 @@ tegra_i2c_wait_completion_timeout(struct tegra_i2c_dev *i2c_dev, disable_irq(i2c_dev->irq); /* - * There is a chance that completion may happen after IRQ - * synchronization, which is done by disable_irq(). + * Under some rare circumstances (like running KASAN + + * NFS root) CPU, which handles interrupt, may stuck in + * uninterruptible state for a significant time. In this + * case we will get timeout if I2C transfer is running on + * a sibling CPU, despite of IRQ being raised. + * + * In order to handle this rare condition, the IRQ status + * needs to be checked after timeout. */ - if (ret == 0 && completion_done(complete)) { - dev_warn(i2c_dev->dev, - "completion done after timeout\n"); - ret = 1; - } + if (ret == 0) + ret = tegra_i2c_poll_completion_timeout(i2c_dev, + complete, 0); } return ret; @@ -1216,6 +1219,15 @@ static int tegra_i2c_xfer_msg(struct tegra_i2c_dev *i2c_dev, time_left = tegra_i2c_wait_completion_timeout( i2c_dev, &i2c_dev->dma_complete, xfer_time); + /* + * Synchronize DMA first, since dmaengine_terminate_sync() + * performs synchronization after the transfer's termination + * and we want to get a completion if transfer succeeded. + */ + dmaengine_synchronize(i2c_dev->msg_read ? + i2c_dev->rx_dma_chan : + i2c_dev->tx_dma_chan); + dmaengine_terminate_sync(i2c_dev->msg_read ? i2c_dev->rx_dma_chan : i2c_dev->tx_dma_chan); diff --git a/drivers/i2c/i2c-core-base.c b/drivers/i2c/i2c-core-base.c index 5cc0b0ec5570..a66912782064 100644 --- a/drivers/i2c/i2c-core-base.c +++ b/drivers/i2c/i2c-core-base.c @@ -2273,19 +2273,6 @@ i2c_new_scanned_device(struct i2c_adapter *adap, } EXPORT_SYMBOL_GPL(i2c_new_scanned_device); -struct i2c_client * -i2c_new_probed_device(struct i2c_adapter *adap, - struct i2c_board_info *info, - unsigned short const *addr_list, - int (*probe)(struct i2c_adapter *adap, unsigned short addr)) -{ - struct i2c_client *client; - - client = i2c_new_scanned_device(adap, info, addr_list, probe); - return IS_ERR(client) ? NULL : client; -} -EXPORT_SYMBOL_GPL(i2c_new_probed_device); - struct i2c_adapter *i2c_get_adapter(int nr) { struct i2c_adapter *adapter; diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c index dcf8b51b47fd..7f17f8303988 100644 --- a/drivers/ide/ide-cd.c +++ b/drivers/ide/ide-cd.c @@ -1034,8 +1034,8 @@ static int cdrom_read_capacity(ide_drive_t *drive, unsigned long *capacity, return 0; } -static int cdrom_read_tocentry(ide_drive_t *drive, int trackno, int msf_flag, - int format, char *buf, int buflen) +static int ide_cdrom_read_tocentry(ide_drive_t *drive, int trackno, + int msf_flag, int format, char *buf, int buflen) { unsigned char cmd[BLK_MAX_CDB]; @@ -1104,7 +1104,7 @@ int ide_cd_read_toc(ide_drive_t *drive) sectors_per_frame << SECTOR_SHIFT); /* first read just the header, so we know how long the TOC is */ - stat = cdrom_read_tocentry(drive, 0, 1, 0, (char *) &toc->hdr, + stat = ide_cdrom_read_tocentry(drive, 0, 1, 0, (char *) &toc->hdr, sizeof(struct atapi_toc_header)); if (stat) return stat; @@ -1121,7 +1121,7 @@ int ide_cd_read_toc(ide_drive_t *drive) ntracks = MAX_TRACKS; /* now read the whole schmeer */ - stat = cdrom_read_tocentry(drive, toc->hdr.first_track, 1, 0, + stat = ide_cdrom_read_tocentry(drive, toc->hdr.first_track, 1, 0, (char *)&toc->hdr, sizeof(struct atapi_toc_header) + (ntracks + 1) * @@ -1141,7 +1141,7 @@ int ide_cd_read_toc(ide_drive_t *drive) * Heiko Eißfeldt. */ ntracks = 0; - stat = cdrom_read_tocentry(drive, CDROM_LEADOUT, 1, 0, + stat = ide_cdrom_read_tocentry(drive, CDROM_LEADOUT, 1, 0, (char *)&toc->hdr, sizeof(struct atapi_toc_header) + (ntracks + 1) * @@ -1181,7 +1181,7 @@ int ide_cd_read_toc(ide_drive_t *drive) if (toc->hdr.first_track != CDROM_LEADOUT) { /* read the multisession information */ - stat = cdrom_read_tocentry(drive, 0, 0, 1, (char *)&ms_tmp, + stat = ide_cdrom_read_tocentry(drive, 0, 0, 1, (char *)&ms_tmp, sizeof(ms_tmp)); if (stat) return stat; @@ -1195,7 +1195,7 @@ int ide_cd_read_toc(ide_drive_t *drive) if (drive->atapi_flags & IDE_AFLAG_TOCADDR_AS_BCD) { /* re-read multisession information using MSF format */ - stat = cdrom_read_tocentry(drive, 0, 1, 1, (char *)&ms_tmp, + stat = ide_cdrom_read_tocentry(drive, 0, 1, 1, (char *)&ms_tmp, sizeof(ms_tmp)); if (stat) return stat; @@ -1305,8 +1305,7 @@ static int ide_cdrom_register(ide_drive_t *drive, int nslots) if (drive->atapi_flags & IDE_AFLAG_NO_SPEED_SELECT) devinfo->mask |= CDC_SELECT_SPEED; - devinfo->disk = info->disk; - return register_cdrom(devinfo); + return register_cdrom(info->disk, devinfo); } static int ide_cdrom_probe_capabilities(ide_drive_t *drive) diff --git a/drivers/ide/ide-io.c b/drivers/ide/ide-io.c index b137f27a34d5..c31f1d2b3b07 100644 --- a/drivers/ide/ide-io.c +++ b/drivers/ide/ide-io.c @@ -233,10 +233,13 @@ static ide_startstop_t do_special(ide_drive_t *drive) void ide_map_sg(ide_drive_t *drive, struct ide_cmd *cmd) { ide_hwif_t *hwif = drive->hwif; - struct scatterlist *sg = hwif->sg_table; + struct scatterlist *sg = hwif->sg_table, *last_sg = NULL; struct request *rq = cmd->rq; - cmd->sg_nents = blk_rq_map_sg(drive->queue, rq, sg); + cmd->sg_nents = __blk_rq_map_sg(drive->queue, rq, sg, &last_sg); + if (blk_rq_bytes(rq) && (blk_rq_bytes(rq) & rq->q->dma_pad_mask)) + last_sg->length += + (rq->q->dma_pad_mask & ~blk_rq_bytes(rq)) + 1; } EXPORT_SYMBOL_GPL(ide_map_sg); diff --git a/drivers/irqchip/irq-bcm7038-l1.c b/drivers/irqchip/irq-bcm7038-l1.c index eb9bce93cd05..fd7c537fb42a 100644 --- a/drivers/irqchip/irq-bcm7038-l1.c +++ b/drivers/irqchip/irq-bcm7038-l1.c @@ -416,7 +416,7 @@ static const struct irq_domain_ops bcm7038_l1_domain_ops = { .map = bcm7038_l1_map, }; -int __init bcm7038_l1_of_init(struct device_node *dn, +static int __init bcm7038_l1_of_init(struct device_node *dn, struct device_node *parent) { struct bcm7038_l1_chip *intc; diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c index 54d142ccc63a..124251b0ccba 100644 --- a/drivers/irqchip/irq-gic-v3-its.c +++ b/drivers/irqchip/irq-gic-v3-its.c @@ -14,6 +14,7 @@ #include <linux/dma-iommu.h> #include <linux/efi.h> #include <linux/interrupt.h> +#include <linux/iopoll.h> #include <linux/irqdomain.h> #include <linux/list.h> #include <linux/log2.h> @@ -3672,6 +3673,20 @@ out: return IRQ_SET_MASK_OK_DONE; } +static void its_wait_vpt_parse_complete(void) +{ + void __iomem *vlpi_base = gic_data_rdist_vlpi_base(); + u64 val; + + if (!gic_rdists->has_vpend_valid_dirty) + return; + + WARN_ON_ONCE(readq_relaxed_poll_timeout(vlpi_base + GICR_VPENDBASER, + val, + !(val & GICR_VPENDBASER_Dirty), + 10, 500)); +} + static void its_vpe_schedule(struct its_vpe *vpe) { void __iomem *vlpi_base = gic_data_rdist_vlpi_base(); @@ -3702,6 +3717,8 @@ static void its_vpe_schedule(struct its_vpe *vpe) val |= vpe->idai ? GICR_VPENDBASER_IDAI : 0; val |= GICR_VPENDBASER_Valid; gicr_write_vpendbaser(val, vlpi_base + GICR_VPENDBASER); + + its_wait_vpt_parse_complete(); } static void its_vpe_deschedule(struct its_vpe *vpe) @@ -3910,6 +3927,8 @@ static void its_vpe_4_1_schedule(struct its_vpe *vpe, val |= FIELD_PREP(GICR_VPENDBASER_4_1_VPEID, vpe->vpe_id); gicr_write_vpendbaser(val, vlpi_base + GICR_VPENDBASER); + + its_wait_vpt_parse_complete(); } static void its_vpe_4_1_deschedule(struct its_vpe *vpe, @@ -4035,6 +4054,7 @@ static int its_sgi_set_affinity(struct irq_data *d, * not on the host (since they can only be targetting a vPE). * Tell the kernel we've done whatever it asked for. */ + irq_data_update_effective_affinity(d, mask_val); return IRQ_SET_MASK_OK; } diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c index 9dbc81b6f62e..d7006ef18a0d 100644 --- a/drivers/irqchip/irq-gic-v3.c +++ b/drivers/irqchip/irq-gic-v3.c @@ -873,6 +873,7 @@ static int __gic_update_rdist_properties(struct redist_region *region, gic_data.rdists.has_rvpeid &= !!(typer & GICR_TYPER_RVPEID); gic_data.rdists.has_direct_lpi &= (!!(typer & GICR_TYPER_DirectLPIS) | gic_data.rdists.has_rvpeid); + gic_data.rdists.has_vpend_valid_dirty &= !!(typer & GICR_TYPER_DIRTY); /* Detect non-sensical configurations */ if (WARN_ON_ONCE(gic_data.rdists.has_rvpeid && !gic_data.rdists.has_vlpis)) { @@ -893,10 +894,11 @@ static void gic_update_rdist_properties(void) if (WARN_ON(gic_data.ppi_nr == UINT_MAX)) gic_data.ppi_nr = 0; pr_info("%d PPIs implemented\n", gic_data.ppi_nr); - pr_info("%sVLPI support, %sdirect LPI support, %sRVPEID support\n", - !gic_data.rdists.has_vlpis ? "no " : "", - !gic_data.rdists.has_direct_lpi ? "no " : "", - !gic_data.rdists.has_rvpeid ? "no " : ""); + if (gic_data.rdists.has_vlpis) + pr_info("GICv4 features: %s%s%s\n", + gic_data.rdists.has_direct_lpi ? "DirectLPI " : "", + gic_data.rdists.has_rvpeid ? "RVPEID " : "", + gic_data.rdists.has_vpend_valid_dirty ? "Valid+Dirty " : ""); } /* Check whether it's single security state view */ @@ -1620,6 +1622,7 @@ static int __init gic_init_bases(void __iomem *dist_base, gic_data.rdists.has_rvpeid = true; gic_data.rdists.has_vlpis = true; gic_data.rdists.has_direct_lpi = true; + gic_data.rdists.has_vpend_valid_dirty = true; if (WARN_ON(!gic_data.domain) || WARN_ON(!gic_data.rdists.rdist)) { err = -ENOMEM; diff --git a/drivers/irqchip/irq-mbigen.c b/drivers/irqchip/irq-mbigen.c index 6b566bba263b..ff7627b57772 100644 --- a/drivers/irqchip/irq-mbigen.c +++ b/drivers/irqchip/irq-mbigen.c @@ -220,10 +220,16 @@ static int mbigen_irq_domain_alloc(struct irq_domain *domain, return 0; } +static void mbigen_irq_domain_free(struct irq_domain *domain, unsigned int virq, + unsigned int nr_irqs) +{ + platform_msi_domain_free(domain, virq, nr_irqs); +} + static const struct irq_domain_ops mbigen_domain_ops = { .translate = mbigen_domain_translate, .alloc = mbigen_irq_domain_alloc, - .free = irq_domain_free_irqs_common, + .free = mbigen_irq_domain_free, }; static int mbigen_of_create_domain(struct platform_device *pdev, diff --git a/drivers/irqchip/irq-meson-gpio.c b/drivers/irqchip/irq-meson-gpio.c index ccc7f823911b..bc7aebcc96e9 100644 --- a/drivers/irqchip/irq-meson-gpio.c +++ b/drivers/irqchip/irq-meson-gpio.c @@ -144,12 +144,17 @@ struct meson_gpio_irq_controller { static void meson_gpio_irq_update_bits(struct meson_gpio_irq_controller *ctl, unsigned int reg, u32 mask, u32 val) { + unsigned long flags; u32 tmp; + spin_lock_irqsave(&ctl->lock, flags); + tmp = readl_relaxed(ctl->base + reg); tmp &= ~mask; tmp |= val; writel_relaxed(tmp, ctl->base + reg); + + spin_unlock_irqrestore(&ctl->lock, flags); } static void meson_gpio_irq_init_dummy(struct meson_gpio_irq_controller *ctl) @@ -196,14 +201,15 @@ meson_gpio_irq_request_channel(struct meson_gpio_irq_controller *ctl, unsigned long hwirq, u32 **channel_hwirq) { + unsigned long flags; unsigned int idx; - spin_lock(&ctl->lock); + spin_lock_irqsave(&ctl->lock, flags); /* Find a free channel */ idx = find_first_zero_bit(ctl->channel_map, NUM_CHANNEL); if (idx >= NUM_CHANNEL) { - spin_unlock(&ctl->lock); + spin_unlock_irqrestore(&ctl->lock, flags); pr_err("No channel available\n"); return -ENOSPC; } @@ -211,6 +217,8 @@ meson_gpio_irq_request_channel(struct meson_gpio_irq_controller *ctl, /* Mark the channel as used */ set_bit(idx, ctl->channel_map); + spin_unlock_irqrestore(&ctl->lock, flags); + /* * Setup the mux of the channel to route the signal of the pad * to the appropriate input of the GIC @@ -225,8 +233,6 @@ meson_gpio_irq_request_channel(struct meson_gpio_irq_controller *ctl, */ *channel_hwirq = &(ctl->channel_irqs[idx]); - spin_unlock(&ctl->lock); - pr_debug("hwirq %lu assigned to channel %d - irq %u\n", hwirq, idx, **channel_hwirq); @@ -287,13 +293,9 @@ static int meson_gpio_irq_type_setup(struct meson_gpio_irq_controller *ctl, val |= REG_EDGE_POL_LOW(params, idx); } - spin_lock(&ctl->lock); - meson_gpio_irq_update_bits(ctl, REG_EDGE_POL, REG_EDGE_POL_MASK(params, idx), val); - spin_unlock(&ctl->lock); - return 0; } diff --git a/drivers/irqchip/irq-mvebu-icu.c b/drivers/irqchip/irq-mvebu-icu.c index 547045d89c4b..91adf771f185 100644 --- a/drivers/irqchip/irq-mvebu-icu.c +++ b/drivers/irqchip/irq-mvebu-icu.c @@ -66,7 +66,7 @@ struct mvebu_icu_irq_data { unsigned int type; }; -DEFINE_STATIC_KEY_FALSE(legacy_bindings); +static DEFINE_STATIC_KEY_FALSE(legacy_bindings); static void mvebu_icu_init(struct mvebu_icu *icu, struct mvebu_icu_msi_data *msi_data, diff --git a/drivers/irqchip/irq-sifive-plic.c b/drivers/irqchip/irq-sifive-plic.c index c34fb3ae0ff8..d0a71febdadc 100644 --- a/drivers/irqchip/irq-sifive-plic.c +++ b/drivers/irqchip/irq-sifive-plic.c @@ -56,7 +56,7 @@ #define CONTEXT_THRESHOLD 0x00 #define CONTEXT_CLAIM 0x04 -#define PLIC_DISABLE_THRESHOLD 0xf +#define PLIC_DISABLE_THRESHOLD 0x7 #define PLIC_ENABLE_THRESHOLD 0 struct plic_priv { diff --git a/drivers/irqchip/irq-ti-sci-inta.c b/drivers/irqchip/irq-ti-sci-inta.c index 8f6e6b08eadf..7e3ebf6ed2cd 100644 --- a/drivers/irqchip/irq-ti-sci-inta.c +++ b/drivers/irqchip/irq-ti-sci-inta.c @@ -37,6 +37,7 @@ #define VINT_ENABLE_SET_OFFSET 0x0 #define VINT_ENABLE_CLR_OFFSET 0x8 #define VINT_STATUS_OFFSET 0x18 +#define VINT_STATUS_MASKED_OFFSET 0x20 /** * struct ti_sci_inta_event_desc - Description of an event coming to @@ -116,7 +117,7 @@ static void ti_sci_inta_irq_handler(struct irq_desc *desc) chained_irq_enter(irq_desc_get_chip(desc), desc); val = readq_relaxed(inta->base + vint_desc->vint_id * 0x1000 + - VINT_STATUS_OFFSET); + VINT_STATUS_MASKED_OFFSET); for_each_set_bit(bit, &val, MAX_EVENTS_PER_VINT) { virq = irq_find_mapping(domain, vint_desc->events[bit].hwirq); diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c index 71a90fbec314..77d1a2697517 100644 --- a/drivers/md/bcache/request.c +++ b/drivers/md/bcache/request.c @@ -1372,7 +1372,6 @@ void bch_flash_dev_request_init(struct bcache_device *d) { struct gendisk *g = d->disk; - g->queue->make_request_fn = flash_dev_make_request; g->queue->backing_dev_info->congested_fn = flash_dev_congested; d->cache_miss = flash_dev_cache_miss; d->ioctl = flash_dev_ioctl; diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index 0a2cc197f62b..8277b959e00b 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -279,7 +279,6 @@ static struct dm_dev_internal *find_device(struct list_head *l, dev_t dev) static int device_area_is_invalid(struct dm_target *ti, struct dm_dev *dev, sector_t start, sector_t len, void *data) { - struct request_queue *q; struct queue_limits *limits = data; struct block_device *bdev = dev->bdev; sector_t dev_size = @@ -288,22 +287,6 @@ static int device_area_is_invalid(struct dm_target *ti, struct dm_dev *dev, limits->logical_block_size >> SECTOR_SHIFT; char b[BDEVNAME_SIZE]; - /* - * Some devices exist without request functions, - * such as loop devices not yet bound to backing files. - * Forbid the use of such devices. - */ - q = bdev_get_queue(bdev); - if (!q || !q->make_request_fn) { - DMWARN("%s: %s is not yet initialised: " - "start=%llu, len=%llu, dev_size=%llu", - dm_device_name(ti->table->md), bdevname(bdev, b), - (unsigned long long)start, - (unsigned long long)len, - (unsigned long long)dev_size); - return 1; - } - if (!dev_size) return 0; diff --git a/drivers/md/dm.c b/drivers/md/dm.c index db9e46114653..0eb93da44ea2 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -1788,6 +1788,9 @@ static blk_qc_t dm_make_request(struct request_queue *q, struct bio *bio) int srcu_idx; struct dm_table *map; + if (dm_get_md_type(md) == DM_TYPE_REQUEST_BASED) + return blk_mq_make_request(q, bio); + map = dm_get_live_table(md, &srcu_idx); /* if we're suspended, we have to queue this io for later */ diff --git a/drivers/s390/block/dasd_genhd.c b/drivers/s390/block/dasd_genhd.c index 7d079154f849..af5b0ecb8f89 100644 --- a/drivers/s390/block/dasd_genhd.c +++ b/drivers/s390/block/dasd_genhd.c @@ -143,9 +143,6 @@ int dasd_scan_partitions(struct dasd_block *block) */ void dasd_destroy_partitions(struct dasd_block *block) { - /* The two structs have 168/176 byte on 31/64 bit. */ - struct blkpg_partition bpart; - struct blkpg_ioctl_arg barg; struct block_device *bdev; /* @@ -155,19 +152,10 @@ void dasd_destroy_partitions(struct dasd_block *block) bdev = block->bdev; block->bdev = NULL; - /* - * See fs/partition/check.c:delete_partition - * Can't call delete_partitions directly. Use ioctl. - * The ioctl also does locking and invalidation. - */ - memset(&bpart, 0, sizeof(struct blkpg_partition)); - memset(&barg, 0, sizeof(struct blkpg_ioctl_arg)); - barg.data = (void __force __user *) &bpart; - barg.op = BLKPG_DEL_PARTITION; - for (bpart.pno = block->gdp->minors - 1; bpart.pno > 0; bpart.pno--) - ioctl_by_bdev(bdev, BLKPG, (unsigned long) &barg); - - invalidate_partition(block->gdp, 0); + mutex_lock(&bdev->bd_mutex); + blk_drop_partitions(bdev); + mutex_unlock(&bdev->bd_mutex); + /* Matching blkdev_put to the blkdev_get in dasd_scan_partitions. */ blkdev_put(bdev, FMODE_READ); set_capacity(block->gdp, 0); diff --git a/drivers/scsi/hisi_sas/Kconfig b/drivers/scsi/hisi_sas/Kconfig index 90a17452a50d..13ed9073fc72 100644 --- a/drivers/scsi/hisi_sas/Kconfig +++ b/drivers/scsi/hisi_sas/Kconfig @@ -6,6 +6,7 @@ config SCSI_HISI_SAS select SCSI_SAS_LIBSAS select BLK_DEV_INTEGRITY depends on ATA + select SATA_HOST help This driver supports HiSilicon's SAS HBA, including support based on platform device diff --git a/drivers/scsi/qla2xxx/qla_dbg.c b/drivers/scsi/qla2xxx/qla_dbg.c index f301a8048b2f..bf1e98f11990 100644 --- a/drivers/scsi/qla2xxx/qla_dbg.c +++ b/drivers/scsi/qla2xxx/qla_dbg.c @@ -2539,7 +2539,6 @@ ql_dbg(uint level, scsi_qla_host_t *vha, uint id, const char *fmt, ...) { va_list va; struct va_format vaf; - char pbuf[64]; va_start(va, fmt); @@ -2547,6 +2546,8 @@ ql_dbg(uint level, scsi_qla_host_t *vha, uint id, const char *fmt, ...) vaf.va = &va; if (!ql_mask_match(level)) { + char pbuf[64]; + if (vha != NULL) { const struct pci_dev *pdev = vha->hw->pdev; /* <module-name> <msg-id>:<host> Message */ diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c index 5b2deaa730bf..caa6b840e459 100644 --- a/drivers/scsi/qla2xxx/qla_init.c +++ b/drivers/scsi/qla2xxx/qla_init.c @@ -3611,8 +3611,6 @@ qla24xx_detect_sfp(scsi_qla_host_t *vha) ha->lr_distance = LR_DISTANCE_5K; } - if (!vha->flags.init_done) - rc = QLA_SUCCESS; out: ql_dbg(ql_dbg_async, vha, 0x507b, "SFP detect: %s-Range SFP %s (nvr=%x ll=%x lr=%x lrd=%x).\n", diff --git a/drivers/scsi/qla2xxx/qla_isr.c b/drivers/scsi/qla2xxx/qla_isr.c index 8d7a905f6247..8a78d395bbc8 100644 --- a/drivers/scsi/qla2xxx/qla_isr.c +++ b/drivers/scsi/qla2xxx/qla_isr.c @@ -87,7 +87,6 @@ qla24xx_process_abts(struct scsi_qla_host *vha, void *pkt) } /* terminate exchange */ - memset(rsp_els, 0, sizeof(*rsp_els)); rsp_els->entry_type = ELS_IOCB_TYPE; rsp_els->entry_count = 1; rsp_els->nport_handle = ~0; diff --git a/drivers/scsi/qla2xxx/qla_mbx.c b/drivers/scsi/qla2xxx/qla_mbx.c index 9fd83d1bffe0..4ed90437e8c4 100644 --- a/drivers/scsi/qla2xxx/qla_mbx.c +++ b/drivers/scsi/qla2xxx/qla_mbx.c @@ -4894,8 +4894,6 @@ qla25xx_set_els_cmds_supported(scsi_qla_host_t *vha) return QLA_MEMORY_ALLOC_FAILED; } - memset(els_cmd_map, 0, ELS_CMD_MAP_SIZE); - els_cmd_map[index] |= 1 << bit; mcp->mb[0] = MBC_SET_RNID_PARAMS; diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index 47835c4b4ee0..0a73230a8f16 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -978,28 +978,12 @@ void scsi_io_completion(struct scsi_cmnd *cmd, unsigned int good_bytes) scsi_io_completion_action(cmd, result); } -static blk_status_t scsi_init_sgtable(struct request *req, - struct scsi_data_buffer *sdb) +static inline bool scsi_cmd_needs_dma_drain(struct scsi_device *sdev, + struct request *rq) { - int count; - - /* - * If sg table allocation fails, requeue request later. - */ - if (unlikely(sg_alloc_table_chained(&sdb->table, - blk_rq_nr_phys_segments(req), sdb->table.sgl, - SCSI_INLINE_SG_CNT))) - return BLK_STS_RESOURCE; - - /* - * Next, walk the list, and fill in the addresses and sizes of - * each segment. - */ - count = blk_rq_map_sg(req->q, req, sdb->table.sgl); - BUG_ON(count > sdb->table.nents); - sdb->table.nents = count; - sdb->length = blk_rq_payload_bytes(req); - return BLK_STS_OK; + return sdev->dma_drain_len && blk_rq_is_passthrough(rq) && + !op_is_write(req_op(rq)) && + sdev->host->hostt->dma_need_drain(rq); } /* @@ -1015,19 +999,62 @@ static blk_status_t scsi_init_sgtable(struct request *req, */ blk_status_t scsi_init_io(struct scsi_cmnd *cmd) { + struct scsi_device *sdev = cmd->device; struct request *rq = cmd->request; + unsigned short nr_segs = blk_rq_nr_phys_segments(rq); + struct scatterlist *last_sg = NULL; blk_status_t ret; + bool need_drain = scsi_cmd_needs_dma_drain(sdev, rq); + int count; - if (WARN_ON_ONCE(!blk_rq_nr_phys_segments(rq))) + if (WARN_ON_ONCE(!nr_segs)) return BLK_STS_IOERR; - ret = scsi_init_sgtable(rq, &cmd->sdb); - if (ret) - return ret; + /* + * Make sure there is space for the drain. The driver must adjust + * max_hw_segments to be prepared for this. + */ + if (need_drain) + nr_segs++; + + /* + * If sg table allocation fails, requeue request later. + */ + if (unlikely(sg_alloc_table_chained(&cmd->sdb.table, nr_segs, + cmd->sdb.table.sgl, SCSI_INLINE_SG_CNT))) + return BLK_STS_RESOURCE; + + /* + * Next, walk the list, and fill in the addresses and sizes of + * each segment. + */ + count = __blk_rq_map_sg(rq->q, rq, cmd->sdb.table.sgl, &last_sg); + + if (blk_rq_bytes(rq) & rq->q->dma_pad_mask) { + unsigned int pad_len = + (rq->q->dma_pad_mask & ~blk_rq_bytes(rq)) + 1; + + last_sg->length += pad_len; + cmd->extra_len += pad_len; + } + + if (need_drain) { + sg_unmark_end(last_sg); + last_sg = sg_next(last_sg); + sg_set_buf(last_sg, sdev->dma_drain_buf, sdev->dma_drain_len); + sg_mark_end(last_sg); + + cmd->extra_len += sdev->dma_drain_len; + count++; + } + + BUG_ON(count > cmd->sdb.table.nents); + cmd->sdb.table.nents = count; + cmd->sdb.length = blk_rq_payload_bytes(rq); if (blk_integrity_rq(rq)) { struct scsi_data_buffer *prot_sdb = cmd->prot_sdb; - int ivecs, count; + int ivecs; if (WARN_ON_ONCE(!prot_sdb)) { /* @@ -1610,12 +1637,7 @@ static bool scsi_mq_get_budget(struct blk_mq_hw_ctx *hctx) struct request_queue *q = hctx->queue; struct scsi_device *sdev = q->queuedata; - if (scsi_dev_queue_ready(q, sdev)) - return true; - - if (atomic_read(&sdev->device_busy) == 0 && !scsi_device_blocked(sdev)) - blk_mq_delay_run_hw_queue(hctx, SCSI_QUEUE_DELAY); - return false; + return scsi_dev_queue_ready(q, sdev); } static blk_status_t scsi_queue_rq(struct blk_mq_hw_ctx *hctx, diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c index 4e6af592f018..9c0ee192f0f9 100644 --- a/drivers/scsi/sg.c +++ b/drivers/scsi/sg.c @@ -793,8 +793,10 @@ sg_common_write(Sg_fd * sfp, Sg_request * srp, "sg_common_write: scsi opcode=0x%02x, cmd_size=%d\n", (int) cmnd[0], (int) hp->cmd_len)); - if (hp->dxfer_len >= SZ_256M) + if (hp->dxfer_len >= SZ_256M) { + sg_remove_request(sfp, srp); return -EINVAL; + } k = sg_start_req(srp, cmnd); if (k) { diff --git a/drivers/scsi/sr.c b/drivers/scsi/sr.c index d2fe3fa470f9..f9b589d60a46 100644 --- a/drivers/scsi/sr.c +++ b/drivers/scsi/sr.c @@ -794,9 +794,8 @@ static int sr_probe(struct device *dev) set_capacity(disk, cd->capacity); disk->private_data = &cd->driver; disk->queue = sdev->request_queue; - cd->cdi.disk = disk; - if (register_cdrom(&cd->cdi)) + if (register_cdrom(disk, &cd->cdi)) goto fail_put; /* diff --git a/drivers/target/target_core_fabric_lib.c b/drivers/target/target_core_fabric_lib.c index 6b4b354c88aa..1e031d81e59e 100644 --- a/drivers/target/target_core_fabric_lib.c +++ b/drivers/target/target_core_fabric_lib.c @@ -63,7 +63,7 @@ static int fc_get_pr_transport_id( * encoded TransportID. */ ptr = &se_nacl->initiatorname[0]; - for (i = 0; i < 24; ) { + for (i = 0; i < 23; ) { if (!strncmp(&ptr[i], ":", 1)) { i++; continue; @@ -341,7 +341,8 @@ static char *iscsi_parse_pr_out_transport_id( *p = tolower(*p); p++; } - } + } else + *port_nexus_ptr = NULL; return &buf[4]; } diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c index 0b9dfa6b17bc..f769bb1e3735 100644 --- a/drivers/target/target_core_user.c +++ b/drivers/target/target_core_user.c @@ -2073,6 +2073,7 @@ static void tcmu_reset_ring(struct tcmu_dev *udev, u8 err_level) mb->cmd_tail = 0; mb->cmd_head = 0; tcmu_flush_dcache_range(mb, sizeof(*mb)); + clear_bit(TCMU_DEV_BIT_BROKEN, &udev->flags); del_timer(&udev->cmd_timer); diff --git a/fs/block_dev.c b/fs/block_dev.c index 93672c3f1c78..7cbb7b79935e 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -713,7 +713,6 @@ int bdev_read_page(struct block_device *bdev, sector_t sector, blk_queue_exit(bdev->bd_queue); return result; } -EXPORT_SYMBOL_GPL(bdev_read_page); /** * bdev_write_page() - Start writing a page to a block device @@ -758,7 +757,6 @@ int bdev_write_page(struct block_device *bdev, sector_t sector, blk_queue_exit(bdev->bd_queue); return result; } -EXPORT_SYMBOL_GPL(bdev_write_page); /* * pseudo-fs @@ -882,21 +880,6 @@ static int bdev_set(struct inode *inode, void *data) static LIST_HEAD(all_bdevs); -/* - * If there is a bdev inode for this device, unhash it so that it gets evicted - * as soon as last inode reference is dropped. - */ -void bdev_unhash_inode(dev_t dev) -{ - struct inode *inode; - - inode = ilookup5(blockdev_superblock, hash(dev), bdev_test, &dev); - if (inode) { - remove_inode_hash(inode); - iput(inode); - } -} - struct block_device *bdget(dev_t dev) { struct block_device *bdev; @@ -1516,7 +1499,7 @@ int bdev_disk_changed(struct block_device *bdev, bool invalidate) lockdep_assert_held(&bdev->bd_mutex); rescan: - ret = blk_drop_partitions(disk, bdev); + ret = blk_drop_partitions(bdev); if (ret) return ret; diff --git a/fs/buffer.c b/fs/buffer.c index 763de99508de..a60f60396cfa 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -1371,6 +1371,17 @@ void __breadahead(struct block_device *bdev, sector_t block, unsigned size) } EXPORT_SYMBOL(__breadahead); +void __breadahead_gfp(struct block_device *bdev, sector_t block, unsigned size, + gfp_t gfp) +{ + struct buffer_head *bh = __getblk_gfp(bdev, block, size, gfp); + if (likely(bh)) { + ll_rw_block(REQ_OP_READ, REQ_RAHEAD, 1, &bh); + brelse(bh); + } +} +EXPORT_SYMBOL(__breadahead_gfp); + /** * __bread_gfp() - reads a specified block and returns the bh * @bdev: the block_device to read from diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index 140efc1a9374..182b864b3075 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -594,6 +594,8 @@ decode_lanman_negprot_rsp(struct TCP_Server_Info *server, NEGOTIATE_RSP *pSMBr) cifs_max_pending); set_credits(server, server->maxReq); server->maxBuf = le16_to_cpu(rsp->MaxBufSize); + /* set up max_read for readpages check */ + server->max_read = server->maxBuf; /* even though we do not use raw we might as well set this accurately, in case we ever find a need for it */ if ((le16_to_cpu(rsp->RawMode) & RAW_ENABLE) == RAW_ENABLE) { @@ -755,6 +757,8 @@ CIFSSMBNegotiate(const unsigned int xid, struct cifs_ses *ses) set_credits(server, server->maxReq); /* probably no need to store and check maxvcs */ server->maxBuf = le32_to_cpu(pSMBr->MaxBufferSize); + /* set up max_read for readpages check */ + server->max_read = server->maxBuf; server->max_rw = le32_to_cpu(pSMBr->MaxRawSize); cifs_dbg(NOISY, "Max buf = %d\n", ses->server->maxBuf); server->capabilities = le32_to_cpu(pSMBr->Capabilities); diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index 8fbbdcdad8ff..390d2b15ef6e 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -61,7 +61,7 @@ static void cifs_set_ops(struct inode *inode) } /* check if server can support readpages */ - if (cifs_sb_master_tcon(cifs_sb)->ses->server->maxBuf < + if (cifs_sb_master_tcon(cifs_sb)->ses->server->max_read < PAGE_SIZE + MAX_CIFS_HDR_SIZE) inode->i_data.a_ops = &cifs_addr_ops_smallbuf; else diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 47d3e382ecaa..b30aa3cdd845 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -1552,6 +1552,21 @@ SMB2_sess_auth_rawntlmssp_authenticate(struct SMB2_sess_data *sess_data) } rc = SMB2_sess_establish_session(sess_data); +#ifdef CONFIG_CIFS_DEBUG_DUMP_KEYS + if (ses->server->dialect < SMB30_PROT_ID) { + cifs_dbg(VFS, "%s: dumping generated SMB2 session keys\n", __func__); + /* + * The session id is opaque in terms of endianness, so we can't + * print it as a long long. we dump it as we got it on the wire + */ + cifs_dbg(VFS, "Session Id %*ph\n", (int)sizeof(ses->Suid), + &ses->Suid); + cifs_dbg(VFS, "Session Key %*ph\n", + SMB2_NTLMV2_SESSKEY_SIZE, ses->auth_key.response); + cifs_dbg(VFS, "Signing Key %*ph\n", + SMB3_SIGN_KEY_SIZE, ses->auth_key.response); + } +#endif out: kfree(ntlmssp_blob); SMB2_sess_free_buffer(sess_data); diff --git a/fs/cifs/smb2transport.c b/fs/cifs/smb2transport.c index 1a6c227ada8f..c0348e3b1695 100644 --- a/fs/cifs/smb2transport.c +++ b/fs/cifs/smb2transport.c @@ -660,8 +660,8 @@ smb2_verify_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server) return rc; if (memcmp(server_response_sig, shdr->Signature, SMB2_SIGNATURE_SIZE)) { - dump_stack(); - cifs_dbg(VFS, "sign fail cmd 0x%x message id 0x%llx\n", shdr->Command, shdr->MessageId); + cifs_dbg(VFS, "sign fail cmd 0x%x message id 0x%llx\n", + shdr->Command, shdr->MessageId); return -EACCES; } else return 0; diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index 0e0a4d6209c7..a32e5f7b5385 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c @@ -410,7 +410,7 @@ verified: * Read the bitmap for a given block_group,and validate the * bits for block/inode/inode tables are set in the bitmaps * - * Return buffer_head on success or NULL in case of failure. + * Return buffer_head on success or an ERR_PTR in case of failure. */ struct buffer_head * ext4_read_block_bitmap_nowait(struct super_block *sb, ext4_group_t block_group) @@ -502,7 +502,7 @@ out: return ERR_PTR(err); } -/* Returns 0 on success, 1 on error */ +/* Returns 0 on success, -errno on error */ int ext4_wait_block_bitmap(struct super_block *sb, ext4_group_t block_group, struct buffer_head *bh) { diff --git a/fs/ext4/ext4_jbd2.c b/fs/ext4/ext4_jbd2.c index 7f16e1af8d5c..0c76cdd44d90 100644 --- a/fs/ext4/ext4_jbd2.c +++ b/fs/ext4/ext4_jbd2.c @@ -338,9 +338,6 @@ int __ext4_handle_dirty_metadata(const char *where, unsigned int line, if (inode && inode_needs_sync(inode)) { sync_dirty_buffer(bh); if (buffer_req(bh) && !buffer_uptodate(bh)) { - struct ext4_super_block *es; - - es = EXT4_SB(inode->i_sb)->s_es; ext4_error_inode_err(inode, where, line, bh->b_blocknr, EIO, "IO error syncing itable block"); diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 031752cfb6f7..f2b577b315a0 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -3374,8 +3374,8 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, (unsigned long long)map->m_lblk, map_len); sbi = EXT4_SB(inode->i_sb); - eof_block = (inode->i_size + inode->i_sb->s_blocksize - 1) >> - inode->i_sb->s_blocksize_bits; + eof_block = (EXT4_I(inode)->i_disksize + inode->i_sb->s_blocksize - 1) + >> inode->i_sb->s_blocksize_bits; if (eof_block < map->m_lblk + map_len) eof_block = map->m_lblk + map_len; @@ -3627,8 +3627,8 @@ static int ext4_split_convert_extents(handle_t *handle, __func__, inode->i_ino, (unsigned long long)map->m_lblk, map->m_len); - eof_block = (inode->i_size + inode->i_sb->s_blocksize - 1) >> - inode->i_sb->s_blocksize_bits; + eof_block = (EXT4_I(inode)->i_disksize + inode->i_sb->s_blocksize - 1) + >> inode->i_sb->s_blocksize_bits; if (eof_block < map->m_lblk + map->m_len) eof_block = map->m_lblk + map->m_len; /* diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index b420c9dc444d..4b8c9a9bdf0c 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c @@ -113,7 +113,7 @@ verified: * Read the inode allocation bitmap for a given block_group, reading * into the specified slot in the superblock's bitmap cache. * - * Return buffer_head of bitmap on success or NULL. + * Return buffer_head of bitmap on success, or an ERR_PTR on error. */ static struct buffer_head * ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group) @@ -662,7 +662,7 @@ static int find_group_other(struct super_block *sb, struct inode *parent, * block has been written back to disk. (Yes, these values are * somewhat arbitrary...) */ -#define RECENTCY_MIN 5 +#define RECENTCY_MIN 60 #define RECENTCY_DIRTY 300 static int recently_deleted(struct super_block *sb, ext4_group_t group, int ino) diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index e416096fc081..2a4aae6acdcb 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -1973,7 +1973,7 @@ static int ext4_writepage(struct page *page, bool keep_towrite = false; if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb)))) { - ext4_invalidatepage(page, 0, PAGE_SIZE); + inode->i_mapping->a_ops->invalidatepage(page, 0, PAGE_SIZE); unlock_page(page); return -EIO; } @@ -4364,7 +4364,7 @@ make_io: if (end > table) end = table; while (b <= end) - sb_breadahead(sb, b++); + sb_breadahead_unmovable(sb, b++); } /* diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 87c85be4c12e..30d5d97548c4 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -1943,7 +1943,8 @@ void ext4_mb_complex_scan_group(struct ext4_allocation_context *ac, int free; free = e4b->bd_info->bb_free; - BUG_ON(free <= 0); + if (WARN_ON(free <= 0)) + return; i = e4b->bd_info->bb_first_free; @@ -1966,7 +1967,8 @@ void ext4_mb_complex_scan_group(struct ext4_allocation_context *ac, } mb_find_extent(e4b, i, ac->ac_g_ex.fe_len, &ex); - BUG_ON(ex.fe_len <= 0); + if (WARN_ON(ex.fe_len <= 0)) + break; if (free < ex.fe_len) { ext4_grp_locked_error(sb, e4b->bd_group, 0, 0, "%d free clusters as per " diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 9728e7b0e84f..bf5fcb477f66 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -596,7 +596,6 @@ void __ext4_error_file(struct file *file, const char *function, { va_list args; struct va_format vaf; - struct ext4_super_block *es; struct inode *inode = file_inode(file); char pathname[80], *path; @@ -604,7 +603,6 @@ void __ext4_error_file(struct file *file, const char *function, return; trace_ext4_error(inode->i_sb, function, line); - es = EXT4_SB(inode->i_sb)->s_es; if (ext4_error_ratelimit(inode->i_sb)) { path = file_path(file, pathname, sizeof(pathname)); if (IS_ERR(path)) @@ -4340,7 +4338,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) /* Pre-read the descriptors into the buffer cache */ for (i = 0; i < db_count; i++) { block = descriptor_loc(sb, logical_sb_block, i); - sb_breadahead(sb, block); + sb_breadahead_unmovable(sb, block); } for (i = 0; i < db_count; i++) { diff --git a/fs/hfsplus/wrapper.c b/fs/hfsplus/wrapper.c index 08c1580bdf7a..61eec628805d 100644 --- a/fs/hfsplus/wrapper.c +++ b/fs/hfsplus/wrapper.c @@ -127,31 +127,34 @@ static int hfsplus_read_mdb(void *bufptr, struct hfsplus_wd *wd) static int hfsplus_get_last_session(struct super_block *sb, sector_t *start, sector_t *size) { - struct cdrom_multisession ms_info; - struct cdrom_tocentry te; - int res; + struct cdrom_device_info *cdi = disk_to_cdi(sb->s_bdev->bd_disk); /* default values */ *start = 0; *size = i_size_read(sb->s_bdev->bd_inode) >> 9; if (HFSPLUS_SB(sb)->session >= 0) { + struct cdrom_tocentry te; + + if (!cdi) + return -EINVAL; + te.cdte_track = HFSPLUS_SB(sb)->session; te.cdte_format = CDROM_LBA; - res = ioctl_by_bdev(sb->s_bdev, - CDROMREADTOCENTRY, (unsigned long)&te); - if (!res && (te.cdte_ctrl & CDROM_DATA_TRACK) == 4) { - *start = (sector_t)te.cdte_addr.lba << 2; - return 0; + if (cdrom_read_tocentry(cdi, &te) || + (te.cdte_ctrl & CDROM_DATA_TRACK) != 4) { + pr_err("invalid session number or type of track\n"); + return -EINVAL; } - pr_err("invalid session number or type of track\n"); - return -EINVAL; + *start = (sector_t)te.cdte_addr.lba << 2; + } else if (cdi) { + struct cdrom_multisession ms_info; + + ms_info.addr_format = CDROM_LBA; + if (cdrom_multisession(cdi, &ms_info) == 0 && ms_info.xa_flag) + *start = (sector_t)ms_info.addr.lba << 2; } - ms_info.addr_format = CDROM_LBA; - res = ioctl_by_bdev(sb->s_bdev, CDROMMULTISESSION, - (unsigned long)&ms_info); - if (!res && ms_info.xa_flag) - *start = (sector_t)ms_info.addr.lba << 2; + return 0; } diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c index 62c0462dc89f..276107cdaaf1 100644 --- a/fs/isofs/inode.c +++ b/fs/isofs/inode.c @@ -544,43 +544,41 @@ static int isofs_show_options(struct seq_file *m, struct dentry *root) static unsigned int isofs_get_last_session(struct super_block *sb, s32 session) { - struct cdrom_multisession ms_info; - unsigned int vol_desc_start; - struct block_device *bdev = sb->s_bdev; - int i; + struct cdrom_device_info *cdi = disk_to_cdi(sb->s_bdev->bd_disk); + unsigned int vol_desc_start = 0; - vol_desc_start=0; - ms_info.addr_format=CDROM_LBA; if (session > 0) { - struct cdrom_tocentry Te; - Te.cdte_track=session; - Te.cdte_format=CDROM_LBA; - i = ioctl_by_bdev(bdev, CDROMREADTOCENTRY, (unsigned long) &Te); - if (!i) { + struct cdrom_tocentry te; + + if (!cdi) + return 0; + + te.cdte_track = session; + te.cdte_format = CDROM_LBA; + if (cdrom_read_tocentry(cdi, &te) == 0) { printk(KERN_DEBUG "ISOFS: Session %d start %d type %d\n", - session, Te.cdte_addr.lba, - Te.cdte_ctrl&CDROM_DATA_TRACK); - if ((Te.cdte_ctrl&CDROM_DATA_TRACK) == 4) - return Te.cdte_addr.lba; + session, te.cdte_addr.lba, + te.cdte_ctrl & CDROM_DATA_TRACK); + if ((te.cdte_ctrl & CDROM_DATA_TRACK) == 4) + return te.cdte_addr.lba; } printk(KERN_ERR "ISOFS: Invalid session number or type of track\n"); } - i = ioctl_by_bdev(bdev, CDROMMULTISESSION, (unsigned long) &ms_info); - if (session > 0) - printk(KERN_ERR "ISOFS: Invalid session number\n"); -#if 0 - printk(KERN_DEBUG "isofs.inode: CDROMMULTISESSION: rc=%d\n",i); - if (i==0) { - printk(KERN_DEBUG "isofs.inode: XA disk: %s\n",ms_info.xa_flag?"yes":"no"); - printk(KERN_DEBUG "isofs.inode: vol_desc_start = %d\n", ms_info.addr.lba); - } -#endif - if (i==0) + + if (cdi) { + struct cdrom_multisession ms_info; + + ms_info.addr_format = CDROM_LBA; + if (cdrom_multisession(cdi, &ms_info) == 0) { #if WE_OBEY_THE_WRITTEN_STANDARDS - if (ms_info.xa_flag) /* necessary for a valid ms_info.addr */ + /* necessary for a valid ms_info.addr */ + if (ms_info.xa_flag) #endif - vol_desc_start=ms_info.addr.lba; + vol_desc_start = ms_info.addr.lba; + } + } + return vol_desc_start; } diff --git a/fs/proc/base.c b/fs/proc/base.c index 6042b646ab27..572898dd16a0 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -1573,6 +1573,7 @@ static ssize_t timens_offsets_write(struct file *file, const char __user *buf, noffsets = 0; for (pos = kbuf; pos; pos = next_line) { struct proc_timens_offset *off = &offsets[noffsets]; + char clock[10]; int err; /* Find the end of line and ensure we don't look past it */ @@ -1584,10 +1585,21 @@ static ssize_t timens_offsets_write(struct file *file, const char __user *buf, next_line = NULL; } - err = sscanf(pos, "%u %lld %lu", &off->clockid, + err = sscanf(pos, "%9s %lld %lu", clock, &off->val.tv_sec, &off->val.tv_nsec); if (err != 3 || off->val.tv_nsec >= NSEC_PER_SEC) goto out; + + clock[sizeof(clock) - 1] = 0; + if (strcmp(clock, "monotonic") == 0 || + strcmp(clock, __stringify(CLOCK_MONOTONIC)) == 0) + off->clockid = CLOCK_MONOTONIC; + else if (strcmp(clock, "boottime") == 0 || + strcmp(clock, __stringify(CLOCK_BOOTTIME)) == 0) + off->clockid = CLOCK_BOOTTIME; + else + goto out; + noffsets++; if (noffsets == ARRAY_SIZE(offsets)) { if (next_line) diff --git a/fs/udf/lowlevel.c b/fs/udf/lowlevel.c index 5c7ec121990d..f1094cdcd6cd 100644 --- a/fs/udf/lowlevel.c +++ b/fs/udf/lowlevel.c @@ -27,41 +27,38 @@ unsigned int udf_get_last_session(struct super_block *sb) { + struct cdrom_device_info *cdi = disk_to_cdi(sb->s_bdev->bd_disk); struct cdrom_multisession ms_info; - unsigned int vol_desc_start; - struct block_device *bdev = sb->s_bdev; - int i; - vol_desc_start = 0; - ms_info.addr_format = CDROM_LBA; - i = ioctl_by_bdev(bdev, CDROMMULTISESSION, (unsigned long)&ms_info); + if (!cdi) { + udf_debug("CDROMMULTISESSION not supported.\n"); + return 0; + } - if (i == 0) { + ms_info.addr_format = CDROM_LBA; + if (cdrom_multisession(cdi, &ms_info) == 0) { udf_debug("XA disk: %s, vol_desc_start=%d\n", ms_info.xa_flag ? "yes" : "no", ms_info.addr.lba); if (ms_info.xa_flag) /* necessary for a valid ms_info.addr */ - vol_desc_start = ms_info.addr.lba; - } else { - udf_debug("CDROMMULTISESSION not supported: rc=%d\n", i); + return ms_info.addr.lba; } - return vol_desc_start; + return 0; } unsigned long udf_get_last_block(struct super_block *sb) { struct block_device *bdev = sb->s_bdev; + struct cdrom_device_info *cdi = disk_to_cdi(bdev->bd_disk); unsigned long lblock = 0; /* - * ioctl failed or returned obviously bogus value? + * The cdrom layer call failed or returned obviously bogus value? * Try using the device size... */ - if (ioctl_by_bdev(bdev, CDROM_LAST_WRITTEN, (unsigned long) &lblock) || - lblock == 0) + if (!cdi || cdrom_get_last_written(cdi, &lblock) || lblock == 0) lblock = i_size_read(bdev->bd_inode) >> sb->s_blocksize_bits; if (lblock) return lblock - 1; - else - return 0; + return 0; } diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c index a7be7a9e5c1a..8bf1d15be3f6 100644 --- a/fs/xfs/xfs_icache.c +++ b/fs/xfs/xfs_icache.c @@ -911,7 +911,12 @@ xfs_eofblocks_worker( { struct xfs_mount *mp = container_of(to_delayed_work(work), struct xfs_mount, m_eofblocks_work); + + if (!sb_start_write_trylock(mp->m_super)) + return; xfs_icache_free_eofblocks(mp, NULL); + sb_end_write(mp->m_super); + xfs_queue_eofblocks(mp); } @@ -938,7 +943,12 @@ xfs_cowblocks_worker( { struct xfs_mount *mp = container_of(to_delayed_work(work), struct xfs_mount, m_cowblocks_work); + + if (!sb_start_write_trylock(mp->m_super)) + return; xfs_icache_free_cowblocks(mp, NULL); + sb_end_write(mp->m_super); + xfs_queue_cowblocks(mp); } diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c index cdfb3cd9a25b..309958186d33 100644 --- a/fs/xfs/xfs_ioctl.c +++ b/fs/xfs/xfs_ioctl.c @@ -2363,7 +2363,10 @@ xfs_file_ioctl( if (error) return error; - return xfs_icache_free_eofblocks(mp, &keofb); + sb_start_write(mp->m_super); + error = xfs_icache_free_eofblocks(mp, &keofb); + sb_end_write(mp->m_super); + return error; } default: diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index 50c43422fa17..b2e4598fdf7d 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h @@ -167,8 +167,12 @@ typedef struct xfs_mount { struct xfs_kobj m_error_meta_kobj; struct xfs_error_cfg m_error_cfg[XFS_ERR_CLASS_MAX][XFS_ERR_ERRNO_MAX]; struct xstats m_stats; /* per-fs stats */ - struct ratelimit_state m_flush_inodes_ratelimit; + /* + * Workqueue item so that we can coalesce multiple inode flush attempts + * into a single flush. + */ + struct work_struct m_flush_inodes_work; struct workqueue_struct *m_buf_workqueue; struct workqueue_struct *m_unwritten_workqueue; struct workqueue_struct *m_cil_workqueue; diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c index b0ce04ffd3cd..107bf2a2f344 100644 --- a/fs/xfs/xfs_reflink.c +++ b/fs/xfs/xfs_reflink.c @@ -1051,6 +1051,7 @@ xfs_reflink_remap_extent( uirec.br_startblock = irec->br_startblock + rlen; uirec.br_startoff = irec->br_startoff + rlen; uirec.br_blockcount = unmap_len - rlen; + uirec.br_state = irec->br_state; unmap_len = rlen; /* If this isn't a real mapping, we're done. */ diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index abf06bf9c3f3..424bb9a2d532 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -516,6 +516,20 @@ xfs_destroy_mount_workqueues( destroy_workqueue(mp->m_buf_workqueue); } +static void +xfs_flush_inodes_worker( + struct work_struct *work) +{ + struct xfs_mount *mp = container_of(work, struct xfs_mount, + m_flush_inodes_work); + struct super_block *sb = mp->m_super; + + if (down_read_trylock(&sb->s_umount)) { + sync_inodes_sb(sb); + up_read(&sb->s_umount); + } +} + /* * Flush all dirty data to disk. Must not be called while holding an XFS_ILOCK * or a page lock. We use sync_inodes_sb() here to ensure we block while waiting @@ -526,15 +540,15 @@ void xfs_flush_inodes( struct xfs_mount *mp) { - struct super_block *sb = mp->m_super; - - if (!__ratelimit(&mp->m_flush_inodes_ratelimit)) + /* + * If flush_work() returns true then that means we waited for a flush + * which was already in progress. Don't bother running another scan. + */ + if (flush_work(&mp->m_flush_inodes_work)) return; - if (down_read_trylock(&sb->s_umount)) { - sync_inodes_sb(sb); - up_read(&sb->s_umount); - } + queue_work(mp->m_sync_workqueue, &mp->m_flush_inodes_work); + flush_work(&mp->m_flush_inodes_work); } /* Catch misguided souls that try to use this interface on XFS */ @@ -1369,17 +1383,6 @@ xfs_fc_fill_super( if (error) goto out_free_names; - /* - * Cap the number of invocations of xfs_flush_inodes to 16 for every - * quarter of a second. The magic numbers here were determined by - * observation neither to cause stalls in writeback when there are a - * lot of IO threads and the fs is near ENOSPC, nor cause any fstest - * regressions. YMMV. - */ - ratelimit_state_init(&mp->m_flush_inodes_ratelimit, HZ / 4, 16); - ratelimit_set_flags(&mp->m_flush_inodes_ratelimit, - RATELIMIT_MSG_ON_RELEASE); - error = xfs_init_mount_workqueues(mp); if (error) goto out_close_devices; @@ -1752,6 +1755,7 @@ static int xfs_init_fs_context( spin_lock_init(&mp->m_perag_lock); mutex_init(&mp->m_growlock); atomic_set(&mp->m_active_trans, 0); + INIT_WORK(&mp->m_flush_inodes_work, xfs_flush_inodes_worker); INIT_DELAYED_WORK(&mp->m_reclaim_work, xfs_reclaim_worker); INIT_DELAYED_WORK(&mp->m_eofblocks_work, xfs_eofblocks_worker); INIT_DELAYED_WORK(&mp->m_cowblocks_work, xfs_cowblocks_worker); diff --git a/include/linux/bio.h b/include/linux/bio.h index c1c0f9ea4e63..a0ee494a6329 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -319,7 +319,7 @@ struct bio_integrity_payload { struct work_struct bip_work; /* I/O completion */ struct bio_vec *bip_vec; - struct bio_vec bip_inline_vecs[0];/* embedded bvec array */ + struct bio_vec bip_inline_vecs[];/* embedded bvec array */ }; #if defined(CONFIG_BLK_DEV_INTEGRITY) diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h index 35f8ffe92b70..a57ebe2f00ab 100644 --- a/include/linux/blk-cgroup.h +++ b/include/linux/blk-cgroup.h @@ -607,12 +607,14 @@ static inline bool blkcg_bio_issue_check(struct request_queue *q, u64_stats_update_begin(&bis->sync); /* - * If the bio is flagged with BIO_QUEUE_ENTERED it means this - * is a split bio and we would have already accounted for the - * size of the bio. + * If the bio is flagged with BIO_CGROUP_ACCT it means this is a + * split bio and we would have already accounted for the size of + * the bio. */ - if (!bio_flagged(bio, BIO_QUEUE_ENTERED)) + if (!bio_flagged(bio, BIO_CGROUP_ACCT)) { + bio_set_flag(bio, BIO_CGROUP_ACCT); bis->cur.bytes[rwd] += bio->bi_iter.bi_size; + } bis->cur.ios[rwd]++; u64_stats_update_end(&bis->sync); @@ -629,6 +631,8 @@ static inline bool blkcg_bio_issue_check(struct request_queue *q, static inline void blkcg_use_delay(struct blkcg_gq *blkg) { + if (WARN_ON_ONCE(atomic_read(&blkg->use_delay) < 0)) + return; if (atomic_add_return(1, &blkg->use_delay) == 1) atomic_inc(&blkg->blkcg->css.cgroup->congestion_count); } @@ -637,6 +641,8 @@ static inline int blkcg_unuse_delay(struct blkcg_gq *blkg) { int old = atomic_read(&blkg->use_delay); + if (WARN_ON_ONCE(old < 0)) + return 0; if (old == 0) return 0; @@ -661,20 +667,39 @@ static inline int blkcg_unuse_delay(struct blkcg_gq *blkg) return 1; } +/** + * blkcg_set_delay - Enable allocator delay mechanism with the specified delay amount + * @blkg: target blkg + * @delay: delay duration in nsecs + * + * When enabled with this function, the delay is not decayed and must be + * explicitly cleared with blkcg_clear_delay(). Must not be mixed with + * blkcg_[un]use_delay() and blkcg_add_delay() usages. + */ +static inline void blkcg_set_delay(struct blkcg_gq *blkg, u64 delay) +{ + int old = atomic_read(&blkg->use_delay); + + /* We only want 1 person setting the congestion count for this blkg. */ + if (!old && atomic_cmpxchg(&blkg->use_delay, old, -1) == old) + atomic_inc(&blkg->blkcg->css.cgroup->congestion_count); + + atomic64_set(&blkg->delay_nsec, delay); +} + +/** + * blkcg_clear_delay - Disable allocator delay mechanism + * @blkg: target blkg + * + * Disable use_delay mechanism. See blkcg_set_delay(). + */ static inline void blkcg_clear_delay(struct blkcg_gq *blkg) { int old = atomic_read(&blkg->use_delay); - if (!old) - return; + /* We only want 1 person clearing the congestion count for this blkg. */ - while (old) { - int cur = atomic_cmpxchg(&blkg->use_delay, old, 0); - if (cur == old) { - atomic_dec(&blkg->blkcg->css.cgroup->congestion_count); - break; - } - old = cur; - } + if (old && atomic_cmpxchg(&blkg->use_delay, old, 0) == old) + atomic_dec(&blkg->blkcg->css.cgroup->congestion_count); } void blkcg_add_delay(struct blkcg_gq *blkg, u64 now, u64 delta); diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index f389d7c724bd..d7307795439a 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -173,7 +173,7 @@ struct blk_mq_hw_ctx { * blocking (BLK_MQ_F_BLOCKING). Must be the last member - see also * blk_mq_hw_ctx_size(). */ - struct srcu_struct srcu[0]; + struct srcu_struct srcu[]; }; /** @@ -508,6 +508,7 @@ void blk_mq_unquiesce_queue(struct request_queue *q); void blk_mq_delay_run_hw_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs); void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async); void blk_mq_run_hw_queues(struct request_queue *q, bool async); +void blk_mq_delay_run_hw_queues(struct request_queue *q, unsigned long msecs); void blk_mq_tagset_busy_iter(struct blk_mq_tag_set *tagset, busy_tag_iter_fn *fn, void *priv); void blk_mq_tagset_wait_completed_request(struct blk_mq_tag_set *tagset); @@ -577,4 +578,6 @@ static inline void blk_mq_cleanup_rq(struct request *rq) rq->q->mq_ops->cleanup_rq(rq); } +blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio); + #endif diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 70254ae11769..90895d594e64 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -198,7 +198,7 @@ struct bio { * double allocations for a small number of bio_vecs. This member * MUST obviously be kept at the very end of the bio. */ - struct bio_vec bi_inline_vecs[0]; + struct bio_vec bi_inline_vecs[]; }; #define BIO_RESET_BYTES offsetof(struct bio, bi_max_vecs) @@ -220,7 +220,7 @@ enum { * throttling rules. Don't do it again. */ BIO_TRACE_COMPLETION, /* bio_endio() should trace the final completion * of this bio. */ - BIO_QUEUE_ENTERED, /* can use blk_queue_enter_live() */ + BIO_CGROUP_ACCT, /* has been accounted to a cgroup */ BIO_TRACKED, /* set if bio goes through the rq_qos path */ BIO_FLAG_LAST }; diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 32868fbedc9e..f00bd4042295 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -82,8 +82,6 @@ typedef __u32 __bitwise req_flags_t; /* set for "ide_preempt" requests and also for requests for which the SCSI "quiesce" state must be ignored. */ #define RQF_PREEMPT ((__force req_flags_t)(1 << 8)) -/* contains copies of user pages */ -#define RQF_COPY_USER ((__force req_flags_t)(1 << 9)) /* vaguely specified driver internal error. Ignored by the block layer */ #define RQF_FAILED ((__force req_flags_t)(1 << 10)) /* don't warn about errors */ @@ -226,8 +224,6 @@ struct request { unsigned short write_hint; unsigned short ioprio; - unsigned int extra_len; /* length of alignment and padding */ - enum mq_rq_state state; refcount_t ref; @@ -290,7 +286,6 @@ struct blk_queue_ctx; typedef blk_qc_t (make_request_fn) (struct request_queue *q, struct bio *bio); struct bio_vec; -typedef int (dma_drain_needed_fn)(struct request *); enum blk_eh_timer_return { BLK_EH_DONE, /* drivers has completed the command */ @@ -399,7 +394,6 @@ struct request_queue { struct rq_qos *rq_qos; make_request_fn *make_request_fn; - dma_drain_needed_fn *dma_drain_needed; const struct blk_mq_ops *mq_ops; @@ -469,8 +463,6 @@ struct request_queue { */ unsigned long nr_requests; /* Max # of requests */ - unsigned int dma_drain_size; - void *dma_drain_buffer; unsigned int dma_pad_mask; unsigned int dma_alignment; @@ -1099,9 +1091,6 @@ extern void disk_stack_limits(struct gendisk *disk, struct block_device *bdev, sector_t offset); extern void blk_queue_stack_limits(struct request_queue *t, struct request_queue *b); extern void blk_queue_update_dma_pad(struct request_queue *, unsigned int); -extern int blk_queue_dma_drain(struct request_queue *q, - dma_drain_needed_fn *dma_drain_needed, - void *buf, unsigned int size); extern void blk_queue_segment_boundary(struct request_queue *, unsigned long); extern void blk_queue_virt_boundary(struct request_queue *, unsigned long); extern void blk_queue_dma_alignment(struct request_queue *, int); @@ -1138,7 +1127,15 @@ static inline unsigned short blk_rq_nr_discard_segments(struct request *rq) return max_t(unsigned short, rq->nr_phys_segments, 1); } -extern int blk_rq_map_sg(struct request_queue *, struct request *, struct scatterlist *); +int __blk_rq_map_sg(struct request_queue *q, struct request *rq, + struct scatterlist *sglist, struct scatterlist **last_sg); +static inline int blk_rq_map_sg(struct request_queue *q, struct request *rq, + struct scatterlist *sglist) +{ + struct scatterlist *last_sg = NULL; + + return __blk_rq_map_sg(q, rq, sglist, &last_sg); +} extern void blk_dump_rq_flags(struct request *, char *); extern long nr_blockdev_pages(void); diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index e0b020eaf32e..15b765a181b8 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -189,6 +189,8 @@ struct buffer_head *__getblk_gfp(struct block_device *bdev, sector_t block, void __brelse(struct buffer_head *); void __bforget(struct buffer_head *); void __breadahead(struct block_device *, sector_t block, unsigned int size); +void __breadahead_gfp(struct block_device *, sector_t block, unsigned int size, + gfp_t gfp); struct buffer_head *__bread_gfp(struct block_device *, sector_t block, unsigned size, gfp_t gfp); void invalidate_bh_lrus(void); @@ -319,6 +321,12 @@ sb_breadahead(struct super_block *sb, sector_t block) __breadahead(sb->s_bdev, block, sb->s_blocksize); } +static inline void +sb_breadahead_unmovable(struct super_block *sb, sector_t block) +{ + __breadahead_gfp(sb->s_bdev, block, sb->s_blocksize, 0); +} + static inline struct buffer_head * sb_getblk(struct super_block *sb, sector_t block) { diff --git a/include/linux/can/dev/peak_canfd.h b/include/linux/can/dev/peak_canfd.h index 511a37302fea..5fd627e9da19 100644 --- a/include/linux/can/dev/peak_canfd.h +++ b/include/linux/can/dev/peak_canfd.h @@ -189,7 +189,7 @@ struct __packed pucan_rx_msg { u8 client; __le16 flags; __le32 can_id; - u8 d[0]; + u8 d[]; }; /* uCAN error types */ @@ -266,7 +266,7 @@ struct __packed pucan_tx_msg { u8 client; __le16 flags; __le32 can_id; - u8 d[0]; + u8 d[]; }; /* build the cmd opcode_channel field with respect to the correct endianness */ diff --git a/include/linux/cdrom.h b/include/linux/cdrom.h index 528271c60018..8543fa59da72 100644 --- a/include/linux/cdrom.h +++ b/include/linux/cdrom.h @@ -94,6 +94,11 @@ struct cdrom_device_ops { struct packet_command *); }; +int cdrom_multisession(struct cdrom_device_info *cdi, + struct cdrom_multisession *info); +int cdrom_read_tocentry(struct cdrom_device_info *cdi, + struct cdrom_tocentry *entry); + /* the general block_device operations structure: */ extern int cdrom_open(struct cdrom_device_info *cdi, struct block_device *bdev, fmode_t mode); @@ -104,7 +109,7 @@ extern unsigned int cdrom_check_events(struct cdrom_device_info *cdi, unsigned int clearing); extern int cdrom_media_changed(struct cdrom_device_info *); -extern int register_cdrom(struct cdrom_device_info *cdi); +extern int register_cdrom(struct gendisk *disk, struct cdrom_device_info *cdi); extern void unregister_cdrom(struct cdrom_device_info *cdi); typedef struct { diff --git a/include/linux/cpu_rmap.h b/include/linux/cpu_rmap.h index 02edeafcb2bf..be8aea04d023 100644 --- a/include/linux/cpu_rmap.h +++ b/include/linux/cpu_rmap.h @@ -28,7 +28,7 @@ struct cpu_rmap { struct { u16 index; u16 dist; - } near[0]; + } near[]; }; #define CPU_RMAP_DIST_INF 0xffff diff --git a/include/linux/digsig.h b/include/linux/digsig.h index 594fc66a395a..2ace69e41088 100644 --- a/include/linux/digsig.h +++ b/include/linux/digsig.h @@ -29,7 +29,7 @@ struct pubkey_hdr { uint32_t timestamp; /* key made, always 0 for now */ uint8_t algo; uint8_t nmpi; - char mpi[0]; + char mpi[]; } __packed; struct signature_hdr { @@ -39,7 +39,7 @@ struct signature_hdr { uint8_t hash; uint8_t keyid[8]; uint8_t nmpi; - char mpi[0]; + char mpi[]; } __packed; #if defined(CONFIG_SIGNATURE) || defined(CONFIG_SIGNATURE_MODULE) diff --git a/include/linux/dirent.h b/include/linux/dirent.h index fc61f3cff72f..99002220cd45 100644 --- a/include/linux/dirent.h +++ b/include/linux/dirent.h @@ -7,7 +7,7 @@ struct linux_dirent64 { s64 d_off; unsigned short d_reclen; unsigned char d_type; - char d_name[0]; + char d_name[]; }; #endif diff --git a/include/linux/enclosure.h b/include/linux/enclosure.h index 564e96f625ff..1c630e2c2756 100644 --- a/include/linux/enclosure.h +++ b/include/linux/enclosure.h @@ -101,7 +101,7 @@ struct enclosure_device { struct device edev; struct enclosure_component_callbacks *cb; int components; - struct enclosure_component component[0]; + struct enclosure_component component[]; }; static inline struct enclosure_device * diff --git a/include/linux/energy_model.h b/include/linux/energy_model.h index d249b88a4d5a..ade6486a3382 100644 --- a/include/linux/energy_model.h +++ b/include/linux/energy_model.h @@ -36,7 +36,7 @@ struct em_cap_state { struct em_perf_domain { struct em_cap_state *table; int nr_cap_states; - unsigned long cpus[0]; + unsigned long cpus[]; }; #ifdef CONFIG_ENERGY_MODEL diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index c1d379bf6ee1..a23b26eab479 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -35,7 +35,7 @@ struct compat_ethtool_rxnfc { compat_u64 data; struct compat_ethtool_rx_flow_spec fs; u32 rule_cnt; - u32 rule_locs[0]; + u32 rule_locs[]; }; #endif /* CONFIG_COMPAT */ @@ -462,7 +462,7 @@ int ethtool_check_ops(const struct ethtool_ops *ops); struct ethtool_rx_flow_rule { struct flow_rule *rule; - unsigned long priv[0]; + unsigned long priv[]; }; struct ethtool_rx_flow_spec_input { diff --git a/include/linux/fs.h b/include/linux/fs.h index 4f6f59b4f22a..1a95e5158811 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2581,7 +2581,6 @@ extern struct kmem_cache *names_cachep; #ifdef CONFIG_BLOCK extern int register_blkdev(unsigned int, const char *); extern void unregister_blkdev(unsigned int, const char *); -extern void bdev_unhash_inode(dev_t dev); extern struct block_device *bdget(dev_t); extern struct block_device *bdgrab(struct block_device *bdev); extern void bd_set_size(struct block_device *, loff_t size); @@ -2723,7 +2722,6 @@ extern bool is_bad_inode(struct inode *); extern int revalidate_disk(struct gendisk *); extern int check_disk_change(struct block_device *); extern int __invalidate_device(struct block_device *, bool); -extern int invalidate_partition(struct gendisk *, int); #endif unsigned long invalidate_mapping_pages(struct address_space *mapping, pgoff_t start, pgoff_t end); diff --git a/include/linux/genalloc.h b/include/linux/genalloc.h index 5b14a0f38124..0bd581003cd5 100644 --- a/include/linux/genalloc.h +++ b/include/linux/genalloc.h @@ -76,7 +76,7 @@ struct gen_pool_chunk { void *owner; /* private data to retrieve at alloc time */ unsigned long start_addr; /* start address of memory chunk */ unsigned long end_addr; /* end address of memory chunk (inclusive) */ - unsigned long bits[0]; /* bitmap for allocating memory chunk */ + unsigned long bits[]; /* bitmap for allocating memory chunk */ }; /* diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 9b3fffdf4011..f9c226f9546a 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -217,11 +217,20 @@ struct gendisk { #ifdef CONFIG_BLK_DEV_INTEGRITY struct kobject integrity_kobj; #endif /* CONFIG_BLK_DEV_INTEGRITY */ +#if IS_ENABLED(CONFIG_CDROM) + struct cdrom_device_info *cdi; +#endif int node_id; struct badblocks *bb; struct lockdep_map lockdep_map; }; +#if IS_REACHABLE(CONFIG_CDROM) +#define disk_to_cdi(disk) ((disk)->cdi) +#else +#define disk_to_cdi(disk) NULL +#endif + static inline struct gendisk *part_to_disk(struct hd_struct *part) { if (likely(part)) { @@ -339,7 +348,7 @@ extern dev_t blk_lookup_devt(const char *name, int partno); int bdev_disk_changed(struct block_device *bdev, bool invalidate); int blk_add_partitions(struct gendisk *disk, struct block_device *bdev); -int blk_drop_partitions(struct gendisk *disk, struct block_device *bdev); +int blk_drop_partitions(struct block_device *bdev); extern void printk_all_partitions(void); extern struct gendisk *__alloc_disk_node(int minors, int node_id); diff --git a/include/linux/i2c.h b/include/linux/i2c.h index 456fc17ecb1c..45d36ba4826b 100644 --- a/include/linux/i2c.h +++ b/include/linux/i2c.h @@ -461,12 +461,6 @@ i2c_new_scanned_device(struct i2c_adapter *adap, unsigned short const *addr_list, int (*probe)(struct i2c_adapter *adap, unsigned short addr)); -struct i2c_client * -i2c_new_probed_device(struct i2c_adapter *adap, - struct i2c_board_info *info, - unsigned short const *addr_list, - int (*probe)(struct i2c_adapter *adap, unsigned short addr)); - /* Common custom probe functions */ int i2c_probe_func_quick_read(struct i2c_adapter *adap, unsigned short addr); diff --git a/include/linux/igmp.h b/include/linux/igmp.h index 463047d0190b..faa6586a5783 100644 --- a/include/linux/igmp.h +++ b/include/linux/igmp.h @@ -38,7 +38,7 @@ struct ip_sf_socklist { unsigned int sl_max; unsigned int sl_count; struct rcu_head rcu; - __be32 sl_addr[0]; + __be32 sl_addr[]; }; #define IP_SFLSIZE(count) (sizeof(struct ip_sf_socklist) + \ diff --git a/include/linux/ihex.h b/include/linux/ihex.h index 98cb5ce0b0a0..b824877e6d1b 100644 --- a/include/linux/ihex.h +++ b/include/linux/ihex.h @@ -18,7 +18,7 @@ struct ihex_binrec { __be32 addr; __be16 len; - uint8_t data[0]; + uint8_t data[]; } __attribute__((packed)); static inline uint16_t ihex_binrec_size(const struct ihex_binrec *p) diff --git a/include/linux/irq.h b/include/linux/irq.h index 9315fbb87db3..8d5bc2c237d7 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -573,8 +573,6 @@ enum { #define IRQ_DEFAULT_INIT_FLAGS ARCH_IRQ_INIT_FLAGS struct irqaction; -extern int setup_irq(unsigned int irq, struct irqaction *new); -extern void remove_irq(unsigned int irq, struct irqaction *act); extern int setup_percpu_irq(unsigned int irq, struct irqaction *new); extern void remove_percpu_irq(unsigned int irq, struct irqaction *act); @@ -1043,7 +1041,7 @@ struct irq_chip_generic { unsigned long unused; struct irq_domain *domain; struct list_head list; - struct irq_chip_type chip_types[0]; + struct irq_chip_type chip_types[]; }; /** @@ -1079,7 +1077,7 @@ struct irq_domain_chip_generic { unsigned int irq_flags_to_clear; unsigned int irq_flags_to_set; enum irq_gc_flags gc_flags; - struct irq_chip_generic *gc[0]; + struct irq_chip_generic *gc[]; }; /* Generic chip callback functions */ diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h index 765d9b769b69..6c36b6cc3edf 100644 --- a/include/linux/irqchip/arm-gic-v3.h +++ b/include/linux/irqchip/arm-gic-v3.h @@ -243,6 +243,7 @@ #define GICR_TYPER_PLPIS (1U << 0) #define GICR_TYPER_VLPIS (1U << 1) +#define GICR_TYPER_DIRTY (1U << 2) #define GICR_TYPER_DirectLPIS (1U << 3) #define GICR_TYPER_LAST (1U << 4) #define GICR_TYPER_RVPEID (1U << 7) @@ -686,6 +687,7 @@ struct rdists { bool has_vlpis; bool has_rvpeid; bool has_direct_lpi; + bool has_vpend_valid_dirty; }; struct irq_domain; diff --git a/include/linux/libata.h b/include/linux/libata.h index cffa4714bfa8..af832852e620 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -1092,6 +1092,7 @@ extern int ata_scsi_ioctl(struct scsi_device *dev, unsigned int cmd, #define ATA_SCSI_COMPAT_IOCTL /* empty */ #endif extern int ata_scsi_queuecmd(struct Scsi_Host *h, struct scsi_cmnd *cmd); +bool ata_scsi_dma_need_drain(struct request *rq); extern int ata_sas_scsi_ioctl(struct ata_port *ap, struct scsi_device *dev, unsigned int cmd, void __user *arg); extern bool ata_link_online(struct ata_link *link); @@ -1387,6 +1388,7 @@ extern struct device_attribute *ata_common_sdev_attrs[]; .ioctl = ata_scsi_ioctl, \ ATA_SCSI_COMPAT_IOCTL \ .queuecommand = ata_scsi_queuecmd, \ + .dma_need_drain = ata_scsi_dma_need_drain, \ .can_queue = ATA_DEF_QUEUE, \ .tag_alloc_policy = BLK_TAG_ALLOC_RR, \ .this_id = ATA_SHT_THIS_ID, \ diff --git a/include/linux/list_lru.h b/include/linux/list_lru.h index d5ceb2839a2d..9dcaa3e582c9 100644 --- a/include/linux/list_lru.h +++ b/include/linux/list_lru.h @@ -34,7 +34,7 @@ struct list_lru_one { struct list_lru_memcg { struct rcu_head rcu; /* array of per cgroup lists, indexed by memcg_cache_id */ - struct list_lru_one *lru[0]; + struct list_lru_one *lru[]; }; struct list_lru_node { diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 1b4150ff64be..d275c72c4f8e 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -106,7 +106,7 @@ struct lruvec_stat { */ struct memcg_shrinker_map { struct rcu_head rcu; - unsigned long map[0]; + unsigned long map[]; }; /* @@ -148,7 +148,7 @@ struct mem_cgroup_threshold_ary { /* Size of entries[] */ unsigned int size; /* Array of thresholds */ - struct mem_cgroup_threshold entries[0]; + struct mem_cgroup_threshold entries[]; }; struct mem_cgroup_thresholds { diff --git a/include/linux/platform_data/wilco-ec.h b/include/linux/platform_data/wilco-ec.h index 25f46a939637..3e268e636b5b 100644 --- a/include/linux/platform_data/wilco-ec.h +++ b/include/linux/platform_data/wilco-ec.h @@ -83,7 +83,7 @@ struct wilco_ec_response { u16 result; u16 data_size; u8 reserved[2]; - u8 data[0]; + u8 data[]; } __packed; /** diff --git a/include/linux/posix_acl.h b/include/linux/posix_acl.h index 540595a321a7..90797f1b421d 100644 --- a/include/linux/posix_acl.h +++ b/include/linux/posix_acl.h @@ -28,7 +28,7 @@ struct posix_acl { refcount_t a_refcount; struct rcu_head a_rcu; unsigned int a_count; - struct posix_acl_entry a_entries[0]; + struct posix_acl_entry a_entries[]; }; #define FOREACH_ACL_ENTRY(pa, acl, pe) \ diff --git a/include/linux/rio.h b/include/linux/rio.h index 317bace5ac64..2cd637268b4f 100644 --- a/include/linux/rio.h +++ b/include/linux/rio.h @@ -100,7 +100,7 @@ struct rio_switch { u32 port_ok; struct rio_switch_ops *ops; spinlock_t lock; - struct rio_dev *nextdev[0]; + struct rio_dev *nextdev[]; }; /** @@ -201,7 +201,7 @@ struct rio_dev { u8 hopcount; struct rio_dev *prev; atomic_t state; - struct rio_switch rswitch[0]; /* RIO switch info */ + struct rio_switch rswitch[]; /* RIO switch info */ }; #define rio_dev_g(n) list_entry(n, struct rio_dev, global_list) diff --git a/include/linux/rslib.h b/include/linux/rslib.h index 5974cedd008c..238bb85243d3 100644 --- a/include/linux/rslib.h +++ b/include/linux/rslib.h @@ -54,7 +54,7 @@ struct rs_codec { */ struct rs_control { struct rs_codec *codec; - uint16_t buffers[0]; + uint16_t buffers[]; }; /* General purpose RS codec, 8-bit data width, symbol width 1-15 bit */ diff --git a/include/linux/sched/topology.h b/include/linux/sched/topology.h index af9319e4cfb9..95253ad792b0 100644 --- a/include/linux/sched/topology.h +++ b/include/linux/sched/topology.h @@ -142,7 +142,7 @@ struct sched_domain { * by attaching extra space to the end of the structure, * depending on how many CPUs the kernel has booted up with) */ - unsigned long span[0]; + unsigned long span[]; }; static inline struct cpumask *sched_domain_span(struct sched_domain *sd) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 3a2ac7072dbb..3000c526f552 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -4162,7 +4162,7 @@ struct skb_ext { refcount_t refcnt; u8 offset[SKB_EXT_NUM]; /* in chunks of 8 bytes */ u8 chunks; /* same */ - char data[0] __aligned(8); + char data[] __aligned(8); }; struct skb_ext *__skb_ext_alloc(void); diff --git a/include/linux/swap.h b/include/linux/swap.h index b835d8dbea0e..e1bbf7a16b27 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -275,7 +275,7 @@ struct swap_info_struct { */ struct work_struct discard_work; /* discard worker */ struct swap_cluster_list discard_clusters; /* discard clusters list */ - struct plist_node avail_lists[0]; /* + struct plist_node avail_lists[]; /* * entries in swap_avail_heads, one * entry per node. * Must be last as the number of the diff --git a/include/linux/ti_wilink_st.h b/include/linux/ti_wilink_st.h index eb6cbdf10e50..44a7f9169ac6 100644 --- a/include/linux/ti_wilink_st.h +++ b/include/linux/ti_wilink_st.h @@ -295,7 +295,7 @@ struct bts_header { u32 magic; u32 version; u8 future[24]; - u8 actions[0]; + u8 actions[]; } __attribute__ ((packed)); /** @@ -305,7 +305,7 @@ struct bts_header { struct bts_action { u16 type; u16 size; - u8 data[0]; + u8 data[]; } __attribute__ ((packed)); struct bts_action_send { @@ -315,7 +315,7 @@ struct bts_action_send { struct bts_action_wait { u32 msec; u32 size; - u8 data[0]; + u8 data[]; } __attribute__ ((packed)); struct bts_action_delay { diff --git a/include/linux/tpm_eventlog.h b/include/linux/tpm_eventlog.h index 131ea1bad458..c253461b1c4e 100644 --- a/include/linux/tpm_eventlog.h +++ b/include/linux/tpm_eventlog.h @@ -28,7 +28,7 @@ struct tcpa_event { u32 event_type; u8 pcr_value[20]; /* SHA1 */ u32 event_size; - u8 event_data[0]; + u8 event_data[]; }; enum tcpa_event_types { @@ -55,7 +55,7 @@ enum tcpa_event_types { struct tcpa_pc_event { u32 event_id; u32 event_size; - u8 event_data[0]; + u8 event_data[]; }; enum tcpa_pc_event_ids { @@ -102,7 +102,7 @@ struct tcg_pcr_event { struct tcg_event_field { u32 event_size; - u8 event[0]; + u8 event[]; } __packed; struct tcg_pcr_event2_head { diff --git a/include/linux/xattr.h b/include/linux/xattr.h index 4cf6e11f4a3c..47eaa34f8761 100644 --- a/include/linux/xattr.h +++ b/include/linux/xattr.h @@ -73,7 +73,7 @@ struct simple_xattr { struct list_head list; char *name; size_t size; - char value[0]; + char value[]; }; /* diff --git a/include/scsi/scsi_cmnd.h b/include/scsi/scsi_cmnd.h index 80ac89e47b47..f93c0b800790 100644 --- a/include/scsi/scsi_cmnd.h +++ b/include/scsi/scsi_cmnd.h @@ -142,6 +142,7 @@ struct scsi_cmnd { unsigned long state; /* Command completion state */ unsigned char tag; /* SCSI-II queued command tag */ + unsigned int extra_len; /* length of alignment and padding */ }; /* diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h index c3cba2aaf934..bc5909033d13 100644 --- a/include/scsi/scsi_device.h +++ b/include/scsi/scsi_device.h @@ -229,6 +229,9 @@ struct scsi_device { struct scsi_device_handler *handler; void *handler_data; + size_t dma_drain_len; + void *dma_drain_buf; + unsigned char access_state; struct mutex state_mutex; enum scsi_device_state sdev_state; diff --git a/include/scsi/scsi_host.h b/include/scsi/scsi_host.h index 822e8cda8d9b..46ef8cccc982 100644 --- a/include/scsi/scsi_host.h +++ b/include/scsi/scsi_host.h @@ -271,6 +271,13 @@ struct scsi_host_template { int (* map_queues)(struct Scsi_Host *shost); /* + * Check if scatterlists need to be padded for DMA draining. + * + * Status: OPTIONAL + */ + bool (* dma_need_drain)(struct request *rq); + + /* * This function determines the BIOS parameters for a given * harddisk. These tend to be numbers that are made up by * the host adapter. Parameters: diff --git a/include/uapi/linux/dlm_device.h b/include/uapi/linux/dlm_device.h index f880d2831160..e83954c69fff 100644 --- a/include/uapi/linux/dlm_device.h +++ b/include/uapi/linux/dlm_device.h @@ -45,13 +45,13 @@ struct dlm_lock_params { void __user *bastaddr; struct dlm_lksb __user *lksb; char lvb[DLM_USER_LVB_LEN]; - char name[0]; + char name[]; }; struct dlm_lspace_params { __u32 flags; __u32 minor; - char name[0]; + char name[]; }; struct dlm_purge_params { diff --git a/include/uapi/linux/fiemap.h b/include/uapi/linux/fiemap.h index 8c0bc24d5d95..7a900b2377b6 100644 --- a/include/uapi/linux/fiemap.h +++ b/include/uapi/linux/fiemap.h @@ -34,7 +34,7 @@ struct fiemap { __u32 fm_mapped_extents;/* number of extents that were mapped (out) */ __u32 fm_extent_count; /* size of fm_extents array (in) */ __u32 fm_reserved; - struct fiemap_extent fm_extents[0]; /* array of mapped extents (out) */ + struct fiemap_extent fm_extents[]; /* array of mapped extents (out) */ }; #define FIEMAP_MAX_OFFSET (~0ULL) diff --git a/kernel/fork.c b/kernel/fork.c index 4385f3d639f2..8c700f881d92 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -2605,6 +2605,14 @@ noinline static int copy_clone_args_from_user(struct kernel_clone_args *kargs, struct clone_args args; pid_t *kset_tid = kargs->set_tid; + BUILD_BUG_ON(offsetofend(struct clone_args, tls) != + CLONE_ARGS_SIZE_VER0); + BUILD_BUG_ON(offsetofend(struct clone_args, set_tid_size) != + CLONE_ARGS_SIZE_VER1); + BUILD_BUG_ON(offsetofend(struct clone_args, cgroup) != + CLONE_ARGS_SIZE_VER2); + BUILD_BUG_ON(sizeof(struct clone_args) != CLONE_ARGS_SIZE_VER2); + if (unlikely(usize > PAGE_SIZE)) return -E2BIG; if (unlikely(usize < CLONE_ARGS_SIZE_VER0)) @@ -2631,7 +2639,8 @@ noinline static int copy_clone_args_from_user(struct kernel_clone_args *kargs, !valid_signal(args.exit_signal))) return -EINVAL; - if ((args.flags & CLONE_INTO_CGROUP) && args.cgroup < 0) + if ((args.flags & CLONE_INTO_CGROUP) && + (args.cgroup > INT_MAX || usize < CLONE_ARGS_SIZE_VER2)) return -EINVAL; *kargs = (struct kernel_clone_args){ diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index fe40c658f86f..453a8a0f4804 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -1690,34 +1690,6 @@ out_mput: return ret; } -/** - * setup_irq - setup an interrupt - * @irq: Interrupt line to setup - * @act: irqaction for the interrupt - * - * Used to statically setup interrupts in the early boot process. - */ -int setup_irq(unsigned int irq, struct irqaction *act) -{ - int retval; - struct irq_desc *desc = irq_to_desc(irq); - - if (!desc || WARN_ON(irq_settings_is_per_cpu_devid(desc))) - return -EINVAL; - - retval = irq_chip_pm_get(&desc->irq_data); - if (retval < 0) - return retval; - - retval = __setup_irq(irq, desc, act); - - if (retval) - irq_chip_pm_put(&desc->irq_data); - - return retval; -} -EXPORT_SYMBOL_GPL(setup_irq); - /* * Internal function to unregister an irqaction - used to free * regular and special interrupts that are part of the architecture. @@ -1859,22 +1831,6 @@ static struct irqaction *__free_irq(struct irq_desc *desc, void *dev_id) } /** - * remove_irq - free an interrupt - * @irq: Interrupt line to free - * @act: irqaction for the interrupt - * - * Used to remove interrupts statically setup by the early boot process. - */ -void remove_irq(unsigned int irq, struct irqaction *act) -{ - struct irq_desc *desc = irq_to_desc(irq); - - if (desc && !WARN_ON(irq_settings_is_per_cpu_devid(desc))) - __free_irq(desc, act->dev_id); -} -EXPORT_SYMBOL_GPL(remove_irq); - -/** * free_irq - free an interrupt allocated with request_irq * @irq: Interrupt line to free * @dev_id: Device identity to free diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 06548e2ebb72..d9a49cd6065a 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -825,7 +825,7 @@ static __always_inline void rcu_nmi_enter_common(bool irq) rcu_cleanup_after_idle(); incby = 1; - } else if (tick_nohz_full_cpu(rdp->cpu) && + } else if (irq && tick_nohz_full_cpu(rdp->cpu) && rdp->dynticks_nmi_nesting == DYNTICK_IRQ_NONIDLE && READ_ONCE(rdp->rcu_urgent_qs) && !READ_ONCE(rdp->rcu_forced_tick)) { diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c index dac9104d126f..ff9435dee1df 100644 --- a/kernel/sched/cputime.c +++ b/kernel/sched/cputime.c @@ -1003,12 +1003,12 @@ u64 kcpustat_field(struct kernel_cpustat *kcpustat, enum cpu_usage_stat usage, int cpu) { u64 *cpustat = kcpustat->cpustat; + u64 val = cpustat[usage]; struct rq *rq; - u64 val; int err; if (!vtime_accounting_enabled_cpu(cpu)) - return cpustat[usage]; + return val; rq = cpu_rq(cpu); diff --git a/kernel/sched/isolation.c b/kernel/sched/isolation.c index 008d6ac2342b..808244f3ddd9 100644 --- a/kernel/sched/isolation.c +++ b/kernel/sched/isolation.c @@ -149,6 +149,9 @@ __setup("nohz_full=", housekeeping_nohz_full_setup); static int __init housekeeping_isolcpus_setup(char *str) { unsigned int flags = 0; + bool illegal = false; + char *par; + int len; while (isalpha(*str)) { if (!strncmp(str, "nohz,", 5)) { @@ -169,8 +172,22 @@ static int __init housekeeping_isolcpus_setup(char *str) continue; } - pr_warn("isolcpus: Error, unknown flag\n"); - return 0; + /* + * Skip unknown sub-parameter and validate that it is not + * containing an invalid character. + */ + for (par = str, len = 0; *str && *str != ','; str++, len++) { + if (!isalpha(*str) && *str != '_') + illegal = true; + } + + if (illegal) { + pr_warn("isolcpus: Invalid flag %.*s\n", len, par); + return 0; + } + + pr_info("isolcpus: Skipped unknown flag %.*s\n", len, par); + str++; } /* Default behaviour for isolcpus without flags */ diff --git a/kernel/signal.c b/kernel/signal.c index e58a6c619824..713104884414 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -1510,15 +1510,15 @@ int kill_pid_usb_asyncio(int sig, int errno, sigval_t addr, unsigned long flags; int ret = -EINVAL; + if (!valid_signal(sig)) + return ret; + clear_siginfo(&info); info.si_signo = sig; info.si_errno = errno; info.si_code = SI_ASYNCIO; *((sigval_t *)&info.si_pid) = addr; - if (!valid_signal(sig)) - return ret; - rcu_read_lock(); p = pid_task(pid, PIDTYPE_PID); if (!p) { @@ -1557,12 +1557,8 @@ static int kill_something_info(int sig, struct kernel_siginfo *info, pid_t pid) { int ret; - if (pid > 0) { - rcu_read_lock(); - ret = kill_pid_info(sig, info, find_vpid(pid)); - rcu_read_unlock(); - return ret; - } + if (pid > 0) + return kill_proc_info(sig, info, pid); /* -INT_MIN is undefined. Exclude this case to avoid a UBSAN warning */ if (pid == INT_MIN) diff --git a/kernel/time/namespace.c b/kernel/time/namespace.c index 3b30288793fe..53bce347cd50 100644 --- a/kernel/time/namespace.c +++ b/kernel/time/namespace.c @@ -338,7 +338,20 @@ static struct user_namespace *timens_owner(struct ns_common *ns) static void show_offset(struct seq_file *m, int clockid, struct timespec64 *ts) { - seq_printf(m, "%d %lld %ld\n", clockid, ts->tv_sec, ts->tv_nsec); + char *clock; + + switch (clockid) { + case CLOCK_BOOTTIME: + clock = "boottime"; + break; + case CLOCK_MONOTONIC: + clock = "monotonic"; + break; + default: + clock = "unknown"; + break; + } + seq_printf(m, "%-10s %10lld %9ld\n", clock, ts->tv_sec, ts->tv_nsec); } void proc_timens_show_offsets(struct task_struct *p, struct seq_file *m) diff --git a/mm/mremap.c b/mm/mremap.c index a7e282ead438..c881abeba0bf 100644 --- a/mm/mremap.c +++ b/mm/mremap.c @@ -413,9 +413,20 @@ static unsigned long move_vma(struct vm_area_struct *vma, /* Always put back VM_ACCOUNT since we won't unmap */ vma->vm_flags |= VM_ACCOUNT; - vm_acct_memory(vma_pages(new_vma)); + vm_acct_memory(new_len >> PAGE_SHIFT); } + /* + * VMAs can actually be merged back together in copy_vma + * calling merge_vma. This can happen with anonymous vmas + * which have not yet been faulted, so if we were to consider + * this VMA split we'll end up adding VM_ACCOUNT on the + * next VMA, which is completely unrelated if this VMA + * was re-merged. + */ + if (split && new_vma == vma) + split = 0; + /* We always clear VM_LOCKED[ONFAULT] on the old vma */ vma->vm_flags &= VM_LOCKED_CLEAR_MASK; diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h index f3327cb56edf..db189945e9b0 100644 --- a/tools/arch/x86/include/asm/cpufeatures.h +++ b/tools/arch/x86/include/asm/cpufeatures.h @@ -217,7 +217,7 @@ #define X86_FEATURE_IBRS ( 7*32+25) /* Indirect Branch Restricted Speculation */ #define X86_FEATURE_IBPB ( 7*32+26) /* Indirect Branch Prediction Barrier */ #define X86_FEATURE_STIBP ( 7*32+27) /* Single Thread Indirect Branch Predictors */ -#define X86_FEATURE_ZEN ( 7*32+28) /* "" CPU is AMD family 0x17 (Zen) */ +#define X86_FEATURE_ZEN ( 7*32+28) /* "" CPU is AMD family 0x17 or above (Zen) */ #define X86_FEATURE_L1TF_PTEINV ( 7*32+29) /* "" L1TF workaround PTE inversion */ #define X86_FEATURE_IBRS_ENHANCED ( 7*32+30) /* Enhanced IBRS */ #define X86_FEATURE_MSR_IA32_FEAT_CTL ( 7*32+31) /* "" MSR IA32_FEAT_CTL configured */ @@ -285,6 +285,7 @@ #define X86_FEATURE_CQM_MBM_LOCAL (11*32+ 3) /* LLC Local MBM monitoring */ #define X86_FEATURE_FENCE_SWAPGS_USER (11*32+ 4) /* "" LFENCE in user entry SWAPGS path */ #define X86_FEATURE_FENCE_SWAPGS_KERNEL (11*32+ 5) /* "" LFENCE in kernel entry SWAPGS path */ +#define X86_FEATURE_SPLIT_LOCK_DETECT (11*32+ 6) /* #AC for split lock */ /* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */ #define X86_FEATURE_AVX512_BF16 (12*32+ 5) /* AVX512 BFLOAT16 instructions */ @@ -299,6 +300,7 @@ #define X86_FEATURE_AMD_IBRS (13*32+14) /* "" Indirect Branch Restricted Speculation */ #define X86_FEATURE_AMD_STIBP (13*32+15) /* "" Single Thread Indirect Branch Predictors */ #define X86_FEATURE_AMD_STIBP_ALWAYS_ON (13*32+17) /* "" Single Thread Indirect Branch Predictors always-on preferred */ +#define X86_FEATURE_AMD_PPIN (13*32+23) /* Protected Processor Inventory Number */ #define X86_FEATURE_AMD_SSBD (13*32+24) /* "" Speculative Store Bypass Disable */ #define X86_FEATURE_VIRT_SSBD (13*32+25) /* Virtualized Speculative Store Bypass Disable */ #define X86_FEATURE_AMD_SSB_NO (13*32+26) /* "" Speculative Store Bypass is fixed in hardware. */ @@ -367,6 +369,7 @@ #define X86_FEATURE_INTEL_STIBP (18*32+27) /* "" Single Thread Indirect Branch Predictors */ #define X86_FEATURE_FLUSH_L1D (18*32+28) /* Flush L1D cache */ #define X86_FEATURE_ARCH_CAPABILITIES (18*32+29) /* IA32_ARCH_CAPABILITIES MSR (Intel) */ +#define X86_FEATURE_CORE_CAPABILITIES (18*32+30) /* "" IA32_CORE_CAPABILITIES MSR */ #define X86_FEATURE_SPEC_CTRL_SSBD (18*32+31) /* "" Speculative Store Bypass Disable */ /* diff --git a/tools/arch/x86/include/asm/msr-index.h b/tools/arch/x86/include/asm/msr-index.h index d5e517d1c3dd..12c9684d59ba 100644 --- a/tools/arch/x86/include/asm/msr-index.h +++ b/tools/arch/x86/include/asm/msr-index.h @@ -41,6 +41,10 @@ /* Intel MSRs. Some also available on other CPUs */ +#define MSR_TEST_CTRL 0x00000033 +#define MSR_TEST_CTRL_SPLIT_LOCK_DETECT_BIT 29 +#define MSR_TEST_CTRL_SPLIT_LOCK_DETECT BIT(MSR_TEST_CTRL_SPLIT_LOCK_DETECT_BIT) + #define MSR_IA32_SPEC_CTRL 0x00000048 /* Speculation Control */ #define SPEC_CTRL_IBRS BIT(0) /* Indirect Branch Restricted Speculation */ #define SPEC_CTRL_STIBP_SHIFT 1 /* Single Thread Indirect Branch Predictor (STIBP) bit */ @@ -70,6 +74,11 @@ */ #define MSR_IA32_UMWAIT_CONTROL_TIME_MASK (~0x03U) +/* Abbreviated from Intel SDM name IA32_CORE_CAPABILITIES */ +#define MSR_IA32_CORE_CAPS 0x000000cf +#define MSR_IA32_CORE_CAPS_SPLIT_LOCK_DETECT_BIT 5 +#define MSR_IA32_CORE_CAPS_SPLIT_LOCK_DETECT BIT(MSR_IA32_CORE_CAPS_SPLIT_LOCK_DETECT_BIT) + #define MSR_PKG_CST_CONFIG_CONTROL 0x000000e2 #define NHM_C3_AUTO_DEMOTE (1UL << 25) #define NHM_C1_AUTO_DEMOTE (1UL << 26) diff --git a/tools/cgroup/iocost_monitor.py b/tools/cgroup/iocost_monitor.py index 9d8e9613008a..3c21de88af9e 100644 --- a/tools/cgroup/iocost_monitor.py +++ b/tools/cgroup/iocost_monitor.py @@ -28,7 +28,8 @@ parser.add_argument('devname', metavar='DEV', parser.add_argument('--cgroup', action='append', metavar='REGEX', help='Regex for target cgroups, ') parser.add_argument('--interval', '-i', metavar='SECONDS', type=float, default=1, - help='Monitoring interval in seconds') + help='Monitoring interval in seconds (0 exits immediately ' + 'after checking requirements)') parser.add_argument('--json', action='store_true', help='Output in json') args = parser.parse_args() @@ -112,14 +113,14 @@ class IocStat: def dict(self, now): return { 'device' : devname, - 'timestamp' : str(now), - 'enabled' : str(int(self.enabled)), - 'running' : str(int(self.running)), - 'period_ms' : str(self.period_ms), - 'period_at' : str(self.period_at), - 'period_vtime_at' : str(self.vperiod_at), - 'busy_level' : str(self.busy_level), - 'vrate_pct' : str(self.vrate_pct), } + 'timestamp' : now, + 'enabled' : self.enabled, + 'running' : self.running, + 'period_ms' : self.period_ms, + 'period_at' : self.period_at, + 'period_vtime_at' : self.vperiod_at, + 'busy_level' : self.busy_level, + 'vrate_pct' : self.vrate_pct, } def table_preamble_str(self): state = ('RUN' if self.running else 'IDLE') if self.enabled else 'OFF' @@ -179,19 +180,19 @@ class IocgStat: def dict(self, now, path): out = { 'cgroup' : path, - 'timestamp' : str(now), - 'is_active' : str(int(self.is_active)), - 'weight' : str(self.weight), - 'weight_active' : str(self.active), - 'weight_inuse' : str(self.inuse), - 'hweight_active_pct' : str(self.hwa_pct), - 'hweight_inuse_pct' : str(self.hwi_pct), - 'inflight_pct' : str(self.inflight_pct), - 'debt_ms' : str(self.debt_ms), - 'use_delay' : str(self.use_delay), - 'delay_ms' : str(self.delay_ms), - 'usage_pct' : str(self.usage), - 'address' : str(hex(self.address)) } + 'timestamp' : now, + 'is_active' : self.is_active, + 'weight' : self.weight, + 'weight_active' : self.active, + 'weight_inuse' : self.inuse, + 'hweight_active_pct' : self.hwa_pct, + 'hweight_inuse_pct' : self.hwi_pct, + 'inflight_pct' : self.inflight_pct, + 'debt_ms' : self.debt_ms, + 'use_delay' : self.use_delay, + 'delay_ms' : self.delay_ms, + 'usage_pct' : self.usage, + 'address' : self.address } for i in range(len(self.usages)): out[f'usage_pct_{i}'] = str(self.usages[i]) return out @@ -248,6 +249,9 @@ for i, ptr in radix_tree_for_each(blkcg_root.blkg_tree.address_of_()): if ioc is None: err(f'Could not find ioc for {devname}'); +if interval == 0: + sys.exit(0) + # Keep printing while True: now = time.time() diff --git a/tools/include/linux/bits.h b/tools/include/linux/bits.h index 669d69441a62..4671fbf28842 100644 --- a/tools/include/linux/bits.h +++ b/tools/include/linux/bits.h @@ -3,9 +3,9 @@ #define __LINUX_BITS_H #include <linux/const.h> +#include <vdso/bits.h> #include <asm/bitsperlong.h> -#define BIT(nr) (UL(1) << (nr)) #define BIT_ULL(nr) (ULL(1) << (nr)) #define BIT_MASK(nr) (UL(1) << ((nr) % BITS_PER_LONG)) #define BIT_WORD(nr) ((nr) / BITS_PER_LONG) @@ -18,12 +18,30 @@ * position @h. For example * GENMASK_ULL(39, 21) gives us the 64bit vector 0x000000ffffe00000. */ -#define GENMASK(h, l) \ +#if !defined(__ASSEMBLY__) && \ + (!defined(CONFIG_CC_IS_GCC) || CONFIG_GCC_VERSION >= 49000) +#include <linux/build_bug.h> +#define GENMASK_INPUT_CHECK(h, l) \ + (BUILD_BUG_ON_ZERO(__builtin_choose_expr( \ + __builtin_constant_p((l) > (h)), (l) > (h), 0))) +#else +/* + * BUILD_BUG_ON_ZERO is not available in h files included from asm files, + * disable the input check if that is the case. + */ +#define GENMASK_INPUT_CHECK(h, l) 0 +#endif + +#define __GENMASK(h, l) \ (((~UL(0)) - (UL(1) << (l)) + 1) & \ (~UL(0) >> (BITS_PER_LONG - 1 - (h)))) +#define GENMASK(h, l) \ + (GENMASK_INPUT_CHECK(h, l) + __GENMASK(h, l)) -#define GENMASK_ULL(h, l) \ +#define __GENMASK_ULL(h, l) \ (((~ULL(0)) - (ULL(1) << (l)) + 1) & \ (~ULL(0) >> (BITS_PER_LONG_LONG - 1 - (h)))) +#define GENMASK_ULL(h, l) \ + (GENMASK_INPUT_CHECK(h, l) + __GENMASK_ULL(h, l)) #endif /* __LINUX_BITS_H */ diff --git a/tools/include/linux/build_bug.h b/tools/include/linux/build_bug.h new file mode 100644 index 000000000000..cc7070c7439b --- /dev/null +++ b/tools/include/linux/build_bug.h @@ -0,0 +1,82 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_BUILD_BUG_H +#define _LINUX_BUILD_BUG_H + +#include <linux/compiler.h> + +#ifdef __CHECKER__ +#define BUILD_BUG_ON_ZERO(e) (0) +#else /* __CHECKER__ */ +/* + * Force a compilation error if condition is true, but also produce a + * result (of value 0 and type int), so the expression can be used + * e.g. in a structure initializer (or where-ever else comma expressions + * aren't permitted). + */ +#define BUILD_BUG_ON_ZERO(e) ((int)(sizeof(struct { int:(-!!(e)); }))) +#endif /* __CHECKER__ */ + +/* Force a compilation error if a constant expression is not a power of 2 */ +#define __BUILD_BUG_ON_NOT_POWER_OF_2(n) \ + BUILD_BUG_ON(((n) & ((n) - 1)) != 0) +#define BUILD_BUG_ON_NOT_POWER_OF_2(n) \ + BUILD_BUG_ON((n) == 0 || (((n) & ((n) - 1)) != 0)) + +/* + * BUILD_BUG_ON_INVALID() permits the compiler to check the validity of the + * expression but avoids the generation of any code, even if that expression + * has side-effects. + */ +#define BUILD_BUG_ON_INVALID(e) ((void)(sizeof((__force long)(e)))) + +/** + * BUILD_BUG_ON_MSG - break compile if a condition is true & emit supplied + * error message. + * @condition: the condition which the compiler should know is false. + * + * See BUILD_BUG_ON for description. + */ +#define BUILD_BUG_ON_MSG(cond, msg) compiletime_assert(!(cond), msg) + +/** + * BUILD_BUG_ON - break compile if a condition is true. + * @condition: the condition which the compiler should know is false. + * + * If you have some code which relies on certain constants being equal, or + * some other compile-time-evaluated condition, you should use BUILD_BUG_ON to + * detect if someone changes it. + */ +#define BUILD_BUG_ON(condition) \ + BUILD_BUG_ON_MSG(condition, "BUILD_BUG_ON failed: " #condition) + +/** + * BUILD_BUG - break compile if used. + * + * If you have some code that you expect the compiler to eliminate at + * build time, you should use BUILD_BUG to detect if it is + * unexpectedly used. + */ +#define BUILD_BUG() BUILD_BUG_ON_MSG(1, "BUILD_BUG failed") + +/** + * static_assert - check integer constant expression at build time + * + * static_assert() is a wrapper for the C11 _Static_assert, with a + * little macro magic to make the message optional (defaulting to the + * stringification of the tested expression). + * + * Contrary to BUILD_BUG_ON(), static_assert() can be used at global + * scope, but requires the expression to be an integer constant + * expression (i.e., it is not enough that __builtin_constant_p() is + * true for expr). + * + * Also note that BUILD_BUG_ON() fails the build if the condition is + * true, while static_assert() fails the build if the expression is + * false. + */ +#ifndef static_assert +#define static_assert(expr, ...) __static_assert(expr, ##__VA_ARGS__, #expr) +#define __static_assert(expr, msg, ...) _Static_assert(expr, msg) +#endif // static_assert + +#endif /* _LINUX_BUILD_BUG_H */ diff --git a/tools/include/linux/compiler.h b/tools/include/linux/compiler.h index 1827c2f973f9..180f7714a5f1 100644 --- a/tools/include/linux/compiler.h +++ b/tools/include/linux/compiler.h @@ -10,6 +10,32 @@ # define __compiletime_error(message) #endif +#ifdef __OPTIMIZE__ +# define __compiletime_assert(condition, msg, prefix, suffix) \ + do { \ + extern void prefix ## suffix(void) __compiletime_error(msg); \ + if (!(condition)) \ + prefix ## suffix(); \ + } while (0) +#else +# define __compiletime_assert(condition, msg, prefix, suffix) do { } while (0) +#endif + +#define _compiletime_assert(condition, msg, prefix, suffix) \ + __compiletime_assert(condition, msg, prefix, suffix) + +/** + * compiletime_assert - break build and emit msg if condition is false + * @condition: a compile-time constant condition to check + * @msg: a message to emit if condition is false + * + * In tradition of POSIX assert, this macro will break the build if the + * supplied condition is *false*, emitting the supplied error message if the + * compiler has support to do so. + */ +#define compiletime_assert(condition, msg) \ + _compiletime_assert(condition, msg, __compiletime_assert_, __COUNTER__) + /* Optimization barrier */ /* The "volatile" is due to gcc bugs */ #define barrier() __asm__ __volatile__("": : :"memory") diff --git a/tools/include/linux/const.h b/tools/include/linux/const.h index 7b55a55f5911..81b8aae5a855 100644 --- a/tools/include/linux/const.h +++ b/tools/include/linux/const.h @@ -1,9 +1,6 @@ #ifndef _LINUX_CONST_H #define _LINUX_CONST_H -#include <uapi/linux/const.h> - -#define UL(x) (_UL(x)) -#define ULL(x) (_ULL(x)) +#include <vdso/const.h> #endif /* _LINUX_CONST_H */ diff --git a/tools/include/linux/kernel.h b/tools/include/linux/kernel.h index cba226948a0c..a7e54a08fb54 100644 --- a/tools/include/linux/kernel.h +++ b/tools/include/linux/kernel.h @@ -5,6 +5,7 @@ #include <stdarg.h> #include <stddef.h> #include <assert.h> +#include <linux/build_bug.h> #include <linux/compiler.h> #include <endian.h> #include <byteswap.h> @@ -35,9 +36,6 @@ (type *)((char *)__mptr - offsetof(type, member)); }) #endif -#define BUILD_BUG_ON(condition) ((void)sizeof(char[1 - 2*!!(condition)])) -#define BUILD_BUG_ON_ZERO(e) (sizeof(struct { int:-!!(e); })) - #ifndef max #define max(x, y) ({ \ typeof(x) _max1 = (x); \ diff --git a/tools/include/uapi/drm/drm.h b/tools/include/uapi/drm/drm.h index 868bf7996c0f..808b48a93330 100644 --- a/tools/include/uapi/drm/drm.h +++ b/tools/include/uapi/drm/drm.h @@ -948,6 +948,8 @@ extern "C" { #define DRM_IOCTL_SYNCOBJ_TRANSFER DRM_IOWR(0xCC, struct drm_syncobj_transfer) #define DRM_IOCTL_SYNCOBJ_TIMELINE_SIGNAL DRM_IOWR(0xCD, struct drm_syncobj_timeline_array) +#define DRM_IOCTL_MODE_GETFB2 DRM_IOWR(0xCE, struct drm_mode_fb_cmd2) + /** * Device specific ioctls should only be in their respective headers * The device specific ioctl range is from 0x40 to 0x9f. diff --git a/tools/include/uapi/drm/i915_drm.h b/tools/include/uapi/drm/i915_drm.h index 829c0a48577f..2813e579b480 100644 --- a/tools/include/uapi/drm/i915_drm.h +++ b/tools/include/uapi/drm/i915_drm.h @@ -1619,6 +1619,27 @@ struct drm_i915_gem_context_param { * By default, new contexts allow persistence. */ #define I915_CONTEXT_PARAM_PERSISTENCE 0xb + +/* + * I915_CONTEXT_PARAM_RINGSIZE: + * + * Sets the size of the CS ringbuffer to use for logical ring contexts. This + * applies a limit of how many batches can be queued to HW before the caller + * is blocked due to lack of space for more commands. + * + * Only reliably possible to be set prior to first use, i.e. during + * construction. At any later point, the current execution must be flushed as + * the ring can only be changed while the context is idle. Note, the ringsize + * can be specified as a constructor property, see + * I915_CONTEXT_CREATE_EXT_SETPARAM, but can also be set later if required. + * + * Only applies to the current set of engine and lost when those engines + * are replaced by a new mapping (see I915_CONTEXT_PARAM_ENGINES). + * + * Must be between 4 - 512 KiB, in intervals of page size [4 KiB]. + * Default is 16 KiB. + */ +#define I915_CONTEXT_PARAM_RINGSIZE 0xc /* Must be kept compact -- no holes and well documented */ __u64 value; diff --git a/tools/include/uapi/linux/fscrypt.h b/tools/include/uapi/linux/fscrypt.h index 0d8a6f47711c..a10e3cdc2839 100644 --- a/tools/include/uapi/linux/fscrypt.h +++ b/tools/include/uapi/linux/fscrypt.h @@ -163,6 +163,7 @@ struct fscrypt_get_key_status_arg { #define FS_IOC_REMOVE_ENCRYPTION_KEY _IOWR('f', 24, struct fscrypt_remove_key_arg) #define FS_IOC_REMOVE_ENCRYPTION_KEY_ALL_USERS _IOWR('f', 25, struct fscrypt_remove_key_arg) #define FS_IOC_GET_ENCRYPTION_KEY_STATUS _IOWR('f', 26, struct fscrypt_get_key_status_arg) +#define FS_IOC_GET_ENCRYPTION_NONCE _IOR('f', 27, __u8[16]) /**********************************************************************/ diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h index 4b95f9a31a2f..428c7dde6b4b 100644 --- a/tools/include/uapi/linux/kvm.h +++ b/tools/include/uapi/linux/kvm.h @@ -474,12 +474,17 @@ struct kvm_s390_mem_op { __u32 size; /* amount of bytes */ __u32 op; /* type of operation */ __u64 buf; /* buffer in userspace */ - __u8 ar; /* the access register number */ - __u8 reserved[31]; /* should be set to 0 */ + union { + __u8 ar; /* the access register number */ + __u32 sida_offset; /* offset into the sida */ + __u8 reserved[32]; /* should be set to 0 */ + }; }; /* types for kvm_s390_mem_op->op */ #define KVM_S390_MEMOP_LOGICAL_READ 0 #define KVM_S390_MEMOP_LOGICAL_WRITE 1 +#define KVM_S390_MEMOP_SIDA_READ 2 +#define KVM_S390_MEMOP_SIDA_WRITE 3 /* flags for kvm_s390_mem_op->flags */ #define KVM_S390_MEMOP_F_CHECK_ONLY (1ULL << 0) #define KVM_S390_MEMOP_F_INJECT_EXCEPTION (1ULL << 1) @@ -1010,6 +1015,8 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_ARM_NISV_TO_USER 177 #define KVM_CAP_ARM_INJECT_EXT_DABT 178 #define KVM_CAP_S390_VCPU_RESETS 179 +#define KVM_CAP_S390_PROTECTED 180 +#define KVM_CAP_PPC_SECURE_GUEST 181 #ifdef KVM_CAP_IRQ_ROUTING @@ -1478,6 +1485,39 @@ struct kvm_enc_region { #define KVM_S390_NORMAL_RESET _IO(KVMIO, 0xc3) #define KVM_S390_CLEAR_RESET _IO(KVMIO, 0xc4) +struct kvm_s390_pv_sec_parm { + __u64 origin; + __u64 length; +}; + +struct kvm_s390_pv_unp { + __u64 addr; + __u64 size; + __u64 tweak; +}; + +enum pv_cmd_id { + KVM_PV_ENABLE, + KVM_PV_DISABLE, + KVM_PV_SET_SEC_PARMS, + KVM_PV_UNPACK, + KVM_PV_VERIFY, + KVM_PV_PREP_RESET, + KVM_PV_UNSHARE_ALL, +}; + +struct kvm_pv_cmd { + __u32 cmd; /* Command to be executed */ + __u16 rc; /* Ultravisor return code */ + __u16 rrc; /* Ultravisor return reason code */ + __u64 data; /* Data or address */ + __u32 flags; /* flags for future extensions. Must be 0 for now */ + __u32 reserved[3]; +}; + +/* Available with KVM_CAP_S390_PROTECTED */ +#define KVM_S390_PV_COMMAND _IOWR(KVMIO, 0xc5, struct kvm_pv_cmd) + /* Secure Encrypted Virtualization command */ enum sev_cmd_id { /* Guest initialization commands */ @@ -1628,4 +1668,7 @@ struct kvm_hyperv_eventfd { #define KVM_HYPERV_CONN_ID_MASK 0x00ffffff #define KVM_HYPERV_EVENTFD_DEASSIGN (1 << 0) +#define KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE (1 << 0) +#define KVM_DIRTY_LOG_INITIALLY_SET (1 << 1) + #endif /* __LINUX_KVM_H */ diff --git a/tools/include/uapi/linux/mman.h b/tools/include/uapi/linux/mman.h index fc1a64c3447b..923cc162609c 100644 --- a/tools/include/uapi/linux/mman.h +++ b/tools/include/uapi/linux/mman.h @@ -5,8 +5,9 @@ #include <asm/mman.h> #include <asm-generic/hugetlb_encode.h> -#define MREMAP_MAYMOVE 1 -#define MREMAP_FIXED 2 +#define MREMAP_MAYMOVE 1 +#define MREMAP_FIXED 2 +#define MREMAP_DONTUNMAP 4 #define OVERCOMMIT_GUESS 0 #define OVERCOMMIT_ALWAYS 1 diff --git a/tools/include/uapi/linux/sched.h b/tools/include/uapi/linux/sched.h index 2e3bc22c6f20..3bac0a8ceab2 100644 --- a/tools/include/uapi/linux/sched.h +++ b/tools/include/uapi/linux/sched.h @@ -35,6 +35,7 @@ /* Flags for the clone3() syscall. */ #define CLONE_CLEAR_SIGHAND 0x100000000ULL /* Clear any signal handler and reset to SIG_DFL. */ +#define CLONE_INTO_CGROUP 0x200000000ULL /* Clone into a specific cgroup given the right permissions. */ /* * cloning flags intersect with CSIGNAL so can be used with unshare and clone3 @@ -81,6 +82,8 @@ * @set_tid_size: This defines the size of the array referenced * in @set_tid. This cannot be larger than the * kernel's limit of nested PID namespaces. + * @cgroup: If CLONE_INTO_CGROUP is specified set this to + * a file descriptor for the cgroup. * * The structure is versioned by size and thus extensible. * New struct members must go at the end of the struct and @@ -97,11 +100,13 @@ struct clone_args { __aligned_u64 tls; __aligned_u64 set_tid; __aligned_u64 set_tid_size; + __aligned_u64 cgroup; }; #endif #define CLONE_ARGS_SIZE_VER0 64 /* sizeof first published struct */ #define CLONE_ARGS_SIZE_VER1 80 /* sizeof second published struct */ +#define CLONE_ARGS_SIZE_VER2 88 /* sizeof third published struct */ /* * Scheduling policies diff --git a/tools/include/uapi/linux/vhost.h b/tools/include/uapi/linux/vhost.h index 40d028eed645..9fe72e4b1373 100644 --- a/tools/include/uapi/linux/vhost.h +++ b/tools/include/uapi/linux/vhost.h @@ -116,4 +116,28 @@ #define VHOST_VSOCK_SET_GUEST_CID _IOW(VHOST_VIRTIO, 0x60, __u64) #define VHOST_VSOCK_SET_RUNNING _IOW(VHOST_VIRTIO, 0x61, int) +/* VHOST_VDPA specific defines */ + +/* Get the device id. The device ids follow the same definition of + * the device id defined in virtio-spec. + */ +#define VHOST_VDPA_GET_DEVICE_ID _IOR(VHOST_VIRTIO, 0x70, __u32) +/* Get and set the status. The status bits follow the same definition + * of the device status defined in virtio-spec. + */ +#define VHOST_VDPA_GET_STATUS _IOR(VHOST_VIRTIO, 0x71, __u8) +#define VHOST_VDPA_SET_STATUS _IOW(VHOST_VIRTIO, 0x72, __u8) +/* Get and set the device config. The device config follows the same + * definition of the device config defined in virtio-spec. + */ +#define VHOST_VDPA_GET_CONFIG _IOR(VHOST_VIRTIO, 0x73, \ + struct vhost_vdpa_config) +#define VHOST_VDPA_SET_CONFIG _IOW(VHOST_VIRTIO, 0x74, \ + struct vhost_vdpa_config) +/* Enable/disable the ring. */ +#define VHOST_VDPA_SET_VRING_ENABLE _IOW(VHOST_VIRTIO, 0x75, \ + struct vhost_vring_state) +/* Get the max ring size. */ +#define VHOST_VDPA_GET_VRING_NUM _IOR(VHOST_VIRTIO, 0x76, __u16) + #endif diff --git a/tools/include/vdso/bits.h b/tools/include/vdso/bits.h new file mode 100644 index 000000000000..6d005a1f5d94 --- /dev/null +++ b/tools/include/vdso/bits.h @@ -0,0 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __VDSO_BITS_H +#define __VDSO_BITS_H + +#include <vdso/const.h> + +#define BIT(nr) (UL(1) << (nr)) + +#endif /* __VDSO_BITS_H */ diff --git a/tools/include/vdso/const.h b/tools/include/vdso/const.h new file mode 100644 index 000000000000..94b385ad438d --- /dev/null +++ b/tools/include/vdso/const.h @@ -0,0 +1,10 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __VDSO_CONST_H +#define __VDSO_CONST_H + +#include <uapi/linux/const.h> + +#define UL(x) (_UL(x)) +#define ULL(x) (_ULL(x)) + +#endif /* __VDSO_CONST_H */ diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 8dd01f986fbb..4b170fd08a28 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -1050,10 +1050,7 @@ static struct rela *find_jump_table(struct objtool_file *file, * it. */ for (; - &insn->list != &file->insn_list && - insn->sec == func->sec && - insn->offset >= func->offset; - + &insn->list != &file->insn_list && insn->func && insn->func->pfunc == func; insn = insn->first_jump_src ?: list_prev_entry(insn, list)) { if (insn != orig_insn && insn->type == INSN_JUMP_DYNAMIC) @@ -2008,8 +2005,8 @@ static int validate_return(struct symbol *func, struct instruction *insn, struct } if (state->bp_scratch) { - WARN("%s uses BP as a scratch register", - func->name); + WARN_FUNC("BP used as a scratch register", + insn->sec, insn->offset); return 1; } @@ -2364,14 +2361,27 @@ static bool ignore_unreachable_insn(struct instruction *insn) !strcmp(insn->sec->name, ".altinstr_aux")) return true; + if (!insn->func) + return false; + + /* + * CONFIG_UBSAN_TRAP inserts a UD2 when it sees + * __builtin_unreachable(). The BUG() macro has an unreachable() after + * the UD2, which causes GCC's undefined trap logic to emit another UD2 + * (or occasionally a JMP to UD2). + */ + if (list_prev_entry(insn, list)->dead_end && + (insn->type == INSN_BUG || + (insn->type == INSN_JUMP_UNCONDITIONAL && + insn->jump_dest && insn->jump_dest->type == INSN_BUG))) + return true; + /* * Check if this (or a subsequent) instruction is related to * CONFIG_UBSAN or CONFIG_KASAN. * * End the search at 5 instructions to avoid going into the weeds. */ - if (!insn->func) - return false; for (i = 0; i < 5; i++) { if (is_kasan_insn(insn) || is_ubsan_insn(insn)) diff --git a/tools/objtool/orc_dump.c b/tools/objtool/orc_dump.c index 13ccf775a83a..ba4cbb1cdd63 100644 --- a/tools/objtool/orc_dump.c +++ b/tools/objtool/orc_dump.c @@ -66,7 +66,7 @@ int orc_dump(const char *_objname) char *name; size_t nr_sections; Elf64_Addr orc_ip_addr = 0; - size_t shstrtab_idx; + size_t shstrtab_idx, strtab_idx = 0; Elf *elf; Elf_Scn *scn; GElf_Shdr sh; @@ -127,6 +127,8 @@ int orc_dump(const char *_objname) if (!strcmp(name, ".symtab")) { symtab = data; + } else if (!strcmp(name, ".strtab")) { + strtab_idx = i; } else if (!strcmp(name, ".orc_unwind")) { orc = data->d_buf; orc_size = sh.sh_size; @@ -138,7 +140,7 @@ int orc_dump(const char *_objname) } } - if (!symtab || !orc || !orc_ip) + if (!symtab || !strtab_idx || !orc || !orc_ip) return 0; if (orc_size % sizeof(*orc) != 0) { @@ -159,21 +161,29 @@ int orc_dump(const char *_objname) return -1; } - scn = elf_getscn(elf, sym.st_shndx); - if (!scn) { - WARN_ELF("elf_getscn"); - return -1; - } - - if (!gelf_getshdr(scn, &sh)) { - WARN_ELF("gelf_getshdr"); - return -1; - } - - name = elf_strptr(elf, shstrtab_idx, sh.sh_name); - if (!name || !*name) { - WARN_ELF("elf_strptr"); - return -1; + if (GELF_ST_TYPE(sym.st_info) == STT_SECTION) { + scn = elf_getscn(elf, sym.st_shndx); + if (!scn) { + WARN_ELF("elf_getscn"); + return -1; + } + + if (!gelf_getshdr(scn, &sh)) { + WARN_ELF("gelf_getshdr"); + return -1; + } + + name = elf_strptr(elf, shstrtab_idx, sh.sh_name); + if (!name) { + WARN_ELF("elf_strptr"); + return -1; + } + } else { + name = elf_strptr(elf, strtab_idx, sym.st_name); + if (!name) { + WARN_ELF("elf_strptr"); + return -1; + } } printf("%s+%llx:", name, (unsigned long long)rela.r_addend); diff --git a/tools/objtool/orc_gen.c b/tools/objtool/orc_gen.c index 41e4a2754da4..4c0dabd28000 100644 --- a/tools/objtool/orc_gen.c +++ b/tools/objtool/orc_gen.c @@ -88,11 +88,6 @@ static int create_orc_entry(struct elf *elf, struct section *u_sec, struct secti struct orc_entry *orc; struct rela *rela; - if (!insn_sec->sym) { - WARN("missing symbol for section %s", insn_sec->name); - return -1; - } - /* populate ORC data */ orc = (struct orc_entry *)u_sec->data->d_buf + idx; memcpy(orc, o, sizeof(*orc)); @@ -105,8 +100,32 @@ static int create_orc_entry(struct elf *elf, struct section *u_sec, struct secti } memset(rela, 0, sizeof(*rela)); - rela->sym = insn_sec->sym; - rela->addend = insn_off; + if (insn_sec->sym) { + rela->sym = insn_sec->sym; + rela->addend = insn_off; + } else { + /* + * The Clang assembler doesn't produce section symbols, so we + * have to reference the function symbol instead: + */ + rela->sym = find_symbol_containing(insn_sec, insn_off); + if (!rela->sym) { + /* + * Hack alert. This happens when we need to reference + * the NOP pad insn immediately after the function. + */ + rela->sym = find_symbol_containing(insn_sec, + insn_off - 1); + } + if (!rela->sym) { + WARN("missing symbol for insn at offset 0x%lx\n", + insn_off); + return -1; + } + + rela->addend = insn_off - rela->sym->offset; + } + rela->type = R_X86_64_PC32; rela->offset = idx * sizeof(int); rela->sec = ip_relasec; diff --git a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl index 44d510bc9b78..37b844f839bc 100644 --- a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl +++ b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl @@ -8,357 +8,357 @@ # # The abi is "common", "64" or "x32" for this file. # -0 common read __x64_sys_read -1 common write __x64_sys_write -2 common open __x64_sys_open -3 common close __x64_sys_close -4 common stat __x64_sys_newstat -5 common fstat __x64_sys_newfstat -6 common lstat __x64_sys_newlstat -7 common poll __x64_sys_poll -8 common lseek __x64_sys_lseek -9 common mmap __x64_sys_mmap -10 common mprotect __x64_sys_mprotect -11 common munmap __x64_sys_munmap -12 common brk __x64_sys_brk -13 64 rt_sigaction __x64_sys_rt_sigaction -14 common rt_sigprocmask __x64_sys_rt_sigprocmask -15 64 rt_sigreturn __x64_sys_rt_sigreturn/ptregs -16 64 ioctl __x64_sys_ioctl -17 common pread64 __x64_sys_pread64 -18 common pwrite64 __x64_sys_pwrite64 -19 64 readv __x64_sys_readv -20 64 writev __x64_sys_writev -21 common access __x64_sys_access -22 common pipe __x64_sys_pipe -23 common select __x64_sys_select -24 common sched_yield __x64_sys_sched_yield -25 common mremap __x64_sys_mremap -26 common msync __x64_sys_msync -27 common mincore __x64_sys_mincore -28 common madvise __x64_sys_madvise -29 common shmget __x64_sys_shmget -30 common shmat __x64_sys_shmat -31 common shmctl __x64_sys_shmctl -32 common dup __x64_sys_dup -33 common dup2 __x64_sys_dup2 -34 common pause __x64_sys_pause -35 common nanosleep __x64_sys_nanosleep -36 common getitimer __x64_sys_getitimer -37 common alarm __x64_sys_alarm -38 common setitimer __x64_sys_setitimer -39 common getpid __x64_sys_getpid -40 common sendfile __x64_sys_sendfile64 -41 common socket __x64_sys_socket -42 common connect __x64_sys_connect -43 common accept __x64_sys_accept -44 common sendto __x64_sys_sendto -45 64 recvfrom __x64_sys_recvfrom -46 64 sendmsg __x64_sys_sendmsg -47 64 recvmsg __x64_sys_recvmsg -48 common shutdown __x64_sys_shutdown -49 common bind __x64_sys_bind -50 common listen __x64_sys_listen -51 common getsockname __x64_sys_getsockname -52 common getpeername __x64_sys_getpeername -53 common socketpair __x64_sys_socketpair -54 64 setsockopt __x64_sys_setsockopt -55 64 getsockopt __x64_sys_getsockopt -56 common clone __x64_sys_clone/ptregs -57 common fork __x64_sys_fork/ptregs -58 common vfork __x64_sys_vfork/ptregs -59 64 execve __x64_sys_execve/ptregs -60 common exit __x64_sys_exit -61 common wait4 __x64_sys_wait4 -62 common kill __x64_sys_kill -63 common uname __x64_sys_newuname -64 common semget __x64_sys_semget -65 common semop __x64_sys_semop -66 common semctl __x64_sys_semctl -67 common shmdt __x64_sys_shmdt -68 common msgget __x64_sys_msgget -69 common msgsnd __x64_sys_msgsnd -70 common msgrcv __x64_sys_msgrcv -71 common msgctl __x64_sys_msgctl -72 common fcntl __x64_sys_fcntl -73 common flock __x64_sys_flock -74 common fsync __x64_sys_fsync -75 common fdatasync __x64_sys_fdatasync -76 common truncate __x64_sys_truncate -77 common ftruncate __x64_sys_ftruncate -78 common getdents __x64_sys_getdents -79 common getcwd __x64_sys_getcwd -80 common chdir __x64_sys_chdir -81 common fchdir __x64_sys_fchdir -82 common rename __x64_sys_rename -83 common mkdir __x64_sys_mkdir -84 common rmdir __x64_sys_rmdir -85 common creat __x64_sys_creat -86 common link __x64_sys_link -87 common unlink __x64_sys_unlink -88 common symlink __x64_sys_symlink -89 common readlink __x64_sys_readlink -90 common chmod __x64_sys_chmod -91 common fchmod __x64_sys_fchmod -92 common chown __x64_sys_chown -93 common fchown __x64_sys_fchown -94 common lchown __x64_sys_lchown -95 common umask __x64_sys_umask -96 common gettimeofday __x64_sys_gettimeofday -97 common getrlimit __x64_sys_getrlimit -98 common getrusage __x64_sys_getrusage -99 common sysinfo __x64_sys_sysinfo -100 common times __x64_sys_times -101 64 ptrace __x64_sys_ptrace -102 common getuid __x64_sys_getuid -103 common syslog __x64_sys_syslog -104 common getgid __x64_sys_getgid -105 common setuid __x64_sys_setuid -106 common setgid __x64_sys_setgid -107 common geteuid __x64_sys_geteuid -108 common getegid __x64_sys_getegid -109 common setpgid __x64_sys_setpgid -110 common getppid __x64_sys_getppid -111 common getpgrp __x64_sys_getpgrp -112 common setsid __x64_sys_setsid -113 common setreuid __x64_sys_setreuid -114 common setregid __x64_sys_setregid -115 common getgroups __x64_sys_getgroups -116 common setgroups __x64_sys_setgroups -117 common setresuid __x64_sys_setresuid -118 common getresuid __x64_sys_getresuid -119 common setresgid __x64_sys_setresgid -120 common getresgid __x64_sys_getresgid -121 common getpgid __x64_sys_getpgid -122 common setfsuid __x64_sys_setfsuid -123 common setfsgid __x64_sys_setfsgid -124 common getsid __x64_sys_getsid -125 common capget __x64_sys_capget -126 common capset __x64_sys_capset -127 64 rt_sigpending __x64_sys_rt_sigpending -128 64 rt_sigtimedwait __x64_sys_rt_sigtimedwait -129 64 rt_sigqueueinfo __x64_sys_rt_sigqueueinfo -130 common rt_sigsuspend __x64_sys_rt_sigsuspend -131 64 sigaltstack __x64_sys_sigaltstack -132 common utime __x64_sys_utime -133 common mknod __x64_sys_mknod +0 common read sys_read +1 common write sys_write +2 common open sys_open +3 common close sys_close +4 common stat sys_newstat +5 common fstat sys_newfstat +6 common lstat sys_newlstat +7 common poll sys_poll +8 common lseek sys_lseek +9 common mmap sys_mmap +10 common mprotect sys_mprotect +11 common munmap sys_munmap +12 common brk sys_brk +13 64 rt_sigaction sys_rt_sigaction +14 common rt_sigprocmask sys_rt_sigprocmask +15 64 rt_sigreturn sys_rt_sigreturn +16 64 ioctl sys_ioctl +17 common pread64 sys_pread64 +18 common pwrite64 sys_pwrite64 +19 64 readv sys_readv +20 64 writev sys_writev +21 common access sys_access +22 common pipe sys_pipe +23 common select sys_select +24 common sched_yield sys_sched_yield +25 common mremap sys_mremap +26 common msync sys_msync +27 common mincore sys_mincore +28 common madvise sys_madvise +29 common shmget sys_shmget +30 common shmat sys_shmat +31 common shmctl sys_shmctl +32 common dup sys_dup +33 common dup2 sys_dup2 +34 common pause sys_pause +35 common nanosleep sys_nanosleep +36 common getitimer sys_getitimer +37 common alarm sys_alarm +38 common setitimer sys_setitimer +39 common getpid sys_getpid +40 common sendfile sys_sendfile64 +41 common socket sys_socket +42 common connect sys_connect +43 common accept sys_accept +44 common sendto sys_sendto +45 64 recvfrom sys_recvfrom +46 64 sendmsg sys_sendmsg +47 64 recvmsg sys_recvmsg +48 common shutdown sys_shutdown +49 common bind sys_bind +50 common listen sys_listen +51 common getsockname sys_getsockname +52 common getpeername sys_getpeername +53 common socketpair sys_socketpair +54 64 setsockopt sys_setsockopt +55 64 getsockopt sys_getsockopt +56 common clone sys_clone +57 common fork sys_fork +58 common vfork sys_vfork +59 64 execve sys_execve +60 common exit sys_exit +61 common wait4 sys_wait4 +62 common kill sys_kill +63 common uname sys_newuname +64 common semget sys_semget +65 common semop sys_semop +66 common semctl sys_semctl +67 common shmdt sys_shmdt +68 common msgget sys_msgget +69 common msgsnd sys_msgsnd +70 common msgrcv sys_msgrcv +71 common msgctl sys_msgctl +72 common fcntl sys_fcntl +73 common flock sys_flock +74 common fsync sys_fsync +75 common fdatasync sys_fdatasync +76 common truncate sys_truncate +77 common ftruncate sys_ftruncate +78 common getdents sys_getdents +79 common getcwd sys_getcwd +80 common chdir sys_chdir +81 common fchdir sys_fchdir +82 common rename sys_rename +83 common mkdir sys_mkdir +84 common rmdir sys_rmdir +85 common creat sys_creat +86 common link sys_link +87 common unlink sys_unlink +88 common symlink sys_symlink +89 common readlink sys_readlink +90 common chmod sys_chmod +91 common fchmod sys_fchmod +92 common chown sys_chown +93 common fchown sys_fchown +94 common lchown sys_lchown +95 common umask sys_umask +96 common gettimeofday sys_gettimeofday +97 common getrlimit sys_getrlimit +98 common getrusage sys_getrusage +99 common sysinfo sys_sysinfo +100 common times sys_times +101 64 ptrace sys_ptrace +102 common getuid sys_getuid +103 common syslog sys_syslog +104 common getgid sys_getgid +105 common setuid sys_setuid +106 common setgid sys_setgid +107 common geteuid sys_geteuid +108 common getegid sys_getegid +109 common setpgid sys_setpgid +110 common getppid sys_getppid +111 common getpgrp sys_getpgrp +112 common setsid sys_setsid +113 common setreuid sys_setreuid +114 common setregid sys_setregid +115 common getgroups sys_getgroups +116 common setgroups sys_setgroups +117 common setresuid sys_setresuid +118 common getresuid sys_getresuid +119 common setresgid sys_setresgid +120 common getresgid sys_getresgid +121 common getpgid sys_getpgid +122 common setfsuid sys_setfsuid +123 common setfsgid sys_setfsgid +124 common getsid sys_getsid +125 common capget sys_capget +126 common capset sys_capset +127 64 rt_sigpending sys_rt_sigpending +128 64 rt_sigtimedwait sys_rt_sigtimedwait +129 64 rt_sigqueueinfo sys_rt_sigqueueinfo +130 common rt_sigsuspend sys_rt_sigsuspend +131 64 sigaltstack sys_sigaltstack +132 common utime sys_utime +133 common mknod sys_mknod 134 64 uselib -135 common personality __x64_sys_personality -136 common ustat __x64_sys_ustat -137 common statfs __x64_sys_statfs -138 common fstatfs __x64_sys_fstatfs -139 common sysfs __x64_sys_sysfs -140 common getpriority __x64_sys_getpriority -141 common setpriority __x64_sys_setpriority -142 common sched_setparam __x64_sys_sched_setparam -143 common sched_getparam __x64_sys_sched_getparam -144 common sched_setscheduler __x64_sys_sched_setscheduler -145 common sched_getscheduler __x64_sys_sched_getscheduler -146 common sched_get_priority_max __x64_sys_sched_get_priority_max -147 common sched_get_priority_min __x64_sys_sched_get_priority_min -148 common sched_rr_get_interval __x64_sys_sched_rr_get_interval -149 common mlock __x64_sys_mlock -150 common munlock __x64_sys_munlock -151 common mlockall __x64_sys_mlockall -152 common munlockall __x64_sys_munlockall -153 common vhangup __x64_sys_vhangup -154 common modify_ldt __x64_sys_modify_ldt -155 common pivot_root __x64_sys_pivot_root -156 64 _sysctl __x64_sys_sysctl -157 common prctl __x64_sys_prctl -158 common arch_prctl __x64_sys_arch_prctl -159 common adjtimex __x64_sys_adjtimex -160 common setrlimit __x64_sys_setrlimit -161 common chroot __x64_sys_chroot -162 common sync __x64_sys_sync -163 common acct __x64_sys_acct -164 common settimeofday __x64_sys_settimeofday -165 common mount __x64_sys_mount -166 common umount2 __x64_sys_umount -167 common swapon __x64_sys_swapon -168 common swapoff __x64_sys_swapoff -169 common reboot __x64_sys_reboot -170 common sethostname __x64_sys_sethostname -171 common setdomainname __x64_sys_setdomainname -172 common iopl __x64_sys_iopl/ptregs -173 common ioperm __x64_sys_ioperm +135 common personality sys_personality +136 common ustat sys_ustat +137 common statfs sys_statfs +138 common fstatfs sys_fstatfs +139 common sysfs sys_sysfs +140 common getpriority sys_getpriority +141 common setpriority sys_setpriority +142 common sched_setparam sys_sched_setparam +143 common sched_getparam sys_sched_getparam +144 common sched_setscheduler sys_sched_setscheduler +145 common sched_getscheduler sys_sched_getscheduler +146 common sched_get_priority_max sys_sched_get_priority_max +147 common sched_get_priority_min sys_sched_get_priority_min +148 common sched_rr_get_interval sys_sched_rr_get_interval +149 common mlock sys_mlock +150 common munlock sys_munlock +151 common mlockall sys_mlockall +152 common munlockall sys_munlockall +153 common vhangup sys_vhangup +154 common modify_ldt sys_modify_ldt +155 common pivot_root sys_pivot_root +156 64 _sysctl sys_sysctl +157 common prctl sys_prctl +158 common arch_prctl sys_arch_prctl +159 common adjtimex sys_adjtimex +160 common setrlimit sys_setrlimit +161 common chroot sys_chroot +162 common sync sys_sync +163 common acct sys_acct +164 common settimeofday sys_settimeofday +165 common mount sys_mount +166 common umount2 sys_umount +167 common swapon sys_swapon +168 common swapoff sys_swapoff +169 common reboot sys_reboot +170 common sethostname sys_sethostname +171 common setdomainname sys_setdomainname +172 common iopl sys_iopl +173 common ioperm sys_ioperm 174 64 create_module -175 common init_module __x64_sys_init_module -176 common delete_module __x64_sys_delete_module +175 common init_module sys_init_module +176 common delete_module sys_delete_module 177 64 get_kernel_syms 178 64 query_module -179 common quotactl __x64_sys_quotactl +179 common quotactl sys_quotactl 180 64 nfsservctl 181 common getpmsg 182 common putpmsg 183 common afs_syscall 184 common tuxcall 185 common security -186 common gettid __x64_sys_gettid -187 common readahead __x64_sys_readahead -188 common setxattr __x64_sys_setxattr -189 common lsetxattr __x64_sys_lsetxattr -190 common fsetxattr __x64_sys_fsetxattr -191 common getxattr __x64_sys_getxattr -192 common lgetxattr __x64_sys_lgetxattr -193 common fgetxattr __x64_sys_fgetxattr -194 common listxattr __x64_sys_listxattr -195 common llistxattr __x64_sys_llistxattr -196 common flistxattr __x64_sys_flistxattr -197 common removexattr __x64_sys_removexattr -198 common lremovexattr __x64_sys_lremovexattr -199 common fremovexattr __x64_sys_fremovexattr -200 common tkill __x64_sys_tkill -201 common time __x64_sys_time -202 common futex __x64_sys_futex -203 common sched_setaffinity __x64_sys_sched_setaffinity -204 common sched_getaffinity __x64_sys_sched_getaffinity +186 common gettid sys_gettid +187 common readahead sys_readahead +188 common setxattr sys_setxattr +189 common lsetxattr sys_lsetxattr +190 common fsetxattr sys_fsetxattr +191 common getxattr sys_getxattr +192 common lgetxattr sys_lgetxattr +193 common fgetxattr sys_fgetxattr +194 common listxattr sys_listxattr +195 common llistxattr sys_llistxattr +196 common flistxattr sys_flistxattr +197 common removexattr sys_removexattr +198 common lremovexattr sys_lremovexattr +199 common fremovexattr sys_fremovexattr +200 common tkill sys_tkill +201 common time sys_time +202 common futex sys_futex +203 common sched_setaffinity sys_sched_setaffinity +204 common sched_getaffinity sys_sched_getaffinity 205 64 set_thread_area -206 64 io_setup __x64_sys_io_setup -207 common io_destroy __x64_sys_io_destroy -208 common io_getevents __x64_sys_io_getevents -209 64 io_submit __x64_sys_io_submit -210 common io_cancel __x64_sys_io_cancel +206 64 io_setup sys_io_setup +207 common io_destroy sys_io_destroy +208 common io_getevents sys_io_getevents +209 64 io_submit sys_io_submit +210 common io_cancel sys_io_cancel 211 64 get_thread_area -212 common lookup_dcookie __x64_sys_lookup_dcookie -213 common epoll_create __x64_sys_epoll_create +212 common lookup_dcookie sys_lookup_dcookie +213 common epoll_create sys_epoll_create 214 64 epoll_ctl_old 215 64 epoll_wait_old -216 common remap_file_pages __x64_sys_remap_file_pages -217 common getdents64 __x64_sys_getdents64 -218 common set_tid_address __x64_sys_set_tid_address -219 common restart_syscall __x64_sys_restart_syscall -220 common semtimedop __x64_sys_semtimedop -221 common fadvise64 __x64_sys_fadvise64 -222 64 timer_create __x64_sys_timer_create -223 common timer_settime __x64_sys_timer_settime -224 common timer_gettime __x64_sys_timer_gettime -225 common timer_getoverrun __x64_sys_timer_getoverrun -226 common timer_delete __x64_sys_timer_delete -227 common clock_settime __x64_sys_clock_settime -228 common clock_gettime __x64_sys_clock_gettime -229 common clock_getres __x64_sys_clock_getres -230 common clock_nanosleep __x64_sys_clock_nanosleep -231 common exit_group __x64_sys_exit_group -232 common epoll_wait __x64_sys_epoll_wait -233 common epoll_ctl __x64_sys_epoll_ctl -234 common tgkill __x64_sys_tgkill -235 common utimes __x64_sys_utimes +216 common remap_file_pages sys_remap_file_pages +217 common getdents64 sys_getdents64 +218 common set_tid_address sys_set_tid_address +219 common restart_syscall sys_restart_syscall +220 common semtimedop sys_semtimedop +221 common fadvise64 sys_fadvise64 +222 64 timer_create sys_timer_create +223 common timer_settime sys_timer_settime +224 common timer_gettime sys_timer_gettime +225 common timer_getoverrun sys_timer_getoverrun +226 common timer_delete sys_timer_delete +227 common clock_settime sys_clock_settime +228 common clock_gettime sys_clock_gettime +229 common clock_getres sys_clock_getres +230 common clock_nanosleep sys_clock_nanosleep +231 common exit_group sys_exit_group +232 common epoll_wait sys_epoll_wait +233 common epoll_ctl sys_epoll_ctl +234 common tgkill sys_tgkill +235 common utimes sys_utimes 236 64 vserver -237 common mbind __x64_sys_mbind -238 common set_mempolicy __x64_sys_set_mempolicy -239 common get_mempolicy __x64_sys_get_mempolicy -240 common mq_open __x64_sys_mq_open -241 common mq_unlink __x64_sys_mq_unlink -242 common mq_timedsend __x64_sys_mq_timedsend -243 common mq_timedreceive __x64_sys_mq_timedreceive -244 64 mq_notify __x64_sys_mq_notify -245 common mq_getsetattr __x64_sys_mq_getsetattr -246 64 kexec_load __x64_sys_kexec_load -247 64 waitid __x64_sys_waitid -248 common add_key __x64_sys_add_key -249 common request_key __x64_sys_request_key -250 common keyctl __x64_sys_keyctl -251 common ioprio_set __x64_sys_ioprio_set -252 common ioprio_get __x64_sys_ioprio_get -253 common inotify_init __x64_sys_inotify_init -254 common inotify_add_watch __x64_sys_inotify_add_watch -255 common inotify_rm_watch __x64_sys_inotify_rm_watch -256 common migrate_pages __x64_sys_migrate_pages -257 common openat __x64_sys_openat -258 common mkdirat __x64_sys_mkdirat -259 common mknodat __x64_sys_mknodat -260 common fchownat __x64_sys_fchownat -261 common futimesat __x64_sys_futimesat -262 common newfstatat __x64_sys_newfstatat -263 common unlinkat __x64_sys_unlinkat -264 common renameat __x64_sys_renameat -265 common linkat __x64_sys_linkat -266 common symlinkat __x64_sys_symlinkat -267 common readlinkat __x64_sys_readlinkat -268 common fchmodat __x64_sys_fchmodat -269 common faccessat __x64_sys_faccessat -270 common pselect6 __x64_sys_pselect6 -271 common ppoll __x64_sys_ppoll -272 common unshare __x64_sys_unshare -273 64 set_robust_list __x64_sys_set_robust_list -274 64 get_robust_list __x64_sys_get_robust_list -275 common splice __x64_sys_splice -276 common tee __x64_sys_tee -277 common sync_file_range __x64_sys_sync_file_range -278 64 vmsplice __x64_sys_vmsplice -279 64 move_pages __x64_sys_move_pages -280 common utimensat __x64_sys_utimensat -281 common epoll_pwait __x64_sys_epoll_pwait -282 common signalfd __x64_sys_signalfd -283 common timerfd_create __x64_sys_timerfd_create -284 common eventfd __x64_sys_eventfd -285 common fallocate __x64_sys_fallocate -286 common timerfd_settime __x64_sys_timerfd_settime -287 common timerfd_gettime __x64_sys_timerfd_gettime -288 common accept4 __x64_sys_accept4 -289 common signalfd4 __x64_sys_signalfd4 -290 common eventfd2 __x64_sys_eventfd2 -291 common epoll_create1 __x64_sys_epoll_create1 -292 common dup3 __x64_sys_dup3 -293 common pipe2 __x64_sys_pipe2 -294 common inotify_init1 __x64_sys_inotify_init1 -295 64 preadv __x64_sys_preadv -296 64 pwritev __x64_sys_pwritev -297 64 rt_tgsigqueueinfo __x64_sys_rt_tgsigqueueinfo -298 common perf_event_open __x64_sys_perf_event_open -299 64 recvmmsg __x64_sys_recvmmsg -300 common fanotify_init __x64_sys_fanotify_init -301 common fanotify_mark __x64_sys_fanotify_mark -302 common prlimit64 __x64_sys_prlimit64 -303 common name_to_handle_at __x64_sys_name_to_handle_at -304 common open_by_handle_at __x64_sys_open_by_handle_at -305 common clock_adjtime __x64_sys_clock_adjtime -306 common syncfs __x64_sys_syncfs -307 64 sendmmsg __x64_sys_sendmmsg -308 common setns __x64_sys_setns -309 common getcpu __x64_sys_getcpu -310 64 process_vm_readv __x64_sys_process_vm_readv -311 64 process_vm_writev __x64_sys_process_vm_writev -312 common kcmp __x64_sys_kcmp -313 common finit_module __x64_sys_finit_module -314 common sched_setattr __x64_sys_sched_setattr -315 common sched_getattr __x64_sys_sched_getattr -316 common renameat2 __x64_sys_renameat2 -317 common seccomp __x64_sys_seccomp -318 common getrandom __x64_sys_getrandom -319 common memfd_create __x64_sys_memfd_create -320 common kexec_file_load __x64_sys_kexec_file_load -321 common bpf __x64_sys_bpf -322 64 execveat __x64_sys_execveat/ptregs -323 common userfaultfd __x64_sys_userfaultfd -324 common membarrier __x64_sys_membarrier -325 common mlock2 __x64_sys_mlock2 -326 common copy_file_range __x64_sys_copy_file_range -327 64 preadv2 __x64_sys_preadv2 -328 64 pwritev2 __x64_sys_pwritev2 -329 common pkey_mprotect __x64_sys_pkey_mprotect -330 common pkey_alloc __x64_sys_pkey_alloc -331 common pkey_free __x64_sys_pkey_free -332 common statx __x64_sys_statx -333 common io_pgetevents __x64_sys_io_pgetevents -334 common rseq __x64_sys_rseq +237 common mbind sys_mbind +238 common set_mempolicy sys_set_mempolicy +239 common get_mempolicy sys_get_mempolicy +240 common mq_open sys_mq_open +241 common mq_unlink sys_mq_unlink +242 common mq_timedsend sys_mq_timedsend +243 common mq_timedreceive sys_mq_timedreceive +244 64 mq_notify sys_mq_notify +245 common mq_getsetattr sys_mq_getsetattr +246 64 kexec_load sys_kexec_load +247 64 waitid sys_waitid +248 common add_key sys_add_key +249 common request_key sys_request_key +250 common keyctl sys_keyctl +251 common ioprio_set sys_ioprio_set +252 common ioprio_get sys_ioprio_get +253 common inotify_init sys_inotify_init +254 common inotify_add_watch sys_inotify_add_watch +255 common inotify_rm_watch sys_inotify_rm_watch +256 common migrate_pages sys_migrate_pages +257 common openat sys_openat +258 common mkdirat sys_mkdirat +259 common mknodat sys_mknodat +260 common fchownat sys_fchownat +261 common futimesat sys_futimesat +262 common newfstatat sys_newfstatat +263 common unlinkat sys_unlinkat +264 common renameat sys_renameat +265 common linkat sys_linkat +266 common symlinkat sys_symlinkat +267 common readlinkat sys_readlinkat +268 common fchmodat sys_fchmodat +269 common faccessat sys_faccessat +270 common pselect6 sys_pselect6 +271 common ppoll sys_ppoll +272 common unshare sys_unshare +273 64 set_robust_list sys_set_robust_list +274 64 get_robust_list sys_get_robust_list +275 common splice sys_splice +276 common tee sys_tee +277 common sync_file_range sys_sync_file_range +278 64 vmsplice sys_vmsplice +279 64 move_pages sys_move_pages +280 common utimensat sys_utimensat +281 common epoll_pwait sys_epoll_pwait +282 common signalfd sys_signalfd +283 common timerfd_create sys_timerfd_create +284 common eventfd sys_eventfd +285 common fallocate sys_fallocate +286 common timerfd_settime sys_timerfd_settime +287 common timerfd_gettime sys_timerfd_gettime +288 common accept4 sys_accept4 +289 common signalfd4 sys_signalfd4 +290 common eventfd2 sys_eventfd2 +291 common epoll_create1 sys_epoll_create1 +292 common dup3 sys_dup3 +293 common pipe2 sys_pipe2 +294 common inotify_init1 sys_inotify_init1 +295 64 preadv sys_preadv +296 64 pwritev sys_pwritev +297 64 rt_tgsigqueueinfo sys_rt_tgsigqueueinfo +298 common perf_event_open sys_perf_event_open +299 64 recvmmsg sys_recvmmsg +300 common fanotify_init sys_fanotify_init +301 common fanotify_mark sys_fanotify_mark +302 common prlimit64 sys_prlimit64 +303 common name_to_handle_at sys_name_to_handle_at +304 common open_by_handle_at sys_open_by_handle_at +305 common clock_adjtime sys_clock_adjtime +306 common syncfs sys_syncfs +307 64 sendmmsg sys_sendmmsg +308 common setns sys_setns +309 common getcpu sys_getcpu +310 64 process_vm_readv sys_process_vm_readv +311 64 process_vm_writev sys_process_vm_writev +312 common kcmp sys_kcmp +313 common finit_module sys_finit_module +314 common sched_setattr sys_sched_setattr +315 common sched_getattr sys_sched_getattr +316 common renameat2 sys_renameat2 +317 common seccomp sys_seccomp +318 common getrandom sys_getrandom +319 common memfd_create sys_memfd_create +320 common kexec_file_load sys_kexec_file_load +321 common bpf sys_bpf +322 64 execveat sys_execveat +323 common userfaultfd sys_userfaultfd +324 common membarrier sys_membarrier +325 common mlock2 sys_mlock2 +326 common copy_file_range sys_copy_file_range +327 64 preadv2 sys_preadv2 +328 64 pwritev2 sys_pwritev2 +329 common pkey_mprotect sys_pkey_mprotect +330 common pkey_alloc sys_pkey_alloc +331 common pkey_free sys_pkey_free +332 common statx sys_statx +333 common io_pgetevents sys_io_pgetevents +334 common rseq sys_rseq # don't use numbers 387 through 423, add new calls after the last # 'common' entry -424 common pidfd_send_signal __x64_sys_pidfd_send_signal -425 common io_uring_setup __x64_sys_io_uring_setup -426 common io_uring_enter __x64_sys_io_uring_enter -427 common io_uring_register __x64_sys_io_uring_register -428 common open_tree __x64_sys_open_tree -429 common move_mount __x64_sys_move_mount -430 common fsopen __x64_sys_fsopen -431 common fsconfig __x64_sys_fsconfig -432 common fsmount __x64_sys_fsmount -433 common fspick __x64_sys_fspick -434 common pidfd_open __x64_sys_pidfd_open -435 common clone3 __x64_sys_clone3/ptregs -437 common openat2 __x64_sys_openat2 -438 common pidfd_getfd __x64_sys_pidfd_getfd +424 common pidfd_send_signal sys_pidfd_send_signal +425 common io_uring_setup sys_io_uring_setup +426 common io_uring_enter sys_io_uring_enter +427 common io_uring_register sys_io_uring_register +428 common open_tree sys_open_tree +429 common move_mount sys_move_mount +430 common fsopen sys_fsopen +431 common fsconfig sys_fsconfig +432 common fsmount sys_fsmount +433 common fspick sys_fspick +434 common pidfd_open sys_pidfd_open +435 common clone3 sys_clone3 +437 common openat2 sys_openat2 +438 common pidfd_getfd sys_pidfd_getfd # # x32-specific system call numbers start at 512 to avoid cache impact @@ -366,39 +366,39 @@ # on-the-fly for compat_sys_*() compatibility system calls if X86_X32 # is defined. # -512 x32 rt_sigaction __x32_compat_sys_rt_sigaction -513 x32 rt_sigreturn sys32_x32_rt_sigreturn -514 x32 ioctl __x32_compat_sys_ioctl -515 x32 readv __x32_compat_sys_readv -516 x32 writev __x32_compat_sys_writev -517 x32 recvfrom __x32_compat_sys_recvfrom -518 x32 sendmsg __x32_compat_sys_sendmsg -519 x32 recvmsg __x32_compat_sys_recvmsg -520 x32 execve __x32_compat_sys_execve/ptregs -521 x32 ptrace __x32_compat_sys_ptrace -522 x32 rt_sigpending __x32_compat_sys_rt_sigpending -523 x32 rt_sigtimedwait __x32_compat_sys_rt_sigtimedwait_time64 -524 x32 rt_sigqueueinfo __x32_compat_sys_rt_sigqueueinfo -525 x32 sigaltstack __x32_compat_sys_sigaltstack -526 x32 timer_create __x32_compat_sys_timer_create -527 x32 mq_notify __x32_compat_sys_mq_notify -528 x32 kexec_load __x32_compat_sys_kexec_load -529 x32 waitid __x32_compat_sys_waitid -530 x32 set_robust_list __x32_compat_sys_set_robust_list -531 x32 get_robust_list __x32_compat_sys_get_robust_list -532 x32 vmsplice __x32_compat_sys_vmsplice -533 x32 move_pages __x32_compat_sys_move_pages -534 x32 preadv __x32_compat_sys_preadv64 -535 x32 pwritev __x32_compat_sys_pwritev64 -536 x32 rt_tgsigqueueinfo __x32_compat_sys_rt_tgsigqueueinfo -537 x32 recvmmsg __x32_compat_sys_recvmmsg_time64 -538 x32 sendmmsg __x32_compat_sys_sendmmsg -539 x32 process_vm_readv __x32_compat_sys_process_vm_readv -540 x32 process_vm_writev __x32_compat_sys_process_vm_writev -541 x32 setsockopt __x32_compat_sys_setsockopt -542 x32 getsockopt __x32_compat_sys_getsockopt -543 x32 io_setup __x32_compat_sys_io_setup -544 x32 io_submit __x32_compat_sys_io_submit -545 x32 execveat __x32_compat_sys_execveat/ptregs -546 x32 preadv2 __x32_compat_sys_preadv64v2 -547 x32 pwritev2 __x32_compat_sys_pwritev64v2 +512 x32 rt_sigaction compat_sys_rt_sigaction +513 x32 rt_sigreturn compat_sys_x32_rt_sigreturn +514 x32 ioctl compat_sys_ioctl +515 x32 readv compat_sys_readv +516 x32 writev compat_sys_writev +517 x32 recvfrom compat_sys_recvfrom +518 x32 sendmsg compat_sys_sendmsg +519 x32 recvmsg compat_sys_recvmsg +520 x32 execve compat_sys_execve +521 x32 ptrace compat_sys_ptrace +522 x32 rt_sigpending compat_sys_rt_sigpending +523 x32 rt_sigtimedwait compat_sys_rt_sigtimedwait_time64 +524 x32 rt_sigqueueinfo compat_sys_rt_sigqueueinfo +525 x32 sigaltstack compat_sys_sigaltstack +526 x32 timer_create compat_sys_timer_create +527 x32 mq_notify compat_sys_mq_notify +528 x32 kexec_load compat_sys_kexec_load +529 x32 waitid compat_sys_waitid +530 x32 set_robust_list compat_sys_set_robust_list +531 x32 get_robust_list compat_sys_get_robust_list +532 x32 vmsplice compat_sys_vmsplice +533 x32 move_pages compat_sys_move_pages +534 x32 preadv compat_sys_preadv64 +535 x32 pwritev compat_sys_pwritev64 +536 x32 rt_tgsigqueueinfo compat_sys_rt_tgsigqueueinfo +537 x32 recvmmsg compat_sys_recvmmsg_time64 +538 x32 sendmmsg compat_sys_sendmmsg +539 x32 process_vm_readv compat_sys_process_vm_readv +540 x32 process_vm_writev compat_sys_process_vm_writev +541 x32 setsockopt compat_sys_setsockopt +542 x32 getsockopt compat_sys_getsockopt +543 x32 io_setup compat_sys_io_setup +544 x32 io_submit compat_sys_io_submit +545 x32 execveat compat_sys_execveat +546 x32 preadv2 compat_sys_preadv64v2 +547 x32 pwritev2 compat_sys_pwritev64v2 diff --git a/tools/perf/check-headers.sh b/tools/perf/check-headers.sh index bfb21d049e6c..cf147db4e5ca 100755 --- a/tools/perf/check-headers.sh +++ b/tools/perf/check-headers.sh @@ -22,7 +22,9 @@ include/uapi/linux/usbdevice_fs.h include/uapi/linux/vhost.h include/uapi/sound/asound.h include/linux/bits.h +include/vdso/bits.h include/linux/const.h +include/vdso/const.h include/linux/hash.h include/uapi/linux/hw_breakpoint.h arch/x86/include/asm/disabled-features.h @@ -115,6 +117,7 @@ check arch/x86/lib/memcpy_64.S '-I "^EXPORT_SYMBOL" -I "^#include <asm/ex check arch/x86/lib/memset_64.S '-I "^EXPORT_SYMBOL" -I "^#include <asm/export.h>" -I"^SYM_FUNC_START\(_LOCAL\)*(memset_\(erms\|orig\))"' check include/uapi/asm-generic/mman.h '-I "^#include <\(uapi/\)*asm-generic/mman-common\(-tools\)*.h>"' check include/uapi/linux/mman.h '-I "^#include <\(uapi/\)*asm/mman.h>"' +check include/linux/build_bug.h '-I "^#\(ifndef\|endif\)\( \/\/\)* static_assert$"' check include/linux/ctype.h '-I "isdigit("' check lib/ctype.c '-I "^EXPORT_SYMBOL" -I "^#include <linux/export.h>" -B' check arch/x86/include/asm/inat.h '-I "^#include [\"<]\(asm/\)*inat_types.h[\">]"' diff --git a/tools/perf/trace/beauty/clone.c b/tools/perf/trace/beauty/clone.c index 062ca849c8fd..f4db894e0af6 100644 --- a/tools/perf/trace/beauty/clone.c +++ b/tools/perf/trace/beauty/clone.c @@ -46,6 +46,7 @@ static size_t clone__scnprintf_flags(unsigned long flags, char *bf, size_t size, P_FLAG(NEWNET); P_FLAG(IO); P_FLAG(CLEAR_SIGHAND); + P_FLAG(INTO_CGROUP); #undef P_FLAG if (flags) diff --git a/tools/perf/trace/beauty/mmap.c b/tools/perf/trace/beauty/mmap.c index 9fa771a90d79..862c8331dded 100644 --- a/tools/perf/trace/beauty/mmap.c +++ b/tools/perf/trace/beauty/mmap.c @@ -69,6 +69,7 @@ static size_t syscall_arg__scnprintf_mremap_flags(char *bf, size_t size, P_MREMAP_FLAG(MAYMOVE); P_MREMAP_FLAG(FIXED); + P_MREMAP_FLAG(DONTUNMAP); #undef P_MREMAP_FLAG if (flags) diff --git a/tools/perf/util/setup.py b/tools/perf/util/setup.py index 347b2c0789e4..c5e3e9a68162 100644 --- a/tools/perf/util/setup.py +++ b/tools/perf/util/setup.py @@ -21,6 +21,8 @@ if cc_is_clang: vars[var] = sub("-fstack-clash-protection", "", vars[var]) if not clang_has_option("-fstack-protector-strong"): vars[var] = sub("-fstack-protector-strong", "", vars[var]) + if not clang_has_option("-fno-semantic-interposition"): + vars[var] = sub("-fno-semantic-interposition", "", vars[var]) from distutils.core import setup, Extension diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c index 0fd713d3674f..03ecb8cd0eec 100644 --- a/tools/perf/util/stat-shadow.c +++ b/tools/perf/util/stat-shadow.c @@ -803,8 +803,11 @@ static void generic_metric(struct perf_stat_config *config, out->force_header ? (metric_name ? metric_name : name) : "", 0); } - } else - print_metric(config, ctxp, NULL, NULL, "", 0); + } else { + print_metric(config, ctxp, NULL, NULL, + out->force_header ? + (metric_name ? metric_name : name) : "", 0); + } for (i = 1; i < pctx.num_ids; i++) zfree(&pctx.ids[i].name); |