diff options
Diffstat (limited to 'drivers/thermal/intel')
-rw-r--r-- | drivers/thermal/intel/Kconfig | 14 | ||||
-rw-r--r-- | drivers/thermal/intel/Makefile | 1 | ||||
-rw-r--r-- | drivers/thermal/intel/int340x_thermal/acpi_thermal_rel.c | 23 | ||||
-rw-r--r-- | drivers/thermal/intel/int340x_thermal/int3400_thermal.c | 153 | ||||
-rw-r--r-- | drivers/thermal/intel/intel_hfi.c | 569 | ||||
-rw-r--r-- | drivers/thermal/intel/intel_hfi.h | 17 | ||||
-rw-r--r-- | drivers/thermal/intel/intel_powerclamp.c | 9 | ||||
-rw-r--r-- | drivers/thermal/intel/therm_throt.c | 22 |
8 files changed, 736 insertions, 72 deletions
diff --git a/drivers/thermal/intel/Kconfig b/drivers/thermal/intel/Kconfig index c83ea5d04a1d..f0c845679250 100644 --- a/drivers/thermal/intel/Kconfig +++ b/drivers/thermal/intel/Kconfig @@ -99,3 +99,17 @@ config INTEL_MENLOW Intel Menlow platform. If unsure, say N. + +config INTEL_HFI_THERMAL + bool "Intel Hardware Feedback Interface" + depends on NET + depends on CPU_SUP_INTEL + depends on X86_THERMAL_VECTOR + select THERMAL_NETLINK + help + Select this option to enable the Hardware Feedback Interface. If + selected, hardware provides guidance to the operating system on + the performance and energy efficiency capabilities of each CPU. + These capabilities may change as a result of changes in the operating + conditions of the system such power and thermal limits. If selected, + the kernel relays updates in CPUs' capabilities to userspace. diff --git a/drivers/thermal/intel/Makefile b/drivers/thermal/intel/Makefile index 960b56268b4a..9a8d8054f316 100644 --- a/drivers/thermal/intel/Makefile +++ b/drivers/thermal/intel/Makefile @@ -13,3 +13,4 @@ obj-$(CONFIG_INTEL_PCH_THERMAL) += intel_pch_thermal.o obj-$(CONFIG_INTEL_TCC_COOLING) += intel_tcc_cooling.o obj-$(CONFIG_X86_THERMAL_VECTOR) += therm_throt.o obj-$(CONFIG_INTEL_MENLOW) += intel_menlow.o +obj-$(CONFIG_INTEL_HFI_THERMAL) += intel_hfi.o diff --git a/drivers/thermal/intel/int340x_thermal/acpi_thermal_rel.c b/drivers/thermal/intel/int340x_thermal/acpi_thermal_rel.c index e90690a234c4..01b80331eab6 100644 --- a/drivers/thermal/intel/int340x_thermal/acpi_thermal_rel.c +++ b/drivers/thermal/intel/int340x_thermal/acpi_thermal_rel.c @@ -72,7 +72,6 @@ int acpi_parse_trt(acpi_handle handle, int *trt_count, struct trt **trtp, int i; int nr_bad_entries = 0; struct trt *trts; - struct acpi_device *adev; union acpi_object *p; struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL }; struct acpi_buffer element = { 0, NULL }; @@ -112,12 +111,10 @@ int acpi_parse_trt(acpi_handle handle, int *trt_count, struct trt **trtp, if (!create_dev) continue; - result = acpi_bus_get_device(trt->source, &adev); - if (result) + if (!acpi_fetch_acpi_dev(trt->source)) pr_warn("Failed to get source ACPI device\n"); - result = acpi_bus_get_device(trt->target, &adev); - if (result) + if (!acpi_fetch_acpi_dev(trt->target)) pr_warn("Failed to get target ACPI device\n"); } @@ -149,7 +146,6 @@ int acpi_parse_art(acpi_handle handle, int *art_count, struct art **artp, int i; int nr_bad_entries = 0; struct art *arts; - struct acpi_device *adev; union acpi_object *p; struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL }; struct acpi_buffer element = { 0, NULL }; @@ -191,16 +187,11 @@ int acpi_parse_art(acpi_handle handle, int *art_count, struct art **artp, if (!create_dev) continue; - if (art->source) { - result = acpi_bus_get_device(art->source, &adev); - if (result) - pr_warn("Failed to get source ACPI device\n"); - } - if (art->target) { - result = acpi_bus_get_device(art->target, &adev); - if (result) - pr_warn("Failed to get target ACPI device\n"); - } + if (!acpi_fetch_acpi_dev(art->source)) + pr_warn("Failed to get source ACPI device\n"); + + if (!acpi_fetch_acpi_dev(art->target)) + pr_warn("Failed to get target ACPI device\n"); } *artp = arts; diff --git a/drivers/thermal/intel/int340x_thermal/int3400_thermal.c b/drivers/thermal/intel/int340x_thermal/int3400_thermal.c index 4f478812cb51..4954800b9850 100644 --- a/drivers/thermal/intel/int340x_thermal/int3400_thermal.c +++ b/drivers/thermal/intel/int340x_thermal/int3400_thermal.c @@ -17,8 +17,8 @@ #define INT3400_KEEP_ALIVE 0xA0 enum int3400_thermal_uuid { + INT3400_THERMAL_ACTIVE = 0, INT3400_THERMAL_PASSIVE_1, - INT3400_THERMAL_ACTIVE, INT3400_THERMAL_CRITICAL, INT3400_THERMAL_ADAPTIVE_PERFORMANCE, INT3400_THERMAL_EMERGENCY_CALL_MODE, @@ -31,8 +31,8 @@ enum int3400_thermal_uuid { }; static char *int3400_thermal_uuids[INT3400_THERMAL_MAXIMUM_UUID] = { - "42A441D6-AE6A-462b-A84B-4A8CE79027D3", "3A95C389-E4B8-4629-A526-C52C88626BAE", + "42A441D6-AE6A-462b-A84B-4A8CE79027D3", "97C68AE7-15FA-499c-B8C9-5DA81D606E0A", "63BE270F-1C11-48FD-A6F7-3AF253FF3E2D", "5349962F-71E6-431D-9AE8-0A635B710AEE", @@ -53,12 +53,13 @@ struct int3400_thermal_priv { struct art *arts; int trt_count; struct trt *trts; - u8 uuid_bitmap; + u32 uuid_bitmap; int rel_misc_dev_res; int current_uuid_index; char *data_vault; int odvp_count; int *odvp; + u32 os_uuid_mask; struct odvp_attr *odvp_attrs; }; @@ -142,12 +143,55 @@ static ssize_t current_uuid_show(struct device *dev, struct device_attribute *devattr, char *buf) { struct int3400_thermal_priv *priv = dev_get_drvdata(dev); + int i, length = 0; - if (priv->current_uuid_index == -1) - return sprintf(buf, "INVALID\n"); + if (priv->current_uuid_index > 0) + return sprintf(buf, "%s\n", + int3400_thermal_uuids[priv->current_uuid_index]); - return sprintf(buf, "%s\n", - int3400_thermal_uuids[priv->current_uuid_index]); + for (i = 0; i <= INT3400_THERMAL_CRITICAL; i++) { + if (priv->os_uuid_mask & BIT(i)) + length += scnprintf(&buf[length], + PAGE_SIZE - length, + "%s\n", + int3400_thermal_uuids[i]); + } + + if (length) + return length; + + return sprintf(buf, "INVALID\n"); +} + +static int int3400_thermal_run_osc(acpi_handle handle, char *uuid_str, int *enable) +{ + u32 ret, buf[2]; + acpi_status status; + int result = 0; + struct acpi_osc_context context = { + .uuid_str = NULL, + .rev = 1, + .cap.length = 8, + }; + + context.uuid_str = uuid_str; + + buf[OSC_QUERY_DWORD] = 0; + buf[OSC_SUPPORT_DWORD] = *enable; + + context.cap.pointer = buf; + + status = acpi_run_osc(handle, &context); + if (ACPI_SUCCESS(status)) { + ret = *((u32 *)(context.ret.pointer + 4)); + if (ret != *enable) + result = -EPERM; + } else + result = -EPERM; + + kfree(context.ret.pointer); + + return result; } static ssize_t current_uuid_store(struct device *dev, @@ -164,16 +208,47 @@ static ssize_t current_uuid_store(struct device *dev, * If we have a list of supported UUIDs, make sure * this one is supported. */ - if (priv->uuid_bitmap && - !(priv->uuid_bitmap & (1 << i))) + if (priv->uuid_bitmap & BIT(i)) { + priv->current_uuid_index = i; + return count; + } + + /* + * There is support of only 3 policies via the new + * _OSC to inform OS capability: + * INT3400_THERMAL_ACTIVE + * INT3400_THERMAL_PASSIVE_1 + * INT3400_THERMAL_CRITICAL + */ + + if (i > INT3400_THERMAL_CRITICAL) return -EINVAL; - priv->current_uuid_index = i; - return count; + priv->os_uuid_mask |= BIT(i); + + break; } } - return -EINVAL; + if (priv->os_uuid_mask) { + int cap, ret; + + /* + * Capability bits: + * Bit 0: set to 1 to indicate DPTF is active + * Bi1 1: set to 1 to active cooling is supported by user space daemon + * Bit 2: set to 1 to passive cooling is supported by user space daemon + * Bit 3: set to 1 to critical trip is handled by user space daemon + */ + cap = ((priv->os_uuid_mask << 1) | 0x01); + ret = int3400_thermal_run_osc(priv->adev->handle, + "b23ba85d-c8b7-3542-88de-8de2ffcfd698", + &cap); + if (ret) + return ret; + } + + return count; } static DEVICE_ATTR_RW(current_uuid); @@ -236,41 +311,6 @@ end: return result; } -static int int3400_thermal_run_osc(acpi_handle handle, - enum int3400_thermal_uuid uuid, bool enable) -{ - u32 ret, buf[2]; - acpi_status status; - int result = 0; - struct acpi_osc_context context = { - .uuid_str = NULL, - .rev = 1, - .cap.length = 8, - }; - - if (uuid < 0 || uuid >= INT3400_THERMAL_MAXIMUM_UUID) - return -EINVAL; - - context.uuid_str = int3400_thermal_uuids[uuid]; - - buf[OSC_QUERY_DWORD] = 0; - buf[OSC_SUPPORT_DWORD] = enable; - - context.cap.pointer = buf; - - status = acpi_run_osc(handle, &context); - if (ACPI_SUCCESS(status)) { - ret = *((u32 *)(context.ret.pointer + 4)); - if (ret != enable) - result = -EPERM; - } else - result = -EPERM; - - kfree(context.ret.pointer); - - return result; -} - static ssize_t odvp_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { @@ -426,10 +466,18 @@ static int int3400_thermal_change_mode(struct thermal_zone_device *thermal, if (!priv) return -EINVAL; - if (mode != thermal->mode) + if (mode != thermal->mode) { + int enabled; + + if (priv->current_uuid_index < 0 || + priv->current_uuid_index >= INT3400_THERMAL_MAXIMUM_UUID) + return -EINVAL; + + enabled = (mode == THERMAL_DEVICE_ENABLED); result = int3400_thermal_run_osc(priv->adev->handle, - priv->current_uuid_index, - mode == THERMAL_DEVICE_ENABLED); + int3400_thermal_uuids[priv->current_uuid_index], + &enabled); + } evaluate_odvp(priv); @@ -468,6 +516,11 @@ static void int3400_setup_gddv(struct int3400_thermal_priv *priv) priv->data_vault = kmemdup(obj->package.elements[0].buffer.pointer, obj->package.elements[0].buffer.length, GFP_KERNEL); + if (!priv->data_vault) { + kfree(buffer.pointer); + return; + } + bin_attr_data_vault.private = priv->data_vault; bin_attr_data_vault.size = obj->package.elements[0].buffer.length; kfree(buffer.pointer); diff --git a/drivers/thermal/intel/intel_hfi.c b/drivers/thermal/intel/intel_hfi.c new file mode 100644 index 000000000000..730fd121df6e --- /dev/null +++ b/drivers/thermal/intel/intel_hfi.c @@ -0,0 +1,569 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Hardware Feedback Interface Driver + * + * Copyright (c) 2021, Intel Corporation. + * + * Authors: Aubrey Li <aubrey.li@linux.intel.com> + * Ricardo Neri <ricardo.neri-calderon@linux.intel.com> + * + * + * The Hardware Feedback Interface provides a performance and energy efficiency + * capability information for each CPU in the system. Depending on the processor + * model, hardware may periodically update these capabilities as a result of + * changes in the operating conditions (e.g., power limits or thermal + * constraints). On other processor models, there is a single HFI update + * at boot. + * + * This file provides functionality to process HFI updates and relay these + * updates to userspace. + */ + +#define pr_fmt(fmt) "intel-hfi: " fmt + +#include <linux/bitops.h> +#include <linux/cpufeature.h> +#include <linux/cpumask.h> +#include <linux/gfp.h> +#include <linux/io.h> +#include <linux/kernel.h> +#include <linux/math.h> +#include <linux/mutex.h> +#include <linux/percpu-defs.h> +#include <linux/printk.h> +#include <linux/processor.h> +#include <linux/slab.h> +#include <linux/spinlock.h> +#include <linux/string.h> +#include <linux/topology.h> +#include <linux/workqueue.h> + +#include <asm/msr.h> + +#include "../thermal_core.h" +#include "intel_hfi.h" + +#define THERM_STATUS_CLEAR_PKG_MASK (BIT(1) | BIT(3) | BIT(5) | BIT(7) | \ + BIT(9) | BIT(11) | BIT(26)) + +/* Hardware Feedback Interface MSR configuration bits */ +#define HW_FEEDBACK_PTR_VALID_BIT BIT(0) +#define HW_FEEDBACK_CONFIG_HFI_ENABLE_BIT BIT(0) + +/* CPUID detection and enumeration definitions for HFI */ + +#define CPUID_HFI_LEAF 6 + +union hfi_capabilities { + struct { + u8 performance:1; + u8 energy_efficiency:1; + u8 __reserved:6; + } split; + u8 bits; +}; + +union cpuid6_edx { + struct { + union hfi_capabilities capabilities; + u32 table_pages:4; + u32 __reserved:4; + s32 index:16; + } split; + u32 full; +}; + +/** + * struct hfi_cpu_data - HFI capabilities per CPU + * @perf_cap: Performance capability + * @ee_cap: Energy efficiency capability + * + * Capabilities of a logical processor in the HFI table. These capabilities are + * unitless. + */ +struct hfi_cpu_data { + u8 perf_cap; + u8 ee_cap; +} __packed; + +/** + * struct hfi_hdr - Header of the HFI table + * @perf_updated: Hardware updated performance capabilities + * @ee_updated: Hardware updated energy efficiency capabilities + * + * Properties of the data in an HFI table. + */ +struct hfi_hdr { + u8 perf_updated; + u8 ee_updated; +} __packed; + +/** + * struct hfi_instance - Representation of an HFI instance (i.e., a table) + * @local_table: Base of the local copy of the HFI table + * @timestamp: Timestamp of the last update of the local table. + * Located at the base of the local table. + * @hdr: Base address of the header of the local table + * @data: Base address of the data of the local table + * @cpus: CPUs represented in this HFI table instance + * @hw_table: Pointer to the HFI table of this instance + * @update_work: Delayed work to process HFI updates + * @table_lock: Lock to protect acceses to the table of this instance + * @event_lock: Lock to process HFI interrupts + * + * A set of parameters to parse and navigate a specific HFI table. + */ +struct hfi_instance { + union { + void *local_table; + u64 *timestamp; + }; + void *hdr; + void *data; + cpumask_var_t cpus; + void *hw_table; + struct delayed_work update_work; + raw_spinlock_t table_lock; + raw_spinlock_t event_lock; +}; + +/** + * struct hfi_features - Supported HFI features + * @nr_table_pages: Size of the HFI table in 4KB pages + * @cpu_stride: Stride size to locate the capability data of a logical + * processor within the table (i.e., row stride) + * @hdr_size: Size of the table header + * + * Parameters and supported features that are common to all HFI instances + */ +struct hfi_features { + unsigned int nr_table_pages; + unsigned int cpu_stride; + unsigned int hdr_size; +}; + +/** + * struct hfi_cpu_info - Per-CPU attributes to consume HFI data + * @index: Row of this CPU in its HFI table + * @hfi_instance: Attributes of the HFI table to which this CPU belongs + * + * Parameters to link a logical processor to an HFI table and a row within it. + */ +struct hfi_cpu_info { + s16 index; + struct hfi_instance *hfi_instance; +}; + +static DEFINE_PER_CPU(struct hfi_cpu_info, hfi_cpu_info) = { .index = -1 }; + +static int max_hfi_instances; +static struct hfi_instance *hfi_instances; + +static struct hfi_features hfi_features; +static DEFINE_MUTEX(hfi_instance_lock); + +static struct workqueue_struct *hfi_updates_wq; +#define HFI_UPDATE_INTERVAL HZ +#define HFI_MAX_THERM_NOTIFY_COUNT 16 + +static void get_hfi_caps(struct hfi_instance *hfi_instance, + struct thermal_genl_cpu_caps *cpu_caps) +{ + int cpu, i = 0; + + raw_spin_lock_irq(&hfi_instance->table_lock); + for_each_cpu(cpu, hfi_instance->cpus) { + struct hfi_cpu_data *caps; + s16 index; + + index = per_cpu(hfi_cpu_info, cpu).index; + caps = hfi_instance->data + index * hfi_features.cpu_stride; + cpu_caps[i].cpu = cpu; + + /* + * Scale performance and energy efficiency to + * the [0, 1023] interval that thermal netlink uses. + */ + cpu_caps[i].performance = caps->perf_cap << 2; + cpu_caps[i].efficiency = caps->ee_cap << 2; + + ++i; + } + raw_spin_unlock_irq(&hfi_instance->table_lock); +} + +/* + * Call update_capabilities() when there are changes in the HFI table. + */ +static void update_capabilities(struct hfi_instance *hfi_instance) +{ + struct thermal_genl_cpu_caps *cpu_caps; + int i = 0, cpu_count; + + /* CPUs may come online/offline while processing an HFI update. */ + mutex_lock(&hfi_instance_lock); + + cpu_count = cpumask_weight(hfi_instance->cpus); + + /* No CPUs to report in this hfi_instance. */ + if (!cpu_count) + goto out; + + cpu_caps = kcalloc(cpu_count, sizeof(*cpu_caps), GFP_KERNEL); + if (!cpu_caps) + goto out; + + get_hfi_caps(hfi_instance, cpu_caps); + + if (cpu_count < HFI_MAX_THERM_NOTIFY_COUNT) + goto last_cmd; + + /* Process complete chunks of HFI_MAX_THERM_NOTIFY_COUNT capabilities. */ + for (i = 0; + (i + HFI_MAX_THERM_NOTIFY_COUNT) <= cpu_count; + i += HFI_MAX_THERM_NOTIFY_COUNT) + thermal_genl_cpu_capability_event(HFI_MAX_THERM_NOTIFY_COUNT, + &cpu_caps[i]); + + cpu_count = cpu_count - i; + +last_cmd: + /* Process the remaining capabilities if any. */ + if (cpu_count) + thermal_genl_cpu_capability_event(cpu_count, &cpu_caps[i]); + + kfree(cpu_caps); +out: + mutex_unlock(&hfi_instance_lock); +} + +static void hfi_update_work_fn(struct work_struct *work) +{ + struct hfi_instance *hfi_instance; + + hfi_instance = container_of(to_delayed_work(work), struct hfi_instance, + update_work); + if (!hfi_instance) + return; + + update_capabilities(hfi_instance); +} + +void intel_hfi_process_event(__u64 pkg_therm_status_msr_val) +{ + struct hfi_instance *hfi_instance; + int cpu = smp_processor_id(); + struct hfi_cpu_info *info; + u64 new_timestamp; + + if (!pkg_therm_status_msr_val) + return; + + info = &per_cpu(hfi_cpu_info, cpu); + if (!info) + return; + + /* + * A CPU is linked to its HFI instance before the thermal vector in the + * local APIC is unmasked. Hence, info->hfi_instance cannot be NULL + * when receiving an HFI event. + */ + hfi_instance = info->hfi_instance; + if (unlikely(!hfi_instance)) { + pr_debug("Received event on CPU %d but instance was null", cpu); + return; + } + + /* + * On most systems, all CPUs in the package receive a package-level + * thermal interrupt when there is an HFI update. It is sufficient to + * let a single CPU to acknowledge the update and queue work to + * process it. The remaining CPUs can resume their work. + */ + if (!raw_spin_trylock(&hfi_instance->event_lock)) + return; + + /* Skip duplicated updates. */ + new_timestamp = *(u64 *)hfi_instance->hw_table; + if (*hfi_instance->timestamp == new_timestamp) { + raw_spin_unlock(&hfi_instance->event_lock); + return; + } + + raw_spin_lock(&hfi_instance->table_lock); + + /* + * Copy the updated table into our local copy. This includes the new + * timestamp. + */ + memcpy(hfi_instance->local_table, hfi_instance->hw_table, + hfi_features.nr_table_pages << PAGE_SHIFT); + + raw_spin_unlock(&hfi_instance->table_lock); + raw_spin_unlock(&hfi_instance->event_lock); + + /* + * Let hardware know that we are done reading the HFI table and it is + * free to update it again. + */ + pkg_therm_status_msr_val &= THERM_STATUS_CLEAR_PKG_MASK & + ~PACKAGE_THERM_STATUS_HFI_UPDATED; + wrmsrl(MSR_IA32_PACKAGE_THERM_STATUS, pkg_therm_status_msr_val); + + queue_delayed_work(hfi_updates_wq, &hfi_instance->update_work, + HFI_UPDATE_INTERVAL); +} + +static void init_hfi_cpu_index(struct hfi_cpu_info *info) +{ + union cpuid6_edx edx; + + /* Do not re-read @cpu's index if it has already been initialized. */ + if (info->index > -1) + return; + + edx.full = cpuid_edx(CPUID_HFI_LEAF); + info->index = edx.split.index; +} + +/* + * The format of the HFI table depends on the number of capabilities that the + * hardware supports. Keep a data structure to navigate the table. + */ +static void init_hfi_instance(struct hfi_instance *hfi_instance) +{ + /* The HFI header is below the time-stamp. */ + hfi_instance->hdr = hfi_instance->local_table + + sizeof(*hfi_instance->timestamp); + + /* The HFI data starts below the header. */ + hfi_instance->data = hfi_instance->hdr + hfi_features.hdr_size; +} + +/** + * intel_hfi_online() - Enable HFI on @cpu + * @cpu: CPU in which the HFI will be enabled + * + * Enable the HFI to be used in @cpu. The HFI is enabled at the die/package + * level. The first CPU in the die/package to come online does the full HFI + * initialization. Subsequent CPUs will just link themselves to the HFI + * instance of their die/package. + * + * This function is called before enabling the thermal vector in the local APIC + * in order to ensure that @cpu has an associated HFI instance when it receives + * an HFI event. + */ +void intel_hfi_online(unsigned int cpu) +{ + struct hfi_instance *hfi_instance; + struct hfi_cpu_info *info; + phys_addr_t hw_table_pa; + u64 msr_val; + u16 die_id; + + /* Nothing to do if hfi_instances are missing. */ + if (!hfi_instances) + return; + + /* + * Link @cpu to the HFI instance of its package/die. It does not + * matter whether the instance has been initialized. + */ + info = &per_cpu(hfi_cpu_info, cpu); + die_id = topology_logical_die_id(cpu); + hfi_instance = info->hfi_instance; + if (!hfi_instance) { + if (die_id < 0 || die_id >= max_hfi_instances) + return; + + hfi_instance = &hfi_instances[die_id]; + info->hfi_instance = hfi_instance; + } + + init_hfi_cpu_index(info); + + /* + * Now check if the HFI instance of the package/die of @cpu has been + * initialized (by checking its header). In such case, all we have to + * do is to add @cpu to this instance's cpumask. + */ + mutex_lock(&hfi_instance_lock); + if (hfi_instance->hdr) { + cpumask_set_cpu(cpu, hfi_instance->cpus); + goto unlock; + } + + /* + * Hardware is programmed with the physical address of the first page + * frame of the table. Hence, the allocated memory must be page-aligned. + */ + hfi_instance->hw_table = alloc_pages_exact(hfi_features.nr_table_pages, + GFP_KERNEL | __GFP_ZERO); + if (!hfi_instance->hw_table) + goto unlock; + + hw_table_pa = virt_to_phys(hfi_instance->hw_table); + + /* + * Allocate memory to keep a local copy of the table that + * hardware generates. + */ + hfi_instance->local_table = kzalloc(hfi_features.nr_table_pages << PAGE_SHIFT, + GFP_KERNEL); + if (!hfi_instance->local_table) + goto free_hw_table; + + /* + * Program the address of the feedback table of this die/package. On + * some processors, hardware remembers the old address of the HFI table + * even after having been reprogrammed and re-enabled. Thus, do not free + * the pages allocated for the table or reprogram the hardware with a + * new base address. Namely, program the hardware only once. + */ + msr_val = hw_table_pa | HW_FEEDBACK_PTR_VALID_BIT; + wrmsrl(MSR_IA32_HW_FEEDBACK_PTR, msr_val); + + init_hfi_instance(hfi_instance); + + INIT_DELAYED_WORK(&hfi_instance->update_work, hfi_update_work_fn); + raw_spin_lock_init(&hfi_instance->table_lock); + raw_spin_lock_init(&hfi_instance->event_lock); + + cpumask_set_cpu(cpu, hfi_instance->cpus); + + /* + * Enable the hardware feedback interface and never disable it. See + * comment on programming the address of the table. + */ + rdmsrl(MSR_IA32_HW_FEEDBACK_CONFIG, msr_val); + msr_val |= HW_FEEDBACK_CONFIG_HFI_ENABLE_BIT; + wrmsrl(MSR_IA32_HW_FEEDBACK_CONFIG, msr_val); + +unlock: + mutex_unlock(&hfi_instance_lock); + return; + +free_hw_table: + free_pages_exact(hfi_instance->hw_table, hfi_features.nr_table_pages); + goto unlock; +} + +/** + * intel_hfi_offline() - Disable HFI on @cpu + * @cpu: CPU in which the HFI will be disabled + * + * Remove @cpu from those covered by its HFI instance. + * + * On some processors, hardware remembers previous programming settings even + * after being reprogrammed. Thus, keep HFI enabled even if all CPUs in the + * die/package of @cpu are offline. See note in intel_hfi_online(). + */ +void intel_hfi_offline(unsigned int cpu) +{ + struct hfi_cpu_info *info = &per_cpu(hfi_cpu_info, cpu); + struct hfi_instance *hfi_instance; + + /* + * Check if @cpu as an associated, initialized (i.e., with a non-NULL + * header). Also, HFI instances are only initialized if X86_FEATURE_HFI + * is present. + */ + hfi_instance = info->hfi_instance; + if (!hfi_instance) + return; + + if (!hfi_instance->hdr) + return; + + mutex_lock(&hfi_instance_lock); + cpumask_clear_cpu(cpu, hfi_instance->cpus); + mutex_unlock(&hfi_instance_lock); +} + +static __init int hfi_parse_features(void) +{ + unsigned int nr_capabilities; + union cpuid6_edx edx; + + if (!boot_cpu_has(X86_FEATURE_HFI)) + return -ENODEV; + + /* + * If we are here we know that CPUID_HFI_LEAF exists. Parse the + * supported capabilities and the size of the HFI table. + */ + edx.full = cpuid_edx(CPUID_HFI_LEAF); + + if (!edx.split.capabilities.split.performance) { + pr_debug("Performance reporting not supported! Not using HFI\n"); + return -ENODEV; + } + + /* + * The number of supported capabilities determines the number of + * columns in the HFI table. Exclude the reserved bits. + */ + edx.split.capabilities.split.__reserved = 0; + nr_capabilities = hweight8(edx.split.capabilities.bits); + + /* The number of 4KB pages required by the table */ + hfi_features.nr_table_pages = edx.split.table_pages + 1; + + /* + * The header contains change indications for each supported feature. + * The size of the table header is rounded up to be a multiple of 8 + * bytes. + */ + hfi_features.hdr_size = DIV_ROUND_UP(nr_capabilities, 8) * 8; + + /* + * Data of each logical processor is also rounded up to be a multiple + * of 8 bytes. + */ + hfi_features.cpu_stride = DIV_ROUND_UP(nr_capabilities, 8) * 8; + + return 0; +} + +void __init intel_hfi_init(void) +{ + struct hfi_instance *hfi_instance; + int i, j; + + if (hfi_parse_features()) + return; + + /* There is one HFI instance per die/package. */ + max_hfi_instances = topology_max_packages() * + topology_max_die_per_package(); + + /* + * This allocation may fail. CPU hotplug callbacks must check + * for a null pointer. + */ + hfi_instances = kcalloc(max_hfi_instances, sizeof(*hfi_instances), + GFP_KERNEL); + if (!hfi_instances) + return; + + for (i = 0; i < max_hfi_instances; i++) { + hfi_instance = &hfi_instances[i]; + if (!zalloc_cpumask_var(&hfi_instance->cpus, GFP_KERNEL)) + goto err_nomem; + } + + hfi_updates_wq = create_singlethread_workqueue("hfi-updates"); + if (!hfi_updates_wq) + goto err_nomem; + + return; + +err_nomem: + for (j = 0; j < i; ++j) { + hfi_instance = &hfi_instances[j]; + free_cpumask_var(hfi_instance->cpus); + } + + kfree(hfi_instances); + hfi_instances = NULL; +} diff --git a/drivers/thermal/intel/intel_hfi.h b/drivers/thermal/intel/intel_hfi.h new file mode 100644 index 000000000000..325aa78b745c --- /dev/null +++ b/drivers/thermal/intel/intel_hfi.h @@ -0,0 +1,17 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _INTEL_HFI_H +#define _INTEL_HFI_H + +#if defined(CONFIG_INTEL_HFI_THERMAL) +void __init intel_hfi_init(void); +void intel_hfi_online(unsigned int cpu); +void intel_hfi_offline(unsigned int cpu); +void intel_hfi_process_event(__u64 pkg_therm_status_msr_val); +#else +static inline void intel_hfi_init(void) { } +static inline void intel_hfi_online(unsigned int cpu) { } +static inline void intel_hfi_offline(unsigned int cpu) { } +static inline void intel_hfi_process_event(__u64 pkg_therm_status_msr_val) { } +#endif /* CONFIG_INTEL_HFI_THERMAL */ + +#endif /* _INTEL_HFI_H */ diff --git a/drivers/thermal/intel/intel_powerclamp.c b/drivers/thermal/intel/intel_powerclamp.c index 14256421d98c..c841ab37e7c6 100644 --- a/drivers/thermal/intel/intel_powerclamp.c +++ b/drivers/thermal/intel/intel_powerclamp.c @@ -556,12 +556,9 @@ static void end_power_clamp(void) * stop faster. */ clamping = false; - if (bitmap_weight(cpu_clamping_mask, num_possible_cpus())) { - for_each_set_bit(i, cpu_clamping_mask, num_possible_cpus()) { - pr_debug("clamping worker for cpu %d alive, destroy\n", - i); - stop_power_clamp_worker(i); - } + for_each_set_bit(i, cpu_clamping_mask, num_possible_cpus()) { + pr_debug("clamping worker for cpu %d alive, destroy\n", i); + stop_power_clamp_worker(i); } } diff --git a/drivers/thermal/intel/therm_throt.c b/drivers/thermal/intel/therm_throt.c index dab7e8fb1059..8352083b87c7 100644 --- a/drivers/thermal/intel/therm_throt.c +++ b/drivers/thermal/intel/therm_throt.c @@ -32,6 +32,7 @@ #include <asm/irq.h> #include <asm/msr.h> +#include "intel_hfi.h" #include "thermal_interrupt.h" /* How long to wait between reporting thermal events */ @@ -475,6 +476,13 @@ static int thermal_throttle_online(unsigned int cpu) INIT_DELAYED_WORK(&state->package_throttle.therm_work, throttle_active_work); INIT_DELAYED_WORK(&state->core_throttle.therm_work, throttle_active_work); + /* + * The first CPU coming online will enable the HFI. Usually this causes + * hardware to issue an HFI thermal interrupt. Such interrupt will reach + * the CPU once we enable the thermal vector in the local APIC. + */ + intel_hfi_online(cpu); + /* Unmask the thermal vector after the above workqueues are initialized. */ l = apic_read(APIC_LVTTHMR); apic_write(APIC_LVTTHMR, l & ~APIC_LVT_MASKED); @@ -492,6 +500,8 @@ static int thermal_throttle_offline(unsigned int cpu) l = apic_read(APIC_LVTTHMR); apic_write(APIC_LVTTHMR, l | APIC_LVT_MASKED); + intel_hfi_offline(cpu); + cancel_delayed_work_sync(&state->package_throttle.therm_work); cancel_delayed_work_sync(&state->core_throttle.therm_work); @@ -509,6 +519,8 @@ static __init int thermal_throttle_init_device(void) if (!atomic_read(&therm_throt_en)) return 0; + intel_hfi_init(); + ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "x86/therm:online", thermal_throttle_online, thermal_throttle_offline); @@ -608,6 +620,10 @@ void intel_thermal_interrupt(void) PACKAGE_THERM_STATUS_POWER_LIMIT, POWER_LIMIT_EVENT, PACKAGE_LEVEL); + + if (this_cpu_has(X86_FEATURE_HFI)) + intel_hfi_process_event(msr_val & + PACKAGE_THERM_STATUS_HFI_UPDATED); } } @@ -717,6 +733,12 @@ void intel_init_thermal(struct cpuinfo_x86 *c) wrmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT, l | (PACKAGE_THERM_INT_LOW_ENABLE | PACKAGE_THERM_INT_HIGH_ENABLE), h); + + if (cpu_has(c, X86_FEATURE_HFI)) { + rdmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT, l, h); + wrmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT, + l | PACKAGE_THERM_INT_HFI_ENABLE, h); + } } rdmsr(MSR_IA32_MISC_ENABLE, l, h); |