summaryrefslogtreecommitdiff
path: root/drivers/cpufreq/powernv-cpufreq.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2015-09-02 05:45:46 +0300
committerLinus Torvalds <torvalds@linux-foundation.org>2015-09-02 05:45:46 +0300
commitae982073095a44f004d7ffb9f271077abef9dbcf (patch)
tree26dfda416542c9dc60ab24029c16caecb964d627 /drivers/cpufreq/powernv-cpufreq.c
parentf1a3c0b933e7ff856223d6fcd7456d403e54e4e5 (diff)
parente625ccec1fa6c24620f38fd72d5b2fd62230ad2b (diff)
downloadlinux-ae982073095a44f004d7ffb9f271077abef9dbcf.tar.xz
Merge tag 'pm+acpi-4.3-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm
Pull power management and ACPI updates from Rafael Wysocki: "From the number of commits perspective, the biggest items are ACPICA and cpufreq changes with the latter taking the lead (over 50 commits). On the cpufreq front, there are many cleanups and minor fixes in the core and governors, driver updates etc. We also have a new cpufreq driver for Mediatek MT8173 chips. ACPICA mostly updates its debug infrastructure and adds a number of fixes and cleanups for a good measure. The Operating Performance Points (OPP) framework is updated with new DT bindings and support for them among other things. We have a few updates of the generic power domains framework and a reorganization of the ACPI device enumeration code and bus type operations. And a lot of fixes and cleanups all over. Included is one branch from the MFD tree as it contains some PM-related driver core and ACPI PM changes a few other commits are based on. Specifics: - ACPICA update to upstream revision 20150818 including method tracing extensions to allow more in-depth AML debugging in the kernel and a number of assorted fixes and cleanups (Bob Moore, Lv Zheng, Markus Elfring). - ACPI sysfs code updates and a documentation update related to AML method tracing (Lv Zheng). - ACPI EC driver fix related to serialized evaluations of _Qxx methods and ACPI tools updates allowing the EC userspace tool to be built from the kernel source (Lv Zheng). - ACPI processor driver updates preparing it for future introduction of CPPC support and ACPI PCC mailbox driver updates (Ashwin Chaugule). - ACPI interrupts enumeration fix for a regression related to the handling of IRQ attribute conflicts between MADT and the ACPI namespace (Jiang Liu). - Fixes related to ACPI device PM (Mika Westerberg, Srinidhi Kasagar). - ACPI device registration code reorganization to separate the sysfs-related code and bus type operations from the rest (Rafael J Wysocki). - Assorted cleanups in the ACPI core (Jarkko Nikula, Mathias Krause, Andy Shevchenko, Rafael J Wysocki, Nicolas Iooss). - ACPI cpufreq driver and ia64 cpufreq driver fixes and cleanups (Pan Xinhui, Rafael J Wysocki). - cpufreq core cleanups on top of the previous changes allowing it to preseve its sysfs directories over system suspend/resume (Viresh Kumar, Rafael J Wysocki, Sebastian Andrzej Siewior). - cpufreq fixes and cleanups related to governors (Viresh Kumar). - cpufreq updates (core and the cpufreq-dt driver) related to the turbo/boost mode support (Viresh Kumar, Bartlomiej Zolnierkiewicz). - New DT bindings for Operating Performance Points (OPP), support for them in the OPP framework and in the cpufreq-dt driver plus related OPP framework fixes and cleanups (Viresh Kumar). - cpufreq powernv driver updates (Shilpasri G Bhat). - New cpufreq driver for Mediatek MT8173 (Pi-Cheng Chen). - Assorted cpufreq driver (speedstep-lib, sfi, integrator) cleanups and fixes (Abhilash Jindal, Andrzej Hajda, Cristian Ardelean). - intel_pstate driver updates including Skylake-S support, support for enabling HW P-states per CPU and an additional vendor bypass list entry (Kristen Carlson Accardi, Chen Yu, Ethan Zhao). - cpuidle core fixes related to the handling of coupled idle states (Xunlei Pang). - intel_idle driver updates including Skylake Client support and support for freeze-mode-specific idle states (Len Brown). - Driver core updates related to power management (Andy Shevchenko, Rafael J Wysocki). - Generic power domains framework fixes and cleanups (Jon Hunter, Geert Uytterhoeven, Rajendra Nayak, Ulf Hansson). - Device PM QoS framework update to allow the latency tolerance setting to be exposed to user space via sysfs (Mika Westerberg). - devfreq support for PPMUv2 in Exynos5433 and a fix for an incorrect exynos-ppmu DT binding (Chanwoo Choi, Javier Martinez Canillas). - System sleep support updates (Alan Stern, Len Brown, SungEun Kim). - rockchip-io AVS support updates (Heiko Stuebner). - PM core clocks support fixup (Colin Ian King). - Power capping RAPL driver update including support for Skylake H/S and Broadwell-H (Radivoje Jovanovic, Seiichi Ikarashi). - Generic device properties framework fixes related to the handling of static (driver-provided) property sets (Andy Shevchenko). - turbostat and cpupower updates (Len Brown, Shilpasri G Bhat, Shreyas B Prabhu)" * tag 'pm+acpi-4.3-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm: (180 commits) cpufreq: speedstep-lib: Use monotonic clock cpufreq: powernv: Increase the verbosity of OCC console messages cpufreq: sfi: use kmemdup rather than duplicating its implementation cpufreq: drop !cpufreq_driver check from cpufreq_parse_governor() cpufreq: rename cpufreq_real_policy as cpufreq_user_policy cpufreq: remove redundant 'policy' field from user_policy cpufreq: remove redundant 'governor' field from user_policy cpufreq: update user_policy.* on success cpufreq: use memcpy() to copy policy cpufreq: remove redundant CPUFREQ_INCOMPATIBLE notifier event cpufreq: mediatek: Add MT8173 cpufreq driver dt-bindings: mediatek: Add MT8173 CPU DVFS clock bindings PM / Domains: Fix typo in description of genpd_dev_pm_detach() PM / Domains: Remove unusable governor dummies PM / Domains: Make pm_genpd_init() available to modules PM / domains: Align column headers and data in pm_genpd_summary output powercap / RAPL: disable the 2nd power limit properly tools: cpupower: Fix error when running cpupower monitor PM / OPP: Drop unlikely before IS_ERR(_OR_NULL) PM / OPP: Fix static checker warning (broken 64bit big endian systems) ...
Diffstat (limited to 'drivers/cpufreq/powernv-cpufreq.c')
-rw-r--r--drivers/cpufreq/powernv-cpufreq.c199
1 files changed, 184 insertions, 15 deletions
diff --git a/drivers/cpufreq/powernv-cpufreq.c b/drivers/cpufreq/powernv-cpufreq.c
index ebef0d8279c7..64994e10638e 100644
--- a/drivers/cpufreq/powernv-cpufreq.c
+++ b/drivers/cpufreq/powernv-cpufreq.c
@@ -27,20 +27,31 @@
#include <linux/smp.h>
#include <linux/of.h>
#include <linux/reboot.h>
+#include <linux/slab.h>
#include <asm/cputhreads.h>
#include <asm/firmware.h>
#include <asm/reg.h>
#include <asm/smp.h> /* Required for cpu_sibling_mask() in UP configs */
+#include <asm/opal.h>
#define POWERNV_MAX_PSTATES 256
#define PMSR_PSAFE_ENABLE (1UL << 30)
#define PMSR_SPR_EM_DISABLE (1UL << 31)
#define PMSR_MAX(x) ((x >> 32) & 0xFF)
-#define PMSR_LP(x) ((x >> 48) & 0xFF)
static struct cpufreq_frequency_table powernv_freqs[POWERNV_MAX_PSTATES+1];
-static bool rebooting, throttled;
+static bool rebooting, throttled, occ_reset;
+
+static struct chip {
+ unsigned int id;
+ bool throttled;
+ cpumask_t mask;
+ struct work_struct throttle;
+ bool restore;
+} *chips;
+
+static int nr_chips;
/*
* Note: The set of pstates consists of contiguous integers, the
@@ -298,28 +309,35 @@ static inline unsigned int get_nominal_index(void)
return powernv_pstate_info.max - powernv_pstate_info.nominal;
}
-static void powernv_cpufreq_throttle_check(unsigned int cpu)
+static void powernv_cpufreq_throttle_check(void *data)
{
+ unsigned int cpu = smp_processor_id();
unsigned long pmsr;
- int pmsr_pmax, pmsr_lp;
+ int pmsr_pmax, i;
pmsr = get_pmspr(SPRN_PMSR);
+ for (i = 0; i < nr_chips; i++)
+ if (chips[i].id == cpu_to_chip_id(cpu))
+ break;
+
/* Check for Pmax Capping */
pmsr_pmax = (s8)PMSR_MAX(pmsr);
if (pmsr_pmax != powernv_pstate_info.max) {
- throttled = true;
- pr_info("CPU %d Pmax is reduced to %d\n", cpu, pmsr_pmax);
- pr_info("Max allowed Pstate is capped\n");
+ if (chips[i].throttled)
+ goto next;
+ chips[i].throttled = true;
+ pr_info("CPU %d on Chip %u has Pmax reduced to %d\n", cpu,
+ chips[i].id, pmsr_pmax);
+ } else if (chips[i].throttled) {
+ chips[i].throttled = false;
+ pr_info("CPU %d on Chip %u has Pmax restored to %d\n", cpu,
+ chips[i].id, pmsr_pmax);
}
- /*
- * Check for Psafe by reading LocalPstate
- * or check if Psafe_mode_active is set in PMSR.
- */
- pmsr_lp = (s8)PMSR_LP(pmsr);
- if ((pmsr_lp < powernv_pstate_info.min) ||
- (pmsr & PMSR_PSAFE_ENABLE)) {
+ /* Check if Psafe_mode_active is set in PMSR. */
+next:
+ if (pmsr & PMSR_PSAFE_ENABLE) {
throttled = true;
pr_info("Pstate set to safe frequency\n");
}
@@ -350,7 +368,7 @@ static int powernv_cpufreq_target_index(struct cpufreq_policy *policy,
return 0;
if (!throttled)
- powernv_cpufreq_throttle_check(smp_processor_id());
+ powernv_cpufreq_throttle_check(NULL);
freq_data.pstate_id = powernv_freqs[new_index].driver_data;
@@ -395,6 +413,119 @@ static struct notifier_block powernv_cpufreq_reboot_nb = {
.notifier_call = powernv_cpufreq_reboot_notifier,
};
+void powernv_cpufreq_work_fn(struct work_struct *work)
+{
+ struct chip *chip = container_of(work, struct chip, throttle);
+ unsigned int cpu;
+ cpumask_var_t mask;
+
+ smp_call_function_any(&chip->mask,
+ powernv_cpufreq_throttle_check, NULL, 0);
+
+ if (!chip->restore)
+ return;
+
+ chip->restore = false;
+ cpumask_copy(mask, &chip->mask);
+ for_each_cpu_and(cpu, mask, cpu_online_mask) {
+ int index, tcpu;
+ struct cpufreq_policy policy;
+
+ cpufreq_get_policy(&policy, cpu);
+ cpufreq_frequency_table_target(&policy, policy.freq_table,
+ policy.cur,
+ CPUFREQ_RELATION_C, &index);
+ powernv_cpufreq_target_index(&policy, index);
+ for_each_cpu(tcpu, policy.cpus)
+ cpumask_clear_cpu(tcpu, mask);
+ }
+}
+
+static char throttle_reason[][30] = {
+ "No throttling",
+ "Power Cap",
+ "Processor Over Temperature",
+ "Power Supply Failure",
+ "Over Current",
+ "OCC Reset"
+ };
+
+static int powernv_cpufreq_occ_msg(struct notifier_block *nb,
+ unsigned long msg_type, void *_msg)
+{
+ struct opal_msg *msg = _msg;
+ struct opal_occ_msg omsg;
+ int i;
+
+ if (msg_type != OPAL_MSG_OCC)
+ return 0;
+
+ omsg.type = be64_to_cpu(msg->params[0]);
+
+ switch (omsg.type) {
+ case OCC_RESET:
+ occ_reset = true;
+ pr_info("OCC (On Chip Controller - enforces hard thermal/power limits) Resetting\n");
+ /*
+ * powernv_cpufreq_throttle_check() is called in
+ * target() callback which can detect the throttle state
+ * for governors like ondemand.
+ * But static governors will not call target() often thus
+ * report throttling here.
+ */
+ if (!throttled) {
+ throttled = true;
+ pr_crit("CPU frequency is throttled for duration\n");
+ }
+
+ break;
+ case OCC_LOAD:
+ pr_info("OCC Loading, CPU frequency is throttled until OCC is started\n");
+ break;
+ case OCC_THROTTLE:
+ omsg.chip = be64_to_cpu(msg->params[1]);
+ omsg.throttle_status = be64_to_cpu(msg->params[2]);
+
+ if (occ_reset) {
+ occ_reset = false;
+ throttled = false;
+ pr_info("OCC Active, CPU frequency is no longer throttled\n");
+
+ for (i = 0; i < nr_chips; i++) {
+ chips[i].restore = true;
+ schedule_work(&chips[i].throttle);
+ }
+
+ return 0;
+ }
+
+ if (omsg.throttle_status &&
+ omsg.throttle_status <= OCC_MAX_THROTTLE_STATUS)
+ pr_info("OCC: Chip %u Pmax reduced due to %s\n",
+ (unsigned int)omsg.chip,
+ throttle_reason[omsg.throttle_status]);
+ else if (!omsg.throttle_status)
+ pr_info("OCC: Chip %u %s\n", (unsigned int)omsg.chip,
+ throttle_reason[omsg.throttle_status]);
+ else
+ return 0;
+
+ for (i = 0; i < nr_chips; i++)
+ if (chips[i].id == omsg.chip) {
+ if (!omsg.throttle_status)
+ chips[i].restore = true;
+ schedule_work(&chips[i].throttle);
+ }
+ }
+ return 0;
+}
+
+static struct notifier_block powernv_cpufreq_opal_nb = {
+ .notifier_call = powernv_cpufreq_occ_msg,
+ .next = NULL,
+ .priority = 0,
+};
+
static void powernv_cpufreq_stop_cpu(struct cpufreq_policy *policy)
{
struct powernv_smp_call_data freq_data;
@@ -414,6 +545,36 @@ static struct cpufreq_driver powernv_cpufreq_driver = {
.attr = powernv_cpu_freq_attr,
};
+static int init_chip_info(void)
+{
+ unsigned int chip[256];
+ unsigned int cpu, i;
+ unsigned int prev_chip_id = UINT_MAX;
+
+ for_each_possible_cpu(cpu) {
+ unsigned int id = cpu_to_chip_id(cpu);
+
+ if (prev_chip_id != id) {
+ prev_chip_id = id;
+ chip[nr_chips++] = id;
+ }
+ }
+
+ chips = kmalloc_array(nr_chips, sizeof(struct chip), GFP_KERNEL);
+ if (!chips)
+ return -ENOMEM;
+
+ for (i = 0; i < nr_chips; i++) {
+ chips[i].id = chip[i];
+ chips[i].throttled = false;
+ cpumask_copy(&chips[i].mask, cpumask_of_node(chip[i]));
+ INIT_WORK(&chips[i].throttle, powernv_cpufreq_work_fn);
+ chips[i].restore = false;
+ }
+
+ return 0;
+}
+
static int __init powernv_cpufreq_init(void)
{
int rc = 0;
@@ -429,7 +590,13 @@ static int __init powernv_cpufreq_init(void)
return rc;
}
+ /* Populate chip info */
+ rc = init_chip_info();
+ if (rc)
+ return rc;
+
register_reboot_notifier(&powernv_cpufreq_reboot_nb);
+ opal_message_notifier_register(OPAL_MSG_OCC, &powernv_cpufreq_opal_nb);
return cpufreq_register_driver(&powernv_cpufreq_driver);
}
module_init(powernv_cpufreq_init);
@@ -437,6 +604,8 @@ module_init(powernv_cpufreq_init);
static void __exit powernv_cpufreq_exit(void)
{
unregister_reboot_notifier(&powernv_cpufreq_reboot_nb);
+ opal_message_notifier_unregister(OPAL_MSG_OCC,
+ &powernv_cpufreq_opal_nb);
cpufreq_unregister_driver(&powernv_cpufreq_driver);
}
module_exit(powernv_cpufreq_exit);