summaryrefslogtreecommitdiff
path: root/drivers/thermal
diff options
context:
space:
mode:
authorRafael J. Wysocki <rafael.j.wysocki@intel.com>2023-02-15 19:18:08 +0300
committerRafael J. Wysocki <rafael.j.wysocki@intel.com>2023-02-15 19:18:08 +0300
commitbadf1f90502d3fc627a18880dfafd8c636699baf (patch)
treed069f30768766fa721b6f311351de2e0df53f8c1 /drivers/thermal
parentc3bd6d539f34784b235c7070edba978f67516372 (diff)
parentfef1f0be10c61dd16fd370964c316c399483448f (diff)
downloadlinux-badf1f90502d3fc627a18880dfafd8c636699baf.tar.xz
Merge branch 'thermal-intel'
Merge thermal control changes related to Intel platforms for 6.3-rc1: - Rework ACPI helper functions for thermal control to retrieve a trip point temperature instead of initializing a trip point objetc (Rafael Wysocki). - Clean up and improve the int340x thermal driver ((Rafael Wysocki). - Simplify and clean up the intel_pch thermal driver ((Rafael Wysocki). - Fix the Intel powerclamp thermal driver and make it use the common idle injection framework (Srinivas Pandruvada). - Add two module parameters, cpumask and max_idle, to the Intel powerclamp thermal driver to allow it to affect only a specific subset of CPUs instead of all of them (Srinivas Pandruvada). - Make the Intel quark_dts thermal driver Use generic trip point objects instead of its own trip point representation (Daniel Lezcano). - Add toctree entry for thermal documents and fix two issues in the Intel powerclamp driver documentation (Bagas Sanjaya). * thermal-intel: (25 commits) Documentation: powerclamp: Fix numbered lists formatting Documentation: powerclamp: Escape wildcard in cpumask description Documentation: admin-guide: Add toctree entry for thermal docs thermal: intel: powerclamp: Add two module parameters Documentation: admin-guide: Move intel_powerclamp documentation thermal: intel: powerclamp: Fix duration module parameter thermal: intel: powerclamp: Return last requested state as cur_state thermal: intel: quark_dts: Use generic trip points thermal: intel: powerclamp: Use powercap idle-inject feature powercap: idle_inject: Add update callback powercap: idle_inject: Export symbols thermal: intel: powerclamp: Fix cur_state for multi package system thermal: intel: intel_pch: Drop struct board_info thermal: intel: intel_pch: Rename board ID symbols thermal: intel: intel_pch: Fold suspend and resume routines into their callers thermal: intel: intel_pch: Fold two functions into their callers thermal: intel: intel_pch: Eliminate device operations object thermal: intel: intel_pch: Rename device operations callbacks thermal: intel: intel_pch: Eliminate redundant return pointers thermal: intel: intel_pch: Make pch_wpt_add_acpi_psv_trip() return int ...
Diffstat (limited to 'drivers/thermal')
-rw-r--r--drivers/thermal/intel/Kconfig3
-rw-r--r--drivers/thermal/intel/int340x_thermal/int340x_thermal_zone.c127
-rw-r--r--drivers/thermal/intel/intel_pch_thermal.c352
-rw-r--r--drivers/thermal/intel/intel_powerclamp.c545
-rw-r--r--drivers/thermal/intel/intel_quark_dts_thermal.c55
-rw-r--r--drivers/thermal/thermal_acpi.c108
6 files changed, 580 insertions, 610 deletions
diff --git a/drivers/thermal/intel/Kconfig b/drivers/thermal/intel/Kconfig
index e50fd260484a..b5808f92702d 100644
--- a/drivers/thermal/intel/Kconfig
+++ b/drivers/thermal/intel/Kconfig
@@ -3,6 +3,9 @@ config INTEL_POWERCLAMP
tristate "Intel PowerClamp idle injection driver"
depends on X86
depends on CPU_SUP_INTEL
+ depends on CPU_IDLE
+ select POWERCAP
+ select IDLE_INJECT
help
Enable this to enable Intel PowerClamp idle injection driver. This
enforce idle time which results in more package C-state residency. The
diff --git a/drivers/thermal/intel/int340x_thermal/int340x_thermal_zone.c b/drivers/thermal/intel/int340x_thermal/int340x_thermal_zone.c
index 09b1b51eb6a5..00665967ca52 100644
--- a/drivers/thermal/intel/int340x_thermal/int340x_thermal_zone.c
+++ b/drivers/thermal/intel/int340x_thermal/int340x_thermal_zone.c
@@ -29,24 +29,27 @@ static int int340x_thermal_get_zone_temp(struct thermal_zone_device *zone,
if (conv_temp < 0)
return conv_temp;
- *temp = (unsigned long)conv_temp * 10;
- } else
+ *temp = conv_temp * 10;
+ } else {
/* _TMP returns the temperature in tenths of degrees Kelvin */
*temp = deci_kelvin_to_millicelsius(tmp);
+ }
return 0;
}
static int int340x_thermal_set_trip_temp(struct thermal_zone_device *zone,
- int trip, int temp)
+ int trip, int temp)
{
struct int34x_thermal_zone *d = zone->devdata;
+ char name[] = {'P', 'A', 'T', '0' + trip, '\0'};
acpi_status status;
- char name[10];
- snprintf(name, sizeof(name), "PAT%d", trip);
+ if (trip > 9)
+ return -EINVAL;
+
status = acpi_execute_simple_method(d->adev->handle, name,
- millicelsius_to_deci_kelvin(temp));
+ millicelsius_to_deci_kelvin(temp));
if (ACPI_FAILURE(status))
return -EIO;
@@ -70,24 +73,34 @@ static int int340x_thermal_read_trips(struct acpi_device *zone_adev,
{
int i, ret;
- ret = thermal_acpi_trip_critical(zone_adev, &zone_trips[trip_cnt]);
- if (!ret)
+ ret = thermal_acpi_critical_trip_temp(zone_adev,
+ &zone_trips[trip_cnt].temperature);
+ if (!ret) {
+ zone_trips[trip_cnt].type = THERMAL_TRIP_CRITICAL;
trip_cnt++;
+ }
- ret = thermal_acpi_trip_hot(zone_adev, &zone_trips[trip_cnt]);
- if (!ret)
+ ret = thermal_acpi_hot_trip_temp(zone_adev,
+ &zone_trips[trip_cnt].temperature);
+ if (!ret) {
+ zone_trips[trip_cnt].type = THERMAL_TRIP_HOT;
trip_cnt++;
+ }
- ret = thermal_acpi_trip_passive(zone_adev, &zone_trips[trip_cnt]);
- if (!ret)
+ ret = thermal_acpi_passive_trip_temp(zone_adev,
+ &zone_trips[trip_cnt].temperature);
+ if (!ret) {
+ zone_trips[trip_cnt].type = THERMAL_TRIP_PASSIVE;
trip_cnt++;
+ }
for (i = 0; i < INT340X_THERMAL_MAX_ACT_TRIP_COUNT; i++) {
-
- ret = thermal_acpi_trip_active(zone_adev, i, &zone_trips[trip_cnt]);
+ ret = thermal_acpi_active_trip_temp(zone_adev, i,
+ &zone_trips[trip_cnt].temperature);
if (ret)
break;
+ zone_trips[trip_cnt].type = THERMAL_TRIP_ACTIVE;
trip_cnt++;
}
@@ -102,7 +115,7 @@ static struct thermal_zone_params int340x_thermal_params = {
struct int34x_thermal_zone *int340x_thermal_zone_add(struct acpi_device *adev,
int (*get_temp) (struct thermal_zone_device *, int *))
{
- struct int34x_thermal_zone *int34x_thermal_zone;
+ struct int34x_thermal_zone *int34x_zone;
struct thermal_trip *zone_trips;
unsigned long long trip_cnt = 0;
unsigned long long hyst;
@@ -110,26 +123,25 @@ struct int34x_thermal_zone *int340x_thermal_zone_add(struct acpi_device *adev,
acpi_status status;
int i, ret;
- int34x_thermal_zone = kzalloc(sizeof(*int34x_thermal_zone),
- GFP_KERNEL);
- if (!int34x_thermal_zone)
+ int34x_zone = kzalloc(sizeof(*int34x_zone), GFP_KERNEL);
+ if (!int34x_zone)
return ERR_PTR(-ENOMEM);
- int34x_thermal_zone->adev = adev;
+ int34x_zone->adev = adev;
- int34x_thermal_zone->ops = kmemdup(&int340x_thermal_zone_ops,
- sizeof(int340x_thermal_zone_ops), GFP_KERNEL);
- if (!int34x_thermal_zone->ops) {
+ int34x_zone->ops = kmemdup(&int340x_thermal_zone_ops,
+ sizeof(int340x_thermal_zone_ops), GFP_KERNEL);
+ if (!int34x_zone->ops) {
ret = -ENOMEM;
goto err_ops_alloc;
}
if (get_temp)
- int34x_thermal_zone->ops->get_temp = get_temp;
+ int34x_zone->ops->get_temp = get_temp;
status = acpi_evaluate_integer(adev->handle, "PATC", NULL, &trip_cnt);
- if (!ACPI_FAILURE(status)) {
- int34x_thermal_zone->aux_trip_nr = trip_cnt;
+ if (ACPI_SUCCESS(status)) {
+ int34x_zone->aux_trip_nr = trip_cnt;
trip_mask = BIT(trip_cnt) - 1;
}
@@ -156,49 +168,47 @@ struct int34x_thermal_zone *int340x_thermal_zone_add(struct acpi_device *adev,
for (i = 0; i < trip_cnt; ++i)
zone_trips[i].hysteresis = hyst;
- int34x_thermal_zone->trips = zone_trips;
+ int34x_zone->trips = zone_trips;
- int34x_thermal_zone->lpat_table = acpi_lpat_get_conversion_table(
- adev->handle);
+ int34x_zone->lpat_table = acpi_lpat_get_conversion_table(adev->handle);
- int34x_thermal_zone->zone = thermal_zone_device_register_with_trips(
- acpi_device_bid(adev),
- zone_trips, trip_cnt,
- trip_mask, int34x_thermal_zone,
- int34x_thermal_zone->ops,
- &int340x_thermal_params,
- 0, 0);
- if (IS_ERR(int34x_thermal_zone->zone)) {
- ret = PTR_ERR(int34x_thermal_zone->zone);
+ int34x_zone->zone = thermal_zone_device_register_with_trips(
+ acpi_device_bid(adev),
+ zone_trips, trip_cnt,
+ trip_mask, int34x_zone,
+ int34x_zone->ops,
+ &int340x_thermal_params,
+ 0, 0);
+ if (IS_ERR(int34x_zone->zone)) {
+ ret = PTR_ERR(int34x_zone->zone);
goto err_thermal_zone;
}
- ret = thermal_zone_device_enable(int34x_thermal_zone->zone);
+ ret = thermal_zone_device_enable(int34x_zone->zone);
if (ret)
goto err_enable;
- return int34x_thermal_zone;
+ return int34x_zone;
err_enable:
- thermal_zone_device_unregister(int34x_thermal_zone->zone);
+ thermal_zone_device_unregister(int34x_zone->zone);
err_thermal_zone:
- kfree(int34x_thermal_zone->trips);
- acpi_lpat_free_conversion_table(int34x_thermal_zone->lpat_table);
+ kfree(int34x_zone->trips);
+ acpi_lpat_free_conversion_table(int34x_zone->lpat_table);
err_trips_alloc:
- kfree(int34x_thermal_zone->ops);
+ kfree(int34x_zone->ops);
err_ops_alloc:
- kfree(int34x_thermal_zone);
+ kfree(int34x_zone);
return ERR_PTR(ret);
}
EXPORT_SYMBOL_GPL(int340x_thermal_zone_add);
-void int340x_thermal_zone_remove(struct int34x_thermal_zone
- *int34x_thermal_zone)
+void int340x_thermal_zone_remove(struct int34x_thermal_zone *int34x_zone)
{
- thermal_zone_device_unregister(int34x_thermal_zone->zone);
- acpi_lpat_free_conversion_table(int34x_thermal_zone->lpat_table);
- kfree(int34x_thermal_zone->trips);
- kfree(int34x_thermal_zone->ops);
- kfree(int34x_thermal_zone);
+ thermal_zone_device_unregister(int34x_zone->zone);
+ acpi_lpat_free_conversion_table(int34x_zone->lpat_table);
+ kfree(int34x_zone->trips);
+ kfree(int34x_zone->ops);
+ kfree(int34x_zone);
}
EXPORT_SYMBOL_GPL(int340x_thermal_zone_remove);
@@ -213,22 +223,21 @@ void int340x_thermal_update_trips(struct int34x_thermal_zone *int34x_zone)
mutex_lock(&int34x_zone->zone->lock);
for (i = int34x_zone->aux_trip_nr; i < trip_cnt; i++) {
- struct thermal_trip trip;
- int err;
+ int temp, err;
switch (zone_trips[i].type) {
case THERMAL_TRIP_CRITICAL:
- err = thermal_acpi_trip_critical(zone_adev, &trip);
+ err = thermal_acpi_critical_trip_temp(zone_adev, &temp);
break;
case THERMAL_TRIP_HOT:
- err = thermal_acpi_trip_hot(zone_adev, &trip);
+ err = thermal_acpi_hot_trip_temp(zone_adev, &temp);
break;
case THERMAL_TRIP_PASSIVE:
- err = thermal_acpi_trip_passive(zone_adev, &trip);
+ err = thermal_acpi_passive_trip_temp(zone_adev, &temp);
break;
case THERMAL_TRIP_ACTIVE:
- err = thermal_acpi_trip_active(zone_adev, act_trip_nr++,
- &trip);
+ err = thermal_acpi_active_trip_temp(zone_adev, act_trip_nr++,
+ &temp);
break;
default:
err = -ENODEV;
@@ -238,7 +247,7 @@ void int340x_thermal_update_trips(struct int34x_thermal_zone *int34x_zone)
continue;
}
- zone_trips[i].temperature = trip.temperature;
+ zone_trips[i].temperature = temp;
}
mutex_unlock(&int34x_zone->zone->lock);
diff --git a/drivers/thermal/intel/intel_pch_thermal.c b/drivers/thermal/intel/intel_pch_thermal.c
index 45a9ea86907e..b855d031a855 100644
--- a/drivers/thermal/intel/intel_pch_thermal.c
+++ b/drivers/thermal/intel/intel_pch_thermal.c
@@ -82,7 +82,6 @@ static char driver_name[] = "Intel PCH thermal driver";
struct pch_thermal_device {
void __iomem *hw_base;
- const struct pch_dev_ops *ops;
struct pci_dev *pdev;
struct thermal_zone_device *tzd;
struct thermal_trip trips[PCH_MAX_TRIPS];
@@ -90,42 +89,107 @@ struct pch_thermal_device {
};
#ifdef CONFIG_ACPI
-
/*
* On some platforms, there is a companion ACPI device, which adds
* passive trip temperature using _PSV method. There is no specific
* passive temperature setting in MMIO interface of this PCI device.
*/
-static void pch_wpt_add_acpi_psv_trip(struct pch_thermal_device *ptd,
- int *nr_trips)
+static int pch_wpt_add_acpi_psv_trip(struct pch_thermal_device *ptd, int trip)
{
struct acpi_device *adev;
- int ret;
+ int temp;
adev = ACPI_COMPANION(&ptd->pdev->dev);
if (!adev)
- return;
+ return 0;
- ret = thermal_acpi_trip_passive(adev, &ptd->trips[*nr_trips]);
- if (ret || ptd->trips[*nr_trips].temperature <= 0)
- return;
+ if (thermal_acpi_passive_trip_temp(adev, &temp) || temp <= 0)
+ return 0;
- ++(*nr_trips);
+ ptd->trips[trip].type = THERMAL_TRIP_PASSIVE;
+ ptd->trips[trip].temperature = temp;
+ return 1;
}
#else
-static void pch_wpt_add_acpi_psv_trip(struct pch_thermal_device *ptd,
- int *nr_trips)
+static int pch_wpt_add_acpi_psv_trip(struct pch_thermal_device *ptd, int trip)
{
-
+ return 0;
}
#endif
-static int pch_wpt_init(struct pch_thermal_device *ptd, int *nr_trips)
+static int pch_thermal_get_temp(struct thermal_zone_device *tzd, int *temp)
{
- u8 tsel;
+ struct pch_thermal_device *ptd = tzd->devdata;
+
+ *temp = GET_WPT_TEMP(WPT_TEMP_TSR & readw(ptd->hw_base + WPT_TEMP));
+ return 0;
+}
+
+static void pch_critical(struct thermal_zone_device *tzd)
+{
+ dev_dbg(&tzd->device, "%s: critical temperature reached\n", tzd->type);
+}
+
+static struct thermal_zone_device_ops tzd_ops = {
+ .get_temp = pch_thermal_get_temp,
+ .critical = pch_critical,
+};
+
+enum pch_board_ids {
+ PCH_BOARD_HSW = 0,
+ PCH_BOARD_WPT,
+ PCH_BOARD_SKL,
+ PCH_BOARD_CNL,
+ PCH_BOARD_CML,
+ PCH_BOARD_LWB,
+ PCH_BOARD_WBG,
+};
+
+static const char *board_names[] = {
+ [PCH_BOARD_HSW] = "pch_haswell",
+ [PCH_BOARD_WPT] = "pch_wildcat_point",
+ [PCH_BOARD_SKL] = "pch_skylake",
+ [PCH_BOARD_CNL] = "pch_cannonlake",
+ [PCH_BOARD_CML] = "pch_cometlake",
+ [PCH_BOARD_LWB] = "pch_lewisburg",
+ [PCH_BOARD_WBG] = "pch_wellsburg",
+};
+
+static int intel_pch_thermal_probe(struct pci_dev *pdev,
+ const struct pci_device_id *id)
+{
+ enum pch_board_ids board_id = id->driver_data;
+ struct pch_thermal_device *ptd;
+ int nr_trips = 0;
u16 trip_temp;
+ u8 tsel;
+ int err;
+
+ ptd = devm_kzalloc(&pdev->dev, sizeof(*ptd), GFP_KERNEL);
+ if (!ptd)
+ return -ENOMEM;
+
+ pci_set_drvdata(pdev, ptd);
+ ptd->pdev = pdev;
+
+ err = pci_enable_device(pdev);
+ if (err) {
+ dev_err(&pdev->dev, "failed to enable pci device\n");
+ return err;
+ }
+
+ err = pci_request_regions(pdev, driver_name);
+ if (err) {
+ dev_err(&pdev->dev, "failed to request pci region\n");
+ goto error_disable;
+ }
- *nr_trips = 0;
+ ptd->hw_base = pci_ioremap_bar(pdev, 0);
+ if (!ptd->hw_base) {
+ err = -ENOMEM;
+ dev_err(&pdev->dev, "failed to map mem base\n");
+ goto error_release;
+ }
/* Check if BIOS has already enabled thermal sensor */
if (WPT_TSEL_ETS & readb(ptd->hw_base + WPT_TSEL)) {
@@ -140,50 +204,79 @@ static int pch_wpt_init(struct pch_thermal_device *ptd, int *nr_trips)
*/
if (tsel & WPT_TSEL_PLDB) {
dev_err(&ptd->pdev->dev, "Sensor can't be enabled\n");
- return -ENODEV;
+ err = -ENODEV;
+ goto error_cleanup;
}
writeb(tsel|WPT_TSEL_ETS, ptd->hw_base + WPT_TSEL);
if (!(WPT_TSEL_ETS & readb(ptd->hw_base + WPT_TSEL))) {
dev_err(&ptd->pdev->dev, "Sensor can't be enabled\n");
- return -ENODEV;
+ err = -ENODEV;
+ goto error_cleanup;
}
read_trips:
trip_temp = readw(ptd->hw_base + WPT_CTT);
trip_temp &= 0x1FF;
if (trip_temp) {
- ptd->trips[*nr_trips].temperature = GET_WPT_TEMP(trip_temp);
- ptd->trips[*nr_trips].type = THERMAL_TRIP_CRITICAL;
- ++(*nr_trips);
+ ptd->trips[nr_trips].temperature = GET_WPT_TEMP(trip_temp);
+ ptd->trips[nr_trips++].type = THERMAL_TRIP_CRITICAL;
}
trip_temp = readw(ptd->hw_base + WPT_PHL);
trip_temp &= 0x1FF;
if (trip_temp) {
- ptd->trips[*nr_trips].temperature = GET_WPT_TEMP(trip_temp);
- ptd->trips[*nr_trips].type = THERMAL_TRIP_HOT;
- ++(*nr_trips);
+ ptd->trips[nr_trips].temperature = GET_WPT_TEMP(trip_temp);
+ ptd->trips[nr_trips++].type = THERMAL_TRIP_HOT;
}
- pch_wpt_add_acpi_psv_trip(ptd, nr_trips);
+ nr_trips += pch_wpt_add_acpi_psv_trip(ptd, nr_trips);
+
+ ptd->tzd = thermal_zone_device_register_with_trips(board_names[board_id],
+ ptd->trips, nr_trips,
+ 0, ptd, &tzd_ops,
+ NULL, 0, 0);
+ if (IS_ERR(ptd->tzd)) {
+ dev_err(&pdev->dev, "Failed to register thermal zone %s\n",
+ board_names[board_id]);
+ err = PTR_ERR(ptd->tzd);
+ goto error_cleanup;
+ }
+ err = thermal_zone_device_enable(ptd->tzd);
+ if (err)
+ goto err_unregister;
return 0;
+
+err_unregister:
+ thermal_zone_device_unregister(ptd->tzd);
+error_cleanup:
+ iounmap(ptd->hw_base);
+error_release:
+ pci_release_regions(pdev);
+error_disable:
+ pci_disable_device(pdev);
+ dev_err(&pdev->dev, "pci device failed to probe\n");
+ return err;
}
-static int pch_wpt_get_temp(struct pch_thermal_device *ptd, int *temp)
+static void intel_pch_thermal_remove(struct pci_dev *pdev)
{
- *temp = GET_WPT_TEMP(WPT_TEMP_TSR & readw(ptd->hw_base + WPT_TEMP));
+ struct pch_thermal_device *ptd = pci_get_drvdata(pdev);
- return 0;
+ thermal_zone_device_unregister(ptd->tzd);
+ iounmap(ptd->hw_base);
+ pci_set_drvdata(pdev, NULL);
+ pci_release_regions(pdev);
+ pci_disable_device(pdev);
}
-/* Cool the PCH when it's overheat in .suspend_noirq phase */
-static int pch_wpt_suspend(struct pch_thermal_device *ptd)
+static int intel_pch_thermal_suspend_noirq(struct device *device)
{
- u8 tsel;
- int pch_delay_cnt = 0;
+ struct pch_thermal_device *ptd = dev_get_drvdata(device);
u16 pch_thr_temp, pch_cur_temp;
+ int pch_delay_cnt = 0;
+ u8 tsel;
/* Shutdown the thermal sensor if it is not enabled by BIOS */
if (!ptd->bios_enabled) {
@@ -246,8 +339,9 @@ static int pch_wpt_suspend(struct pch_thermal_device *ptd)
return 0;
}
-static int pch_wpt_resume(struct pch_thermal_device *ptd)
+static int intel_pch_thermal_resume(struct device *device)
{
+ struct pch_thermal_device *ptd = dev_get_drvdata(device);
u8 tsel;
if (ptd->bios_enabled)
@@ -260,199 +354,29 @@ static int pch_wpt_resume(struct pch_thermal_device *ptd)
return 0;
}
-struct pch_dev_ops {
- int (*hw_init)(struct pch_thermal_device *ptd, int *nr_trips);
- int (*get_temp)(struct pch_thermal_device *ptd, int *temp);
- int (*suspend)(struct pch_thermal_device *ptd);
- int (*resume)(struct pch_thermal_device *ptd);
-};
-
-
-/* dev ops for Wildcat Point */
-static const struct pch_dev_ops pch_dev_ops_wpt = {
- .hw_init = pch_wpt_init,
- .get_temp = pch_wpt_get_temp,
- .suspend = pch_wpt_suspend,
- .resume = pch_wpt_resume,
-};
-
-static int pch_thermal_get_temp(struct thermal_zone_device *tzd, int *temp)
-{
- struct pch_thermal_device *ptd = tzd->devdata;
-
- return ptd->ops->get_temp(ptd, temp);
-}
-
-static void pch_critical(struct thermal_zone_device *tzd)
-{
- dev_dbg(&tzd->device, "%s: critical temperature reached\n", tzd->type);
-}
-
-static struct thermal_zone_device_ops tzd_ops = {
- .get_temp = pch_thermal_get_temp,
- .critical = pch_critical,
-};
-
-enum board_ids {
- board_hsw,
- board_wpt,
- board_skl,
- board_cnl,
- board_cml,
- board_lwb,
- board_wbg,
-};
-
-static const struct board_info {
- const char *name;
- const struct pch_dev_ops *ops;
-} board_info[] = {
- [board_hsw] = {
- .name = "pch_haswell",
- .ops = &pch_dev_ops_wpt,
- },
- [board_wpt] = {
- .name = "pch_wildcat_point",
- .ops = &pch_dev_ops_wpt,
- },
- [board_skl] = {
- .name = "pch_skylake",
- .ops = &pch_dev_ops_wpt,
- },
- [board_cnl] = {
- .name = "pch_cannonlake",
- .ops = &pch_dev_ops_wpt,
- },
- [board_cml] = {
- .name = "pch_cometlake",
- .ops = &pch_dev_ops_wpt,
- },
- [board_lwb] = {
- .name = "pch_lewisburg",
- .ops = &pch_dev_ops_wpt,
- },
- [board_wbg] = {
- .name = "pch_wellsburg",
- .ops = &pch_dev_ops_wpt,
- },
-};
-
-static int intel_pch_thermal_probe(struct pci_dev *pdev,
- const struct pci_device_id *id)
-{
- enum board_ids board_id = id->driver_data;
- const struct board_info *bi = &board_info[board_id];
- struct pch_thermal_device *ptd;
- int err;
- int nr_trips;
-
- ptd = devm_kzalloc(&pdev->dev, sizeof(*ptd), GFP_KERNEL);
- if (!ptd)
- return -ENOMEM;
-
- ptd->ops = bi->ops;
-
- pci_set_drvdata(pdev, ptd);
- ptd->pdev = pdev;
-
- err = pci_enable_device(pdev);
- if (err) {
- dev_err(&pdev->dev, "failed to enable pci device\n");
- return err;
- }
-
- err = pci_request_regions(pdev, driver_name);
- if (err) {
- dev_err(&pdev->dev, "failed to request pci region\n");
- goto error_disable;
- }
-
- ptd->hw_base = pci_ioremap_bar(pdev, 0);
- if (!ptd->hw_base) {
- err = -ENOMEM;
- dev_err(&pdev->dev, "failed to map mem base\n");
- goto error_release;
- }
-
- err = ptd->ops->hw_init(ptd, &nr_trips);
- if (err)
- goto error_cleanup;
-
- ptd->tzd = thermal_zone_device_register_with_trips(bi->name, ptd->trips,
- nr_trips, 0, ptd,
- &tzd_ops, NULL, 0, 0);
- if (IS_ERR(ptd->tzd)) {
- dev_err(&pdev->dev, "Failed to register thermal zone %s\n",
- bi->name);
- err = PTR_ERR(ptd->tzd);
- goto error_cleanup;
- }
- err = thermal_zone_device_enable(ptd->tzd);
- if (err)
- goto err_unregister;
-
- return 0;
-
-err_unregister:
- thermal_zone_device_unregister(ptd->tzd);
-error_cleanup:
- iounmap(ptd->hw_base);
-error_release:
- pci_release_regions(pdev);
-error_disable:
- pci_disable_device(pdev);
- dev_err(&pdev->dev, "pci device failed to probe\n");
- return err;
-}
-
-static void intel_pch_thermal_remove(struct pci_dev *pdev)
-{
- struct pch_thermal_device *ptd = pci_get_drvdata(pdev);
-
- thermal_zone_device_unregister(ptd->tzd);
- iounmap(ptd->hw_base);
- pci_set_drvdata(pdev, NULL);
- pci_release_regions(pdev);
- pci_disable_device(pdev);
-}
-
-static int intel_pch_thermal_suspend_noirq(struct device *device)
-{
- struct pch_thermal_device *ptd = dev_get_drvdata(device);
-
- return ptd->ops->suspend(ptd);
-}
-
-static int intel_pch_thermal_resume(struct device *device)
-{
- struct pch_thermal_device *ptd = dev_get_drvdata(device);
-
- return ptd->ops->resume(ptd);
-}
-
static const struct pci_device_id intel_pch_thermal_id[] = {
{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCH_THERMAL_DID_HSW_1),
- .driver_data = board_hsw, },
+ .driver_data = PCH_BOARD_HSW, },
{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCH_THERMAL_DID_HSW_2),
- .driver_data = board_hsw, },
+ .driver_data = PCH_BOARD_HSW, },
{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCH_THERMAL_DID_WPT),
- .driver_data = board_wpt, },
+ .driver_data = PCH_BOARD_WPT, },
{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCH_THERMAL_DID_SKL),
- .driver_data = board_skl, },
+ .driver_data = PCH_BOARD_SKL, },
{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCH_THERMAL_DID_SKL_H),
- .driver_data = board_skl, },
+ .driver_data = PCH_BOARD_SKL, },
{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCH_THERMAL_DID_CNL),
- .driver_data = board_cnl, },
+ .driver_data = PCH_BOARD_CNL, },
{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCH_THERMAL_DID_CNL_H),
- .driver_data = board_cnl, },
+ .driver_data = PCH_BOARD_CNL, },
{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCH_THERMAL_DID_CNL_LP),
- .driver_data = board_cnl, },
+ .driver_data = PCH_BOARD_CNL, },
{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCH_THERMAL_DID_CML_H),
- .driver_data = board_cml, },
+ .driver_data = PCH_BOARD_CML, },
{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCH_THERMAL_DID_LWB),
- .driver_data = board_lwb, },
+ .driver_data = PCH_BOARD_LWB, },
{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCH_THERMAL_DID_WBG),
- .driver_data = board_wbg, },
+ .driver_data = PCH_BOARD_WBG, },
{ 0, },
};
MODULE_DEVICE_TABLE(pci, intel_pch_thermal_id);
diff --git a/drivers/thermal/intel/intel_powerclamp.c b/drivers/thermal/intel/intel_powerclamp.c
index b80e25ec1261..c7ba5680cd48 100644
--- a/drivers/thermal/intel/intel_powerclamp.c
+++ b/drivers/thermal/intel/intel_powerclamp.c
@@ -2,7 +2,7 @@
/*
* intel_powerclamp.c - package c-state idle injection
*
- * Copyright (c) 2012, Intel Corporation.
+ * Copyright (c) 2012-2023, Intel Corporation.
*
* Authors:
* Arjan van de Ven <arjan@linux.intel.com>
@@ -27,23 +27,17 @@
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/delay.h>
-#include <linux/kthread.h>
#include <linux/cpu.h>
#include <linux/thermal.h>
-#include <linux/slab.h>
-#include <linux/tick.h>
#include <linux/debugfs.h>
#include <linux/seq_file.h>
-#include <linux/sched/rt.h>
-#include <uapi/linux/sched/types.h>
+#include <linux/idle_inject.h>
-#include <asm/nmi.h>
#include <asm/msr.h>
#include <asm/mwait.h>
#include <asm/cpu_device_id.h>
-#include <asm/hardirq.h>
-#define MAX_TARGET_RATIO (50U)
+#define MAX_TARGET_RATIO (100U)
/* For each undisturbed clamping period (no extra wake ups during idle time),
* we increment the confidence counter for the given target ratio.
* CONFIDENCE_OK defines the level where runtime calibration results are
@@ -57,37 +51,30 @@
static unsigned int target_mwait;
static struct dentry *debug_dir;
+static bool poll_pkg_cstate_enable;
-/* user selected target */
-static unsigned int set_target_ratio;
+/* Idle ratio observed using package C-state counters */
static unsigned int current_ratio;
-static bool should_skip;
-static unsigned int control_cpu; /* The cpu assigned to collect stat and update
- * control parameters. default to BSP but BSP
- * can be offlined.
- */
-static bool clamping;
+/* Skip the idle injection till set to true */
+static bool should_skip;
-struct powerclamp_worker_data {
- struct kthread_worker *worker;
- struct kthread_work balancing_work;
- struct kthread_delayed_work idle_injection_work;
+struct powerclamp_data {
unsigned int cpu;
unsigned int count;
unsigned int guard;
unsigned int window_size_now;
unsigned int target_ratio;
- unsigned int duration_jiffies;
bool clamping;
};
-static struct powerclamp_worker_data __percpu *worker_data;
+static struct powerclamp_data powerclamp_data;
+
static struct thermal_cooling_device *cooling_dev;
-static unsigned long *cpu_clamping_mask; /* bit map for tracking per cpu
- * clamping kthread worker
- */
+static DEFINE_MUTEX(powerclamp_lock);
+
+/* This duration is in microseconds */
static unsigned int duration;
static unsigned int pkg_cstate_ratio_cur;
static unsigned int window_size;
@@ -104,25 +91,171 @@ static int duration_set(const char *arg, const struct kernel_param *kp)
pr_err("Out of recommended range %lu, between 6-25ms\n",
new_duration);
ret = -EINVAL;
+ goto exit;
}
- duration = clamp(new_duration, 6ul, 25ul);
- smp_mb();
-
+ mutex_lock(&powerclamp_lock);
+ duration = clamp(new_duration, 6ul, 25ul) * 1000;
+ mutex_unlock(&powerclamp_lock);
exit:
return ret;
}
+static int duration_get(char *buf, const struct kernel_param *kp)
+{
+ int ret;
+
+ mutex_lock(&powerclamp_lock);
+ ret = sysfs_emit(buf, "%d\n", duration / 1000);
+ mutex_unlock(&powerclamp_lock);
+
+ return ret;
+}
+
static const struct kernel_param_ops duration_ops = {
.set = duration_set,
- .get = param_get_int,
+ .get = duration_get,
};
-
-module_param_cb(duration, &duration_ops, &duration, 0644);
+module_param_cb(duration, &duration_ops, NULL, 0644);
MODULE_PARM_DESC(duration, "forced idle time for each attempt in msec.");
+#define DEFAULT_MAX_IDLE 50
+#define MAX_ALL_CPU_IDLE 75
+
+static u8 max_idle = DEFAULT_MAX_IDLE;
+
+static cpumask_var_t idle_injection_cpu_mask;
+
+static int allocate_copy_idle_injection_mask(const struct cpumask *copy_mask)
+{
+ if (cpumask_available(idle_injection_cpu_mask))
+ goto copy_mask;
+
+ /* This mask is allocated only one time and freed during module exit */
+ if (!alloc_cpumask_var(&idle_injection_cpu_mask, GFP_KERNEL))
+ return -ENOMEM;
+
+copy_mask:
+ cpumask_copy(idle_injection_cpu_mask, copy_mask);
+
+ return 0;
+}
+
+/* Return true if the cpumask and idle percent combination is invalid */
+static bool check_invalid(cpumask_var_t mask, u8 idle)
+{
+ if (cpumask_equal(cpu_present_mask, mask) && idle > MAX_ALL_CPU_IDLE)
+ return true;
+
+ return false;
+}
+
+static int cpumask_set(const char *arg, const struct kernel_param *kp)
+{
+ cpumask_var_t new_mask;
+ int ret;
+
+ mutex_lock(&powerclamp_lock);
+
+ /* Can't set mask when cooling device is in use */
+ if (powerclamp_data.clamping) {
+ ret = -EAGAIN;
+ goto skip_cpumask_set;
+ }
+
+ ret = alloc_cpumask_var(&new_mask, GFP_KERNEL);
+ if (!ret)
+ goto skip_cpumask_set;
+
+ ret = bitmap_parse(arg, strlen(arg), cpumask_bits(new_mask),
+ nr_cpumask_bits);
+ if (ret)
+ goto free_cpumask_set;
+
+ if (cpumask_empty(new_mask) || check_invalid(new_mask, max_idle)) {
+ ret = -EINVAL;
+ goto free_cpumask_set;
+ }
+
+ /*
+ * When module parameters are passed from kernel command line
+ * during insmod, the module parameter callback is called
+ * before powerclamp_init(), so we can't assume that some
+ * cpumask can be allocated and copied before here. Also
+ * in this case this cpumask is used as the default mask.
+ */
+ ret = allocate_copy_idle_injection_mask(new_mask);
+
+free_cpumask_set:
+ free_cpumask_var(new_mask);
+skip_cpumask_set:
+ mutex_unlock(&powerclamp_lock);
+
+ return ret;
+}
+
+static int cpumask_get(char *buf, const struct kernel_param *kp)
+{
+ if (!cpumask_available(idle_injection_cpu_mask))
+ return -ENODEV;
+
+ return bitmap_print_to_pagebuf(false, buf, cpumask_bits(idle_injection_cpu_mask),
+ nr_cpumask_bits);
+}
+
+static const struct kernel_param_ops cpumask_ops = {
+ .set = cpumask_set,
+ .get = cpumask_get,
+};
+
+module_param_cb(cpumask, &cpumask_ops, NULL, 0644);
+MODULE_PARM_DESC(cpumask, "Mask of CPUs to use for idle injection.");
+
+static int max_idle_set(const char *arg, const struct kernel_param *kp)
+{
+ u8 new_max_idle;
+ int ret = 0;
+
+ mutex_lock(&powerclamp_lock);
+
+ /* Can't set mask when cooling device is in use */
+ if (powerclamp_data.clamping) {
+ ret = -EAGAIN;
+ goto skip_limit_set;
+ }
+
+ ret = kstrtou8(arg, 10, &new_max_idle);
+ if (ret)
+ goto skip_limit_set;
+
+ if (new_max_idle > MAX_TARGET_RATIO) {
+ ret = -EINVAL;
+ goto skip_limit_set;
+ }
+
+ if (check_invalid(idle_injection_cpu_mask, new_max_idle)) {
+ ret = -EINVAL;
+ goto skip_limit_set;
+ }
+
+ max_idle = new_max_idle;
+
+skip_limit_set:
+ mutex_unlock(&powerclamp_lock);
+
+ return ret;
+}
+
+static const struct kernel_param_ops max_idle_ops = {
+ .set = max_idle_set,
+ .get = param_get_int,
+};
+
+module_param_cb(max_idle, &max_idle_ops, &max_idle, 0644);
+MODULE_PARM_DESC(max_idle, "maximum injected idle time to the total CPU time ratio in percent range:1-100");
+
struct powerclamp_calibration_data {
unsigned long confidence; /* used for calibration, basically a counter
* gets incremented each time a clamping
@@ -261,6 +394,9 @@ static unsigned int get_compensation(int ratio)
{
unsigned int comp = 0;
+ if (!poll_pkg_cstate_enable)
+ return 0;
+
/* we only use compensation if all adjacent ones are good */
if (ratio == 1 &&
cal_data[ratio].confidence >= CONFIDENCE_OK &&
@@ -302,7 +438,7 @@ static void adjust_compensation(int target_ratio, unsigned int win)
if (d->confidence >= CONFIDENCE_OK)
return;
- delta = set_target_ratio - current_ratio;
+ delta = powerclamp_data.target_ratio - current_ratio;
/* filter out bad data */
if (delta >= 0 && delta <= (1+target_ratio/10)) {
if (d->steady_comp)
@@ -341,82 +477,39 @@ static bool powerclamp_adjust_controls(unsigned int target_ratio,
adjust_compensation(target_ratio, win);
/* if we are above target+guard, skip */
- return set_target_ratio + guard <= current_ratio;
+ return powerclamp_data.target_ratio + guard <= current_ratio;
}
-static void clamp_balancing_func(struct kthread_work *work)
+/*
+ * This function calculates runtime from the current target ratio.
+ * This function gets called under powerclamp_lock.
+ */
+static unsigned int get_run_time(void)
{
- struct powerclamp_worker_data *w_data;
- int sleeptime;
- unsigned long target_jiffies;
unsigned int compensated_ratio;
- int interval; /* jiffies to sleep for each attempt */
-
- w_data = container_of(work, struct powerclamp_worker_data,
- balancing_work);
+ unsigned int runtime;
/*
* make sure user selected ratio does not take effect until
* the next round. adjust target_ratio if user has changed
* target such that we can converge quickly.
*/
- w_data->target_ratio = READ_ONCE(set_target_ratio);
- w_data->guard = 1 + w_data->target_ratio / 20;
- w_data->window_size_now = window_size;
- w_data->duration_jiffies = msecs_to_jiffies(duration);
- w_data->count++;
+ powerclamp_data.guard = 1 + powerclamp_data.target_ratio / 20;
+ powerclamp_data.window_size_now = window_size;
/*
* systems may have different ability to enter package level
* c-states, thus we need to compensate the injected idle ratio
* to achieve the actual target reported by the HW.
*/
- compensated_ratio = w_data->target_ratio +
- get_compensation(w_data->target_ratio);
+ compensated_ratio = powerclamp_data.target_ratio +
+ get_compensation(powerclamp_data.target_ratio);
if (compensated_ratio <= 0)
compensated_ratio = 1;
- interval = w_data->duration_jiffies * 100 / compensated_ratio;
-
- /* align idle time */
- target_jiffies = roundup(jiffies, interval);
- sleeptime = target_jiffies - jiffies;
- if (sleeptime <= 0)
- sleeptime = 1;
-
- if (clamping && w_data->clamping && cpu_online(w_data->cpu))
- kthread_queue_delayed_work(w_data->worker,
- &w_data->idle_injection_work,
- sleeptime);
-}
-static void clamp_idle_injection_func(struct kthread_work *work)
-{
- struct powerclamp_worker_data *w_data;
-
- w_data = container_of(work, struct powerclamp_worker_data,
- idle_injection_work.work);
-
- /*
- * only elected controlling cpu can collect stats and update
- * control parameters.
- */
- if (w_data->cpu == control_cpu &&
- !(w_data->count % w_data->window_size_now)) {
- should_skip =
- powerclamp_adjust_controls(w_data->target_ratio,
- w_data->guard,
- w_data->window_size_now);
- smp_mb();
- }
+ runtime = duration * 100 / compensated_ratio - duration;
- if (should_skip)
- goto balance;
-
- play_idle(jiffies_to_usecs(w_data->duration_jiffies));
-
-balance:
- if (clamping && w_data->clamping && cpu_online(w_data->cpu))
- kthread_queue_work(w_data->worker, &w_data->balancing_work);
+ return runtime;
}
/*
@@ -452,126 +545,129 @@ static void poll_pkg_cstate(struct work_struct *dummy)
msr_last = msr_now;
tsc_last = tsc_now;
- if (true == clamping)
+ mutex_lock(&powerclamp_lock);
+ if (powerclamp_data.clamping)
schedule_delayed_work(&poll_pkg_cstate_work, HZ);
+ mutex_unlock(&powerclamp_lock);
}
-static void start_power_clamp_worker(unsigned long cpu)
+static struct idle_inject_device *ii_dev;
+
+/*
+ * This function is called from idle injection core on timer expiry
+ * for the run duration. This allows powerclamp to readjust or skip
+ * injecting idle for this cycle.
+ */
+static bool idle_inject_update(void)
{
- struct powerclamp_worker_data *w_data = per_cpu_ptr(worker_data, cpu);
- struct kthread_worker *worker;
+ bool update = false;
- worker = kthread_create_worker_on_cpu(cpu, 0, "kidle_inj/%ld", cpu);
- if (IS_ERR(worker))
- return;
+ /* We can't sleep in this callback */
+ if (!mutex_trylock(&powerclamp_lock))
+ return true;
- w_data->worker = worker;
- w_data->count = 0;
- w_data->cpu = cpu;
- w_data->clamping = true;
- set_bit(cpu, cpu_clamping_mask);
- sched_set_fifo(worker->task);
- kthread_init_work(&w_data->balancing_work, clamp_balancing_func);
- kthread_init_delayed_work(&w_data->idle_injection_work,
- clamp_idle_injection_func);
- kthread_queue_work(w_data->worker, &w_data->balancing_work);
-}
+ if (!(powerclamp_data.count % powerclamp_data.window_size_now)) {
-static void stop_power_clamp_worker(unsigned long cpu)
-{
- struct powerclamp_worker_data *w_data = per_cpu_ptr(worker_data, cpu);
+ should_skip = powerclamp_adjust_controls(powerclamp_data.target_ratio,
+ powerclamp_data.guard,
+ powerclamp_data.window_size_now);
+ update = true;
+ }
- if (!w_data->worker)
- return;
+ if (update) {
+ unsigned int runtime = get_run_time();
- w_data->clamping = false;
- /*
- * Make sure that all works that get queued after this point see
- * the clamping disabled. The counter part is not needed because
- * there is an implicit memory barrier when the queued work
- * is proceed.
- */
- smp_wmb();
- kthread_cancel_work_sync(&w_data->balancing_work);
- kthread_cancel_delayed_work_sync(&w_data->idle_injection_work);
- /*
- * The balancing work still might be queued here because
- * the handling of the "clapming" variable, cancel, and queue
- * operations are not synchronized via a lock. But it is not
- * a big deal. The balancing work is fast and destroy kthread
- * will wait for it.
- */
- clear_bit(w_data->cpu, cpu_clamping_mask);
- kthread_destroy_worker(w_data->worker);
+ idle_inject_set_duration(ii_dev, runtime, duration);
+ }
+
+ powerclamp_data.count++;
+
+ mutex_unlock(&powerclamp_lock);
+
+ if (should_skip)
+ return false;
- w_data->worker = NULL;
+ return true;
}
-static int start_power_clamp(void)
+/* This function starts idle injection by calling idle_inject_start() */
+static void trigger_idle_injection(void)
{
- unsigned long cpu;
-
- set_target_ratio = clamp(set_target_ratio, 0U, MAX_TARGET_RATIO - 1);
- /* prevent cpu hotplug */
- cpus_read_lock();
+ unsigned int runtime = get_run_time();
- /* prefer BSP */
- control_cpu = cpumask_first(cpu_online_mask);
+ idle_inject_set_duration(ii_dev, runtime, duration);
+ idle_inject_start(ii_dev);
+ powerclamp_data.clamping = true;
+}
- clamping = true;
- schedule_delayed_work(&poll_pkg_cstate_work, 0);
+/*
+ * This function is called from start_power_clamp() to register
+ * CPUS with powercap idle injection register and set default
+ * idle duration and latency.
+ */
+static int powerclamp_idle_injection_register(void)
+{
+ poll_pkg_cstate_enable = false;
+ if (cpumask_equal(cpu_present_mask, idle_injection_cpu_mask)) {
+ ii_dev = idle_inject_register_full(idle_injection_cpu_mask, idle_inject_update);
+ if (topology_max_packages() == 1 && topology_max_die_per_package() == 1)
+ poll_pkg_cstate_enable = true;
+ } else {
+ ii_dev = idle_inject_register(idle_injection_cpu_mask);
+ }
- /* start one kthread worker per online cpu */
- for_each_online_cpu(cpu) {
- start_power_clamp_worker(cpu);
+ if (!ii_dev) {
+ pr_err("powerclamp: idle_inject_register failed\n");
+ return -EAGAIN;
}
- cpus_read_unlock();
+
+ idle_inject_set_duration(ii_dev, TICK_USEC, duration);
+ idle_inject_set_latency(ii_dev, UINT_MAX);
return 0;
}
-static void end_power_clamp(void)
+/*
+ * This function is called from end_power_clamp() to stop idle injection
+ * and unregister CPUS from powercap idle injection core.
+ */
+static void remove_idle_injection(void)
{
- int i;
+ if (!powerclamp_data.clamping)
+ return;
- /*
- * Block requeuing in all the kthread workers. They will flush and
- * stop faster.
- */
- clamping = false;
- for_each_set_bit(i, cpu_clamping_mask, num_possible_cpus()) {
- pr_debug("clamping worker for cpu %d alive, destroy\n", i);
- stop_power_clamp_worker(i);
- }
+ powerclamp_data.clamping = false;
+ idle_inject_stop(ii_dev);
}
-static int powerclamp_cpu_online(unsigned int cpu)
+/*
+ * This function is called when user change the cooling device
+ * state from zero to some other value.
+ */
+static int start_power_clamp(void)
{
- if (clamping == false)
- return 0;
- start_power_clamp_worker(cpu);
- /* prefer BSP as controlling CPU */
- if (cpu == 0) {
- control_cpu = 0;
- smp_mb();
+ int ret;
+
+ ret = powerclamp_idle_injection_register();
+ if (!ret) {
+ trigger_idle_injection();
+ if (poll_pkg_cstate_enable)
+ schedule_delayed_work(&poll_pkg_cstate_work, 0);
}
- return 0;
+
+ return ret;
}
-static int powerclamp_cpu_predown(unsigned int cpu)
+/*
+ * This function is called when user change the cooling device
+ * state from non zero value zero.
+ */
+static void end_power_clamp(void)
{
- if (clamping == false)
- return 0;
-
- stop_power_clamp_worker(cpu);
- if (cpu != control_cpu)
- return 0;
-
- control_cpu = cpumask_first(cpu_online_mask);
- if (control_cpu == cpu)
- control_cpu = cpumask_next(cpu, cpu_online_mask);
- smp_mb();
- return 0;
+ if (powerclamp_data.clamping) {
+ remove_idle_injection();
+ idle_inject_unregister(ii_dev);
+ }
}
static int powerclamp_get_max_state(struct thermal_cooling_device *cdev,
@@ -585,11 +681,9 @@ static int powerclamp_get_max_state(struct thermal_cooling_device *cdev,
static int powerclamp_get_cur_state(struct thermal_cooling_device *cdev,
unsigned long *state)
{
- if (true == clamping)
- *state = pkg_cstate_ratio_cur;
- else
- /* to save power, do not poll idle ratio while not clamping */
- *state = -1; /* indicates invalid state */
+ mutex_lock(&powerclamp_lock);
+ *state = powerclamp_data.target_ratio;
+ mutex_unlock(&powerclamp_lock);
return 0;
}
@@ -599,24 +693,32 @@ static int powerclamp_set_cur_state(struct thermal_cooling_device *cdev,
{
int ret = 0;
+ mutex_lock(&powerclamp_lock);
+
new_target_ratio = clamp(new_target_ratio, 0UL,
- (unsigned long) (MAX_TARGET_RATIO-1));
- if (set_target_ratio == 0 && new_target_ratio > 0) {
+ (unsigned long) (max_idle - 1));
+ if (!powerclamp_data.target_ratio && new_target_ratio > 0) {
pr_info("Start idle injection to reduce power\n");
- set_target_ratio = new_target_ratio;
+ powerclamp_data.target_ratio = new_target_ratio;
ret = start_power_clamp();
+ if (ret)
+ powerclamp_data.target_ratio = 0;
goto exit_set;
- } else if (set_target_ratio > 0 && new_target_ratio == 0) {
+ } else if (powerclamp_data.target_ratio > 0 && new_target_ratio == 0) {
pr_info("Stop forced idle injection\n");
end_power_clamp();
- set_target_ratio = 0;
+ powerclamp_data.target_ratio = 0;
} else /* adjust currently running */ {
- set_target_ratio = new_target_ratio;
- /* make new set_target_ratio visible to other cpus */
- smp_mb();
+ unsigned int runtime;
+
+ powerclamp_data.target_ratio = new_target_ratio;
+ runtime = get_run_time();
+ idle_inject_set_duration(ii_dev, runtime, duration);
}
exit_set:
+ mutex_unlock(&powerclamp_lock);
+
return ret;
}
@@ -657,7 +759,6 @@ static int powerclamp_debug_show(struct seq_file *m, void *unused)
{
int i = 0;
- seq_printf(m, "controlling cpu: %d\n", control_cpu);
seq_printf(m, "pct confidence steady dynamic (compensation)\n");
for (i = 0; i < MAX_TARGET_RATIO; i++) {
seq_printf(m, "%d\t%lu\t%lu\t%lu\n",
@@ -680,75 +781,57 @@ static inline void powerclamp_create_debug_files(void)
&powerclamp_debug_fops);
}
-static enum cpuhp_state hp_state;
-
static int __init powerclamp_init(void)
{
int retval;
- cpu_clamping_mask = bitmap_zalloc(num_possible_cpus(), GFP_KERNEL);
- if (!cpu_clamping_mask)
- return -ENOMEM;
-
/* probe cpu features and ids here */
retval = powerclamp_probe();
if (retval)
- goto exit_free;
+ return retval;
+
+ mutex_lock(&powerclamp_lock);
+ retval = allocate_copy_idle_injection_mask(cpu_present_mask);
+ mutex_unlock(&powerclamp_lock);
+
+ if (retval)
+ return retval;
/* set default limit, maybe adjusted during runtime based on feedback */
window_size = 2;
- retval = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN,
- "thermal/intel_powerclamp:online",
- powerclamp_cpu_online,
- powerclamp_cpu_predown);
- if (retval < 0)
- goto exit_free;
-
- hp_state = retval;
-
- worker_data = alloc_percpu(struct powerclamp_worker_data);
- if (!worker_data) {
- retval = -ENOMEM;
- goto exit_unregister;
- }
cooling_dev = thermal_cooling_device_register("intel_powerclamp", NULL,
- &powerclamp_cooling_ops);
- if (IS_ERR(cooling_dev)) {
- retval = -ENODEV;
- goto exit_free_thread;
- }
+ &powerclamp_cooling_ops);
+ if (IS_ERR(cooling_dev))
+ return -ENODEV;
if (!duration)
- duration = jiffies_to_msecs(DEFAULT_DURATION_JIFFIES);
+ duration = jiffies_to_usecs(DEFAULT_DURATION_JIFFIES);
powerclamp_create_debug_files();
return 0;
-
-exit_free_thread:
- free_percpu(worker_data);
-exit_unregister:
- cpuhp_remove_state_nocalls(hp_state);
-exit_free:
- bitmap_free(cpu_clamping_mask);
- return retval;
}
module_init(powerclamp_init);
static void __exit powerclamp_exit(void)
{
+ mutex_lock(&powerclamp_lock);
end_power_clamp();
- cpuhp_remove_state_nocalls(hp_state);
- free_percpu(worker_data);
+ mutex_unlock(&powerclamp_lock);
+
thermal_cooling_device_unregister(cooling_dev);
- bitmap_free(cpu_clamping_mask);
cancel_delayed_work_sync(&poll_pkg_cstate_work);
debugfs_remove_recursive(debug_dir);
+
+ if (cpumask_available(idle_injection_cpu_mask))
+ free_cpumask_var(idle_injection_cpu_mask);
}
module_exit(powerclamp_exit);
+MODULE_IMPORT_NS(IDLE_INJECT);
+
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Arjan van de Ven <arjan@linux.intel.com>");
MODULE_AUTHOR("Jacob Pan <jacob.jun.pan@linux.intel.com>");
diff --git a/drivers/thermal/intel/intel_quark_dts_thermal.c b/drivers/thermal/intel/intel_quark_dts_thermal.c
index 3eafc6b0e6c3..97b843fa7568 100644
--- a/drivers/thermal/intel/intel_quark_dts_thermal.c
+++ b/drivers/thermal/intel/intel_quark_dts_thermal.c
@@ -84,6 +84,7 @@
#define QRK_DTS_MASK_TP_THRES 0xFF
#define QRK_DTS_SHIFT_TP 8
#define QRK_DTS_ID_TP_CRITICAL 0
+#define QRK_DTS_ID_TP_HOT 1
#define QRK_DTS_SAFE_TP_THRES 105
/* Thermal Sensor Register Lock */
@@ -104,6 +105,7 @@ struct soc_sensor_entry {
u32 store_ptps;
u32 store_dts_enable;
struct thermal_zone_device *tzone;
+ struct thermal_trip trips[QRK_MAX_DTS_TRIPS];
};
static struct soc_sensor_entry *soc_dts;
@@ -172,9 +174,9 @@ static int soc_dts_disable(struct thermal_zone_device *tzd)
return ret;
}
-static int _get_trip_temp(int trip, int *temp)
+static int get_trip_temp(int trip)
{
- int status;
+ int status, temp;
u32 out;
mutex_lock(&dts_update_mutex);
@@ -183,7 +185,7 @@ static int _get_trip_temp(int trip, int *temp)
mutex_unlock(&dts_update_mutex);
if (status)
- return status;
+ return THERMAL_TEMP_INVALID;
/*
* Thermal Sensor Programmable Trip Point Register has 8-bit
@@ -191,21 +193,10 @@ static int _get_trip_temp(int trip, int *temp)
* thresholds. The threshold value is always offset by its
* temperature base (50 degree Celsius).
*/
- *temp = (out >> (trip * QRK_DTS_SHIFT_TP)) & QRK_DTS_MASK_TP_THRES;
- *temp -= QRK_DTS_TEMP_BASE;
+ temp = (out >> (trip * QRK_DTS_SHIFT_TP)) & QRK_DTS_MASK_TP_THRES;
+ temp -= QRK_DTS_TEMP_BASE;
- return 0;
-}
-
-static inline int sys_get_trip_temp(struct thermal_zone_device *tzd,
- int trip, int *temp)
-{
- return _get_trip_temp(trip, temp);
-}
-
-static inline int sys_get_crit_temp(struct thermal_zone_device *tzd, int *temp)
-{
- return _get_trip_temp(QRK_DTS_ID_TP_CRITICAL, temp);
+ return temp;
}
static int update_trip_temp(struct soc_sensor_entry *aux_entry,
@@ -262,17 +253,6 @@ static inline int sys_set_trip_temp(struct thermal_zone_device *tzd, int trip,
return update_trip_temp(tzd->devdata, trip, temp);
}
-static int sys_get_trip_type(struct thermal_zone_device *thermal,
- int trip, enum thermal_trip_type *type)
-{
- if (trip)
- *type = THERMAL_TRIP_HOT;
- else
- *type = THERMAL_TRIP_CRITICAL;
-
- return 0;
-}
-
static int sys_get_curr_temp(struct thermal_zone_device *tzd,
int *temp)
{
@@ -315,10 +295,7 @@ static int sys_change_mode(struct thermal_zone_device *tzd,
static struct thermal_zone_device_ops tzone_ops = {
.get_temp = sys_get_curr_temp,
- .get_trip_temp = sys_get_trip_temp,
- .get_trip_type = sys_get_trip_type,
.set_trip_temp = sys_set_trip_temp,
- .get_crit_temp = sys_get_crit_temp,
.change_mode = sys_change_mode,
};
@@ -385,10 +362,18 @@ static struct soc_sensor_entry *alloc_soc_dts(void)
goto err_ret;
}
- aux_entry->tzone = thermal_zone_device_register("quark_dts",
- QRK_MAX_DTS_TRIPS,
- wr_mask,
- aux_entry, &tzone_ops, NULL, 0, polling_delay);
+ aux_entry->trips[QRK_DTS_ID_TP_CRITICAL].temperature = get_trip_temp(QRK_DTS_ID_TP_CRITICAL);
+ aux_entry->trips[QRK_DTS_ID_TP_CRITICAL].type = THERMAL_TRIP_CRITICAL;
+
+ aux_entry->trips[QRK_DTS_ID_TP_HOT].temperature = get_trip_temp(QRK_DTS_ID_TP_HOT);
+ aux_entry->trips[QRK_DTS_ID_TP_HOT].type = THERMAL_TRIP_HOT;
+
+ aux_entry->tzone = thermal_zone_device_register_with_trips("quark_dts",
+ aux_entry->trips,
+ QRK_MAX_DTS_TRIPS,
+ wr_mask,
+ aux_entry, &tzone_ops,
+ NULL, 0, polling_delay);
if (IS_ERR(aux_entry->tzone)) {
err = PTR_ERR(aux_entry->tzone);
goto err_ret;
diff --git a/drivers/thermal/thermal_acpi.c b/drivers/thermal/thermal_acpi.c
index 671f774a7621..0e5698818f69 100644
--- a/drivers/thermal/thermal_acpi.c
+++ b/drivers/thermal/thermal_acpi.c
@@ -21,42 +21,11 @@
#define TEMP_MIN_DECIK 2180
#define TEMP_MAX_DECIK 4480
-static int thermal_acpi_trip_init(struct acpi_device *adev,
- enum thermal_trip_type type, int id,
- struct thermal_trip *trip)
+static int thermal_acpi_trip_temp(struct acpi_device *adev, char *obj_name,
+ int *ret_temp)
{
unsigned long long temp;
acpi_status status;
- char obj_name[5];
-
- switch (type) {
- case THERMAL_TRIP_ACTIVE:
- if (id < 0 || id > 9)
- return -EINVAL;
-
- obj_name[1] = 'A';
- obj_name[2] = 'C';
- obj_name[3] = '0' + id;
- break;
- case THERMAL_TRIP_PASSIVE:
- obj_name[1] = 'P';
- obj_name[2] = 'S';
- obj_name[3] = 'V';
- break;
- case THERMAL_TRIP_HOT:
- obj_name[1] = 'H';
- obj_name[2] = 'O';
- obj_name[3] = 'T';
- break;
- case THERMAL_TRIP_CRITICAL:
- obj_name[1] = 'C';
- obj_name[2] = 'R';
- obj_name[3] = 'T';
- break;
- }
-
- obj_name[0] = '_';
- obj_name[4] = '\0';
status = acpi_evaluate_integer(adev->handle, obj_name, NULL, &temp);
if (ACPI_FAILURE(status)) {
@@ -65,87 +34,84 @@ static int thermal_acpi_trip_init(struct acpi_device *adev,
}
if (temp >= TEMP_MIN_DECIK && temp <= TEMP_MAX_DECIK) {
- trip->temperature = deci_kelvin_to_millicelsius(temp);
+ *ret_temp = deci_kelvin_to_millicelsius(temp);
} else {
acpi_handle_debug(adev->handle, "%s result %llu out of range\n",
obj_name, temp);
- trip->temperature = THERMAL_TEMP_INVALID;
+ *ret_temp = THERMAL_TEMP_INVALID;
}
- trip->hysteresis = 0;
- trip->type = type;
-
return 0;
}
/**
- * thermal_acpi_trip_active - Get the specified active trip point
- * @adev: Thermal zone ACPI device object to get the description from.
+ * thermal_acpi_active_trip_temp - Retrieve active trip point temperature
+ * @adev: Target thermal zone ACPI device object.
* @id: Active cooling level (0 - 9).
- * @trip: Trip point structure to be populated on success.
+ * @ret_temp: Address to store the retrieved temperature value on success.
*
* Evaluate the _ACx object for the thermal zone represented by @adev to obtain
* the temperature of the active cooling trip point corresponding to the active
- * cooling level given by @id and initialize @trip as an active trip point using
- * that temperature value.
+ * cooling level given by @id.
*
* Return 0 on success or a negative error value on failure.
*/
-int thermal_acpi_trip_active(struct acpi_device *adev, int id,
- struct thermal_trip *trip)
+int thermal_acpi_active_trip_temp(struct acpi_device *adev, int id, int *ret_temp)
{
- return thermal_acpi_trip_init(adev, THERMAL_TRIP_ACTIVE, id, trip);
+ char obj_name[] = {'_', 'A', 'C', '0' + id, '\0'};
+
+ if (id < 0 || id > 9)
+ return -EINVAL;
+
+ return thermal_acpi_trip_temp(adev, obj_name, ret_temp);
}
-EXPORT_SYMBOL_GPL(thermal_acpi_trip_active);
+EXPORT_SYMBOL_GPL(thermal_acpi_active_trip_temp);
/**
- * thermal_acpi_trip_passive - Get the passive trip point
- * @adev: Thermal zone ACPI device object to get the description from.
- * @trip: Trip point structure to be populated on success.
+ * thermal_acpi_passive_trip_temp - Retrieve passive trip point temperature
+ * @adev: Target thermal zone ACPI device object.
+ * @ret_temp: Address to store the retrieved temperature value on success.
*
* Evaluate the _PSV object for the thermal zone represented by @adev to obtain
- * the temperature of the passive cooling trip point and initialize @trip as a
- * passive trip point using that temperature value.
+ * the temperature of the passive cooling trip point.
*
* Return 0 on success or -ENODATA on failure.
*/
-int thermal_acpi_trip_passive(struct acpi_device *adev, struct thermal_trip *trip)
+int thermal_acpi_passive_trip_temp(struct acpi_device *adev, int *ret_temp)
{
- return thermal_acpi_trip_init(adev, THERMAL_TRIP_PASSIVE, INT_MAX, trip);
+ return thermal_acpi_trip_temp(adev, "_PSV", ret_temp);
}
-EXPORT_SYMBOL_GPL(thermal_acpi_trip_passive);
+EXPORT_SYMBOL_GPL(thermal_acpi_passive_trip_temp);
/**
- * thermal_acpi_trip_hot - Get the near critical trip point
- * @adev: the ACPI device to get the description from.
- * @trip: a &struct thermal_trip to be filled if the function succeed.
+ * thermal_acpi_hot_trip_temp - Retrieve hot trip point temperature
+ * @adev: Target thermal zone ACPI device object.
+ * @ret_temp: Address to store the retrieved temperature value on success.
*
* Evaluate the _HOT object for the thermal zone represented by @adev to obtain
* the temperature of the trip point at which the system is expected to be put
- * into the S4 sleep state and initialize @trip as a hot trip point using that
- * temperature value.
+ * into the S4 sleep state.
*
* Return 0 on success or -ENODATA on failure.
*/
-int thermal_acpi_trip_hot(struct acpi_device *adev, struct thermal_trip *trip)
+int thermal_acpi_hot_trip_temp(struct acpi_device *adev, int *ret_temp)
{
- return thermal_acpi_trip_init(adev, THERMAL_TRIP_HOT, INT_MAX, trip);
+ return thermal_acpi_trip_temp(adev, "_HOT", ret_temp);
}
-EXPORT_SYMBOL_GPL(thermal_acpi_trip_hot);
+EXPORT_SYMBOL_GPL(thermal_acpi_hot_trip_temp);
/**
- * thermal_acpi_trip_critical - Get the critical trip point
- * @adev: the ACPI device to get the description from.
- * @trip: a &struct thermal_trip to be filled if the function succeed.
+ * thermal_acpi_critical_trip_temp - Retrieve critical trip point temperature
+ * @adev: Target thermal zone ACPI device object.
+ * @ret_temp: Address to store the retrieved temperature value on success.
*
* Evaluate the _CRT object for the thermal zone represented by @adev to obtain
- * the temperature of the critical cooling trip point and initialize @trip as a
- * critical trip point using that temperature value.
+ * the temperature of the critical cooling trip point.
*
* Return 0 on success or -ENODATA on failure.
*/
-int thermal_acpi_trip_critical(struct acpi_device *adev, struct thermal_trip *trip)
+int thermal_acpi_critical_trip_temp(struct acpi_device *adev, int *ret_temp)
{
- return thermal_acpi_trip_init(adev, THERMAL_TRIP_CRITICAL, INT_MAX, trip);
+ return thermal_acpi_trip_temp(adev, "_CRT", ret_temp);
}
-EXPORT_SYMBOL_GPL(thermal_acpi_trip_critical);
+EXPORT_SYMBOL_GPL(thermal_acpi_critical_trip_temp);