From d216bfb4d7984a510e984d2c94ae6d2251d76aee Mon Sep 17 00:00:00 2001 From: Sergey Shtylyov Date: Thu, 2 Sep 2021 23:57:29 +0300 Subject: PM: sleep: wakeirq: drop useless parameter from dev_pm_attach_wake_irq() This function has the 'irq' parameter which isn't ever used, so drop it. Signed-off-by: Sergey Shtylyov [ rjw: Subject and changelog edits ] Signed-off-by: Rafael J. Wysocki --- drivers/base/power/wakeirq.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/drivers/base/power/wakeirq.c b/drivers/base/power/wakeirq.c index 3bad3266a2ad..b91a3a9bf9f6 100644 --- a/drivers/base/power/wakeirq.c +++ b/drivers/base/power/wakeirq.c @@ -12,14 +12,11 @@ /** * dev_pm_attach_wake_irq - Attach device interrupt as a wake IRQ * @dev: Device entry - * @irq: Device wake-up capable interrupt * @wirq: Wake irq specific data * - * Internal function to attach either a device IO interrupt or a - * dedicated wake-up interrupt as a wake IRQ. + * Internal function to attach a dedicated wake-up interrupt as a wake IRQ. */ -static int dev_pm_attach_wake_irq(struct device *dev, int irq, - struct wake_irq *wirq) +static int dev_pm_attach_wake_irq(struct device *dev, struct wake_irq *wirq) { unsigned long flags; @@ -65,7 +62,7 @@ int dev_pm_set_wake_irq(struct device *dev, int irq) wirq->dev = dev; wirq->irq = irq; - err = dev_pm_attach_wake_irq(dev, irq, wirq); + err = dev_pm_attach_wake_irq(dev, wirq); if (err) kfree(wirq); @@ -196,7 +193,7 @@ int dev_pm_set_dedicated_wake_irq(struct device *dev, int irq) if (err) goto err_free_name; - err = dev_pm_attach_wake_irq(dev, irq, wirq); + err = dev_pm_attach_wake_irq(dev, wirq); if (err) goto err_free_irq; -- cgit v1.2.3 From 0654cf05d17bc4d296a53a8bc7d107bc8a795f2e Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sat, 4 Sep 2021 15:51:45 +0200 Subject: ACPI: CPPC: Introduce cppc_get_nominal_perf() On some systems the nominal_perf value retrieved via CPPC is just a constant and fetching it doesn't require accessing any registers, so if it is the only CPPC capability that's needed, it is wasteful to run cppc_get_perf_caps() in order to get just that value alone, especially when this is done for CPUs other than the one running the code. For this reason, introduce cppc_get_nominal_perf() allowing nominal_perf to be obtained individually, by generalizing the existing cppc_get_desired_perf() (and renaming it) so it can be used to retrieve any specific CPPC capability value. While at it, clean up the cppc_get_desired_perf() kerneldoc comment a bit. Signed-off-by: Rafael J. Wysocki --- drivers/acpi/cppc_acpi.c | 47 +++++++++++++++++++++++++++++++---------------- include/acpi/cppc_acpi.h | 5 +++++ 2 files changed, 36 insertions(+), 16 deletions(-) diff --git a/drivers/acpi/cppc_acpi.c b/drivers/acpi/cppc_acpi.c index a4d4eebba1da..bd482108310c 100644 --- a/drivers/acpi/cppc_acpi.c +++ b/drivers/acpi/cppc_acpi.c @@ -1008,23 +1008,14 @@ static int cpc_write(int cpu, struct cpc_register_resource *reg_res, u64 val) return ret_val; } -/** - * cppc_get_desired_perf - Get the value of desired performance register. - * @cpunum: CPU from which to get desired performance. - * @desired_perf: address of a variable to store the returned desired performance - * - * Return: 0 for success, -EIO otherwise. - */ -int cppc_get_desired_perf(int cpunum, u64 *desired_perf) +static int cppc_get_perf(int cpunum, enum cppc_regs reg_idx, u64 *perf) { struct cpc_desc *cpc_desc = per_cpu(cpc_desc_ptr, cpunum); - int pcc_ss_id = per_cpu(cpu_pcc_subspace_idx, cpunum); - struct cpc_register_resource *desired_reg; - struct cppc_pcc_data *pcc_ss_data = NULL; - - desired_reg = &cpc_desc->cpc_regs[DESIRED_PERF]; + struct cpc_register_resource *reg = &cpc_desc->cpc_regs[reg_idx]; - if (CPC_IN_PCC(desired_reg)) { + if (CPC_IN_PCC(reg)) { + int pcc_ss_id = per_cpu(cpu_pcc_subspace_idx, cpunum); + struct cppc_pcc_data *pcc_ss_data = NULL; int ret = 0; if (pcc_ss_id < 0) @@ -1035,7 +1026,7 @@ int cppc_get_desired_perf(int cpunum, u64 *desired_perf) down_write(&pcc_ss_data->pcc_lock); if (send_pcc_cmd(pcc_ss_id, CMD_READ) >= 0) - cpc_read(cpunum, desired_reg, desired_perf); + cpc_read(cpunum, reg, perf); else ret = -EIO; @@ -1044,12 +1035,36 @@ int cppc_get_desired_perf(int cpunum, u64 *desired_perf) return ret; } - cpc_read(cpunum, desired_reg, desired_perf); + cpc_read(cpunum, reg, perf); return 0; } + +/** + * cppc_get_desired_perf - Get the desired performance register value. + * @cpunum: CPU from which to get desired performance. + * @desired_perf: Return address. + * + * Return: 0 for success, -EIO otherwise. + */ +int cppc_get_desired_perf(int cpunum, u64 *desired_perf) +{ + return cppc_get_perf(cpunum, DESIRED_PERF, desired_perf); +} EXPORT_SYMBOL_GPL(cppc_get_desired_perf); +/** + * cppc_get_nominal_perf - Get the nominal performance register value. + * @cpunum: CPU from which to get nominal performance. + * @nominal_perf: Return address. + * + * Return: 0 for success, -EIO otherwise. + */ +int cppc_get_nominal_perf(int cpunum, u64 *nominal_perf) +{ + return cppc_get_perf(cpunum, NOMINAL_PERF, nominal_perf); +} + /** * cppc_get_perf_caps - Get a CPU's performance capabilities. * @cpunum: CPU from which to get capabilities info. diff --git a/include/acpi/cppc_acpi.h b/include/acpi/cppc_acpi.h index 9f4985b4d64d..bc159a9b4a73 100644 --- a/include/acpi/cppc_acpi.h +++ b/include/acpi/cppc_acpi.h @@ -135,6 +135,7 @@ struct cppc_cpudata { #ifdef CONFIG_ACPI_CPPC_LIB extern int cppc_get_desired_perf(int cpunum, u64 *desired_perf); +extern int cppc_get_nominal_perf(int cpunum, u64 *nominal_perf); extern int cppc_get_perf_ctrs(int cpu, struct cppc_perf_fb_ctrs *perf_fb_ctrs); extern int cppc_set_perf(int cpu, struct cppc_perf_ctrls *perf_ctrls); extern int cppc_get_perf_caps(int cpu, struct cppc_perf_caps *caps); @@ -149,6 +150,10 @@ static inline int cppc_get_desired_perf(int cpunum, u64 *desired_perf) { return -ENOTSUPP; } +static inline int cppc_get_nominal_perf(int cpunum, u64 *nominal_perf) +{ + return -ENOTSUPP; +} static inline int cppc_get_perf_ctrs(int cpu, struct cppc_perf_fb_ctrs *perf_fb_ctrs) { return -ENOTSUPP; -- cgit v1.2.3 From 46573fd6369f098f15e11768850b6430f374905f Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sat, 4 Sep 2021 15:53:39 +0200 Subject: cpufreq: intel_pstate: hybrid: Rework HWP calibration The current HWP calibration for hybrid processors in intel_pstate is fragile, because it depends too much on the information provided by the platform firmware via CPPC which may not be reliable enough. It also need not be so complicated. In order to improve that mechanism and make it more resistant to platform firmware issues, make it only use the CPPC nominal_perf values to compute the HWP-to-frequency scaling factors for all CPUs and possibly use the HWP_CAP highest_perf values to recompute them if the ones derived from the CPPC nominal_perf values alone appear to be too high. Namely, fetch CPC.nominal_perf for all CPUs present in the system, find the minimum one and use it as a reference for computing all of the CPUs' scaling factors (using the observation that for the CPUs having the minimum CPC.nominal_perf the HWP range of available performance levels should be the same as the range of available "legacy" P-states and so the HWP-to-frequency scaling factor for them should be the same as the corresponding scaling factor used for representing the P-state values in kHz). Signed-off-by: Rafael J. Wysocki Tested-by: Zhang Rui --- drivers/cpufreq/intel_pstate.c | 185 ++++++++++++++++------------------------- 1 file changed, 71 insertions(+), 114 deletions(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 2d83a9f9651b..1097f826ad70 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -268,6 +268,7 @@ static struct cpudata **all_cpu_data; * @get_min: Callback to get minimum P state * @get_turbo: Callback to get turbo P state * @get_scaling: Callback to get frequency scaling factor + * @get_cpu_scaling: Get frequency scaling factor for a given cpu * @get_aperf_mperf_shift: Callback to get the APERF vs MPERF frequency difference * @get_val: Callback to convert P state to actual MSR write value * @get_vid: Callback to get VID data for Atom platforms @@ -281,6 +282,7 @@ struct pstate_funcs { int (*get_min)(void); int (*get_turbo)(void); int (*get_scaling)(void); + int (*get_cpu_scaling)(int cpu); int (*get_aperf_mperf_shift)(void); u64 (*get_val)(struct cpudata*, int pstate); void (*get_vid)(struct cpudata *); @@ -384,6 +386,15 @@ static int intel_pstate_get_cppc_guaranteed(int cpu) return cppc_perf.nominal_perf; } +static u32 intel_pstate_cppc_nominal(int cpu) +{ + u64 nominal_perf; + + if (cppc_get_nominal_perf(cpu, &nominal_perf)) + return 0; + + return nominal_perf; +} #else /* CONFIG_ACPI_CPPC_LIB */ static inline void intel_pstate_set_itmt_prio(int cpu) { @@ -470,20 +481,6 @@ static void intel_pstate_exit_perf_limits(struct cpufreq_policy *policy) acpi_processor_unregister_performance(policy->cpu); } - -static bool intel_pstate_cppc_perf_valid(u32 perf, struct cppc_perf_caps *caps) -{ - return perf && perf <= caps->highest_perf && perf >= caps->lowest_perf; -} - -static bool intel_pstate_cppc_perf_caps(struct cpudata *cpu, - struct cppc_perf_caps *caps) -{ - if (cppc_get_perf_caps(cpu->cpu, caps)) - return false; - - return caps->highest_perf && caps->lowest_perf <= caps->highest_perf; -} #else /* CONFIG_ACPI */ static inline void intel_pstate_init_acpi_perf_limits(struct cpufreq_policy *policy) { @@ -506,15 +503,8 @@ static inline int intel_pstate_get_cppc_guaranteed(int cpu) } #endif /* CONFIG_ACPI_CPPC_LIB */ -static void intel_pstate_hybrid_hwp_perf_ctl_parity(struct cpudata *cpu) -{ - pr_debug("CPU%d: Using PERF_CTL scaling for HWP\n", cpu->cpu); - - cpu->pstate.scaling = cpu->pstate.perf_ctl_scaling; -} - /** - * intel_pstate_hybrid_hwp_calibrate - Calibrate HWP performance levels. + * intel_pstate_hybrid_hwp_adjust - Calibrate HWP performance levels. * @cpu: Target CPU. * * On hybrid processors, HWP may expose more performance levels than there are @@ -522,115 +512,46 @@ static void intel_pstate_hybrid_hwp_perf_ctl_parity(struct cpudata *cpu) * scaling factor between HWP performance levels and CPU frequency will be less * than the scaling factor between P-state values and CPU frequency. * - * In that case, the scaling factor between HWP performance levels and CPU - * frequency needs to be determined which can be done with the help of the - * observation that certain HWP performance levels should correspond to certain - * P-states, like for example the HWP highest performance should correspond - * to the maximum turbo P-state of the CPU. + * In that case, adjust the CPU parameters used in computations accordingly. */ -static void intel_pstate_hybrid_hwp_calibrate(struct cpudata *cpu) +static void intel_pstate_hybrid_hwp_adjust(struct cpudata *cpu) { int perf_ctl_max_phys = cpu->pstate.max_pstate_physical; int perf_ctl_scaling = cpu->pstate.perf_ctl_scaling; int perf_ctl_turbo = pstate_funcs.get_turbo(); int turbo_freq = perf_ctl_turbo * perf_ctl_scaling; - int perf_ctl_max = pstate_funcs.get_max(); - int max_freq = perf_ctl_max * perf_ctl_scaling; - int scaling = INT_MAX; - int freq; + int scaling = cpu->pstate.scaling; pr_debug("CPU%d: perf_ctl_max_phys = %d\n", cpu->cpu, perf_ctl_max_phys); - pr_debug("CPU%d: perf_ctl_max = %d\n", cpu->cpu, perf_ctl_max); + pr_debug("CPU%d: perf_ctl_max = %d\n", cpu->cpu, pstate_funcs.get_max()); pr_debug("CPU%d: perf_ctl_turbo = %d\n", cpu->cpu, perf_ctl_turbo); pr_debug("CPU%d: perf_ctl_scaling = %d\n", cpu->cpu, perf_ctl_scaling); - pr_debug("CPU%d: HWP_CAP guaranteed = %d\n", cpu->cpu, cpu->pstate.max_pstate); pr_debug("CPU%d: HWP_CAP highest = %d\n", cpu->cpu, cpu->pstate.turbo_pstate); - -#ifdef CONFIG_ACPI - if (IS_ENABLED(CONFIG_ACPI_CPPC_LIB)) { - struct cppc_perf_caps caps; - - if (intel_pstate_cppc_perf_caps(cpu, &caps)) { - if (intel_pstate_cppc_perf_valid(caps.nominal_perf, &caps)) { - pr_debug("CPU%d: Using CPPC nominal\n", cpu->cpu); - - /* - * If the CPPC nominal performance is valid, it - * can be assumed to correspond to cpu_khz. - */ - if (caps.nominal_perf == perf_ctl_max_phys) { - intel_pstate_hybrid_hwp_perf_ctl_parity(cpu); - return; - } - scaling = DIV_ROUND_UP(cpu_khz, caps.nominal_perf); - } else if (intel_pstate_cppc_perf_valid(caps.guaranteed_perf, &caps)) { - pr_debug("CPU%d: Using CPPC guaranteed\n", cpu->cpu); - - /* - * If the CPPC guaranteed performance is valid, - * it can be assumed to correspond to max_freq. - */ - if (caps.guaranteed_perf == perf_ctl_max) { - intel_pstate_hybrid_hwp_perf_ctl_parity(cpu); - return; - } - scaling = DIV_ROUND_UP(max_freq, caps.guaranteed_perf); - } - } - } -#endif - /* - * If using the CPPC data to compute the HWP-to-frequency scaling factor - * doesn't work, use the HWP_CAP gauranteed perf for this purpose with - * the assumption that it corresponds to max_freq. - */ - if (scaling > perf_ctl_scaling) { - pr_debug("CPU%d: Using HWP_CAP guaranteed\n", cpu->cpu); - - if (cpu->pstate.max_pstate == perf_ctl_max) { - intel_pstate_hybrid_hwp_perf_ctl_parity(cpu); - return; - } - scaling = DIV_ROUND_UP(max_freq, cpu->pstate.max_pstate); - if (scaling > perf_ctl_scaling) { - /* - * This should not happen, because it would mean that - * the number of HWP perf levels was less than the - * number of P-states, so use the PERF_CTL scaling in - * that case. - */ - pr_debug("CPU%d: scaling (%d) out of range\n", cpu->cpu, - scaling); - - intel_pstate_hybrid_hwp_perf_ctl_parity(cpu); - return; - } - } + pr_debug("CPU%d: HWP-to-frequency scaling factor: %d\n", cpu->cpu, scaling); /* - * If the product of the HWP performance scaling factor obtained above - * and the HWP_CAP highest performance is greater than the maximum turbo - * frequency corresponding to the pstate_funcs.get_turbo() return value, - * the scaling factor is too high, so recompute it so that the HWP_CAP - * highest performance corresponds to the maximum turbo frequency. + * If the product of the HWP performance scaling factor and the HWP_CAP + * highest performance is greater than the maximum turbo frequency + * corresponding to the pstate_funcs.get_turbo() return value, the + * scaling factor is too high, so recompute it to make the HWP_CAP + * highest performance correspond to the maximum turbo frequency. */ if (turbo_freq < cpu->pstate.turbo_pstate * scaling) { - pr_debug("CPU%d: scaling too high (%d)\n", cpu->cpu, scaling); - cpu->pstate.turbo_freq = turbo_freq; scaling = DIV_ROUND_UP(turbo_freq, cpu->pstate.turbo_pstate); - } + cpu->pstate.scaling = scaling; - cpu->pstate.scaling = scaling; - - pr_debug("CPU%d: HWP-to-frequency scaling factor: %d\n", cpu->cpu, scaling); + pr_debug("CPU%d: refined HWP-to-frequency scaling factor: %d\n", + cpu->cpu, scaling); + } cpu->pstate.max_freq = rounddown(cpu->pstate.max_pstate * scaling, perf_ctl_scaling); - freq = perf_ctl_max_phys * perf_ctl_scaling; - cpu->pstate.max_pstate_physical = DIV_ROUND_UP(freq, scaling); + cpu->pstate.max_pstate_physical = + DIV_ROUND_UP(perf_ctl_max_phys * perf_ctl_scaling, + scaling); cpu->pstate.min_freq = cpu->pstate.min_pstate * perf_ctl_scaling; /* @@ -1861,6 +1782,38 @@ static int knl_get_turbo_pstate(void) return ret; } +#ifdef CONFIG_ACPI_CPPC_LIB +static u32 hybrid_ref_perf; + +static int hybrid_get_cpu_scaling(int cpu) +{ + return DIV_ROUND_UP(core_get_scaling() * hybrid_ref_perf, + intel_pstate_cppc_nominal(cpu)); +} + +static void intel_pstate_cppc_set_cpu_scaling(void) +{ + u32 min_nominal_perf = U32_MAX; + int cpu; + + for_each_present_cpu(cpu) { + u32 nominal_perf = intel_pstate_cppc_nominal(cpu); + + if (nominal_perf && nominal_perf < min_nominal_perf) + min_nominal_perf = nominal_perf; + } + + if (min_nominal_perf < U32_MAX) { + hybrid_ref_perf = min_nominal_perf; + pstate_funcs.get_cpu_scaling = hybrid_get_cpu_scaling; + } +} +#else +static inline void intel_pstate_cppc_set_cpu_scaling(void) +{ +} +#endif /* CONFIG_ACPI_CPPC_LIB */ + static void intel_pstate_set_pstate(struct cpudata *cpu, int pstate) { trace_cpu_frequency(pstate * cpu->pstate.scaling, cpu->cpu); @@ -1889,10 +1842,8 @@ static void intel_pstate_max_within_limits(struct cpudata *cpu) static void intel_pstate_get_cpu_pstates(struct cpudata *cpu) { - bool hybrid_cpu = boot_cpu_has(X86_FEATURE_HYBRID_CPU); int perf_ctl_max_phys = pstate_funcs.get_max_physical(); - int perf_ctl_scaling = hybrid_cpu ? cpu_khz / perf_ctl_max_phys : - pstate_funcs.get_scaling(); + int perf_ctl_scaling = pstate_funcs.get_scaling(); cpu->pstate.min_pstate = pstate_funcs.get_min(); cpu->pstate.max_pstate_physical = perf_ctl_max_phys; @@ -1901,10 +1852,13 @@ static void intel_pstate_get_cpu_pstates(struct cpudata *cpu) if (hwp_active && !hwp_mode_bdw) { __intel_pstate_get_hwp_cap(cpu); - if (hybrid_cpu) - intel_pstate_hybrid_hwp_calibrate(cpu); - else + if (pstate_funcs.get_cpu_scaling) { + cpu->pstate.scaling = pstate_funcs.get_cpu_scaling(cpu->cpu); + if (cpu->pstate.scaling != perf_ctl_scaling) + intel_pstate_hybrid_hwp_adjust(cpu); + } else { cpu->pstate.scaling = perf_ctl_scaling; + } } else { cpu->pstate.scaling = perf_ctl_scaling; cpu->pstate.max_pstate = pstate_funcs.get_max(); @@ -3276,6 +3230,9 @@ static int __init intel_pstate_init(void) if (!default_driver) default_driver = &intel_pstate; + if (boot_cpu_has(X86_FEATURE_HYBRID_CPU)) + intel_pstate_cppc_set_cpu_scaling(); + goto hwp_cpu_matched; } } else { -- cgit v1.2.3 From ca67408ad57a5a67ad6801d792c40c010451bdef Mon Sep 17 00:00:00 2001 From: Lukasz Luba Date: Mon, 6 Sep 2021 09:44:52 +0100 Subject: PM: EM: fix kernel-doc comments Fix the kernel-doc comments for the improved Energy Model documentation. Signed-off-by: Lukasz Luba Signed-off-by: Rafael J. Wysocki --- include/linux/energy_model.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/include/linux/energy_model.h b/include/linux/energy_model.h index 1834752c5617..39dcadd492b5 100644 --- a/include/linux/energy_model.h +++ b/include/linux/energy_model.h @@ -11,7 +11,7 @@ #include /** - * em_perf_state - Performance state of a performance domain + * struct em_perf_state - Performance state of a performance domain * @frequency: The frequency in KHz, for consistency with CPUFreq * @power: The power consumed at this level (by 1 CPU or by a registered * device). It can be a total power: static and dynamic. @@ -25,7 +25,7 @@ struct em_perf_state { }; /** - * em_perf_domain - Performance domain + * struct em_perf_domain - Performance domain * @table: List of performance states, in ascending order * @nr_perf_states: Number of performance states * @milliwatts: Flag indicating the power values are in milli-Watts @@ -103,12 +103,12 @@ void em_dev_unregister_perf_domain(struct device *dev); /** * em_cpu_energy() - Estimates the energy consumed by the CPUs of a - performance domain + * performance domain * @pd : performance domain for which energy has to be estimated * @max_util : highest utilization among CPUs of the domain * @sum_util : sum of the utilization of all CPUs in the domain * @allowed_cpu_cap : maximum allowed CPU capacity for the @pd, which - might reflect reduced frequency (due to thermal) + * might reflect reduced frequency (due to thermal) * * This function must be used only for CPU devices. There is no validation, * i.e. if the EM is a CPU type and has cpumask allocated. It is called from -- cgit v1.2.3 From d62aab8ff711dc3a4c07684ea33042347f79b630 Mon Sep 17 00:00:00 2001 From: Lukasz Luba Date: Mon, 6 Sep 2021 09:44:53 +0100 Subject: Documentation: power: include kernel-doc in Energy Model doc Improve the existing documentation of the Energy Model and add kernel-doc comments. This extends an API description and helps better understanding the usage. Signed-off-by: Lukasz Luba Signed-off-by: Rafael J. Wysocki --- Documentation/power/energy-model.rst | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/Documentation/power/energy-model.rst b/Documentation/power/energy-model.rst index 60ac091d3b0d..8a2788afe89b 100644 --- a/Documentation/power/energy-model.rst +++ b/Documentation/power/energy-model.rst @@ -101,8 +101,7 @@ subsystems which use EM might rely on this flag to check if all EM devices use the same scale. If there are different scales, these subsystems might decide to: return warning/error, stop working or panic. See Section 3. for an example of driver implementing this -callback, and kernel/power/energy_model.c for further documentation on this -API. +callback, or Section 2.4 for further documentation on this API 2.3 Accessing performance domains @@ -123,7 +122,17 @@ em_cpu_energy() API. The estimation is performed assuming that the schedutil CPUfreq governor is in use in case of CPU device. Currently this calculation is not provided for other type of devices. -More details about the above APIs can be found in include/linux/energy_model.h. +More details about the above APIs can be found in ```` +or in Section 2.4 + + +2.4 Description details of this API +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +.. kernel-doc:: include/linux/energy_model.h + :internal: + +.. kernel-doc:: kernel/power/energy_model.c + :export: 3. Example driver -- cgit v1.2.3 From 4a9344cd0aa4499beb3772bbecb40bb78888c0e1 Mon Sep 17 00:00:00 2001 From: Prasad Sodagudi Date: Tue, 7 Sep 2021 04:24:23 -0700 Subject: PM: sleep: core: Avoid setting power.must_resume to false There are variables(power.may_skip_resume and dev->power.must_resume) and DPM_FLAG_MAY_SKIP_RESUME flags to control the resume of devices after a system wide suspend transition. Setting the DPM_FLAG_MAY_SKIP_RESUME flag means that the driver allows its "noirq" and "early" resume callbacks to be skipped if the device can be left in suspend after a system-wide transition into the working state. PM core determines that the driver's "noirq" and "early" resume callbacks should be skipped or not with dev_pm_skip_resume() function by checking power.may_skip_resume variable. power.must_resume variable is getting set to false in __device_suspend() function without checking device's DPM_FLAG_MAY_SKIP_RESUME settings. In problematic scenario, where all the devices in the suspend_late stage are successful and some device can fail to suspend in suspend_noirq phase. So some devices successfully suspended in suspend_late stage are not getting chance to execute __device_suspend_noirq() to set dev->power.must_resume variable to true and not getting resumed in early_resume phase. Add a check for device's DPM_FLAG_MAY_SKIP_RESUME flag before setting power.must_resume variable in __device_suspend function. Fixes: 6e176bf8d461 ("PM: sleep: core: Do not skip callbacks in the resume phase") Signed-off-by: Prasad Sodagudi Signed-off-by: Rafael J. Wysocki --- drivers/base/power/main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c index d568772152c2..cbea78e79f3d 100644 --- a/drivers/base/power/main.c +++ b/drivers/base/power/main.c @@ -1642,7 +1642,7 @@ static int __device_suspend(struct device *dev, pm_message_t state, bool async) } dev->power.may_skip_resume = true; - dev->power.must_resume = false; + dev->power.must_resume = !dev_pm_test_driver_flags(dev, DPM_FLAG_MAY_SKIP_RESUME); dpm_watchdog_set(&wd, dev); device_lock(dev); -- cgit v1.2.3