1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* |
3 | * Intel Performance and Energy Bias Hint support. |
4 | * |
5 | * Copyright (C) 2019 Intel Corporation |
6 | * |
7 | * Author: |
8 | * Rafael J. Wysocki <rafael.j.wysocki@intel.com> |
9 | */ |
10 | |
11 | #include <linux/cpuhotplug.h> |
12 | #include <linux/cpu.h> |
13 | #include <linux/device.h> |
14 | #include <linux/kernel.h> |
15 | #include <linux/string.h> |
16 | #include <linux/syscore_ops.h> |
17 | #include <linux/pm.h> |
18 | |
19 | #include <asm/cpu_device_id.h> |
20 | #include <asm/cpufeature.h> |
21 | #include <asm/msr.h> |
22 | |
23 | /** |
24 | * DOC: overview |
25 | * |
26 | * The Performance and Energy Bias Hint (EPB) allows software to specify its |
27 | * preference with respect to the power-performance tradeoffs present in the |
28 | * processor. Generally, the EPB is expected to be set by user space (directly |
29 | * via sysfs or with the help of the x86_energy_perf_policy tool), but there are |
30 | * two reasons for the kernel to update it. |
31 | * |
32 | * First, there are systems where the platform firmware resets the EPB during |
33 | * system-wide transitions from sleep states back into the working state |
34 | * effectively causing the previous EPB updates by user space to be lost. |
35 | * Thus the kernel needs to save the current EPB values for all CPUs during |
36 | * system-wide transitions to sleep states and restore them on the way back to |
37 | * the working state. That can be achieved by saving EPB for secondary CPUs |
38 | * when they are taken offline during transitions into system sleep states and |
39 | * for the boot CPU in a syscore suspend operation, so that it can be restored |
40 | * for the boot CPU in a syscore resume operation and for the other CPUs when |
41 | * they are brought back online. However, CPUs that are already offline when |
42 | * a system-wide PM transition is started are not taken offline again, but their |
43 | * EPB values may still be reset by the platform firmware during the transition, |
44 | * so in fact it is necessary to save the EPB of any CPU taken offline and to |
45 | * restore it when the given CPU goes back online at all times. |
46 | * |
47 | * Second, on many systems the initial EPB value coming from the platform |
48 | * firmware is 0 ('performance') and at least on some of them that is because |
49 | * the platform firmware does not initialize EPB at all with the assumption that |
50 | * the OS will do that anyway. That sometimes is problematic, as it may cause |
51 | * the system battery to drain too fast, for example, so it is better to adjust |
52 | * it on CPU bring-up and if the initial EPB value for a given CPU is 0, the |
53 | * kernel changes it to 6 ('normal'). |
54 | */ |
55 | |
56 | static DEFINE_PER_CPU(u8, saved_epb); |
57 | |
58 | #define EPB_MASK 0x0fULL |
59 | #define EPB_SAVED 0x10ULL |
60 | #define MAX_EPB EPB_MASK |
61 | |
62 | enum energy_perf_value_index { |
63 | EPB_INDEX_PERFORMANCE, |
64 | EPB_INDEX_BALANCE_PERFORMANCE, |
65 | EPB_INDEX_NORMAL, |
66 | EPB_INDEX_BALANCE_POWERSAVE, |
67 | EPB_INDEX_POWERSAVE, |
68 | }; |
69 | |
70 | static u8 energ_perf_values[] = { |
71 | [EPB_INDEX_PERFORMANCE] = ENERGY_PERF_BIAS_PERFORMANCE, |
72 | [EPB_INDEX_BALANCE_PERFORMANCE] = ENERGY_PERF_BIAS_BALANCE_PERFORMANCE, |
73 | [EPB_INDEX_NORMAL] = ENERGY_PERF_BIAS_NORMAL, |
74 | [EPB_INDEX_BALANCE_POWERSAVE] = ENERGY_PERF_BIAS_BALANCE_POWERSAVE, |
75 | [EPB_INDEX_POWERSAVE] = ENERGY_PERF_BIAS_POWERSAVE, |
76 | }; |
77 | |
78 | static int intel_epb_save(void) |
79 | { |
80 | u64 epb; |
81 | |
82 | rdmsrl(MSR_IA32_ENERGY_PERF_BIAS, epb); |
83 | /* |
84 | * Ensure that saved_epb will always be nonzero after this write even if |
85 | * the EPB value read from the MSR is 0. |
86 | */ |
87 | this_cpu_write(saved_epb, (epb & EPB_MASK) | EPB_SAVED); |
88 | |
89 | return 0; |
90 | } |
91 | |
92 | static void intel_epb_restore(void) |
93 | { |
94 | u64 val = this_cpu_read(saved_epb); |
95 | u64 epb; |
96 | |
97 | rdmsrl(MSR_IA32_ENERGY_PERF_BIAS, epb); |
98 | if (val) { |
99 | val &= EPB_MASK; |
100 | } else { |
101 | /* |
102 | * Because intel_epb_save() has not run for the current CPU yet, |
103 | * it is going online for the first time, so if its EPB value is |
104 | * 0 ('performance') at this point, assume that it has not been |
105 | * initialized by the platform firmware and set it to 6 |
106 | * ('normal'). |
107 | */ |
108 | val = epb & EPB_MASK; |
109 | if (val == ENERGY_PERF_BIAS_PERFORMANCE) { |
110 | val = energ_perf_values[EPB_INDEX_NORMAL]; |
111 | pr_warn_once("ENERGY_PERF_BIAS: Set to 'normal', was 'performance'\n" ); |
112 | } |
113 | } |
114 | wrmsrl(MSR_IA32_ENERGY_PERF_BIAS, val: (epb & ~EPB_MASK) | val); |
115 | } |
116 | |
117 | static struct syscore_ops intel_epb_syscore_ops = { |
118 | .suspend = intel_epb_save, |
119 | .resume = intel_epb_restore, |
120 | }; |
121 | |
122 | static const char * const energy_perf_strings[] = { |
123 | [EPB_INDEX_PERFORMANCE] = "performance" , |
124 | [EPB_INDEX_BALANCE_PERFORMANCE] = "balance-performance" , |
125 | [EPB_INDEX_NORMAL] = "normal" , |
126 | [EPB_INDEX_BALANCE_POWERSAVE] = "balance-power" , |
127 | [EPB_INDEX_POWERSAVE] = "power" , |
128 | }; |
129 | |
130 | static ssize_t energy_perf_bias_show(struct device *dev, |
131 | struct device_attribute *attr, |
132 | char *buf) |
133 | { |
134 | unsigned int cpu = dev->id; |
135 | u64 epb; |
136 | int ret; |
137 | |
138 | ret = rdmsrl_on_cpu(cpu, MSR_IA32_ENERGY_PERF_BIAS, q: &epb); |
139 | if (ret < 0) |
140 | return ret; |
141 | |
142 | return sprintf(buf, fmt: "%llu\n" , epb); |
143 | } |
144 | |
145 | static ssize_t energy_perf_bias_store(struct device *dev, |
146 | struct device_attribute *attr, |
147 | const char *buf, size_t count) |
148 | { |
149 | unsigned int cpu = dev->id; |
150 | u64 epb, val; |
151 | int ret; |
152 | |
153 | ret = __sysfs_match_string(array: energy_perf_strings, |
154 | ARRAY_SIZE(energy_perf_strings), s: buf); |
155 | if (ret >= 0) |
156 | val = energ_perf_values[ret]; |
157 | else if (kstrtou64(s: buf, base: 0, res: &val) || val > MAX_EPB) |
158 | return -EINVAL; |
159 | |
160 | ret = rdmsrl_on_cpu(cpu, MSR_IA32_ENERGY_PERF_BIAS, q: &epb); |
161 | if (ret < 0) |
162 | return ret; |
163 | |
164 | ret = wrmsrl_on_cpu(cpu, MSR_IA32_ENERGY_PERF_BIAS, |
165 | q: (epb & ~EPB_MASK) | val); |
166 | if (ret < 0) |
167 | return ret; |
168 | |
169 | return count; |
170 | } |
171 | |
172 | static DEVICE_ATTR_RW(energy_perf_bias); |
173 | |
174 | static struct attribute *intel_epb_attrs[] = { |
175 | &dev_attr_energy_perf_bias.attr, |
176 | NULL |
177 | }; |
178 | |
179 | static const struct attribute_group intel_epb_attr_group = { |
180 | .name = power_group_name, |
181 | .attrs = intel_epb_attrs |
182 | }; |
183 | |
184 | static int intel_epb_online(unsigned int cpu) |
185 | { |
186 | struct device *cpu_dev = get_cpu_device(cpu); |
187 | |
188 | intel_epb_restore(); |
189 | if (!cpuhp_tasks_frozen) |
190 | sysfs_merge_group(kobj: &cpu_dev->kobj, grp: &intel_epb_attr_group); |
191 | |
192 | return 0; |
193 | } |
194 | |
195 | static int intel_epb_offline(unsigned int cpu) |
196 | { |
197 | struct device *cpu_dev = get_cpu_device(cpu); |
198 | |
199 | if (!cpuhp_tasks_frozen) |
200 | sysfs_unmerge_group(kobj: &cpu_dev->kobj, grp: &intel_epb_attr_group); |
201 | |
202 | intel_epb_save(); |
203 | return 0; |
204 | } |
205 | |
206 | static const struct x86_cpu_id intel_epb_normal[] = { |
207 | X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L, |
208 | ENERGY_PERF_BIAS_NORMAL_POWERSAVE), |
209 | X86_MATCH_INTEL_FAM6_MODEL(ATOM_GRACEMONT, |
210 | ENERGY_PERF_BIAS_NORMAL_POWERSAVE), |
211 | X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE_P, |
212 | ENERGY_PERF_BIAS_NORMAL_POWERSAVE), |
213 | {} |
214 | }; |
215 | |
216 | static __init int intel_epb_init(void) |
217 | { |
218 | const struct x86_cpu_id *id = x86_match_cpu(match: intel_epb_normal); |
219 | int ret; |
220 | |
221 | if (!boot_cpu_has(X86_FEATURE_EPB)) |
222 | return -ENODEV; |
223 | |
224 | if (id) |
225 | energ_perf_values[EPB_INDEX_NORMAL] = id->driver_data; |
226 | |
227 | ret = cpuhp_setup_state(state: CPUHP_AP_X86_INTEL_EPB_ONLINE, |
228 | name: "x86/intel/epb:online" , startup: intel_epb_online, |
229 | teardown: intel_epb_offline); |
230 | if (ret < 0) |
231 | goto err_out_online; |
232 | |
233 | register_syscore_ops(ops: &intel_epb_syscore_ops); |
234 | return 0; |
235 | |
236 | err_out_online: |
237 | cpuhp_remove_state(state: CPUHP_AP_X86_INTEL_EPB_ONLINE); |
238 | return ret; |
239 | } |
240 | late_initcall(intel_epb_init); |
241 | |