1 | // SPDX-License-Identifier: GPL-2.0-only |
2 | /* |
3 | * drivers/cpufreq/cpufreq_ondemand.c |
4 | * |
5 | * Copyright (C) 2001 Russell King |
6 | * (C) 2003 Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>. |
7 | * Jun Nakajima <jun.nakajima@intel.com> |
8 | */ |
9 | |
10 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt |
11 | |
12 | #include <linux/cpu.h> |
13 | #include <linux/percpu-defs.h> |
14 | #include <linux/slab.h> |
15 | #include <linux/tick.h> |
16 | #include <linux/sched/cpufreq.h> |
17 | |
18 | #include "cpufreq_ondemand.h" |
19 | |
20 | /* On-demand governor macros */ |
21 | #define DEF_FREQUENCY_UP_THRESHOLD (80) |
22 | #define DEF_SAMPLING_DOWN_FACTOR (1) |
23 | #define MAX_SAMPLING_DOWN_FACTOR (100000) |
24 | #define MICRO_FREQUENCY_UP_THRESHOLD (95) |
25 | #define MIN_FREQUENCY_UP_THRESHOLD (1) |
26 | #define MAX_FREQUENCY_UP_THRESHOLD (100) |
27 | |
28 | static struct od_ops od_ops; |
29 | |
30 | static unsigned int default_powersave_bias; |
31 | |
32 | /* |
33 | * Not all CPUs want IO time to be accounted as busy; this depends on how |
34 | * efficient idling at a higher frequency/voltage is. |
35 | * Pavel Machek says this is not so for various generations of AMD and old |
36 | * Intel systems. |
37 | * Mike Chan (android.com) claims this is also not true for ARM. |
38 | * Because of this, whitelist specific known (series) of CPUs by default, and |
39 | * leave all others up to the user. |
40 | */ |
41 | static int should_io_be_busy(void) |
42 | { |
43 | #if defined(CONFIG_X86) |
44 | /* |
45 | * For Intel, Core 2 (model 15) and later have an efficient idle. |
46 | */ |
47 | if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL && |
48 | boot_cpu_data.x86 == 6 && |
49 | boot_cpu_data.x86_model >= 15) |
50 | return 1; |
51 | #endif |
52 | return 0; |
53 | } |
54 | |
55 | /* |
56 | * Find right freq to be set now with powersave_bias on. |
57 | * Returns the freq_hi to be used right now and will set freq_hi_delay_us, |
58 | * freq_lo, and freq_lo_delay_us in percpu area for averaging freqs. |
59 | */ |
60 | static unsigned int generic_powersave_bias_target(struct cpufreq_policy *policy, |
61 | unsigned int freq_next, unsigned int relation) |
62 | { |
63 | unsigned int freq_req, freq_reduc, freq_avg; |
64 | unsigned int freq_hi, freq_lo; |
65 | unsigned int index; |
66 | unsigned int delay_hi_us; |
67 | struct policy_dbs_info *policy_dbs = policy->governor_data; |
68 | struct od_policy_dbs_info *dbs_info = to_dbs_info(policy_dbs); |
69 | struct dbs_data *dbs_data = policy_dbs->dbs_data; |
70 | struct od_dbs_tuners *od_tuners = dbs_data->tuners; |
71 | struct cpufreq_frequency_table *freq_table = policy->freq_table; |
72 | |
73 | if (!freq_table) { |
74 | dbs_info->freq_lo = 0; |
75 | dbs_info->freq_lo_delay_us = 0; |
76 | return freq_next; |
77 | } |
78 | |
79 | index = cpufreq_frequency_table_target(policy, target_freq: freq_next, relation); |
80 | freq_req = freq_table[index].frequency; |
81 | freq_reduc = freq_req * od_tuners->powersave_bias / 1000; |
82 | freq_avg = freq_req - freq_reduc; |
83 | |
84 | /* Find freq bounds for freq_avg in freq_table */ |
85 | index = cpufreq_table_find_index_h(policy, target_freq: freq_avg, |
86 | efficiencies: relation & CPUFREQ_RELATION_E); |
87 | freq_lo = freq_table[index].frequency; |
88 | index = cpufreq_table_find_index_l(policy, target_freq: freq_avg, |
89 | efficiencies: relation & CPUFREQ_RELATION_E); |
90 | freq_hi = freq_table[index].frequency; |
91 | |
92 | /* Find out how long we have to be in hi and lo freqs */ |
93 | if (freq_hi == freq_lo) { |
94 | dbs_info->freq_lo = 0; |
95 | dbs_info->freq_lo_delay_us = 0; |
96 | return freq_lo; |
97 | } |
98 | delay_hi_us = (freq_avg - freq_lo) * dbs_data->sampling_rate; |
99 | delay_hi_us += (freq_hi - freq_lo) / 2; |
100 | delay_hi_us /= freq_hi - freq_lo; |
101 | dbs_info->freq_hi_delay_us = delay_hi_us; |
102 | dbs_info->freq_lo = freq_lo; |
103 | dbs_info->freq_lo_delay_us = dbs_data->sampling_rate - delay_hi_us; |
104 | return freq_hi; |
105 | } |
106 | |
107 | static void ondemand_powersave_bias_init(struct cpufreq_policy *policy) |
108 | { |
109 | struct od_policy_dbs_info *dbs_info = to_dbs_info(policy_dbs: policy->governor_data); |
110 | |
111 | dbs_info->freq_lo = 0; |
112 | } |
113 | |
114 | static void dbs_freq_increase(struct cpufreq_policy *policy, unsigned int freq) |
115 | { |
116 | struct policy_dbs_info *policy_dbs = policy->governor_data; |
117 | struct dbs_data *dbs_data = policy_dbs->dbs_data; |
118 | struct od_dbs_tuners *od_tuners = dbs_data->tuners; |
119 | |
120 | if (od_tuners->powersave_bias) |
121 | freq = od_ops.powersave_bias_target(policy, freq, |
122 | CPUFREQ_RELATION_HE); |
123 | else if (policy->cur == policy->max) |
124 | return; |
125 | |
126 | __cpufreq_driver_target(policy, target_freq: freq, relation: od_tuners->powersave_bias ? |
127 | CPUFREQ_RELATION_LE : CPUFREQ_RELATION_HE); |
128 | } |
129 | |
130 | /* |
131 | * Every sampling_rate, we check, if current idle time is less than 20% |
132 | * (default), then we try to increase frequency. Else, we adjust the frequency |
133 | * proportional to load. |
134 | */ |
135 | static void od_update(struct cpufreq_policy *policy) |
136 | { |
137 | struct policy_dbs_info *policy_dbs = policy->governor_data; |
138 | struct od_policy_dbs_info *dbs_info = to_dbs_info(policy_dbs); |
139 | struct dbs_data *dbs_data = policy_dbs->dbs_data; |
140 | struct od_dbs_tuners *od_tuners = dbs_data->tuners; |
141 | unsigned int load = dbs_update(policy); |
142 | |
143 | dbs_info->freq_lo = 0; |
144 | |
145 | /* Check for frequency increase */ |
146 | if (load > dbs_data->up_threshold) { |
147 | /* If switching to max speed, apply sampling_down_factor */ |
148 | if (policy->cur < policy->max) |
149 | policy_dbs->rate_mult = dbs_data->sampling_down_factor; |
150 | dbs_freq_increase(policy, freq: policy->max); |
151 | } else { |
152 | /* Calculate the next frequency proportional to load */ |
153 | unsigned int freq_next, min_f, max_f; |
154 | |
155 | min_f = policy->cpuinfo.min_freq; |
156 | max_f = policy->cpuinfo.max_freq; |
157 | freq_next = min_f + load * (max_f - min_f) / 100; |
158 | |
159 | /* No longer fully busy, reset rate_mult */ |
160 | policy_dbs->rate_mult = 1; |
161 | |
162 | if (od_tuners->powersave_bias) |
163 | freq_next = od_ops.powersave_bias_target(policy, |
164 | freq_next, |
165 | CPUFREQ_RELATION_LE); |
166 | |
167 | __cpufreq_driver_target(policy, target_freq: freq_next, CPUFREQ_RELATION_CE); |
168 | } |
169 | } |
170 | |
171 | static unsigned int od_dbs_update(struct cpufreq_policy *policy) |
172 | { |
173 | struct policy_dbs_info *policy_dbs = policy->governor_data; |
174 | struct dbs_data *dbs_data = policy_dbs->dbs_data; |
175 | struct od_policy_dbs_info *dbs_info = to_dbs_info(policy_dbs); |
176 | int sample_type = dbs_info->sample_type; |
177 | |
178 | /* Common NORMAL_SAMPLE setup */ |
179 | dbs_info->sample_type = OD_NORMAL_SAMPLE; |
180 | /* |
181 | * OD_SUB_SAMPLE doesn't make sense if sample_delay_ns is 0, so ignore |
182 | * it then. |
183 | */ |
184 | if (sample_type == OD_SUB_SAMPLE && policy_dbs->sample_delay_ns > 0) { |
185 | __cpufreq_driver_target(policy, target_freq: dbs_info->freq_lo, |
186 | CPUFREQ_RELATION_HE); |
187 | return dbs_info->freq_lo_delay_us; |
188 | } |
189 | |
190 | od_update(policy); |
191 | |
192 | if (dbs_info->freq_lo) { |
193 | /* Setup SUB_SAMPLE */ |
194 | dbs_info->sample_type = OD_SUB_SAMPLE; |
195 | return dbs_info->freq_hi_delay_us; |
196 | } |
197 | |
198 | return dbs_data->sampling_rate * policy_dbs->rate_mult; |
199 | } |
200 | |
201 | /************************** sysfs interface ************************/ |
202 | static struct dbs_governor od_dbs_gov; |
203 | |
204 | static ssize_t io_is_busy_store(struct gov_attr_set *attr_set, const char *buf, |
205 | size_t count) |
206 | { |
207 | struct dbs_data *dbs_data = to_dbs_data(attr_set); |
208 | unsigned int input; |
209 | int ret; |
210 | |
211 | ret = sscanf(buf, "%u" , &input); |
212 | if (ret != 1) |
213 | return -EINVAL; |
214 | dbs_data->io_is_busy = !!input; |
215 | |
216 | /* we need to re-evaluate prev_cpu_idle */ |
217 | gov_update_cpu_data(dbs_data); |
218 | |
219 | return count; |
220 | } |
221 | |
222 | static ssize_t up_threshold_store(struct gov_attr_set *attr_set, |
223 | const char *buf, size_t count) |
224 | { |
225 | struct dbs_data *dbs_data = to_dbs_data(attr_set); |
226 | unsigned int input; |
227 | int ret; |
228 | ret = sscanf(buf, "%u" , &input); |
229 | |
230 | if (ret != 1 || input > MAX_FREQUENCY_UP_THRESHOLD || |
231 | input < MIN_FREQUENCY_UP_THRESHOLD) { |
232 | return -EINVAL; |
233 | } |
234 | |
235 | dbs_data->up_threshold = input; |
236 | return count; |
237 | } |
238 | |
239 | static ssize_t sampling_down_factor_store(struct gov_attr_set *attr_set, |
240 | const char *buf, size_t count) |
241 | { |
242 | struct dbs_data *dbs_data = to_dbs_data(attr_set); |
243 | struct policy_dbs_info *policy_dbs; |
244 | unsigned int input; |
245 | int ret; |
246 | ret = sscanf(buf, "%u" , &input); |
247 | |
248 | if (ret != 1 || input > MAX_SAMPLING_DOWN_FACTOR || input < 1) |
249 | return -EINVAL; |
250 | |
251 | dbs_data->sampling_down_factor = input; |
252 | |
253 | /* Reset down sampling multiplier in case it was active */ |
254 | list_for_each_entry(policy_dbs, &attr_set->policy_list, list) { |
255 | /* |
256 | * Doing this without locking might lead to using different |
257 | * rate_mult values in od_update() and od_dbs_update(). |
258 | */ |
259 | mutex_lock(&policy_dbs->update_mutex); |
260 | policy_dbs->rate_mult = 1; |
261 | mutex_unlock(lock: &policy_dbs->update_mutex); |
262 | } |
263 | |
264 | return count; |
265 | } |
266 | |
267 | static ssize_t ignore_nice_load_store(struct gov_attr_set *attr_set, |
268 | const char *buf, size_t count) |
269 | { |
270 | struct dbs_data *dbs_data = to_dbs_data(attr_set); |
271 | unsigned int input; |
272 | int ret; |
273 | |
274 | ret = sscanf(buf, "%u" , &input); |
275 | if (ret != 1) |
276 | return -EINVAL; |
277 | |
278 | if (input > 1) |
279 | input = 1; |
280 | |
281 | if (input == dbs_data->ignore_nice_load) { /* nothing to do */ |
282 | return count; |
283 | } |
284 | dbs_data->ignore_nice_load = input; |
285 | |
286 | /* we need to re-evaluate prev_cpu_idle */ |
287 | gov_update_cpu_data(dbs_data); |
288 | |
289 | return count; |
290 | } |
291 | |
292 | static ssize_t powersave_bias_store(struct gov_attr_set *attr_set, |
293 | const char *buf, size_t count) |
294 | { |
295 | struct dbs_data *dbs_data = to_dbs_data(attr_set); |
296 | struct od_dbs_tuners *od_tuners = dbs_data->tuners; |
297 | struct policy_dbs_info *policy_dbs; |
298 | unsigned int input; |
299 | int ret; |
300 | ret = sscanf(buf, "%u" , &input); |
301 | |
302 | if (ret != 1) |
303 | return -EINVAL; |
304 | |
305 | if (input > 1000) |
306 | input = 1000; |
307 | |
308 | od_tuners->powersave_bias = input; |
309 | |
310 | list_for_each_entry(policy_dbs, &attr_set->policy_list, list) |
311 | ondemand_powersave_bias_init(policy: policy_dbs->policy); |
312 | |
313 | return count; |
314 | } |
315 | |
316 | gov_show_one_common(sampling_rate); |
317 | gov_show_one_common(up_threshold); |
318 | gov_show_one_common(sampling_down_factor); |
319 | gov_show_one_common(ignore_nice_load); |
320 | gov_show_one_common(io_is_busy); |
321 | gov_show_one(od, powersave_bias); |
322 | |
323 | gov_attr_rw(sampling_rate); |
324 | gov_attr_rw(io_is_busy); |
325 | gov_attr_rw(up_threshold); |
326 | gov_attr_rw(sampling_down_factor); |
327 | gov_attr_rw(ignore_nice_load); |
328 | gov_attr_rw(powersave_bias); |
329 | |
330 | static struct attribute *od_attrs[] = { |
331 | &sampling_rate.attr, |
332 | &up_threshold.attr, |
333 | &sampling_down_factor.attr, |
334 | &ignore_nice_load.attr, |
335 | &powersave_bias.attr, |
336 | &io_is_busy.attr, |
337 | NULL |
338 | }; |
339 | ATTRIBUTE_GROUPS(od); |
340 | |
341 | /************************** sysfs end ************************/ |
342 | |
343 | static struct policy_dbs_info *od_alloc(void) |
344 | { |
345 | struct od_policy_dbs_info *dbs_info; |
346 | |
347 | dbs_info = kzalloc(size: sizeof(*dbs_info), GFP_KERNEL); |
348 | return dbs_info ? &dbs_info->policy_dbs : NULL; |
349 | } |
350 | |
351 | static void od_free(struct policy_dbs_info *policy_dbs) |
352 | { |
353 | kfree(objp: to_dbs_info(policy_dbs)); |
354 | } |
355 | |
356 | static int od_init(struct dbs_data *dbs_data) |
357 | { |
358 | struct od_dbs_tuners *tuners; |
359 | u64 idle_time; |
360 | int cpu; |
361 | |
362 | tuners = kzalloc(size: sizeof(*tuners), GFP_KERNEL); |
363 | if (!tuners) |
364 | return -ENOMEM; |
365 | |
366 | cpu = get_cpu(); |
367 | idle_time = get_cpu_idle_time_us(cpu, NULL); |
368 | put_cpu(); |
369 | if (idle_time != -1ULL) { |
370 | /* Idle micro accounting is supported. Use finer thresholds */ |
371 | dbs_data->up_threshold = MICRO_FREQUENCY_UP_THRESHOLD; |
372 | } else { |
373 | dbs_data->up_threshold = DEF_FREQUENCY_UP_THRESHOLD; |
374 | } |
375 | |
376 | dbs_data->sampling_down_factor = DEF_SAMPLING_DOWN_FACTOR; |
377 | dbs_data->ignore_nice_load = 0; |
378 | tuners->powersave_bias = default_powersave_bias; |
379 | dbs_data->io_is_busy = should_io_be_busy(); |
380 | |
381 | dbs_data->tuners = tuners; |
382 | return 0; |
383 | } |
384 | |
385 | static void od_exit(struct dbs_data *dbs_data) |
386 | { |
387 | kfree(objp: dbs_data->tuners); |
388 | } |
389 | |
390 | static void od_start(struct cpufreq_policy *policy) |
391 | { |
392 | struct od_policy_dbs_info *dbs_info = to_dbs_info(policy_dbs: policy->governor_data); |
393 | |
394 | dbs_info->sample_type = OD_NORMAL_SAMPLE; |
395 | ondemand_powersave_bias_init(policy); |
396 | } |
397 | |
398 | static struct od_ops od_ops = { |
399 | .powersave_bias_target = generic_powersave_bias_target, |
400 | }; |
401 | |
402 | static struct dbs_governor od_dbs_gov = { |
403 | .gov = CPUFREQ_DBS_GOVERNOR_INITIALIZER("ondemand" ), |
404 | .kobj_type = { .default_groups = od_groups }, |
405 | .gov_dbs_update = od_dbs_update, |
406 | .alloc = od_alloc, |
407 | .free = od_free, |
408 | .init = od_init, |
409 | .exit = od_exit, |
410 | .start = od_start, |
411 | }; |
412 | |
413 | #define CPU_FREQ_GOV_ONDEMAND (od_dbs_gov.gov) |
414 | |
415 | static void od_set_powersave_bias(unsigned int powersave_bias) |
416 | { |
417 | unsigned int cpu; |
418 | cpumask_var_t done; |
419 | |
420 | if (!alloc_cpumask_var(mask: &done, GFP_KERNEL)) |
421 | return; |
422 | |
423 | default_powersave_bias = powersave_bias; |
424 | cpumask_clear(dstp: done); |
425 | |
426 | cpus_read_lock(); |
427 | for_each_online_cpu(cpu) { |
428 | struct cpufreq_policy *policy; |
429 | struct policy_dbs_info *policy_dbs; |
430 | struct dbs_data *dbs_data; |
431 | struct od_dbs_tuners *od_tuners; |
432 | |
433 | if (cpumask_test_cpu(cpu, cpumask: done)) |
434 | continue; |
435 | |
436 | policy = cpufreq_cpu_get_raw(cpu); |
437 | if (!policy || policy->governor != &CPU_FREQ_GOV_ONDEMAND) |
438 | continue; |
439 | |
440 | policy_dbs = policy->governor_data; |
441 | if (!policy_dbs) |
442 | continue; |
443 | |
444 | cpumask_or(dstp: done, src1p: done, src2p: policy->cpus); |
445 | |
446 | dbs_data = policy_dbs->dbs_data; |
447 | od_tuners = dbs_data->tuners; |
448 | od_tuners->powersave_bias = default_powersave_bias; |
449 | } |
450 | cpus_read_unlock(); |
451 | |
452 | free_cpumask_var(mask: done); |
453 | } |
454 | |
455 | void od_register_powersave_bias_handler(unsigned int (*f) |
456 | (struct cpufreq_policy *, unsigned int, unsigned int), |
457 | unsigned int powersave_bias) |
458 | { |
459 | od_ops.powersave_bias_target = f; |
460 | od_set_powersave_bias(powersave_bias); |
461 | } |
462 | EXPORT_SYMBOL_GPL(od_register_powersave_bias_handler); |
463 | |
464 | void od_unregister_powersave_bias_handler(void) |
465 | { |
466 | od_ops.powersave_bias_target = generic_powersave_bias_target; |
467 | od_set_powersave_bias(powersave_bias: 0); |
468 | } |
469 | EXPORT_SYMBOL_GPL(od_unregister_powersave_bias_handler); |
470 | |
471 | MODULE_AUTHOR("Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>" ); |
472 | MODULE_AUTHOR("Alexey Starikovskiy <alexey.y.starikovskiy@intel.com>" ); |
473 | MODULE_DESCRIPTION("'cpufreq_ondemand' - A dynamic cpufreq governor for " |
474 | "Low Latency Frequency Transition capable processors" ); |
475 | MODULE_LICENSE("GPL" ); |
476 | |
477 | #ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND |
478 | struct cpufreq_governor *cpufreq_default_governor(void) |
479 | { |
480 | return &CPU_FREQ_GOV_ONDEMAND; |
481 | } |
482 | #endif |
483 | |
484 | cpufreq_governor_init(CPU_FREQ_GOV_ONDEMAND); |
485 | cpufreq_governor_exit(CPU_FREQ_GOV_ONDEMAND); |
486 | |