1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* |
3 | * Energy Model of devices |
4 | * |
5 | * Copyright (c) 2018-2021, Arm ltd. |
6 | * Written by: Quentin Perret, Arm ltd. |
7 | * Improvements provided by: Lukasz Luba, Arm ltd. |
8 | */ |
9 | |
10 | #define pr_fmt(fmt) "energy_model: " fmt |
11 | |
12 | #include <linux/cpu.h> |
13 | #include <linux/cpufreq.h> |
14 | #include <linux/cpumask.h> |
15 | #include <linux/debugfs.h> |
16 | #include <linux/energy_model.h> |
17 | #include <linux/sched/topology.h> |
18 | #include <linux/slab.h> |
19 | |
20 | /* |
21 | * Mutex serializing the registrations of performance domains and letting |
22 | * callbacks defined by drivers sleep. |
23 | */ |
24 | static DEFINE_MUTEX(em_pd_mutex); |
25 | |
26 | static void em_cpufreq_update_efficiencies(struct device *dev, |
27 | struct em_perf_state *table); |
28 | static void em_check_capacity_update(void); |
29 | static void em_update_workfn(struct work_struct *work); |
30 | static DECLARE_DELAYED_WORK(em_update_work, em_update_workfn); |
31 | |
32 | static bool _is_cpu_device(struct device *dev) |
33 | { |
34 | return (dev->bus == &cpu_subsys); |
35 | } |
36 | |
37 | #ifdef CONFIG_DEBUG_FS |
38 | static struct dentry *rootdir; |
39 | |
40 | struct em_dbg_info { |
41 | struct em_perf_domain *pd; |
42 | int ps_id; |
43 | }; |
44 | |
45 | #define DEFINE_EM_DBG_SHOW(name, fname) \ |
46 | static int em_debug_##fname##_show(struct seq_file *s, void *unused) \ |
47 | { \ |
48 | struct em_dbg_info *em_dbg = s->private; \ |
49 | struct em_perf_state *table; \ |
50 | unsigned long val; \ |
51 | \ |
52 | rcu_read_lock(); \ |
53 | table = em_perf_state_from_pd(em_dbg->pd); \ |
54 | val = table[em_dbg->ps_id].name; \ |
55 | rcu_read_unlock(); \ |
56 | \ |
57 | seq_printf(s, "%lu\n", val); \ |
58 | return 0; \ |
59 | } \ |
60 | DEFINE_SHOW_ATTRIBUTE(em_debug_##fname) |
61 | |
62 | DEFINE_EM_DBG_SHOW(frequency, frequency); |
63 | DEFINE_EM_DBG_SHOW(power, power); |
64 | DEFINE_EM_DBG_SHOW(cost, cost); |
65 | DEFINE_EM_DBG_SHOW(performance, performance); |
66 | DEFINE_EM_DBG_SHOW(flags, inefficiency); |
67 | |
68 | static void em_debug_create_ps(struct em_perf_domain *em_pd, |
69 | struct em_dbg_info *em_dbg, int i, |
70 | struct dentry *pd) |
71 | { |
72 | struct em_perf_state *table; |
73 | unsigned long freq; |
74 | struct dentry *d; |
75 | char name[24]; |
76 | |
77 | em_dbg[i].pd = em_pd; |
78 | em_dbg[i].ps_id = i; |
79 | |
80 | rcu_read_lock(); |
81 | table = em_perf_state_from_pd(pd: em_pd); |
82 | freq = table[i].frequency; |
83 | rcu_read_unlock(); |
84 | |
85 | snprintf(buf: name, size: sizeof(name), fmt: "ps:%lu" , freq); |
86 | |
87 | /* Create per-ps directory */ |
88 | d = debugfs_create_dir(name, parent: pd); |
89 | debugfs_create_file(name: "frequency" , mode: 0444, parent: d, data: &em_dbg[i], |
90 | fops: &em_debug_frequency_fops); |
91 | debugfs_create_file(name: "power" , mode: 0444, parent: d, data: &em_dbg[i], |
92 | fops: &em_debug_power_fops); |
93 | debugfs_create_file(name: "cost" , mode: 0444, parent: d, data: &em_dbg[i], |
94 | fops: &em_debug_cost_fops); |
95 | debugfs_create_file(name: "performance" , mode: 0444, parent: d, data: &em_dbg[i], |
96 | fops: &em_debug_performance_fops); |
97 | debugfs_create_file(name: "inefficient" , mode: 0444, parent: d, data: &em_dbg[i], |
98 | fops: &em_debug_inefficiency_fops); |
99 | } |
100 | |
101 | static int em_debug_cpus_show(struct seq_file *s, void *unused) |
102 | { |
103 | seq_printf(m: s, fmt: "%*pbl\n" , cpumask_pr_args(to_cpumask(s->private))); |
104 | |
105 | return 0; |
106 | } |
107 | DEFINE_SHOW_ATTRIBUTE(em_debug_cpus); |
108 | |
109 | static int em_debug_flags_show(struct seq_file *s, void *unused) |
110 | { |
111 | struct em_perf_domain *pd = s->private; |
112 | |
113 | seq_printf(m: s, fmt: "%#lx\n" , pd->flags); |
114 | |
115 | return 0; |
116 | } |
117 | DEFINE_SHOW_ATTRIBUTE(em_debug_flags); |
118 | |
119 | static void em_debug_create_pd(struct device *dev) |
120 | { |
121 | struct em_dbg_info *em_dbg; |
122 | struct dentry *d; |
123 | int i; |
124 | |
125 | /* Create the directory of the performance domain */ |
126 | d = debugfs_create_dir(name: dev_name(dev), parent: rootdir); |
127 | |
128 | if (_is_cpu_device(dev)) |
129 | debugfs_create_file(name: "cpus" , mode: 0444, parent: d, data: dev->em_pd->cpus, |
130 | fops: &em_debug_cpus_fops); |
131 | |
132 | debugfs_create_file(name: "flags" , mode: 0444, parent: d, data: dev->em_pd, |
133 | fops: &em_debug_flags_fops); |
134 | |
135 | em_dbg = devm_kcalloc(dev, n: dev->em_pd->nr_perf_states, |
136 | size: sizeof(*em_dbg), GFP_KERNEL); |
137 | if (!em_dbg) |
138 | return; |
139 | |
140 | /* Create a sub-directory for each performance state */ |
141 | for (i = 0; i < dev->em_pd->nr_perf_states; i++) |
142 | em_debug_create_ps(em_pd: dev->em_pd, em_dbg, i, pd: d); |
143 | |
144 | } |
145 | |
146 | static void em_debug_remove_pd(struct device *dev) |
147 | { |
148 | debugfs_lookup_and_remove(name: dev_name(dev), parent: rootdir); |
149 | } |
150 | |
151 | static int __init em_debug_init(void) |
152 | { |
153 | /* Create /sys/kernel/debug/energy_model directory */ |
154 | rootdir = debugfs_create_dir(name: "energy_model" , NULL); |
155 | |
156 | return 0; |
157 | } |
158 | fs_initcall(em_debug_init); |
159 | #else /* CONFIG_DEBUG_FS */ |
160 | static void em_debug_create_pd(struct device *dev) {} |
161 | static void em_debug_remove_pd(struct device *dev) {} |
162 | #endif |
163 | |
164 | static void em_destroy_table_rcu(struct rcu_head *rp) |
165 | { |
166 | struct em_perf_table __rcu *table; |
167 | |
168 | table = container_of(rp, struct em_perf_table, rcu); |
169 | kfree(objp: table); |
170 | } |
171 | |
172 | static void em_release_table_kref(struct kref *kref) |
173 | { |
174 | struct em_perf_table __rcu *table; |
175 | |
176 | /* It was the last owner of this table so we can free */ |
177 | table = container_of(kref, struct em_perf_table, kref); |
178 | |
179 | call_rcu(head: &table->rcu, func: em_destroy_table_rcu); |
180 | } |
181 | |
182 | /** |
183 | * em_table_free() - Handles safe free of the EM table when needed |
184 | * @table : EM table which is going to be freed |
185 | * |
186 | * No return values. |
187 | */ |
188 | void em_table_free(struct em_perf_table __rcu *table) |
189 | { |
190 | kref_put(kref: &table->kref, release: em_release_table_kref); |
191 | } |
192 | |
193 | /** |
194 | * em_table_alloc() - Allocate a new EM table |
195 | * @pd : EM performance domain for which this must be done |
196 | * |
197 | * Allocate a new EM table and initialize its kref to indicate that it |
198 | * has a user. |
199 | * Returns allocated table or NULL. |
200 | */ |
201 | struct em_perf_table __rcu *em_table_alloc(struct em_perf_domain *pd) |
202 | { |
203 | struct em_perf_table __rcu *table; |
204 | int table_size; |
205 | |
206 | table_size = sizeof(struct em_perf_state) * pd->nr_perf_states; |
207 | |
208 | table = kzalloc(size: sizeof(*table) + table_size, GFP_KERNEL); |
209 | if (!table) |
210 | return NULL; |
211 | |
212 | kref_init(kref: &table->kref); |
213 | |
214 | return table; |
215 | } |
216 | |
217 | static void em_init_performance(struct device *dev, struct em_perf_domain *pd, |
218 | struct em_perf_state *table, int nr_states) |
219 | { |
220 | u64 fmax, max_cap; |
221 | int i, cpu; |
222 | |
223 | /* This is needed only for CPUs and EAS skip other devices */ |
224 | if (!_is_cpu_device(dev)) |
225 | return; |
226 | |
227 | cpu = cpumask_first(em_span_cpus(pd)); |
228 | |
229 | /* |
230 | * Calculate the performance value for each frequency with |
231 | * linear relationship. The final CPU capacity might not be ready at |
232 | * boot time, but the EM will be updated a bit later with correct one. |
233 | */ |
234 | fmax = (u64) table[nr_states - 1].frequency; |
235 | max_cap = (u64) arch_scale_cpu_capacity(cpu); |
236 | for (i = 0; i < nr_states; i++) |
237 | table[i].performance = div64_u64(dividend: max_cap * table[i].frequency, |
238 | divisor: fmax); |
239 | } |
240 | |
241 | static int em_compute_costs(struct device *dev, struct em_perf_state *table, |
242 | struct em_data_callback *cb, int nr_states, |
243 | unsigned long flags) |
244 | { |
245 | unsigned long prev_cost = ULONG_MAX; |
246 | int i, ret; |
247 | |
248 | /* Compute the cost of each performance state. */ |
249 | for (i = nr_states - 1; i >= 0; i--) { |
250 | unsigned long power_res, cost; |
251 | |
252 | if ((flags & EM_PERF_DOMAIN_ARTIFICIAL) && cb->get_cost) { |
253 | ret = cb->get_cost(dev, table[i].frequency, &cost); |
254 | if (ret || !cost || cost > EM_MAX_POWER) { |
255 | dev_err(dev, "EM: invalid cost %lu %d\n" , |
256 | cost, ret); |
257 | return -EINVAL; |
258 | } |
259 | } else { |
260 | /* increase resolution of 'cost' precision */ |
261 | power_res = table[i].power * 10; |
262 | cost = power_res / table[i].performance; |
263 | } |
264 | |
265 | table[i].cost = cost; |
266 | |
267 | if (table[i].cost >= prev_cost) { |
268 | table[i].flags = EM_PERF_STATE_INEFFICIENT; |
269 | dev_dbg(dev, "EM: OPP:%lu is inefficient\n" , |
270 | table[i].frequency); |
271 | } else { |
272 | prev_cost = table[i].cost; |
273 | } |
274 | } |
275 | |
276 | return 0; |
277 | } |
278 | |
279 | /** |
280 | * em_dev_compute_costs() - Calculate cost values for new runtime EM table |
281 | * @dev : Device for which the EM table is to be updated |
282 | * @table : The new EM table that is going to get the costs calculated |
283 | * @nr_states : Number of performance states |
284 | * |
285 | * Calculate the em_perf_state::cost values for new runtime EM table. The |
286 | * values are used for EAS during task placement. It also calculates and sets |
287 | * the efficiency flag for each performance state. When the function finish |
288 | * successfully the EM table is ready to be updated and used by EAS. |
289 | * |
290 | * Return 0 on success or a proper error in case of failure. |
291 | */ |
292 | int em_dev_compute_costs(struct device *dev, struct em_perf_state *table, |
293 | int nr_states) |
294 | { |
295 | return em_compute_costs(dev, table, NULL, nr_states, flags: 0); |
296 | } |
297 | |
298 | /** |
299 | * em_dev_update_perf_domain() - Update runtime EM table for a device |
300 | * @dev : Device for which the EM is to be updated |
301 | * @new_table : The new EM table that is going to be used from now |
302 | * |
303 | * Update EM runtime modifiable table for the @dev using the provided @table. |
304 | * |
305 | * This function uses a mutex to serialize writers, so it must not be called |
306 | * from a non-sleeping context. |
307 | * |
308 | * Return 0 on success or an error code on failure. |
309 | */ |
310 | int em_dev_update_perf_domain(struct device *dev, |
311 | struct em_perf_table __rcu *new_table) |
312 | { |
313 | struct em_perf_table __rcu *old_table; |
314 | struct em_perf_domain *pd; |
315 | |
316 | if (!dev) |
317 | return -EINVAL; |
318 | |
319 | /* Serialize update/unregister or concurrent updates */ |
320 | mutex_lock(&em_pd_mutex); |
321 | |
322 | if (!dev->em_pd) { |
323 | mutex_unlock(lock: &em_pd_mutex); |
324 | return -EINVAL; |
325 | } |
326 | pd = dev->em_pd; |
327 | |
328 | kref_get(kref: &new_table->kref); |
329 | |
330 | old_table = pd->em_table; |
331 | rcu_assign_pointer(pd->em_table, new_table); |
332 | |
333 | em_cpufreq_update_efficiencies(dev, table: new_table->state); |
334 | |
335 | em_table_free(table: old_table); |
336 | |
337 | mutex_unlock(lock: &em_pd_mutex); |
338 | return 0; |
339 | } |
340 | EXPORT_SYMBOL_GPL(em_dev_update_perf_domain); |
341 | |
342 | static int em_create_perf_table(struct device *dev, struct em_perf_domain *pd, |
343 | struct em_perf_state *table, |
344 | struct em_data_callback *cb, |
345 | unsigned long flags) |
346 | { |
347 | unsigned long power, freq, prev_freq = 0; |
348 | int nr_states = pd->nr_perf_states; |
349 | int i, ret; |
350 | |
351 | /* Build the list of performance states for this performance domain */ |
352 | for (i = 0, freq = 0; i < nr_states; i++, freq++) { |
353 | /* |
354 | * active_power() is a driver callback which ceils 'freq' to |
355 | * lowest performance state of 'dev' above 'freq' and updates |
356 | * 'power' and 'freq' accordingly. |
357 | */ |
358 | ret = cb->active_power(dev, &power, &freq); |
359 | if (ret) { |
360 | dev_err(dev, "EM: invalid perf. state: %d\n" , |
361 | ret); |
362 | return -EINVAL; |
363 | } |
364 | |
365 | /* |
366 | * We expect the driver callback to increase the frequency for |
367 | * higher performance states. |
368 | */ |
369 | if (freq <= prev_freq) { |
370 | dev_err(dev, "EM: non-increasing freq: %lu\n" , |
371 | freq); |
372 | return -EINVAL; |
373 | } |
374 | |
375 | /* |
376 | * The power returned by active_state() is expected to be |
377 | * positive and be in range. |
378 | */ |
379 | if (!power || power > EM_MAX_POWER) { |
380 | dev_err(dev, "EM: invalid power: %lu\n" , |
381 | power); |
382 | return -EINVAL; |
383 | } |
384 | |
385 | table[i].power = power; |
386 | table[i].frequency = prev_freq = freq; |
387 | } |
388 | |
389 | em_init_performance(dev, pd, table, nr_states); |
390 | |
391 | ret = em_compute_costs(dev, table, cb, nr_states, flags); |
392 | if (ret) |
393 | return -EINVAL; |
394 | |
395 | return 0; |
396 | } |
397 | |
398 | static int em_create_pd(struct device *dev, int nr_states, |
399 | struct em_data_callback *cb, cpumask_t *cpus, |
400 | unsigned long flags) |
401 | { |
402 | struct em_perf_table __rcu *em_table; |
403 | struct em_perf_domain *pd; |
404 | struct device *cpu_dev; |
405 | int cpu, ret, num_cpus; |
406 | |
407 | if (_is_cpu_device(dev)) { |
408 | num_cpus = cpumask_weight(srcp: cpus); |
409 | |
410 | /* Prevent max possible energy calculation to not overflow */ |
411 | if (num_cpus > EM_MAX_NUM_CPUS) { |
412 | dev_err(dev, "EM: too many CPUs, overflow possible\n" ); |
413 | return -EINVAL; |
414 | } |
415 | |
416 | pd = kzalloc(size: sizeof(*pd) + cpumask_size(), GFP_KERNEL); |
417 | if (!pd) |
418 | return -ENOMEM; |
419 | |
420 | cpumask_copy(em_span_cpus(pd), srcp: cpus); |
421 | } else { |
422 | pd = kzalloc(size: sizeof(*pd), GFP_KERNEL); |
423 | if (!pd) |
424 | return -ENOMEM; |
425 | } |
426 | |
427 | pd->nr_perf_states = nr_states; |
428 | |
429 | em_table = em_table_alloc(pd); |
430 | if (!em_table) |
431 | goto free_pd; |
432 | |
433 | ret = em_create_perf_table(dev, pd, table: em_table->state, cb, flags); |
434 | if (ret) |
435 | goto free_pd_table; |
436 | |
437 | rcu_assign_pointer(pd->em_table, em_table); |
438 | |
439 | if (_is_cpu_device(dev)) |
440 | for_each_cpu(cpu, cpus) { |
441 | cpu_dev = get_cpu_device(cpu); |
442 | cpu_dev->em_pd = pd; |
443 | } |
444 | |
445 | dev->em_pd = pd; |
446 | |
447 | return 0; |
448 | |
449 | free_pd_table: |
450 | kfree(objp: em_table); |
451 | free_pd: |
452 | kfree(objp: pd); |
453 | return -EINVAL; |
454 | } |
455 | |
456 | static void |
457 | em_cpufreq_update_efficiencies(struct device *dev, struct em_perf_state *table) |
458 | { |
459 | struct em_perf_domain *pd = dev->em_pd; |
460 | struct cpufreq_policy *policy; |
461 | int found = 0; |
462 | int i, cpu; |
463 | |
464 | if (!_is_cpu_device(dev)) |
465 | return; |
466 | |
467 | /* Try to get a CPU which is active and in this PD */ |
468 | cpu = cpumask_first_and(em_span_cpus(pd), cpu_active_mask); |
469 | if (cpu >= nr_cpu_ids) { |
470 | dev_warn(dev, "EM: No online CPU for CPUFreq policy\n" ); |
471 | return; |
472 | } |
473 | |
474 | policy = cpufreq_cpu_get(cpu); |
475 | if (!policy) { |
476 | dev_warn(dev, "EM: Access to CPUFreq policy failed\n" ); |
477 | return; |
478 | } |
479 | |
480 | for (i = 0; i < pd->nr_perf_states; i++) { |
481 | if (!(table[i].flags & EM_PERF_STATE_INEFFICIENT)) |
482 | continue; |
483 | |
484 | if (!cpufreq_table_set_inefficient(policy, frequency: table[i].frequency)) |
485 | found++; |
486 | } |
487 | |
488 | cpufreq_cpu_put(policy); |
489 | |
490 | if (!found) |
491 | return; |
492 | |
493 | /* |
494 | * Efficiencies have been installed in CPUFreq, inefficient frequencies |
495 | * will be skipped. The EM can do the same. |
496 | */ |
497 | pd->flags |= EM_PERF_DOMAIN_SKIP_INEFFICIENCIES; |
498 | } |
499 | |
500 | /** |
501 | * em_pd_get() - Return the performance domain for a device |
502 | * @dev : Device to find the performance domain for |
503 | * |
504 | * Returns the performance domain to which @dev belongs, or NULL if it doesn't |
505 | * exist. |
506 | */ |
507 | struct em_perf_domain *em_pd_get(struct device *dev) |
508 | { |
509 | if (IS_ERR_OR_NULL(ptr: dev)) |
510 | return NULL; |
511 | |
512 | return dev->em_pd; |
513 | } |
514 | EXPORT_SYMBOL_GPL(em_pd_get); |
515 | |
516 | /** |
517 | * em_cpu_get() - Return the performance domain for a CPU |
518 | * @cpu : CPU to find the performance domain for |
519 | * |
520 | * Returns the performance domain to which @cpu belongs, or NULL if it doesn't |
521 | * exist. |
522 | */ |
523 | struct em_perf_domain *em_cpu_get(int cpu) |
524 | { |
525 | struct device *cpu_dev; |
526 | |
527 | cpu_dev = get_cpu_device(cpu); |
528 | if (!cpu_dev) |
529 | return NULL; |
530 | |
531 | return em_pd_get(cpu_dev); |
532 | } |
533 | EXPORT_SYMBOL_GPL(em_cpu_get); |
534 | |
535 | /** |
536 | * em_dev_register_perf_domain() - Register the Energy Model (EM) for a device |
537 | * @dev : Device for which the EM is to register |
538 | * @nr_states : Number of performance states to register |
539 | * @cb : Callback functions providing the data of the Energy Model |
540 | * @cpus : Pointer to cpumask_t, which in case of a CPU device is |
541 | * obligatory. It can be taken from i.e. 'policy->cpus'. For other |
542 | * type of devices this should be set to NULL. |
543 | * @microwatts : Flag indicating that the power values are in micro-Watts or |
544 | * in some other scale. It must be set properly. |
545 | * |
546 | * Create Energy Model tables for a performance domain using the callbacks |
547 | * defined in cb. |
548 | * |
549 | * The @microwatts is important to set with correct value. Some kernel |
550 | * sub-systems might rely on this flag and check if all devices in the EM are |
551 | * using the same scale. |
552 | * |
553 | * If multiple clients register the same performance domain, all but the first |
554 | * registration will be ignored. |
555 | * |
556 | * Return 0 on success |
557 | */ |
558 | int em_dev_register_perf_domain(struct device *dev, unsigned int nr_states, |
559 | struct em_data_callback *cb, cpumask_t *cpus, |
560 | bool microwatts) |
561 | { |
562 | unsigned long cap, prev_cap = 0; |
563 | unsigned long flags = 0; |
564 | int cpu, ret; |
565 | |
566 | if (!dev || !nr_states || !cb) |
567 | return -EINVAL; |
568 | |
569 | /* |
570 | * Use a mutex to serialize the registration of performance domains and |
571 | * let the driver-defined callback functions sleep. |
572 | */ |
573 | mutex_lock(&em_pd_mutex); |
574 | |
575 | if (dev->em_pd) { |
576 | ret = -EEXIST; |
577 | goto unlock; |
578 | } |
579 | |
580 | if (_is_cpu_device(dev)) { |
581 | if (!cpus) { |
582 | dev_err(dev, "EM: invalid CPU mask\n" ); |
583 | ret = -EINVAL; |
584 | goto unlock; |
585 | } |
586 | |
587 | for_each_cpu(cpu, cpus) { |
588 | if (em_cpu_get(cpu)) { |
589 | dev_err(dev, "EM: exists for CPU%d\n" , cpu); |
590 | ret = -EEXIST; |
591 | goto unlock; |
592 | } |
593 | /* |
594 | * All CPUs of a domain must have the same |
595 | * micro-architecture since they all share the same |
596 | * table. |
597 | */ |
598 | cap = arch_scale_cpu_capacity(cpu); |
599 | if (prev_cap && prev_cap != cap) { |
600 | dev_err(dev, "EM: CPUs of %*pbl must have the same capacity\n" , |
601 | cpumask_pr_args(cpus)); |
602 | |
603 | ret = -EINVAL; |
604 | goto unlock; |
605 | } |
606 | prev_cap = cap; |
607 | } |
608 | } |
609 | |
610 | if (microwatts) |
611 | flags |= EM_PERF_DOMAIN_MICROWATTS; |
612 | else if (cb->get_cost) |
613 | flags |= EM_PERF_DOMAIN_ARTIFICIAL; |
614 | |
615 | /* |
616 | * EM only supports uW (exception is artificial EM). |
617 | * Therefore, check and force the drivers to provide |
618 | * power in uW. |
619 | */ |
620 | if (!microwatts && !(flags & EM_PERF_DOMAIN_ARTIFICIAL)) { |
621 | dev_err(dev, "EM: only supports uW power values\n" ); |
622 | ret = -EINVAL; |
623 | goto unlock; |
624 | } |
625 | |
626 | ret = em_create_pd(dev, nr_states, cb, cpus, flags); |
627 | if (ret) |
628 | goto unlock; |
629 | |
630 | dev->em_pd->flags |= flags; |
631 | |
632 | em_cpufreq_update_efficiencies(dev, table: dev->em_pd->em_table->state); |
633 | |
634 | em_debug_create_pd(dev); |
635 | dev_info(dev, "EM: created perf domain\n" ); |
636 | |
637 | unlock: |
638 | mutex_unlock(lock: &em_pd_mutex); |
639 | |
640 | if (_is_cpu_device(dev)) |
641 | em_check_capacity_update(); |
642 | |
643 | return ret; |
644 | } |
645 | EXPORT_SYMBOL_GPL(em_dev_register_perf_domain); |
646 | |
647 | /** |
648 | * em_dev_unregister_perf_domain() - Unregister Energy Model (EM) for a device |
649 | * @dev : Device for which the EM is registered |
650 | * |
651 | * Unregister the EM for the specified @dev (but not a CPU device). |
652 | */ |
653 | void em_dev_unregister_perf_domain(struct device *dev) |
654 | { |
655 | if (IS_ERR_OR_NULL(ptr: dev) || !dev->em_pd) |
656 | return; |
657 | |
658 | if (_is_cpu_device(dev)) |
659 | return; |
660 | |
661 | /* |
662 | * The mutex separates all register/unregister requests and protects |
663 | * from potential clean-up/setup issues in the debugfs directories. |
664 | * The debugfs directory name is the same as device's name. |
665 | */ |
666 | mutex_lock(&em_pd_mutex); |
667 | em_debug_remove_pd(dev); |
668 | |
669 | em_table_free(table: dev->em_pd->em_table); |
670 | |
671 | kfree(objp: dev->em_pd); |
672 | dev->em_pd = NULL; |
673 | mutex_unlock(lock: &em_pd_mutex); |
674 | } |
675 | EXPORT_SYMBOL_GPL(em_dev_unregister_perf_domain); |
676 | |
677 | /* |
678 | * Adjustment of CPU performance values after boot, when all CPUs capacites |
679 | * are correctly calculated. |
680 | */ |
681 | static void em_adjust_new_capacity(struct device *dev, |
682 | struct em_perf_domain *pd, |
683 | u64 max_cap) |
684 | { |
685 | struct em_perf_table __rcu *em_table; |
686 | struct em_perf_state *ps, *new_ps; |
687 | int ret, ps_size; |
688 | |
689 | em_table = em_table_alloc(pd); |
690 | if (!em_table) { |
691 | dev_warn(dev, "EM: allocation failed\n" ); |
692 | return; |
693 | } |
694 | |
695 | new_ps = em_table->state; |
696 | |
697 | rcu_read_lock(); |
698 | ps = em_perf_state_from_pd(pd); |
699 | /* Initialize data based on old table */ |
700 | ps_size = sizeof(struct em_perf_state) * pd->nr_perf_states; |
701 | memcpy(new_ps, ps, ps_size); |
702 | |
703 | rcu_read_unlock(); |
704 | |
705 | em_init_performance(dev, pd, table: new_ps, nr_states: pd->nr_perf_states); |
706 | ret = em_compute_costs(dev, table: new_ps, NULL, nr_states: pd->nr_perf_states, |
707 | flags: pd->flags); |
708 | if (ret) { |
709 | dev_warn(dev, "EM: compute costs failed\n" ); |
710 | return; |
711 | } |
712 | |
713 | ret = em_dev_update_perf_domain(dev, em_table); |
714 | if (ret) |
715 | dev_warn(dev, "EM: update failed %d\n" , ret); |
716 | |
717 | /* |
718 | * This is one-time-update, so give up the ownership in this updater. |
719 | * The EM framework has incremented the usage counter and from now |
720 | * will keep the reference (then free the memory when needed). |
721 | */ |
722 | em_table_free(table: em_table); |
723 | } |
724 | |
725 | static void em_check_capacity_update(void) |
726 | { |
727 | cpumask_var_t cpu_done_mask; |
728 | struct em_perf_state *table; |
729 | struct em_perf_domain *pd; |
730 | unsigned long cpu_capacity; |
731 | int cpu; |
732 | |
733 | if (!zalloc_cpumask_var(mask: &cpu_done_mask, GFP_KERNEL)) { |
734 | pr_warn("no free memory\n" ); |
735 | return; |
736 | } |
737 | |
738 | /* Check if CPUs capacity has changed than update EM */ |
739 | for_each_possible_cpu(cpu) { |
740 | struct cpufreq_policy *policy; |
741 | unsigned long em_max_perf; |
742 | struct device *dev; |
743 | |
744 | if (cpumask_test_cpu(cpu, cpumask: cpu_done_mask)) |
745 | continue; |
746 | |
747 | policy = cpufreq_cpu_get(cpu); |
748 | if (!policy) { |
749 | pr_debug("Accessing cpu%d policy failed\n" , cpu); |
750 | schedule_delayed_work(dwork: &em_update_work, |
751 | delay: msecs_to_jiffies(m: 1000)); |
752 | break; |
753 | } |
754 | cpufreq_cpu_put(policy); |
755 | |
756 | pd = em_cpu_get(cpu); |
757 | if (!pd || em_is_artificial(pd)) |
758 | continue; |
759 | |
760 | cpumask_or(dstp: cpu_done_mask, src1p: cpu_done_mask, |
761 | em_span_cpus(pd)); |
762 | |
763 | cpu_capacity = arch_scale_cpu_capacity(cpu); |
764 | |
765 | rcu_read_lock(); |
766 | table = em_perf_state_from_pd(pd); |
767 | em_max_perf = table[pd->nr_perf_states - 1].performance; |
768 | rcu_read_unlock(); |
769 | |
770 | /* |
771 | * Check if the CPU capacity has been adjusted during boot |
772 | * and trigger the update for new performance values. |
773 | */ |
774 | if (em_max_perf == cpu_capacity) |
775 | continue; |
776 | |
777 | pr_debug("updating cpu%d cpu_cap=%lu old capacity=%lu\n" , |
778 | cpu, cpu_capacity, em_max_perf); |
779 | |
780 | dev = get_cpu_device(cpu); |
781 | em_adjust_new_capacity(dev, pd, max_cap: cpu_capacity); |
782 | } |
783 | |
784 | free_cpumask_var(mask: cpu_done_mask); |
785 | } |
786 | |
787 | static void em_update_workfn(struct work_struct *work) |
788 | { |
789 | em_check_capacity_update(); |
790 | } |
791 | |