energy_model.c source code [linux/kernel/power/energy_model.c]

1	// SPDX-License-Identifier: GPL-2.0
2	/*
3	* Energy Model of devices
4	*
5	* Copyright (c) 2018-2021, Arm ltd.
6	* Written by: Quentin Perret, Arm ltd.
7	* Improvements provided by: Lukasz Luba, Arm ltd.
8	*/
9
10	#define pr_fmt(fmt) "energy_model: " fmt
11
12	#include <linux/cpu.h>
13	#include <linux/cpufreq.h>
14	#include <linux/cpumask.h>
15	#include <linux/debugfs.h>
16	#include <linux/energy_model.h>
17	#include <linux/sched/topology.h>
18	#include <linux/slab.h>
19
20	/*
21	* Mutex serializing the registrations of performance domains and letting
22	* callbacks defined by drivers sleep.
23	*/
24	static DEFINE_MUTEX(em_pd_mutex);
25
26	static void em_cpufreq_update_efficiencies(struct device *dev,
27	struct em_perf_state *table);
28	static void em_check_capacity_update(void);
29	static void em_update_workfn(struct work_struct *work);
30	static DECLARE_DELAYED_WORK(em_update_work, em_update_workfn);
31
32	static bool _is_cpu_device(struct device *dev)
33	{
34	return (dev->bus == &cpu_subsys);
35	}
36
37	#ifdef CONFIG_DEBUG_FS
38	static struct dentry *rootdir;
39
40	struct em_dbg_info {
41	struct em_perf_domain *pd;
42	int ps_id;
43	};
44
45	#define DEFINE_EM_DBG_SHOW(name, fname) \
46	static int em_debug_##fname##_show(struct seq_file s, void unused) \
47	{ \
48	struct em_dbg_info *em_dbg = s->private; \
49	struct em_perf_state *table; \
50	unsigned long val; \
51	\
52	rcu_read_lock(); \
53	table = em_perf_state_from_pd(em_dbg->pd); \
54	val = table[em_dbg->ps_id].name; \
55	rcu_read_unlock(); \
56	\
57	seq_printf(s, "%lu\n", val); \
58	return 0; \
59	} \
60	DEFINE_SHOW_ATTRIBUTE(em_debug_##fname)
61
62	DEFINE_EM_DBG_SHOW(frequency, frequency);
63	DEFINE_EM_DBG_SHOW(power, power);
64	DEFINE_EM_DBG_SHOW(cost, cost);
65	DEFINE_EM_DBG_SHOW(performance, performance);
66	DEFINE_EM_DBG_SHOW(flags, inefficiency);
67
68	static void em_debug_create_ps(struct em_perf_domain *em_pd,
69	struct em_dbg_info em_dbg, int* i,
70	struct dentry *pd)
71	{
72	struct em_perf_state *table;
73	unsigned long freq;
74	struct dentry *d;
75	char name[`24`];
76
77	em_dbg[i].pd = em_pd;
78	em_dbg[i].ps_id = i;
79
80	rcu_read_lock();
81	table = em_perf_state_from_pd(pd: em_pd);
82	freq = table[i].frequency;
83	rcu_read_unlock();
84
85	snprintf(buf: name, size: sizeof(name), fmt: "ps:%lu", freq);
86
87	/ Create per-ps directory /
88	d = debugfs_create_dir(name, parent: pd);
89	debugfs_create_file(name: "frequency", mode: `0444`, parent: d, data: &em_dbg[i],
90	fops: &em_debug_frequency_fops);
91	debugfs_create_file(name: "power", mode: `0444`, parent: d, data: &em_dbg[i],
92	fops: &em_debug_power_fops);
93	debugfs_create_file(name: "cost", mode: `0444`, parent: d, data: &em_dbg[i],
94	fops: &em_debug_cost_fops);
95	debugfs_create_file(name: "performance", mode: `0444`, parent: d, data: &em_dbg[i],
96	fops: &em_debug_performance_fops);
97	debugfs_create_file(name: "inefficient", mode: `0444`, parent: d, data: &em_dbg[i],
98	fops: &em_debug_inefficiency_fops);
99	}
100
101	static int em_debug_cpus_show(struct seq_file s, void* *unused)
102	{
103	seq_printf(m: s, fmt: "%*pbl\n", cpumask_pr_args(to_cpumask(s->private)));
104
105	return `0`;
106	}
107	DEFINE_SHOW_ATTRIBUTE(em_debug_cpus);
108
109	static int em_debug_flags_show(struct seq_file s, void* *unused)
110	{
111	struct em_perf_domain *pd = s->private;
112
113	seq_printf(m: s, fmt: "%#lx\n", pd->flags);
114
115	return `0`;
116	}
117	DEFINE_SHOW_ATTRIBUTE(em_debug_flags);
118
119	static void em_debug_create_pd(struct device *dev)
120	{
121	struct em_dbg_info *em_dbg;
122	struct dentry *d;
123	int i;
124
125	/ Create the directory of the performance domain /
126	d = debugfs_create_dir(name: dev_name(dev), parent: rootdir);
127
128	if (_is_cpu_device(dev))
129	debugfs_create_file(name: "cpus", mode: `0444`, parent: d, data: dev->em_pd->cpus,
130	fops: &em_debug_cpus_fops);
131
132	debugfs_create_file(name: "flags", mode: `0444`, parent: d, data: dev->em_pd,
133	fops: &em_debug_flags_fops);
134
135	em_dbg = devm_kcalloc(dev, n: dev->em_pd->nr_perf_states,
136	size: sizeof(*em_dbg), GFP_KERNEL);
137	if (!em_dbg)
138	return;
139
140	/ Create a sub-directory for each performance state /
141	for (i = `0`; i < dev->em_pd->nr_perf_states; i++)
142	em_debug_create_ps(em_pd: dev->em_pd, em_dbg, i, pd: d);
143
144	}
145
146	static void em_debug_remove_pd(struct device *dev)
147	{
148	debugfs_lookup_and_remove(name: dev_name(dev), parent: rootdir);
149	}
150
151	static int __init em_debug_init(void)
152	{
153	/ Create /sys/kernel/debug/energy_model directory /
154	rootdir = debugfs_create_dir(name: "energy_model", NULL);
155
156	return `0`;
157	}
158	fs_initcall(em_debug_init);
159	#else /* CONFIG_DEBUG_FS */
160	static void em_debug_create_pd(struct device *dev) {}
161	static void em_debug_remove_pd(struct device *dev) {}
162	#endif
163
164	static void em_destroy_table_rcu(struct rcu_head *rp)
165	{
166	struct em_perf_table __rcu *table;
167
168	table = container_of(rp, struct em_perf_table, rcu);
169	kfree(objp: table);
170	}
171
172	static void em_release_table_kref(struct kref *kref)
173	{
174	struct em_perf_table __rcu *table;
175
176	/ It was the last owner of this table so we can free /
177	table = container_of(kref, struct em_perf_table, kref);
178
179	call_rcu(head: &table->rcu, func: em_destroy_table_rcu);
180	}
181
182	/**
183	* em_table_free() - Handles safe free of the EM table when needed
184	* @table : EM table which is going to be freed
185	*
186	* No return values.
187	*/
188	void em_table_free(struct em_perf_table __rcu *table)
189	{
190	kref_put(kref: &table->kref, release: em_release_table_kref);
191	}
192
193	/**
194	* em_table_alloc() - Allocate a new EM table
195	* @pd : EM performance domain for which this must be done
196	*
197	* Allocate a new EM table and initialize its kref to indicate that it
198	* has a user.
199	* Returns allocated table or NULL.
200	*/
201	struct em_perf_table __rcu em_table_alloc(struct* em_perf_domain *pd)
202	{
203	struct em_perf_table __rcu *table;
204	int table_size;
205
206	table_size = sizeof(struct em_perf_state) * pd->nr_perf_states;
207
208	table = kzalloc(size: sizeof(*table) + table_size, GFP_KERNEL);
209	if (!table)
210	return NULL;
211
212	kref_init(kref: &table->kref);
213
214	return table;
215	}
216
217	static void em_init_performance(struct device dev, struct* em_perf_domain *pd,
218	struct em_perf_state table, int* nr_states)
219	{
220	u64 fmax, max_cap;
221	int i, cpu;
222
223	/ This is needed only for CPUs and EAS skip other devices /
224	if (!_is_cpu_device(dev))
225	return;
226
227	cpu = cpumask_first(em_span_cpus(pd));
228
229	/*
230	* Calculate the performance value for each frequency with
231	* linear relationship. The final CPU capacity might not be ready at
232	* boot time, but the EM will be updated a bit later with correct one.
233	*/
234	fmax = (u64) table[nr_states - `1`].frequency;
235	max_cap = (u64) arch_scale_cpu_capacity(cpu);
236	for (i = `0`; i < nr_states; i++)
237	table[i].performance = div64_u64(dividend: max_cap * table[i].frequency,
238	divisor: fmax);
239	}
240
241	static int em_compute_costs(struct device dev, struct* em_perf_state *table,
242	struct em_data_callback cb, int* nr_states,
243	unsigned long flags)
244	{
245	unsigned long prev_cost = ULONG_MAX;
246	int i, ret;
247
248	/ Compute the cost of each performance state. /
249	for (i = nr_states - `1`; i >= `0`; i--) {
250	unsigned long power_res, cost;
251
252	if ((flags & EM_PERF_DOMAIN_ARTIFICIAL) && cb->get_cost) {
253	ret = cb->get_cost(dev, table[i].frequency, &cost);
254	if (ret \|\| !cost \|\| cost > EM_MAX_POWER) {
255	dev_err(dev, "EM: invalid cost %lu %d\n",
256	cost, ret);
257	return -EINVAL;
258	}
259	} else {
260	/ increase resolution of 'cost' precision /
261	power_res = table[i].power * `10`;
262	cost = power_res / table[i].performance;
263	}
264
265	table[i].cost = cost;
266
267	if (table[i].cost >= prev_cost) {
268	table[i].flags = EM_PERF_STATE_INEFFICIENT;
269	dev_dbg(dev, "EM: OPP:%lu is inefficient\n",
270	table[i].frequency);
271	} else {
272	prev_cost = table[i].cost;
273	}
274	}
275
276	return `0`;
277	}
278
279	/**
280	* em_dev_compute_costs() - Calculate cost values for new runtime EM table
281	* @dev : Device for which the EM table is to be updated
282	* @table : The new EM table that is going to get the costs calculated
283	* @nr_states : Number of performance states
284	*
285	* Calculate the em_perf_state::cost values for new runtime EM table. The
286	* values are used for EAS during task placement. It also calculates and sets
287	* the efficiency flag for each performance state. When the function finish
288	* successfully the EM table is ready to be updated and used by EAS.
289	*
290	* Return 0 on success or a proper error in case of failure.
291	*/
292	int em_dev_compute_costs(struct device dev, struct* em_perf_state *table,
293	int nr_states)
294	{
295	return em_compute_costs(dev, table, NULL, nr_states, flags: `0`);
296	}
297
298	/**
299	* em_dev_update_perf_domain() - Update runtime EM table for a device
300	* @dev : Device for which the EM is to be updated
301	* @new_table : The new EM table that is going to be used from now
302	*
303	* Update EM runtime modifiable table for the @dev using the provided @table.
304	*
305	* This function uses a mutex to serialize writers, so it must not be called
306	* from a non-sleeping context.
307	*
308	* Return 0 on success or an error code on failure.
309	*/
310	int em_dev_update_perf_domain(struct device *dev,
311	struct em_perf_table __rcu *new_table)
312	{
313	struct em_perf_table __rcu *old_table;
314	struct em_perf_domain *pd;
315
316	if (!dev)
317	return -EINVAL;
318
319	/ Serialize update/unregister or concurrent updates /
320	mutex_lock(&em_pd_mutex);
321
322	if (!dev->em_pd) {
323	mutex_unlock(lock: &em_pd_mutex);
324	return -EINVAL;
325	}
326	pd = dev->em_pd;
327
328	kref_get(kref: &new_table->kref);
329
330	old_table = pd->em_table;
331	rcu_assign_pointer(pd->em_table, new_table);
332
333	em_cpufreq_update_efficiencies(dev, table: new_table->state);
334
335	em_table_free(table: old_table);
336
337	mutex_unlock(lock: &em_pd_mutex);
338	return `0`;
339	}
340	EXPORT_SYMBOL_GPL(em_dev_update_perf_domain);
341
342	static int em_create_perf_table(struct device dev, struct* em_perf_domain *pd,
343	struct em_perf_state *table,
344	struct em_data_callback *cb,
345	unsigned long flags)
346	{
347	unsigned long power, freq, prev_freq = `0`;
348	int nr_states = pd->nr_perf_states;
349	int i, ret;
350
351	/ Build the list of performance states for this performance domain /
352	for (i = `0`, freq = `0`; i < nr_states; i++, freq++) {
353	/*
354	* active_power() is a driver callback which ceils 'freq' to
355	* lowest performance state of 'dev' above 'freq' and updates
356	* 'power' and 'freq' accordingly.
357	*/
358	ret = cb->active_power(dev, &power, &freq);
359	if (ret) {
360	dev_err(dev, "EM: invalid perf. state: %d\n",
361	ret);
362	return -EINVAL;
363	}
364
365	/*
366	* We expect the driver callback to increase the frequency for
367	* higher performance states.
368	*/
369	if (freq <= prev_freq) {
370	dev_err(dev, "EM: non-increasing freq: %lu\n",
371	freq);
372	return -EINVAL;
373	}
374
375	/*
376	* The power returned by active_state() is expected to be
377	* positive and be in range.
378	*/
379	if (!power \|\| power > EM_MAX_POWER) {
380	dev_err(dev, "EM: invalid power: %lu\n",
381	power);
382	return -EINVAL;
383	}
384
385	table[i].power = power;
386	table[i].frequency = prev_freq = freq;
387	}
388
389	em_init_performance(dev, pd, table, nr_states);
390
391	ret = em_compute_costs(dev, table, cb, nr_states, flags);
392	if (ret)
393	return -EINVAL;
394
395	return `0`;
396	}
397
398	static int em_create_pd(struct device dev, int* nr_states,
399	struct em_data_callback cb, cpumask_t cpus,
400	unsigned long flags)
401	{
402	struct em_perf_table __rcu *em_table;
403	struct em_perf_domain *pd;
404	struct device *cpu_dev;
405	int cpu, ret, num_cpus;
406
407	if (_is_cpu_device(dev)) {
408	num_cpus = cpumask_weight(srcp: cpus);
409
410	/ Prevent max possible energy calculation to not overflow /
411	if (num_cpus > EM_MAX_NUM_CPUS) {
412	dev_err(dev, "EM: too many CPUs, overflow possible\n");
413	return -EINVAL;
414	}
415
416	pd = kzalloc(size: sizeof(*pd) + cpumask_size(), GFP_KERNEL);
417	if (!pd)
418	return -ENOMEM;
419
420	cpumask_copy(em_span_cpus(pd), srcp: cpus);
421	} else {
422	pd = kzalloc(size: sizeof(*pd), GFP_KERNEL);
423	if (!pd)
424	return -ENOMEM;
425	}
426
427	pd->nr_perf_states = nr_states;
428
429	em_table = em_table_alloc(pd);
430	if (!em_table)
431	goto free_pd;
432
433	ret = em_create_perf_table(dev, pd, table: em_table->state, cb, flags);
434	if (ret)
435	goto free_pd_table;
436
437	rcu_assign_pointer(pd->em_table, em_table);
438
439	if (_is_cpu_device(dev))
440	for_each_cpu(cpu, cpus) {
441	cpu_dev = get_cpu_device(cpu);
442	cpu_dev->em_pd = pd;
443	}
444
445	dev->em_pd = pd;
446
447	return `0`;
448
449	free_pd_table:
450	kfree(objp: em_table);
451	free_pd:
452	kfree(objp: pd);
453	return -EINVAL;
454	}
455
456	static void
457	em_cpufreq_update_efficiencies(struct device dev, struct* em_perf_state *table)
458	{
459	struct em_perf_domain *pd = dev->em_pd;
460	struct cpufreq_policy *policy;
461	int found = `0`;
462	int i, cpu;
463
464	if (!_is_cpu_device(dev))
465	return;
466
467	/ Try to get a CPU which is active and in this PD /
468	cpu = cpumask_first_and(em_span_cpus(pd), cpu_active_mask);
469	if (cpu >= nr_cpu_ids) {
470	dev_warn(dev, "EM: No online CPU for CPUFreq policy\n");
471	return;
472	}
473
474	policy = cpufreq_cpu_get(cpu);
475	if (!policy) {
476	dev_warn(dev, "EM: Access to CPUFreq policy failed\n");
477	return;
478	}
479
480	for (i = `0`; i < pd->nr_perf_states; i++) {
481	if (!(table[i].flags & EM_PERF_STATE_INEFFICIENT))
482	continue;
483
484	if (!cpufreq_table_set_inefficient(policy, frequency: table[i].frequency))
485	found++;
486	}
487
488	cpufreq_cpu_put(policy);
489
490	if (!found)
491	return;
492
493	/*
494	* Efficiencies have been installed in CPUFreq, inefficient frequencies
495	* will be skipped. The EM can do the same.
496	*/
497	pd->flags \|= EM_PERF_DOMAIN_SKIP_INEFFICIENCIES;
498	}
499
500	/**
501	* em_pd_get() - Return the performance domain for a device
502	* @dev : Device to find the performance domain for
503	*
504	* Returns the performance domain to which @dev belongs, or NULL if it doesn't
505	* exist.
506	*/
507	struct em_perf_domain em_pd_get(struct* device *dev)
508	{
509	if (IS_ERR_OR_NULL(ptr: dev))
510	return NULL;
511
512	return dev->em_pd;
513	}
514	EXPORT_SYMBOL_GPL(em_pd_get);
515
516	/**
517	* em_cpu_get() - Return the performance domain for a CPU
518	* @cpu : CPU to find the performance domain for
519	*
520	* Returns the performance domain to which @cpu belongs, or NULL if it doesn't
521	* exist.
522	*/
523	struct em_perf_domain em_cpu_get(int* cpu)
524	{
525	struct device *cpu_dev;
526
527	cpu_dev = get_cpu_device(cpu);
528	if (!cpu_dev)
529	return NULL;
530
531	return em_pd_get(cpu_dev);
532	}
533	EXPORT_SYMBOL_GPL(em_cpu_get);
534
535	/**
536	* em_dev_register_perf_domain() - Register the Energy Model (EM) for a device
537	* @dev : Device for which the EM is to register
538	* @nr_states : Number of performance states to register
539	* @cb : Callback functions providing the data of the Energy Model
540	* @cpus : Pointer to cpumask_t, which in case of a CPU device is
541	* obligatory. It can be taken from i.e. 'policy->cpus'. For other
542	* type of devices this should be set to NULL.
543	* @microwatts : Flag indicating that the power values are in micro-Watts or
544	* in some other scale. It must be set properly.
545	*
546	* Create Energy Model tables for a performance domain using the callbacks
547	* defined in cb.
548	*
549	* The @microwatts is important to set with correct value. Some kernel
550	* sub-systems might rely on this flag and check if all devices in the EM are
551	* using the same scale.
552	*
553	* If multiple clients register the same performance domain, all but the first
554	* registration will be ignored.
555	*
556	* Return 0 on success
557	*/
558	int em_dev_register_perf_domain(struct device dev, unsigned* int nr_states,
559	struct em_data_callback cb, cpumask_t cpus,
560	bool microwatts)
561	{
562	unsigned long cap, prev_cap = `0`;
563	unsigned long flags = `0`;
564	int cpu, ret;
565
566	if (!dev \|\| !nr_states \|\| !cb)
567	return -EINVAL;
568
569	/*
570	* Use a mutex to serialize the registration of performance domains and
571	* let the driver-defined callback functions sleep.
572	*/
573	mutex_lock(&em_pd_mutex);
574
575	if (dev->em_pd) {
576	ret = -EEXIST;
577	goto unlock;
578	}
579
580	if (_is_cpu_device(dev)) {
581	if (!cpus) {
582	dev_err(dev, "EM: invalid CPU mask\n");
583	ret = -EINVAL;
584	goto unlock;
585	}
586
587	for_each_cpu(cpu, cpus) {
588	if (em_cpu_get(cpu)) {
589	dev_err(dev, "EM: exists for CPU%d\n", cpu);
590	ret = -EEXIST;
591	goto unlock;
592	}
593	/*
594	* All CPUs of a domain must have the same
595	* micro-architecture since they all share the same
596	* table.
597	*/
598	cap = arch_scale_cpu_capacity(cpu);
599	if (prev_cap && prev_cap != cap) {
600	dev_err(dev, "EM: CPUs of %*pbl must have the same capacity\n",
601	cpumask_pr_args(cpus));
602
603	ret = -EINVAL;
604	goto unlock;
605	}
606	prev_cap = cap;
607	}
608	}
609
610	if (microwatts)
611	flags \|= EM_PERF_DOMAIN_MICROWATTS;
612	else if (cb->get_cost)
613	flags \|= EM_PERF_DOMAIN_ARTIFICIAL;
614
615	/*
616	* EM only supports uW (exception is artificial EM).
617	* Therefore, check and force the drivers to provide
618	* power in uW.
619	*/
620	if (!microwatts && !(flags & EM_PERF_DOMAIN_ARTIFICIAL)) {
621	dev_err(dev, "EM: only supports uW power values\n");
622	ret = -EINVAL;
623	goto unlock;
624	}
625
626	ret = em_create_pd(dev, nr_states, cb, cpus, flags);
627	if (ret)
628	goto unlock;
629
630	dev->em_pd->flags \|= flags;
631
632	em_cpufreq_update_efficiencies(dev, table: dev->em_pd->em_table->state);
633
634	em_debug_create_pd(dev);
635	dev_info(dev, "EM: created perf domain\n");
636
637	unlock:
638	mutex_unlock(lock: &em_pd_mutex);
639
640	if (_is_cpu_device(dev))
641	em_check_capacity_update();
642
643	return ret;
644	}
645	EXPORT_SYMBOL_GPL(em_dev_register_perf_domain);
646
647	/**
648	* em_dev_unregister_perf_domain() - Unregister Energy Model (EM) for a device
649	* @dev : Device for which the EM is registered
650	*
651	* Unregister the EM for the specified @dev (but not a CPU device).
652	*/
653	void em_dev_unregister_perf_domain(struct device *dev)
654	{
655	if (IS_ERR_OR_NULL(ptr: dev) \|\| !dev->em_pd)
656	return;
657
658	if (_is_cpu_device(dev))
659	return;
660
661	/*
662	* The mutex separates all register/unregister requests and protects
663	* from potential clean-up/setup issues in the debugfs directories.
664	* The debugfs directory name is the same as device's name.
665	*/
666	mutex_lock(&em_pd_mutex);
667	em_debug_remove_pd(dev);
668
669	em_table_free(table: dev->em_pd->em_table);
670
671	kfree(objp: dev->em_pd);
672	dev->em_pd = NULL;
673	mutex_unlock(lock: &em_pd_mutex);
674	}
675	EXPORT_SYMBOL_GPL(em_dev_unregister_perf_domain);
676
677	/*
678	* Adjustment of CPU performance values after boot, when all CPUs capacites
679	* are correctly calculated.
680	*/
681	static void em_adjust_new_capacity(struct device *dev,
682	struct em_perf_domain *pd,
683	u64 max_cap)
684	{
685	struct em_perf_table __rcu *em_table;
686	struct em_perf_state ps, new_ps;
687	int ret, ps_size;
688
689	em_table = em_table_alloc(pd);
690	if (!em_table) {
691	dev_warn(dev, "EM: allocation failed\n");
692	return;
693	}
694
695	new_ps = em_table->state;
696
697	rcu_read_lock();
698	ps = em_perf_state_from_pd(pd);
699	/ Initialize data based on old table /
700	ps_size = sizeof(struct em_perf_state) * pd->nr_perf_states;
701	memcpy(new_ps, ps, ps_size);
702
703	rcu_read_unlock();
704
705	em_init_performance(dev, pd, table: new_ps, nr_states: pd->nr_perf_states);
706	ret = em_compute_costs(dev, table: new_ps, NULL, nr_states: pd->nr_perf_states,
707	flags: pd->flags);
708	if (ret) {
709	dev_warn(dev, "EM: compute costs failed\n");
710	return;
711	}
712
713	ret = em_dev_update_perf_domain(dev, em_table);
714	if (ret)
715	dev_warn(dev, "EM: update failed %d\n", ret);
716
717	/*
718	* This is one-time-update, so give up the ownership in this updater.
719	* The EM framework has incremented the usage counter and from now
720	* will keep the reference (then free the memory when needed).
721	*/
722	em_table_free(table: em_table);
723	}
724
725	static void em_check_capacity_update(void)
726	{
727	cpumask_var_t cpu_done_mask;
728	struct em_perf_state *table;
729	struct em_perf_domain *pd;
730	unsigned long cpu_capacity;
731	int cpu;
732
733	if (!zalloc_cpumask_var(mask: &cpu_done_mask, GFP_KERNEL)) {
734	pr_warn("no free memory\n");
735	return;
736	}
737
738	/ Check if CPUs capacity has changed than update EM /
739	for_each_possible_cpu(cpu) {
740	struct cpufreq_policy *policy;
741	unsigned long em_max_perf;
742	struct device *dev;
743
744	if (cpumask_test_cpu(cpu, cpumask: cpu_done_mask))
745	continue;
746
747	policy = cpufreq_cpu_get(cpu);
748	if (!policy) {
749	pr_debug("Accessing cpu%d policy failed\n", cpu);
750	schedule_delayed_work(dwork: &em_update_work,
751	delay: msecs_to_jiffies(m: `1000`));
752	break;
753	}
754	cpufreq_cpu_put(policy);
755
756	pd = em_cpu_get(cpu);
757	if (!pd \|\| em_is_artificial(pd))
758	continue;
759
760	cpumask_or(dstp: cpu_done_mask, src1p: cpu_done_mask,
761	em_span_cpus(pd));
762
763	cpu_capacity = arch_scale_cpu_capacity(cpu);
764
765	rcu_read_lock();
766	table = em_perf_state_from_pd(pd);
767	em_max_perf = table[pd->nr_perf_states - `1`].performance;
768	rcu_read_unlock();
769
770	/*
771	* Check if the CPU capacity has been adjusted during boot
772	* and trigger the update for new performance values.
773	*/
774	if (em_max_perf == cpu_capacity)
775	continue;
776
777	pr_debug("updating cpu%d cpu_cap=%lu old capacity=%lu\n",
778	cpu, cpu_capacity, em_max_perf);
779
780	dev = get_cpu_device(cpu);
781	em_adjust_new_capacity(dev, pd, max_cap: cpu_capacity);
782	}
783
784	free_cpumask_var(mask: cpu_done_mask);
785	}
786
787	static void em_update_workfn(struct work_struct *work)
788	{
789	em_check_capacity_update();
790	}
791

source code of linux/kernel/power/energy_model.c