1 | // SPDX-License-Identifier: GPL-2.0-only |
2 | /* |
3 | * arch/arm/common/mcpm_entry.c -- entry point for multi-cluster PM |
4 | * |
5 | * Created by: Nicolas Pitre, March 2012 |
6 | * Copyright: (C) 2012-2013 Linaro Limited |
7 | */ |
8 | |
9 | #include <linux/export.h> |
10 | #include <linux/kernel.h> |
11 | #include <linux/init.h> |
12 | #include <linux/irqflags.h> |
13 | #include <linux/cpu_pm.h> |
14 | |
15 | #include <asm/mcpm.h> |
16 | #include <asm/cacheflush.h> |
17 | #include <asm/idmap.h> |
18 | #include <asm/cputype.h> |
19 | #include <asm/suspend.h> |
20 | |
21 | /* |
22 | * The public API for this code is documented in arch/arm/include/asm/mcpm.h. |
23 | * For a comprehensive description of the main algorithm used here, please |
24 | * see Documentation/arch/arm/cluster-pm-race-avoidance.rst. |
25 | */ |
26 | |
27 | struct sync_struct mcpm_sync; |
28 | |
29 | /* |
30 | * __mcpm_cpu_going_down: Indicates that the cpu is being torn down. |
31 | * This must be called at the point of committing to teardown of a CPU. |
32 | * The CPU cache (SCTRL.C bit) is expected to still be active. |
33 | */ |
34 | static void __mcpm_cpu_going_down(unsigned int cpu, unsigned int cluster) |
35 | { |
36 | mcpm_sync.clusters[cluster].cpus[cpu].cpu = CPU_GOING_DOWN; |
37 | sync_cache_w(&mcpm_sync.clusters[cluster].cpus[cpu].cpu); |
38 | } |
39 | |
40 | /* |
41 | * __mcpm_cpu_down: Indicates that cpu teardown is complete and that the |
42 | * cluster can be torn down without disrupting this CPU. |
43 | * To avoid deadlocks, this must be called before a CPU is powered down. |
44 | * The CPU cache (SCTRL.C bit) is expected to be off. |
45 | * However L2 cache might or might not be active. |
46 | */ |
47 | static void __mcpm_cpu_down(unsigned int cpu, unsigned int cluster) |
48 | { |
49 | dmb(); |
50 | mcpm_sync.clusters[cluster].cpus[cpu].cpu = CPU_DOWN; |
51 | sync_cache_w(&mcpm_sync.clusters[cluster].cpus[cpu].cpu); |
52 | sev(); |
53 | } |
54 | |
55 | /* |
56 | * __mcpm_outbound_leave_critical: Leave the cluster teardown critical section. |
57 | * @state: the final state of the cluster: |
58 | * CLUSTER_UP: no destructive teardown was done and the cluster has been |
59 | * restored to the previous state (CPU cache still active); or |
60 | * CLUSTER_DOWN: the cluster has been torn-down, ready for power-off |
61 | * (CPU cache disabled, L2 cache either enabled or disabled). |
62 | */ |
63 | static void __mcpm_outbound_leave_critical(unsigned int cluster, int state) |
64 | { |
65 | dmb(); |
66 | mcpm_sync.clusters[cluster].cluster = state; |
67 | sync_cache_w(&mcpm_sync.clusters[cluster].cluster); |
68 | sev(); |
69 | } |
70 | |
71 | /* |
72 | * __mcpm_outbound_enter_critical: Enter the cluster teardown critical section. |
73 | * This function should be called by the last man, after local CPU teardown |
74 | * is complete. CPU cache expected to be active. |
75 | * |
76 | * Returns: |
77 | * false: the critical section was not entered because an inbound CPU was |
78 | * observed, or the cluster is already being set up; |
79 | * true: the critical section was entered: it is now safe to tear down the |
80 | * cluster. |
81 | */ |
82 | static bool __mcpm_outbound_enter_critical(unsigned int cpu, unsigned int cluster) |
83 | { |
84 | unsigned int i; |
85 | struct mcpm_sync_struct *c = &mcpm_sync.clusters[cluster]; |
86 | |
87 | /* Warn inbound CPUs that the cluster is being torn down: */ |
88 | c->cluster = CLUSTER_GOING_DOWN; |
89 | sync_cache_w(&c->cluster); |
90 | |
91 | /* Back out if the inbound cluster is already in the critical region: */ |
92 | sync_cache_r(&c->inbound); |
93 | if (c->inbound == INBOUND_COMING_UP) |
94 | goto abort; |
95 | |
96 | /* |
97 | * Wait for all CPUs to get out of the GOING_DOWN state, so that local |
98 | * teardown is complete on each CPU before tearing down the cluster. |
99 | * |
100 | * If any CPU has been woken up again from the DOWN state, then we |
101 | * shouldn't be taking the cluster down at all: abort in that case. |
102 | */ |
103 | sync_cache_r(&c->cpus); |
104 | for (i = 0; i < MAX_CPUS_PER_CLUSTER; i++) { |
105 | int cpustate; |
106 | |
107 | if (i == cpu) |
108 | continue; |
109 | |
110 | while (1) { |
111 | cpustate = c->cpus[i].cpu; |
112 | if (cpustate != CPU_GOING_DOWN) |
113 | break; |
114 | |
115 | wfe(); |
116 | sync_cache_r(&c->cpus[i].cpu); |
117 | } |
118 | |
119 | switch (cpustate) { |
120 | case CPU_DOWN: |
121 | continue; |
122 | |
123 | default: |
124 | goto abort; |
125 | } |
126 | } |
127 | |
128 | return true; |
129 | |
130 | abort: |
131 | __mcpm_outbound_leave_critical(cluster, state: CLUSTER_UP); |
132 | return false; |
133 | } |
134 | |
135 | static int __mcpm_cluster_state(unsigned int cluster) |
136 | { |
137 | sync_cache_r(&mcpm_sync.clusters[cluster].cluster); |
138 | return mcpm_sync.clusters[cluster].cluster; |
139 | } |
140 | |
141 | extern unsigned long mcpm_entry_vectors[MAX_NR_CLUSTERS][MAX_CPUS_PER_CLUSTER]; |
142 | |
143 | void mcpm_set_entry_vector(unsigned cpu, unsigned cluster, void *ptr) |
144 | { |
145 | unsigned long val = ptr ? __pa_symbol(ptr) : 0; |
146 | mcpm_entry_vectors[cluster][cpu] = val; |
147 | sync_cache_w(&mcpm_entry_vectors[cluster][cpu]); |
148 | } |
149 | |
150 | extern unsigned long mcpm_entry_early_pokes[MAX_NR_CLUSTERS][MAX_CPUS_PER_CLUSTER][2]; |
151 | |
152 | void mcpm_set_early_poke(unsigned cpu, unsigned cluster, |
153 | unsigned long poke_phys_addr, unsigned long poke_val) |
154 | { |
155 | unsigned long *poke = &mcpm_entry_early_pokes[cluster][cpu][0]; |
156 | poke[0] = poke_phys_addr; |
157 | poke[1] = poke_val; |
158 | __sync_cache_range_w(poke, 2 * sizeof(*poke)); |
159 | } |
160 | |
161 | static const struct mcpm_platform_ops *platform_ops; |
162 | |
163 | int __init mcpm_platform_register(const struct mcpm_platform_ops *ops) |
164 | { |
165 | if (platform_ops) |
166 | return -EBUSY; |
167 | platform_ops = ops; |
168 | return 0; |
169 | } |
170 | |
171 | bool mcpm_is_available(void) |
172 | { |
173 | return (platform_ops) ? true : false; |
174 | } |
175 | EXPORT_SYMBOL_GPL(mcpm_is_available); |
176 | |
177 | /* |
178 | * We can't use regular spinlocks. In the switcher case, it is possible |
179 | * for an outbound CPU to call power_down() after its inbound counterpart |
180 | * is already live using the same logical CPU number which trips lockdep |
181 | * debugging. |
182 | */ |
183 | static arch_spinlock_t mcpm_lock = __ARCH_SPIN_LOCK_UNLOCKED; |
184 | |
185 | static int mcpm_cpu_use_count[MAX_NR_CLUSTERS][MAX_CPUS_PER_CLUSTER]; |
186 | |
187 | static inline bool mcpm_cluster_unused(unsigned int cluster) |
188 | { |
189 | int i, cnt; |
190 | for (i = 0, cnt = 0; i < MAX_CPUS_PER_CLUSTER; i++) |
191 | cnt |= mcpm_cpu_use_count[cluster][i]; |
192 | return !cnt; |
193 | } |
194 | |
195 | int mcpm_cpu_power_up(unsigned int cpu, unsigned int cluster) |
196 | { |
197 | bool cpu_is_down, cluster_is_down; |
198 | int ret = 0; |
199 | |
200 | pr_debug("%s: cpu %u cluster %u\n" , __func__, cpu, cluster); |
201 | if (!platform_ops) |
202 | return -EUNATCH; /* try not to shadow power_up errors */ |
203 | might_sleep(); |
204 | |
205 | /* |
206 | * Since this is called with IRQs enabled, and no arch_spin_lock_irq |
207 | * variant exists, we need to disable IRQs manually here. |
208 | */ |
209 | local_irq_disable(); |
210 | arch_spin_lock(&mcpm_lock); |
211 | |
212 | cpu_is_down = !mcpm_cpu_use_count[cluster][cpu]; |
213 | cluster_is_down = mcpm_cluster_unused(cluster); |
214 | |
215 | mcpm_cpu_use_count[cluster][cpu]++; |
216 | /* |
217 | * The only possible values are: |
218 | * 0 = CPU down |
219 | * 1 = CPU (still) up |
220 | * 2 = CPU requested to be up before it had a chance |
221 | * to actually make itself down. |
222 | * Any other value is a bug. |
223 | */ |
224 | BUG_ON(mcpm_cpu_use_count[cluster][cpu] != 1 && |
225 | mcpm_cpu_use_count[cluster][cpu] != 2); |
226 | |
227 | if (cluster_is_down) |
228 | ret = platform_ops->cluster_powerup(cluster); |
229 | if (cpu_is_down && !ret) |
230 | ret = platform_ops->cpu_powerup(cpu, cluster); |
231 | |
232 | arch_spin_unlock(&mcpm_lock); |
233 | local_irq_enable(); |
234 | return ret; |
235 | } |
236 | |
237 | typedef typeof(cpu_reset) phys_reset_t; |
238 | |
239 | void mcpm_cpu_power_down(void) |
240 | { |
241 | unsigned int mpidr, cpu, cluster; |
242 | bool cpu_going_down, last_man; |
243 | phys_reset_t phys_reset; |
244 | |
245 | mpidr = read_cpuid_mpidr(); |
246 | cpu = MPIDR_AFFINITY_LEVEL(mpidr, 0); |
247 | cluster = MPIDR_AFFINITY_LEVEL(mpidr, 1); |
248 | pr_debug("%s: cpu %u cluster %u\n" , __func__, cpu, cluster); |
249 | if (WARN_ON_ONCE(!platform_ops)) |
250 | return; |
251 | BUG_ON(!irqs_disabled()); |
252 | |
253 | setup_mm_for_reboot(); |
254 | |
255 | __mcpm_cpu_going_down(cpu, cluster); |
256 | arch_spin_lock(&mcpm_lock); |
257 | BUG_ON(__mcpm_cluster_state(cluster) != CLUSTER_UP); |
258 | |
259 | mcpm_cpu_use_count[cluster][cpu]--; |
260 | BUG_ON(mcpm_cpu_use_count[cluster][cpu] != 0 && |
261 | mcpm_cpu_use_count[cluster][cpu] != 1); |
262 | cpu_going_down = !mcpm_cpu_use_count[cluster][cpu]; |
263 | last_man = mcpm_cluster_unused(cluster); |
264 | |
265 | if (last_man && __mcpm_outbound_enter_critical(cpu, cluster)) { |
266 | platform_ops->cpu_powerdown_prepare(cpu, cluster); |
267 | platform_ops->cluster_powerdown_prepare(cluster); |
268 | arch_spin_unlock(&mcpm_lock); |
269 | platform_ops->cluster_cache_disable(); |
270 | __mcpm_outbound_leave_critical(cluster, state: CLUSTER_DOWN); |
271 | } else { |
272 | if (cpu_going_down) |
273 | platform_ops->cpu_powerdown_prepare(cpu, cluster); |
274 | arch_spin_unlock(&mcpm_lock); |
275 | /* |
276 | * If cpu_going_down is false here, that means a power_up |
277 | * request raced ahead of us. Even if we do not want to |
278 | * shut this CPU down, the caller still expects execution |
279 | * to return through the system resume entry path, like |
280 | * when the WFI is aborted due to a new IRQ or the like.. |
281 | * So let's continue with cache cleaning in all cases. |
282 | */ |
283 | platform_ops->cpu_cache_disable(); |
284 | } |
285 | |
286 | __mcpm_cpu_down(cpu, cluster); |
287 | |
288 | /* Now we are prepared for power-down, do it: */ |
289 | if (cpu_going_down) |
290 | wfi(); |
291 | |
292 | /* |
293 | * It is possible for a power_up request to happen concurrently |
294 | * with a power_down request for the same CPU. In this case the |
295 | * CPU might not be able to actually enter a powered down state |
296 | * with the WFI instruction if the power_up request has removed |
297 | * the required reset condition. We must perform a re-entry in |
298 | * the kernel as if the power_up method just had deasserted reset |
299 | * on the CPU. |
300 | */ |
301 | phys_reset = (phys_reset_t)(unsigned long)__pa_symbol(cpu_reset); |
302 | phys_reset(__pa_symbol(mcpm_entry_point), false); |
303 | |
304 | /* should never get here */ |
305 | BUG(); |
306 | } |
307 | |
308 | int mcpm_wait_for_cpu_powerdown(unsigned int cpu, unsigned int cluster) |
309 | { |
310 | int ret; |
311 | |
312 | if (WARN_ON_ONCE(!platform_ops || !platform_ops->wait_for_powerdown)) |
313 | return -EUNATCH; |
314 | |
315 | ret = platform_ops->wait_for_powerdown(cpu, cluster); |
316 | if (ret) |
317 | pr_warn("%s: cpu %u, cluster %u failed to power down (%d)\n" , |
318 | __func__, cpu, cluster, ret); |
319 | |
320 | return ret; |
321 | } |
322 | |
323 | void mcpm_cpu_suspend(void) |
324 | { |
325 | if (WARN_ON_ONCE(!platform_ops)) |
326 | return; |
327 | |
328 | /* Some platforms might have to enable special resume modes, etc. */ |
329 | if (platform_ops->cpu_suspend_prepare) { |
330 | unsigned int mpidr = read_cpuid_mpidr(); |
331 | unsigned int cpu = MPIDR_AFFINITY_LEVEL(mpidr, 0); |
332 | unsigned int cluster = MPIDR_AFFINITY_LEVEL(mpidr, 1); |
333 | arch_spin_lock(&mcpm_lock); |
334 | platform_ops->cpu_suspend_prepare(cpu, cluster); |
335 | arch_spin_unlock(&mcpm_lock); |
336 | } |
337 | mcpm_cpu_power_down(); |
338 | } |
339 | |
340 | int mcpm_cpu_powered_up(void) |
341 | { |
342 | unsigned int mpidr, cpu, cluster; |
343 | bool cpu_was_down, first_man; |
344 | unsigned long flags; |
345 | |
346 | if (!platform_ops) |
347 | return -EUNATCH; |
348 | |
349 | mpidr = read_cpuid_mpidr(); |
350 | cpu = MPIDR_AFFINITY_LEVEL(mpidr, 0); |
351 | cluster = MPIDR_AFFINITY_LEVEL(mpidr, 1); |
352 | local_irq_save(flags); |
353 | arch_spin_lock(&mcpm_lock); |
354 | |
355 | cpu_was_down = !mcpm_cpu_use_count[cluster][cpu]; |
356 | first_man = mcpm_cluster_unused(cluster); |
357 | |
358 | if (first_man && platform_ops->cluster_is_up) |
359 | platform_ops->cluster_is_up(cluster); |
360 | if (cpu_was_down) |
361 | mcpm_cpu_use_count[cluster][cpu] = 1; |
362 | if (platform_ops->cpu_is_up) |
363 | platform_ops->cpu_is_up(cpu, cluster); |
364 | |
365 | arch_spin_unlock(&mcpm_lock); |
366 | local_irq_restore(flags); |
367 | |
368 | return 0; |
369 | } |
370 | |
371 | #ifdef CONFIG_ARM_CPU_SUSPEND |
372 | |
373 | static int __init nocache_trampoline(unsigned long _arg) |
374 | { |
375 | void (*cache_disable)(void) = (void *)_arg; |
376 | unsigned int mpidr = read_cpuid_mpidr(); |
377 | unsigned int cpu = MPIDR_AFFINITY_LEVEL(mpidr, 0); |
378 | unsigned int cluster = MPIDR_AFFINITY_LEVEL(mpidr, 1); |
379 | phys_reset_t phys_reset; |
380 | |
381 | mcpm_set_entry_vector(cpu, cluster, cpu_resume_no_hyp); |
382 | setup_mm_for_reboot(); |
383 | |
384 | __mcpm_cpu_going_down(cpu, cluster); |
385 | BUG_ON(!__mcpm_outbound_enter_critical(cpu, cluster)); |
386 | cache_disable(); |
387 | __mcpm_outbound_leave_critical(cluster, CLUSTER_DOWN); |
388 | __mcpm_cpu_down(cpu, cluster); |
389 | |
390 | phys_reset = (phys_reset_t)(unsigned long)__pa_symbol(cpu_reset); |
391 | phys_reset(__pa_symbol(mcpm_entry_point), false); |
392 | BUG(); |
393 | } |
394 | |
395 | int __init mcpm_loopback(void (*cache_disable)(void)) |
396 | { |
397 | int ret; |
398 | |
399 | /* |
400 | * We're going to soft-restart the current CPU through the |
401 | * low-level MCPM code by leveraging the suspend/resume |
402 | * infrastructure. Let's play it safe by using cpu_pm_enter() |
403 | * in case the CPU init code path resets the VFP or similar. |
404 | */ |
405 | local_irq_disable(); |
406 | local_fiq_disable(); |
407 | ret = cpu_pm_enter(); |
408 | if (!ret) { |
409 | ret = cpu_suspend((unsigned long)cache_disable, nocache_trampoline); |
410 | cpu_pm_exit(); |
411 | } |
412 | local_fiq_enable(); |
413 | local_irq_enable(); |
414 | if (ret) |
415 | pr_err("%s returned %d\n" , __func__, ret); |
416 | return ret; |
417 | } |
418 | |
419 | #endif |
420 | |
421 | extern unsigned long mcpm_power_up_setup_phys; |
422 | |
423 | int __init mcpm_sync_init( |
424 | void (*power_up_setup)(unsigned int affinity_level)) |
425 | { |
426 | unsigned int i, j, mpidr, this_cluster; |
427 | |
428 | BUILD_BUG_ON(MCPM_SYNC_CLUSTER_SIZE * MAX_NR_CLUSTERS != sizeof mcpm_sync); |
429 | BUG_ON((unsigned long)&mcpm_sync & (__CACHE_WRITEBACK_GRANULE - 1)); |
430 | |
431 | /* |
432 | * Set initial CPU and cluster states. |
433 | * Only one cluster is assumed to be active at this point. |
434 | */ |
435 | for (i = 0; i < MAX_NR_CLUSTERS; i++) { |
436 | mcpm_sync.clusters[i].cluster = CLUSTER_DOWN; |
437 | mcpm_sync.clusters[i].inbound = INBOUND_NOT_COMING_UP; |
438 | for (j = 0; j < MAX_CPUS_PER_CLUSTER; j++) |
439 | mcpm_sync.clusters[i].cpus[j].cpu = CPU_DOWN; |
440 | } |
441 | mpidr = read_cpuid_mpidr(); |
442 | this_cluster = MPIDR_AFFINITY_LEVEL(mpidr, 1); |
443 | for_each_online_cpu(i) { |
444 | mcpm_cpu_use_count[this_cluster][i] = 1; |
445 | mcpm_sync.clusters[this_cluster].cpus[i].cpu = CPU_UP; |
446 | } |
447 | mcpm_sync.clusters[this_cluster].cluster = CLUSTER_UP; |
448 | sync_cache_w(&mcpm_sync); |
449 | |
450 | if (power_up_setup) { |
451 | mcpm_power_up_setup_phys = __pa_symbol(power_up_setup); |
452 | sync_cache_w(&mcpm_power_up_setup_phys); |
453 | } |
454 | |
455 | return 0; |
456 | } |
457 | |