1 | // SPDX-License-Identifier: GPL-2.0 |
2 | #include <linux/syscore_ops.h> |
3 | #include <linux/suspend.h> |
4 | #include <linux/cpu.h> |
5 | |
6 | #include <asm/msr.h> |
7 | #include <asm/mwait.h> |
8 | |
9 | #define UMWAIT_C02_ENABLE 0 |
10 | |
11 | #define UMWAIT_CTRL_VAL(max_time, c02_disable) \ |
12 | (((max_time) & MSR_IA32_UMWAIT_CONTROL_TIME_MASK) | \ |
13 | ((c02_disable) & MSR_IA32_UMWAIT_CONTROL_C02_DISABLE)) |
14 | |
15 | /* |
16 | * Cache IA32_UMWAIT_CONTROL MSR. This is a systemwide control. By default, |
17 | * umwait max time is 100000 in TSC-quanta and C0.2 is enabled |
18 | */ |
19 | static u32 umwait_control_cached = UMWAIT_CTRL_VAL(100000, UMWAIT_C02_ENABLE); |
20 | |
21 | /* |
22 | * Cache the original IA32_UMWAIT_CONTROL MSR value which is configured by |
23 | * hardware or BIOS before kernel boot. |
24 | */ |
25 | static u32 orig_umwait_control_cached __ro_after_init; |
26 | |
27 | /* |
28 | * Serialize access to umwait_control_cached and IA32_UMWAIT_CONTROL MSR in |
29 | * the sysfs write functions. |
30 | */ |
31 | static DEFINE_MUTEX(umwait_lock); |
32 | |
33 | static void umwait_update_control_msr(void * unused) |
34 | { |
35 | lockdep_assert_irqs_disabled(); |
36 | wrmsr(MSR_IA32_UMWAIT_CONTROL, READ_ONCE(umwait_control_cached), 0); |
37 | } |
38 | |
39 | /* |
40 | * The CPU hotplug callback sets the control MSR to the global control |
41 | * value. |
42 | * |
43 | * Disable interrupts so the read of umwait_control_cached and the WRMSR |
44 | * are protected against a concurrent sysfs write. Otherwise the sysfs |
45 | * write could update the cached value after it had been read on this CPU |
46 | * and issue the IPI before the old value had been written. The IPI would |
47 | * interrupt, write the new value and after return from IPI the previous |
48 | * value would be written by this CPU. |
49 | * |
50 | * With interrupts disabled the upcoming CPU either sees the new control |
51 | * value or the IPI is updating this CPU to the new control value after |
52 | * interrupts have been reenabled. |
53 | */ |
54 | static int umwait_cpu_online(unsigned int cpu) |
55 | { |
56 | local_irq_disable(); |
57 | umwait_update_control_msr(NULL); |
58 | local_irq_enable(); |
59 | return 0; |
60 | } |
61 | |
62 | /* |
63 | * The CPU hotplug callback sets the control MSR to the original control |
64 | * value. |
65 | */ |
66 | static int umwait_cpu_offline(unsigned int cpu) |
67 | { |
68 | /* |
69 | * This code is protected by the CPU hotplug already and |
70 | * orig_umwait_control_cached is never changed after it caches |
71 | * the original control MSR value in umwait_init(). So there |
72 | * is no race condition here. |
73 | */ |
74 | wrmsr(MSR_IA32_UMWAIT_CONTROL, orig_umwait_control_cached, 0); |
75 | |
76 | return 0; |
77 | } |
78 | |
79 | /* |
80 | * On resume, restore IA32_UMWAIT_CONTROL MSR on the boot processor which |
81 | * is the only active CPU at this time. The MSR is set up on the APs via the |
82 | * CPU hotplug callback. |
83 | * |
84 | * This function is invoked on resume from suspend and hibernation. On |
85 | * resume from suspend the restore should be not required, but we neither |
86 | * trust the firmware nor does it matter if the same value is written |
87 | * again. |
88 | */ |
89 | static void umwait_syscore_resume(void) |
90 | { |
91 | umwait_update_control_msr(NULL); |
92 | } |
93 | |
94 | static struct syscore_ops umwait_syscore_ops = { |
95 | .resume = umwait_syscore_resume, |
96 | }; |
97 | |
98 | /* sysfs interface */ |
99 | |
100 | /* |
101 | * When bit 0 in IA32_UMWAIT_CONTROL MSR is 1, C0.2 is disabled. |
102 | * Otherwise, C0.2 is enabled. |
103 | */ |
104 | static inline bool umwait_ctrl_c02_enabled(u32 ctrl) |
105 | { |
106 | return !(ctrl & MSR_IA32_UMWAIT_CONTROL_C02_DISABLE); |
107 | } |
108 | |
109 | static inline u32 umwait_ctrl_max_time(u32 ctrl) |
110 | { |
111 | return ctrl & MSR_IA32_UMWAIT_CONTROL_TIME_MASK; |
112 | } |
113 | |
114 | static inline void umwait_update_control(u32 maxtime, bool c02_enable) |
115 | { |
116 | u32 ctrl = maxtime & MSR_IA32_UMWAIT_CONTROL_TIME_MASK; |
117 | |
118 | if (!c02_enable) |
119 | ctrl |= MSR_IA32_UMWAIT_CONTROL_C02_DISABLE; |
120 | |
121 | WRITE_ONCE(umwait_control_cached, ctrl); |
122 | /* Propagate to all CPUs */ |
123 | on_each_cpu(func: umwait_update_control_msr, NULL, wait: 1); |
124 | } |
125 | |
126 | static ssize_t |
127 | enable_c02_show(struct device *dev, struct device_attribute *attr, char *buf) |
128 | { |
129 | u32 ctrl = READ_ONCE(umwait_control_cached); |
130 | |
131 | return sprintf(buf, fmt: "%d\n" , umwait_ctrl_c02_enabled(ctrl)); |
132 | } |
133 | |
134 | static ssize_t enable_c02_store(struct device *dev, |
135 | struct device_attribute *attr, |
136 | const char *buf, size_t count) |
137 | { |
138 | bool c02_enable; |
139 | u32 ctrl; |
140 | int ret; |
141 | |
142 | ret = kstrtobool(s: buf, res: &c02_enable); |
143 | if (ret) |
144 | return ret; |
145 | |
146 | mutex_lock(&umwait_lock); |
147 | |
148 | ctrl = READ_ONCE(umwait_control_cached); |
149 | if (c02_enable != umwait_ctrl_c02_enabled(ctrl)) |
150 | umwait_update_control(maxtime: ctrl, c02_enable); |
151 | |
152 | mutex_unlock(lock: &umwait_lock); |
153 | |
154 | return count; |
155 | } |
156 | static DEVICE_ATTR_RW(enable_c02); |
157 | |
158 | static ssize_t |
159 | max_time_show(struct device *kobj, struct device_attribute *attr, char *buf) |
160 | { |
161 | u32 ctrl = READ_ONCE(umwait_control_cached); |
162 | |
163 | return sprintf(buf, fmt: "%u\n" , umwait_ctrl_max_time(ctrl)); |
164 | } |
165 | |
166 | static ssize_t max_time_store(struct device *kobj, |
167 | struct device_attribute *attr, |
168 | const char *buf, size_t count) |
169 | { |
170 | u32 max_time, ctrl; |
171 | int ret; |
172 | |
173 | ret = kstrtou32(s: buf, base: 0, res: &max_time); |
174 | if (ret) |
175 | return ret; |
176 | |
177 | /* bits[1:0] must be zero */ |
178 | if (max_time & ~MSR_IA32_UMWAIT_CONTROL_TIME_MASK) |
179 | return -EINVAL; |
180 | |
181 | mutex_lock(&umwait_lock); |
182 | |
183 | ctrl = READ_ONCE(umwait_control_cached); |
184 | if (max_time != umwait_ctrl_max_time(ctrl)) |
185 | umwait_update_control(maxtime: max_time, c02_enable: umwait_ctrl_c02_enabled(ctrl)); |
186 | |
187 | mutex_unlock(lock: &umwait_lock); |
188 | |
189 | return count; |
190 | } |
191 | static DEVICE_ATTR_RW(max_time); |
192 | |
193 | static struct attribute *umwait_attrs[] = { |
194 | &dev_attr_enable_c02.attr, |
195 | &dev_attr_max_time.attr, |
196 | NULL |
197 | }; |
198 | |
199 | static struct attribute_group umwait_attr_group = { |
200 | .attrs = umwait_attrs, |
201 | .name = "umwait_control" , |
202 | }; |
203 | |
204 | static int __init umwait_init(void) |
205 | { |
206 | struct device *dev; |
207 | int ret; |
208 | |
209 | if (!boot_cpu_has(X86_FEATURE_WAITPKG)) |
210 | return -ENODEV; |
211 | |
212 | /* |
213 | * Cache the original control MSR value before the control MSR is |
214 | * changed. This is the only place where orig_umwait_control_cached |
215 | * is modified. |
216 | */ |
217 | rdmsrl(MSR_IA32_UMWAIT_CONTROL, orig_umwait_control_cached); |
218 | |
219 | ret = cpuhp_setup_state(state: CPUHP_AP_ONLINE_DYN, name: "umwait:online" , |
220 | startup: umwait_cpu_online, teardown: umwait_cpu_offline); |
221 | if (ret < 0) { |
222 | /* |
223 | * On failure, the control MSR on all CPUs has the |
224 | * original control value. |
225 | */ |
226 | return ret; |
227 | } |
228 | |
229 | register_syscore_ops(ops: &umwait_syscore_ops); |
230 | |
231 | /* |
232 | * Add umwait control interface. Ignore failure, so at least the |
233 | * default values are set up in case the machine manages to boot. |
234 | */ |
235 | dev = bus_get_dev_root(bus: &cpu_subsys); |
236 | if (dev) { |
237 | ret = sysfs_create_group(kobj: &dev->kobj, grp: &umwait_attr_group); |
238 | put_device(dev); |
239 | } |
240 | return ret; |
241 | } |
242 | device_initcall(umwait_init); |
243 | |