| 1 | // SPDX-License-Identifier: GPL-2.0-or-later |
| 2 | /* |
| 3 | * amd-pstate.c - AMD Processor P-state Frequency Driver |
| 4 | * |
| 5 | * Copyright (C) 2021 Advanced Micro Devices, Inc. All Rights Reserved. |
| 6 | * |
| 7 | * Author: Huang Rui <ray.huang@amd.com> |
| 8 | * |
| 9 | * AMD P-State introduces a new CPU performance scaling design for AMD |
| 10 | * processors using the ACPI Collaborative Performance and Power Control (CPPC) |
| 11 | * feature which works with the AMD SMU firmware providing a finer grained |
| 12 | * frequency control range. It is to replace the legacy ACPI P-States control, |
| 13 | * allows a flexible, low-latency interface for the Linux kernel to directly |
| 14 | * communicate the performance hints to hardware. |
| 15 | * |
| 16 | * AMD P-State is supported on recent AMD Zen base CPU series include some of |
| 17 | * Zen2 and Zen3 processors. _CPC needs to be present in the ACPI tables of AMD |
| 18 | * P-State supported system. And there are two types of hardware implementations |
| 19 | * for AMD P-State: 1) Full MSR Solution and 2) Shared Memory Solution. |
| 20 | * X86_FEATURE_CPPC CPU feature flag is used to distinguish the different types. |
| 21 | */ |
| 22 | |
| 23 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt |
| 24 | |
| 25 | #include <linux/bitfield.h> |
| 26 | #include <linux/kernel.h> |
| 27 | #include <linux/module.h> |
| 28 | #include <linux/init.h> |
| 29 | #include <linux/smp.h> |
| 30 | #include <linux/sched.h> |
| 31 | #include <linux/cpufreq.h> |
| 32 | #include <linux/compiler.h> |
| 33 | #include <linux/dmi.h> |
| 34 | #include <linux/slab.h> |
| 35 | #include <linux/acpi.h> |
| 36 | #include <linux/io.h> |
| 37 | #include <linux/delay.h> |
| 38 | #include <linux/uaccess.h> |
| 39 | #include <linux/static_call.h> |
| 40 | #include <linux/topology.h> |
| 41 | |
| 42 | #include <acpi/processor.h> |
| 43 | #include <acpi/cppc_acpi.h> |
| 44 | |
| 45 | #include <asm/msr.h> |
| 46 | #include <asm/processor.h> |
| 47 | #include <asm/cpufeature.h> |
| 48 | #include <asm/cpu_device_id.h> |
| 49 | |
| 50 | #include "amd-pstate.h" |
| 51 | #include "amd-pstate-trace.h" |
| 52 | |
| 53 | #define AMD_PSTATE_TRANSITION_LATENCY 20000 |
| 54 | #define AMD_PSTATE_TRANSITION_DELAY 1000 |
| 55 | #define AMD_PSTATE_FAST_CPPC_TRANSITION_DELAY 600 |
| 56 | |
| 57 | #define AMD_CPPC_EPP_PERFORMANCE 0x00 |
| 58 | #define AMD_CPPC_EPP_BALANCE_PERFORMANCE 0x80 |
| 59 | #define AMD_CPPC_EPP_BALANCE_POWERSAVE 0xBF |
| 60 | #define AMD_CPPC_EPP_POWERSAVE 0xFF |
| 61 | |
| 62 | static const char * const amd_pstate_mode_string[] = { |
| 63 | [AMD_PSTATE_UNDEFINED] = "undefined" , |
| 64 | [AMD_PSTATE_DISABLE] = "disable" , |
| 65 | [AMD_PSTATE_PASSIVE] = "passive" , |
| 66 | [AMD_PSTATE_ACTIVE] = "active" , |
| 67 | [AMD_PSTATE_GUIDED] = "guided" , |
| 68 | }; |
| 69 | static_assert(ARRAY_SIZE(amd_pstate_mode_string) == AMD_PSTATE_MAX); |
| 70 | |
| 71 | const char *amd_pstate_get_mode_string(enum amd_pstate_mode mode) |
| 72 | { |
| 73 | if (mode < AMD_PSTATE_UNDEFINED || mode >= AMD_PSTATE_MAX) |
| 74 | mode = AMD_PSTATE_UNDEFINED; |
| 75 | return amd_pstate_mode_string[mode]; |
| 76 | } |
| 77 | EXPORT_SYMBOL_GPL(amd_pstate_get_mode_string); |
| 78 | |
| 79 | struct quirk_entry { |
| 80 | u32 nominal_freq; |
| 81 | u32 lowest_freq; |
| 82 | }; |
| 83 | |
| 84 | static struct cpufreq_driver *current_pstate_driver; |
| 85 | static struct cpufreq_driver amd_pstate_driver; |
| 86 | static struct cpufreq_driver amd_pstate_epp_driver; |
| 87 | static int cppc_state = AMD_PSTATE_UNDEFINED; |
| 88 | static bool amd_pstate_prefcore = true; |
| 89 | static struct quirk_entry *quirks; |
| 90 | |
| 91 | /* |
| 92 | * AMD Energy Preference Performance (EPP) |
| 93 | * The EPP is used in the CCLK DPM controller to drive |
| 94 | * the frequency that a core is going to operate during |
| 95 | * short periods of activity. EPP values will be utilized for |
| 96 | * different OS profiles (balanced, performance, power savings) |
| 97 | * display strings corresponding to EPP index in the |
| 98 | * energy_perf_strings[] |
| 99 | * index String |
| 100 | *------------------------------------- |
| 101 | * 0 default |
| 102 | * 1 performance |
| 103 | * 2 balance_performance |
| 104 | * 3 balance_power |
| 105 | * 4 power |
| 106 | */ |
| 107 | enum energy_perf_value_index { |
| 108 | EPP_INDEX_DEFAULT = 0, |
| 109 | EPP_INDEX_PERFORMANCE, |
| 110 | EPP_INDEX_BALANCE_PERFORMANCE, |
| 111 | EPP_INDEX_BALANCE_POWERSAVE, |
| 112 | EPP_INDEX_POWERSAVE, |
| 113 | EPP_INDEX_MAX, |
| 114 | }; |
| 115 | |
| 116 | static const char * const energy_perf_strings[] = { |
| 117 | [EPP_INDEX_DEFAULT] = "default" , |
| 118 | [EPP_INDEX_PERFORMANCE] = "performance" , |
| 119 | [EPP_INDEX_BALANCE_PERFORMANCE] = "balance_performance" , |
| 120 | [EPP_INDEX_BALANCE_POWERSAVE] = "balance_power" , |
| 121 | [EPP_INDEX_POWERSAVE] = "power" , |
| 122 | }; |
| 123 | static_assert(ARRAY_SIZE(energy_perf_strings) == EPP_INDEX_MAX); |
| 124 | |
| 125 | static unsigned int epp_values[] = { |
| 126 | [EPP_INDEX_DEFAULT] = 0, |
| 127 | [EPP_INDEX_PERFORMANCE] = AMD_CPPC_EPP_PERFORMANCE, |
| 128 | [EPP_INDEX_BALANCE_PERFORMANCE] = AMD_CPPC_EPP_BALANCE_PERFORMANCE, |
| 129 | [EPP_INDEX_BALANCE_POWERSAVE] = AMD_CPPC_EPP_BALANCE_POWERSAVE, |
| 130 | [EPP_INDEX_POWERSAVE] = AMD_CPPC_EPP_POWERSAVE, |
| 131 | }; |
| 132 | static_assert(ARRAY_SIZE(epp_values) == EPP_INDEX_MAX); |
| 133 | |
| 134 | typedef int (*cppc_mode_transition_fn)(int); |
| 135 | |
| 136 | static struct quirk_entry quirk_amd_7k62 = { |
| 137 | .nominal_freq = 2600, |
| 138 | .lowest_freq = 550, |
| 139 | }; |
| 140 | |
| 141 | static inline u8 freq_to_perf(union perf_cached perf, u32 nominal_freq, unsigned int freq_val) |
| 142 | { |
| 143 | u32 perf_val = DIV_ROUND_UP_ULL((u64)freq_val * perf.nominal_perf, nominal_freq); |
| 144 | |
| 145 | return (u8)clamp(perf_val, perf.lowest_perf, perf.highest_perf); |
| 146 | } |
| 147 | |
| 148 | static inline u32 perf_to_freq(union perf_cached perf, u32 nominal_freq, u8 perf_val) |
| 149 | { |
| 150 | return DIV_ROUND_UP_ULL((u64)nominal_freq * perf_val, |
| 151 | perf.nominal_perf); |
| 152 | } |
| 153 | |
| 154 | static int __init dmi_matched_7k62_bios_bug(const struct dmi_system_id *dmi) |
| 155 | { |
| 156 | /** |
| 157 | * match the broken bios for family 17h processor support CPPC V2 |
| 158 | * broken BIOS lack of nominal_freq and lowest_freq capabilities |
| 159 | * definition in ACPI tables |
| 160 | */ |
| 161 | if (cpu_feature_enabled(X86_FEATURE_ZEN2)) { |
| 162 | quirks = dmi->driver_data; |
| 163 | pr_info("Overriding nominal and lowest frequencies for %s\n" , dmi->ident); |
| 164 | return 1; |
| 165 | } |
| 166 | |
| 167 | return 0; |
| 168 | } |
| 169 | |
| 170 | static const struct dmi_system_id amd_pstate_quirks_table[] __initconst = { |
| 171 | { |
| 172 | .callback = dmi_matched_7k62_bios_bug, |
| 173 | .ident = "AMD EPYC 7K62" , |
| 174 | .matches = { |
| 175 | DMI_MATCH(DMI_BIOS_VERSION, "5.14" ), |
| 176 | DMI_MATCH(DMI_BIOS_RELEASE, "12/12/2019" ), |
| 177 | }, |
| 178 | .driver_data = &quirk_amd_7k62, |
| 179 | }, |
| 180 | {} |
| 181 | }; |
| 182 | MODULE_DEVICE_TABLE(dmi, amd_pstate_quirks_table); |
| 183 | |
| 184 | static inline int get_mode_idx_from_str(const char *str, size_t size) |
| 185 | { |
| 186 | int i; |
| 187 | |
| 188 | for (i = 0; i < AMD_PSTATE_MAX; i++) { |
| 189 | if (!strncmp(str, amd_pstate_mode_string[i], size)) |
| 190 | return i; |
| 191 | } |
| 192 | return -EINVAL; |
| 193 | } |
| 194 | |
| 195 | static DEFINE_MUTEX(amd_pstate_driver_lock); |
| 196 | |
| 197 | static u8 msr_get_epp(struct amd_cpudata *cpudata) |
| 198 | { |
| 199 | u64 value; |
| 200 | int ret; |
| 201 | |
| 202 | ret = rdmsrq_on_cpu(cpu: cpudata->cpu, MSR_AMD_CPPC_REQ, q: &value); |
| 203 | if (ret < 0) { |
| 204 | pr_debug("Could not retrieve energy perf value (%d)\n" , ret); |
| 205 | return ret; |
| 206 | } |
| 207 | |
| 208 | return FIELD_GET(AMD_CPPC_EPP_PERF_MASK, value); |
| 209 | } |
| 210 | |
| 211 | DEFINE_STATIC_CALL(amd_pstate_get_epp, msr_get_epp); |
| 212 | |
| 213 | static inline s16 amd_pstate_get_epp(struct amd_cpudata *cpudata) |
| 214 | { |
| 215 | return static_call(amd_pstate_get_epp)(cpudata); |
| 216 | } |
| 217 | |
| 218 | static u8 shmem_get_epp(struct amd_cpudata *cpudata) |
| 219 | { |
| 220 | u64 epp; |
| 221 | int ret; |
| 222 | |
| 223 | ret = cppc_get_epp_perf(cpunum: cpudata->cpu, epp_perf: &epp); |
| 224 | if (ret < 0) { |
| 225 | pr_debug("Could not retrieve energy perf value (%d)\n" , ret); |
| 226 | return ret; |
| 227 | } |
| 228 | |
| 229 | return FIELD_GET(AMD_CPPC_EPP_PERF_MASK, epp); |
| 230 | } |
| 231 | |
| 232 | static int msr_update_perf(struct cpufreq_policy *policy, u8 min_perf, |
| 233 | u8 des_perf, u8 max_perf, u8 epp, bool fast_switch) |
| 234 | { |
| 235 | struct amd_cpudata *cpudata = policy->driver_data; |
| 236 | u64 value, prev; |
| 237 | |
| 238 | value = prev = READ_ONCE(cpudata->cppc_req_cached); |
| 239 | |
| 240 | value &= ~(AMD_CPPC_MAX_PERF_MASK | AMD_CPPC_MIN_PERF_MASK | |
| 241 | AMD_CPPC_DES_PERF_MASK | AMD_CPPC_EPP_PERF_MASK); |
| 242 | value |= FIELD_PREP(AMD_CPPC_MAX_PERF_MASK, max_perf); |
| 243 | value |= FIELD_PREP(AMD_CPPC_DES_PERF_MASK, des_perf); |
| 244 | value |= FIELD_PREP(AMD_CPPC_MIN_PERF_MASK, min_perf); |
| 245 | value |= FIELD_PREP(AMD_CPPC_EPP_PERF_MASK, epp); |
| 246 | |
| 247 | if (trace_amd_pstate_epp_perf_enabled()) { |
| 248 | union perf_cached perf = READ_ONCE(cpudata->perf); |
| 249 | |
| 250 | trace_amd_pstate_epp_perf(cpu_id: cpudata->cpu, |
| 251 | highest_perf: perf.highest_perf, |
| 252 | epp, |
| 253 | min_perf, |
| 254 | max_perf, |
| 255 | boost: policy->boost_enabled, |
| 256 | changed: value != prev); |
| 257 | } |
| 258 | |
| 259 | if (value == prev) |
| 260 | return 0; |
| 261 | |
| 262 | if (fast_switch) { |
| 263 | wrmsrq(MSR_AMD_CPPC_REQ, val: value); |
| 264 | return 0; |
| 265 | } else { |
| 266 | int ret = wrmsrq_on_cpu(cpu: cpudata->cpu, MSR_AMD_CPPC_REQ, q: value); |
| 267 | |
| 268 | if (ret) |
| 269 | return ret; |
| 270 | } |
| 271 | |
| 272 | WRITE_ONCE(cpudata->cppc_req_cached, value); |
| 273 | |
| 274 | return 0; |
| 275 | } |
| 276 | |
| 277 | DEFINE_STATIC_CALL(amd_pstate_update_perf, msr_update_perf); |
| 278 | |
| 279 | static inline int amd_pstate_update_perf(struct cpufreq_policy *policy, |
| 280 | u8 min_perf, u8 des_perf, |
| 281 | u8 max_perf, u8 epp, |
| 282 | bool fast_switch) |
| 283 | { |
| 284 | return static_call(amd_pstate_update_perf)(policy, min_perf, des_perf, |
| 285 | max_perf, epp, fast_switch); |
| 286 | } |
| 287 | |
| 288 | static int msr_set_epp(struct cpufreq_policy *policy, u8 epp) |
| 289 | { |
| 290 | struct amd_cpudata *cpudata = policy->driver_data; |
| 291 | u64 value, prev; |
| 292 | int ret; |
| 293 | |
| 294 | value = prev = READ_ONCE(cpudata->cppc_req_cached); |
| 295 | value &= ~AMD_CPPC_EPP_PERF_MASK; |
| 296 | value |= FIELD_PREP(AMD_CPPC_EPP_PERF_MASK, epp); |
| 297 | |
| 298 | if (trace_amd_pstate_epp_perf_enabled()) { |
| 299 | union perf_cached perf = cpudata->perf; |
| 300 | |
| 301 | trace_amd_pstate_epp_perf(cpu_id: cpudata->cpu, highest_perf: perf.highest_perf, |
| 302 | epp, |
| 303 | FIELD_GET(AMD_CPPC_MIN_PERF_MASK, |
| 304 | cpudata->cppc_req_cached), |
| 305 | FIELD_GET(AMD_CPPC_MAX_PERF_MASK, |
| 306 | cpudata->cppc_req_cached), |
| 307 | boost: policy->boost_enabled, |
| 308 | changed: value != prev); |
| 309 | } |
| 310 | |
| 311 | if (value == prev) |
| 312 | return 0; |
| 313 | |
| 314 | ret = wrmsrq_on_cpu(cpu: cpudata->cpu, MSR_AMD_CPPC_REQ, q: value); |
| 315 | if (ret) { |
| 316 | pr_err("failed to set energy perf value (%d)\n" , ret); |
| 317 | return ret; |
| 318 | } |
| 319 | |
| 320 | /* update both so that msr_update_perf() can effectively check */ |
| 321 | WRITE_ONCE(cpudata->cppc_req_cached, value); |
| 322 | |
| 323 | return ret; |
| 324 | } |
| 325 | |
| 326 | DEFINE_STATIC_CALL(amd_pstate_set_epp, msr_set_epp); |
| 327 | |
| 328 | static inline int amd_pstate_set_epp(struct cpufreq_policy *policy, u8 epp) |
| 329 | { |
| 330 | return static_call(amd_pstate_set_epp)(policy, epp); |
| 331 | } |
| 332 | |
| 333 | static int shmem_set_epp(struct cpufreq_policy *policy, u8 epp) |
| 334 | { |
| 335 | struct amd_cpudata *cpudata = policy->driver_data; |
| 336 | struct cppc_perf_ctrls perf_ctrls; |
| 337 | u8 epp_cached; |
| 338 | u64 value; |
| 339 | int ret; |
| 340 | |
| 341 | |
| 342 | epp_cached = FIELD_GET(AMD_CPPC_EPP_PERF_MASK, cpudata->cppc_req_cached); |
| 343 | if (trace_amd_pstate_epp_perf_enabled()) { |
| 344 | union perf_cached perf = cpudata->perf; |
| 345 | |
| 346 | trace_amd_pstate_epp_perf(cpu_id: cpudata->cpu, highest_perf: perf.highest_perf, |
| 347 | epp, |
| 348 | FIELD_GET(AMD_CPPC_MIN_PERF_MASK, |
| 349 | cpudata->cppc_req_cached), |
| 350 | FIELD_GET(AMD_CPPC_MAX_PERF_MASK, |
| 351 | cpudata->cppc_req_cached), |
| 352 | boost: policy->boost_enabled, |
| 353 | changed: epp != epp_cached); |
| 354 | } |
| 355 | |
| 356 | if (epp == epp_cached) |
| 357 | return 0; |
| 358 | |
| 359 | perf_ctrls.energy_perf = epp; |
| 360 | ret = cppc_set_epp_perf(cpu: cpudata->cpu, perf_ctrls: &perf_ctrls, enable: 1); |
| 361 | if (ret) { |
| 362 | pr_debug("failed to set energy perf value (%d)\n" , ret); |
| 363 | return ret; |
| 364 | } |
| 365 | |
| 366 | value = READ_ONCE(cpudata->cppc_req_cached); |
| 367 | value &= ~AMD_CPPC_EPP_PERF_MASK; |
| 368 | value |= FIELD_PREP(AMD_CPPC_EPP_PERF_MASK, epp); |
| 369 | WRITE_ONCE(cpudata->cppc_req_cached, value); |
| 370 | |
| 371 | return ret; |
| 372 | } |
| 373 | |
| 374 | static inline int msr_cppc_enable(struct cpufreq_policy *policy) |
| 375 | { |
| 376 | return wrmsrq_safe_on_cpu(cpu: policy->cpu, MSR_AMD_CPPC_ENABLE, q: 1); |
| 377 | } |
| 378 | |
| 379 | static int shmem_cppc_enable(struct cpufreq_policy *policy) |
| 380 | { |
| 381 | return cppc_set_enable(cpu: policy->cpu, enable: 1); |
| 382 | } |
| 383 | |
| 384 | DEFINE_STATIC_CALL(amd_pstate_cppc_enable, msr_cppc_enable); |
| 385 | |
| 386 | static inline int amd_pstate_cppc_enable(struct cpufreq_policy *policy) |
| 387 | { |
| 388 | return static_call(amd_pstate_cppc_enable)(policy); |
| 389 | } |
| 390 | |
| 391 | static int msr_init_perf(struct amd_cpudata *cpudata) |
| 392 | { |
| 393 | union perf_cached perf = READ_ONCE(cpudata->perf); |
| 394 | u64 cap1, numerator, cppc_req; |
| 395 | u8 min_perf; |
| 396 | |
| 397 | int ret = rdmsrq_safe_on_cpu(cpu: cpudata->cpu, MSR_AMD_CPPC_CAP1, |
| 398 | q: &cap1); |
| 399 | if (ret) |
| 400 | return ret; |
| 401 | |
| 402 | ret = amd_get_boost_ratio_numerator(cpu: cpudata->cpu, numerator: &numerator); |
| 403 | if (ret) |
| 404 | return ret; |
| 405 | |
| 406 | ret = rdmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, &cppc_req); |
| 407 | if (ret) |
| 408 | return ret; |
| 409 | |
| 410 | WRITE_ONCE(cpudata->cppc_req_cached, cppc_req); |
| 411 | min_perf = FIELD_GET(AMD_CPPC_MIN_PERF_MASK, cppc_req); |
| 412 | |
| 413 | /* |
| 414 | * Clear out the min_perf part to check if the rest of the MSR is 0, if yes, this is an |
| 415 | * indication that the min_perf value is the one specified through the BIOS option |
| 416 | */ |
| 417 | cppc_req &= ~(AMD_CPPC_MIN_PERF_MASK); |
| 418 | |
| 419 | if (!cppc_req) |
| 420 | perf.bios_min_perf = min_perf; |
| 421 | |
| 422 | perf.highest_perf = numerator; |
| 423 | perf.max_limit_perf = numerator; |
| 424 | perf.min_limit_perf = FIELD_GET(AMD_CPPC_LOWEST_PERF_MASK, cap1); |
| 425 | perf.nominal_perf = FIELD_GET(AMD_CPPC_NOMINAL_PERF_MASK, cap1); |
| 426 | perf.lowest_nonlinear_perf = FIELD_GET(AMD_CPPC_LOWNONLIN_PERF_MASK, cap1); |
| 427 | perf.lowest_perf = FIELD_GET(AMD_CPPC_LOWEST_PERF_MASK, cap1); |
| 428 | WRITE_ONCE(cpudata->perf, perf); |
| 429 | WRITE_ONCE(cpudata->prefcore_ranking, FIELD_GET(AMD_CPPC_HIGHEST_PERF_MASK, cap1)); |
| 430 | |
| 431 | return 0; |
| 432 | } |
| 433 | |
| 434 | static int shmem_init_perf(struct amd_cpudata *cpudata) |
| 435 | { |
| 436 | struct cppc_perf_caps cppc_perf; |
| 437 | union perf_cached perf = READ_ONCE(cpudata->perf); |
| 438 | u64 numerator; |
| 439 | bool auto_sel; |
| 440 | |
| 441 | int ret = cppc_get_perf_caps(cpu: cpudata->cpu, caps: &cppc_perf); |
| 442 | if (ret) |
| 443 | return ret; |
| 444 | |
| 445 | ret = amd_get_boost_ratio_numerator(cpu: cpudata->cpu, numerator: &numerator); |
| 446 | if (ret) |
| 447 | return ret; |
| 448 | |
| 449 | perf.highest_perf = numerator; |
| 450 | perf.max_limit_perf = numerator; |
| 451 | perf.min_limit_perf = cppc_perf.lowest_perf; |
| 452 | perf.nominal_perf = cppc_perf.nominal_perf; |
| 453 | perf.lowest_nonlinear_perf = cppc_perf.lowest_nonlinear_perf; |
| 454 | perf.lowest_perf = cppc_perf.lowest_perf; |
| 455 | WRITE_ONCE(cpudata->perf, perf); |
| 456 | WRITE_ONCE(cpudata->prefcore_ranking, cppc_perf.highest_perf); |
| 457 | |
| 458 | if (cppc_state == AMD_PSTATE_ACTIVE) |
| 459 | return 0; |
| 460 | |
| 461 | ret = cppc_get_auto_sel(cpu: cpudata->cpu, enable: &auto_sel); |
| 462 | if (ret) { |
| 463 | pr_warn("failed to get auto_sel, ret: %d\n" , ret); |
| 464 | return 0; |
| 465 | } |
| 466 | |
| 467 | ret = cppc_set_auto_sel(cpu: cpudata->cpu, |
| 468 | enable: (cppc_state == AMD_PSTATE_PASSIVE) ? 0 : 1); |
| 469 | |
| 470 | if (ret) |
| 471 | pr_warn("failed to set auto_sel, ret: %d\n" , ret); |
| 472 | |
| 473 | return ret; |
| 474 | } |
| 475 | |
| 476 | DEFINE_STATIC_CALL(amd_pstate_init_perf, msr_init_perf); |
| 477 | |
| 478 | static inline int amd_pstate_init_perf(struct amd_cpudata *cpudata) |
| 479 | { |
| 480 | return static_call(amd_pstate_init_perf)(cpudata); |
| 481 | } |
| 482 | |
| 483 | static int shmem_update_perf(struct cpufreq_policy *policy, u8 min_perf, |
| 484 | u8 des_perf, u8 max_perf, u8 epp, bool fast_switch) |
| 485 | { |
| 486 | struct amd_cpudata *cpudata = policy->driver_data; |
| 487 | struct cppc_perf_ctrls perf_ctrls; |
| 488 | u64 value, prev; |
| 489 | int ret; |
| 490 | |
| 491 | if (cppc_state == AMD_PSTATE_ACTIVE) { |
| 492 | int ret = shmem_set_epp(policy, epp); |
| 493 | |
| 494 | if (ret) |
| 495 | return ret; |
| 496 | } |
| 497 | |
| 498 | value = prev = READ_ONCE(cpudata->cppc_req_cached); |
| 499 | |
| 500 | value &= ~(AMD_CPPC_MAX_PERF_MASK | AMD_CPPC_MIN_PERF_MASK | |
| 501 | AMD_CPPC_DES_PERF_MASK | AMD_CPPC_EPP_PERF_MASK); |
| 502 | value |= FIELD_PREP(AMD_CPPC_MAX_PERF_MASK, max_perf); |
| 503 | value |= FIELD_PREP(AMD_CPPC_DES_PERF_MASK, des_perf); |
| 504 | value |= FIELD_PREP(AMD_CPPC_MIN_PERF_MASK, min_perf); |
| 505 | value |= FIELD_PREP(AMD_CPPC_EPP_PERF_MASK, epp); |
| 506 | |
| 507 | if (trace_amd_pstate_epp_perf_enabled()) { |
| 508 | union perf_cached perf = READ_ONCE(cpudata->perf); |
| 509 | |
| 510 | trace_amd_pstate_epp_perf(cpu_id: cpudata->cpu, |
| 511 | highest_perf: perf.highest_perf, |
| 512 | epp, |
| 513 | min_perf, |
| 514 | max_perf, |
| 515 | boost: policy->boost_enabled, |
| 516 | changed: value != prev); |
| 517 | } |
| 518 | |
| 519 | if (value == prev) |
| 520 | return 0; |
| 521 | |
| 522 | perf_ctrls.max_perf = max_perf; |
| 523 | perf_ctrls.min_perf = min_perf; |
| 524 | perf_ctrls.desired_perf = des_perf; |
| 525 | |
| 526 | ret = cppc_set_perf(cpu: cpudata->cpu, perf_ctrls: &perf_ctrls); |
| 527 | if (ret) |
| 528 | return ret; |
| 529 | |
| 530 | WRITE_ONCE(cpudata->cppc_req_cached, value); |
| 531 | |
| 532 | return 0; |
| 533 | } |
| 534 | |
| 535 | static inline bool amd_pstate_sample(struct amd_cpudata *cpudata) |
| 536 | { |
| 537 | u64 aperf, mperf, tsc; |
| 538 | unsigned long flags; |
| 539 | |
| 540 | local_irq_save(flags); |
| 541 | rdmsrq(MSR_IA32_APERF, aperf); |
| 542 | rdmsrq(MSR_IA32_MPERF, mperf); |
| 543 | tsc = rdtsc(); |
| 544 | |
| 545 | if (cpudata->prev.mperf == mperf || cpudata->prev.tsc == tsc) { |
| 546 | local_irq_restore(flags); |
| 547 | return false; |
| 548 | } |
| 549 | |
| 550 | local_irq_restore(flags); |
| 551 | |
| 552 | cpudata->cur.aperf = aperf; |
| 553 | cpudata->cur.mperf = mperf; |
| 554 | cpudata->cur.tsc = tsc; |
| 555 | cpudata->cur.aperf -= cpudata->prev.aperf; |
| 556 | cpudata->cur.mperf -= cpudata->prev.mperf; |
| 557 | cpudata->cur.tsc -= cpudata->prev.tsc; |
| 558 | |
| 559 | cpudata->prev.aperf = aperf; |
| 560 | cpudata->prev.mperf = mperf; |
| 561 | cpudata->prev.tsc = tsc; |
| 562 | |
| 563 | cpudata->freq = div64_u64(dividend: (cpudata->cur.aperf * cpu_khz), divisor: cpudata->cur.mperf); |
| 564 | |
| 565 | return true; |
| 566 | } |
| 567 | |
| 568 | static void amd_pstate_update(struct amd_cpudata *cpudata, u8 min_perf, |
| 569 | u8 des_perf, u8 max_perf, bool fast_switch, int gov_flags) |
| 570 | { |
| 571 | struct cpufreq_policy *policy __free(put_cpufreq_policy) = cpufreq_cpu_get(cpu: cpudata->cpu); |
| 572 | union perf_cached perf = READ_ONCE(cpudata->perf); |
| 573 | |
| 574 | if (!policy) |
| 575 | return; |
| 576 | |
| 577 | /* limit the max perf when core performance boost feature is disabled */ |
| 578 | if (!cpudata->boost_supported) |
| 579 | max_perf = min_t(u8, perf.nominal_perf, max_perf); |
| 580 | |
| 581 | des_perf = clamp_t(u8, des_perf, min_perf, max_perf); |
| 582 | |
| 583 | policy->cur = perf_to_freq(perf, nominal_freq: cpudata->nominal_freq, perf_val: des_perf); |
| 584 | |
| 585 | if ((cppc_state == AMD_PSTATE_GUIDED) && (gov_flags & CPUFREQ_GOV_DYNAMIC_SWITCHING)) { |
| 586 | min_perf = des_perf; |
| 587 | des_perf = 0; |
| 588 | } |
| 589 | |
| 590 | if (trace_amd_pstate_perf_enabled() && amd_pstate_sample(cpudata)) { |
| 591 | trace_amd_pstate_perf(min_perf, target_perf: des_perf, capacity: max_perf, freq: cpudata->freq, |
| 592 | mperf: cpudata->cur.mperf, aperf: cpudata->cur.aperf, tsc: cpudata->cur.tsc, |
| 593 | cpu_id: cpudata->cpu, fast_switch); |
| 594 | } |
| 595 | |
| 596 | amd_pstate_update_perf(policy, min_perf, des_perf, max_perf, epp: 0, fast_switch); |
| 597 | } |
| 598 | |
| 599 | static int amd_pstate_verify(struct cpufreq_policy_data *policy_data) |
| 600 | { |
| 601 | /* |
| 602 | * Initialize lower frequency limit (i.e.policy->min) with |
| 603 | * lowest_nonlinear_frequency or the min frequency (if) specified in BIOS, |
| 604 | * Override the initial value set by cpufreq core and amd-pstate qos_requests. |
| 605 | */ |
| 606 | if (policy_data->min == FREQ_QOS_MIN_DEFAULT_VALUE) { |
| 607 | struct cpufreq_policy *policy __free(put_cpufreq_policy) = |
| 608 | cpufreq_cpu_get(cpu: policy_data->cpu); |
| 609 | struct amd_cpudata *cpudata; |
| 610 | union perf_cached perf; |
| 611 | |
| 612 | if (!policy) |
| 613 | return -EINVAL; |
| 614 | |
| 615 | cpudata = policy->driver_data; |
| 616 | perf = READ_ONCE(cpudata->perf); |
| 617 | |
| 618 | if (perf.bios_min_perf) |
| 619 | policy_data->min = perf_to_freq(perf, nominal_freq: cpudata->nominal_freq, |
| 620 | perf_val: perf.bios_min_perf); |
| 621 | else |
| 622 | policy_data->min = cpudata->lowest_nonlinear_freq; |
| 623 | } |
| 624 | |
| 625 | cpufreq_verify_within_cpu_limits(policy: policy_data); |
| 626 | |
| 627 | return 0; |
| 628 | } |
| 629 | |
| 630 | static void amd_pstate_update_min_max_limit(struct cpufreq_policy *policy) |
| 631 | { |
| 632 | struct amd_cpudata *cpudata = policy->driver_data; |
| 633 | union perf_cached perf = READ_ONCE(cpudata->perf); |
| 634 | |
| 635 | perf.max_limit_perf = freq_to_perf(perf, nominal_freq: cpudata->nominal_freq, freq_val: policy->max); |
| 636 | WRITE_ONCE(cpudata->max_limit_freq, policy->max); |
| 637 | |
| 638 | if (cpudata->policy == CPUFREQ_POLICY_PERFORMANCE) { |
| 639 | perf.min_limit_perf = min(perf.nominal_perf, perf.max_limit_perf); |
| 640 | WRITE_ONCE(cpudata->min_limit_freq, min(cpudata->nominal_freq, cpudata->max_limit_freq)); |
| 641 | } else { |
| 642 | perf.min_limit_perf = freq_to_perf(perf, nominal_freq: cpudata->nominal_freq, freq_val: policy->min); |
| 643 | WRITE_ONCE(cpudata->min_limit_freq, policy->min); |
| 644 | } |
| 645 | |
| 646 | WRITE_ONCE(cpudata->perf, perf); |
| 647 | } |
| 648 | |
| 649 | static int amd_pstate_update_freq(struct cpufreq_policy *policy, |
| 650 | unsigned int target_freq, bool fast_switch) |
| 651 | { |
| 652 | struct cpufreq_freqs freqs; |
| 653 | struct amd_cpudata *cpudata; |
| 654 | union perf_cached perf; |
| 655 | u8 des_perf; |
| 656 | |
| 657 | cpudata = policy->driver_data; |
| 658 | |
| 659 | if (policy->min != cpudata->min_limit_freq || policy->max != cpudata->max_limit_freq) |
| 660 | amd_pstate_update_min_max_limit(policy); |
| 661 | |
| 662 | perf = READ_ONCE(cpudata->perf); |
| 663 | |
| 664 | freqs.old = policy->cur; |
| 665 | freqs.new = target_freq; |
| 666 | |
| 667 | des_perf = freq_to_perf(perf, nominal_freq: cpudata->nominal_freq, freq_val: target_freq); |
| 668 | |
| 669 | WARN_ON(fast_switch && !policy->fast_switch_enabled); |
| 670 | /* |
| 671 | * If fast_switch is desired, then there aren't any registered |
| 672 | * transition notifiers. See comment for |
| 673 | * cpufreq_enable_fast_switch(). |
| 674 | */ |
| 675 | if (!fast_switch) |
| 676 | cpufreq_freq_transition_begin(policy, freqs: &freqs); |
| 677 | |
| 678 | amd_pstate_update(cpudata, min_perf: perf.min_limit_perf, des_perf, |
| 679 | max_perf: perf.max_limit_perf, fast_switch, |
| 680 | gov_flags: policy->governor->flags); |
| 681 | |
| 682 | if (!fast_switch) |
| 683 | cpufreq_freq_transition_end(policy, freqs: &freqs, transition_failed: false); |
| 684 | |
| 685 | return 0; |
| 686 | } |
| 687 | |
| 688 | static int amd_pstate_target(struct cpufreq_policy *policy, |
| 689 | unsigned int target_freq, |
| 690 | unsigned int relation) |
| 691 | { |
| 692 | return amd_pstate_update_freq(policy, target_freq, fast_switch: false); |
| 693 | } |
| 694 | |
| 695 | static unsigned int amd_pstate_fast_switch(struct cpufreq_policy *policy, |
| 696 | unsigned int target_freq) |
| 697 | { |
| 698 | if (!amd_pstate_update_freq(policy, target_freq, fast_switch: true)) |
| 699 | return target_freq; |
| 700 | return policy->cur; |
| 701 | } |
| 702 | |
| 703 | static void amd_pstate_adjust_perf(unsigned int cpu, |
| 704 | unsigned long _min_perf, |
| 705 | unsigned long target_perf, |
| 706 | unsigned long capacity) |
| 707 | { |
| 708 | u8 max_perf, min_perf, des_perf, cap_perf; |
| 709 | struct cpufreq_policy *policy __free(put_cpufreq_policy) = cpufreq_cpu_get(cpu); |
| 710 | struct amd_cpudata *cpudata; |
| 711 | union perf_cached perf; |
| 712 | |
| 713 | if (!policy) |
| 714 | return; |
| 715 | |
| 716 | cpudata = policy->driver_data; |
| 717 | |
| 718 | if (policy->min != cpudata->min_limit_freq || policy->max != cpudata->max_limit_freq) |
| 719 | amd_pstate_update_min_max_limit(policy); |
| 720 | |
| 721 | perf = READ_ONCE(cpudata->perf); |
| 722 | cap_perf = perf.highest_perf; |
| 723 | |
| 724 | des_perf = cap_perf; |
| 725 | if (target_perf < capacity) |
| 726 | des_perf = DIV_ROUND_UP(cap_perf * target_perf, capacity); |
| 727 | |
| 728 | if (_min_perf < capacity) |
| 729 | min_perf = DIV_ROUND_UP(cap_perf * _min_perf, capacity); |
| 730 | else |
| 731 | min_perf = cap_perf; |
| 732 | |
| 733 | if (min_perf < perf.min_limit_perf) |
| 734 | min_perf = perf.min_limit_perf; |
| 735 | |
| 736 | max_perf = perf.max_limit_perf; |
| 737 | if (max_perf < min_perf) |
| 738 | max_perf = min_perf; |
| 739 | |
| 740 | amd_pstate_update(cpudata, min_perf, des_perf, max_perf, fast_switch: true, |
| 741 | gov_flags: policy->governor->flags); |
| 742 | } |
| 743 | |
| 744 | static int amd_pstate_cpu_boost_update(struct cpufreq_policy *policy, bool on) |
| 745 | { |
| 746 | struct amd_cpudata *cpudata = policy->driver_data; |
| 747 | union perf_cached perf = READ_ONCE(cpudata->perf); |
| 748 | u32 nominal_freq, max_freq; |
| 749 | int ret = 0; |
| 750 | |
| 751 | nominal_freq = READ_ONCE(cpudata->nominal_freq); |
| 752 | max_freq = perf_to_freq(perf, nominal_freq: cpudata->nominal_freq, perf_val: perf.highest_perf); |
| 753 | |
| 754 | if (on) |
| 755 | policy->cpuinfo.max_freq = max_freq; |
| 756 | else if (policy->cpuinfo.max_freq > nominal_freq) |
| 757 | policy->cpuinfo.max_freq = nominal_freq; |
| 758 | |
| 759 | policy->max = policy->cpuinfo.max_freq; |
| 760 | |
| 761 | if (cppc_state == AMD_PSTATE_PASSIVE) { |
| 762 | ret = freq_qos_update_request(req: &cpudata->req[1], new_value: policy->cpuinfo.max_freq); |
| 763 | if (ret < 0) |
| 764 | pr_debug("Failed to update freq constraint: CPU%d\n" , cpudata->cpu); |
| 765 | } |
| 766 | |
| 767 | return ret < 0 ? ret : 0; |
| 768 | } |
| 769 | |
| 770 | static int amd_pstate_set_boost(struct cpufreq_policy *policy, int state) |
| 771 | { |
| 772 | struct amd_cpudata *cpudata = policy->driver_data; |
| 773 | int ret; |
| 774 | |
| 775 | if (!cpudata->boost_supported) { |
| 776 | pr_err("Boost mode is not supported by this processor or SBIOS\n" ); |
| 777 | return -EOPNOTSUPP; |
| 778 | } |
| 779 | |
| 780 | ret = amd_pstate_cpu_boost_update(policy, on: state); |
| 781 | refresh_frequency_limits(policy); |
| 782 | |
| 783 | return ret; |
| 784 | } |
| 785 | |
| 786 | static int amd_pstate_init_boost_support(struct amd_cpudata *cpudata) |
| 787 | { |
| 788 | u64 boost_val; |
| 789 | int ret = -1; |
| 790 | |
| 791 | /* |
| 792 | * If platform has no CPB support or disable it, initialize current driver |
| 793 | * boost_enabled state to be false, it is not an error for cpufreq core to handle. |
| 794 | */ |
| 795 | if (!cpu_feature_enabled(X86_FEATURE_CPB)) { |
| 796 | pr_debug_once("Boost CPB capabilities not present in the processor\n" ); |
| 797 | ret = 0; |
| 798 | goto exit_err; |
| 799 | } |
| 800 | |
| 801 | ret = rdmsrq_on_cpu(cpu: cpudata->cpu, MSR_K7_HWCR, q: &boost_val); |
| 802 | if (ret) { |
| 803 | pr_err_once("failed to read initial CPU boost state!\n" ); |
| 804 | ret = -EIO; |
| 805 | goto exit_err; |
| 806 | } |
| 807 | |
| 808 | if (!(boost_val & MSR_K7_HWCR_CPB_DIS)) |
| 809 | cpudata->boost_supported = true; |
| 810 | |
| 811 | return 0; |
| 812 | |
| 813 | exit_err: |
| 814 | cpudata->boost_supported = false; |
| 815 | return ret; |
| 816 | } |
| 817 | |
| 818 | static void amd_perf_ctl_reset(unsigned int cpu) |
| 819 | { |
| 820 | wrmsrq_on_cpu(cpu, MSR_AMD_PERF_CTL, q: 0); |
| 821 | } |
| 822 | |
| 823 | #define CPPC_MAX_PERF U8_MAX |
| 824 | |
| 825 | static void amd_pstate_init_prefcore(struct amd_cpudata *cpudata) |
| 826 | { |
| 827 | /* user disabled or not detected */ |
| 828 | if (!amd_pstate_prefcore) |
| 829 | return; |
| 830 | |
| 831 | /* should use amd-hfi instead */ |
| 832 | if (cpu_feature_enabled(X86_FEATURE_AMD_WORKLOAD_CLASS) && |
| 833 | IS_ENABLED(CONFIG_AMD_HFI)) { |
| 834 | amd_pstate_prefcore = false; |
| 835 | return; |
| 836 | } |
| 837 | |
| 838 | cpudata->hw_prefcore = true; |
| 839 | |
| 840 | /* Priorities must be initialized before ITMT support can be toggled on. */ |
| 841 | sched_set_itmt_core_prio(prio: (int)READ_ONCE(cpudata->prefcore_ranking), core_cpu: cpudata->cpu); |
| 842 | } |
| 843 | |
| 844 | static void amd_pstate_update_limits(struct cpufreq_policy *policy) |
| 845 | { |
| 846 | struct amd_cpudata *cpudata; |
| 847 | u32 prev_high = 0, cur_high = 0; |
| 848 | bool highest_perf_changed = false; |
| 849 | unsigned int cpu = policy->cpu; |
| 850 | |
| 851 | if (!amd_pstate_prefcore) |
| 852 | return; |
| 853 | |
| 854 | if (amd_get_highest_perf(cpu, highest_perf: &cur_high)) |
| 855 | return; |
| 856 | |
| 857 | cpudata = policy->driver_data; |
| 858 | |
| 859 | prev_high = READ_ONCE(cpudata->prefcore_ranking); |
| 860 | highest_perf_changed = (prev_high != cur_high); |
| 861 | if (highest_perf_changed) { |
| 862 | WRITE_ONCE(cpudata->prefcore_ranking, cur_high); |
| 863 | |
| 864 | if (cur_high < CPPC_MAX_PERF) { |
| 865 | sched_set_itmt_core_prio(prio: (int)cur_high, core_cpu: cpu); |
| 866 | sched_update_asym_prefer_cpu(cpu, old_prio: prev_high, new_prio: cur_high); |
| 867 | } |
| 868 | } |
| 869 | } |
| 870 | |
| 871 | /* |
| 872 | * Get pstate transition delay time from ACPI tables that firmware set |
| 873 | * instead of using hardcode value directly. |
| 874 | */ |
| 875 | static u32 amd_pstate_get_transition_delay_us(unsigned int cpu) |
| 876 | { |
| 877 | int transition_delay_ns; |
| 878 | |
| 879 | transition_delay_ns = cppc_get_transition_latency(cpu); |
| 880 | if (transition_delay_ns < 0) { |
| 881 | if (cpu_feature_enabled(X86_FEATURE_AMD_FAST_CPPC)) |
| 882 | return AMD_PSTATE_FAST_CPPC_TRANSITION_DELAY; |
| 883 | else |
| 884 | return AMD_PSTATE_TRANSITION_DELAY; |
| 885 | } |
| 886 | |
| 887 | return transition_delay_ns / NSEC_PER_USEC; |
| 888 | } |
| 889 | |
| 890 | /* |
| 891 | * Get pstate transition latency value from ACPI tables that firmware |
| 892 | * set instead of using hardcode value directly. |
| 893 | */ |
| 894 | static u32 amd_pstate_get_transition_latency(unsigned int cpu) |
| 895 | { |
| 896 | int transition_latency; |
| 897 | |
| 898 | transition_latency = cppc_get_transition_latency(cpu); |
| 899 | if (transition_latency < 0) |
| 900 | return AMD_PSTATE_TRANSITION_LATENCY; |
| 901 | |
| 902 | return transition_latency; |
| 903 | } |
| 904 | |
| 905 | /* |
| 906 | * amd_pstate_init_freq: Initialize the nominal_freq and lowest_nonlinear_freq |
| 907 | * for the @cpudata object. |
| 908 | * |
| 909 | * Requires: all perf members of @cpudata to be initialized. |
| 910 | * |
| 911 | * Returns 0 on success, non-zero value on failure. |
| 912 | */ |
| 913 | static int amd_pstate_init_freq(struct amd_cpudata *cpudata) |
| 914 | { |
| 915 | u32 min_freq, max_freq, nominal_freq, lowest_nonlinear_freq; |
| 916 | struct cppc_perf_caps cppc_perf; |
| 917 | union perf_cached perf; |
| 918 | int ret; |
| 919 | |
| 920 | ret = cppc_get_perf_caps(cpu: cpudata->cpu, caps: &cppc_perf); |
| 921 | if (ret) |
| 922 | return ret; |
| 923 | perf = READ_ONCE(cpudata->perf); |
| 924 | |
| 925 | if (quirks && quirks->nominal_freq) |
| 926 | nominal_freq = quirks->nominal_freq; |
| 927 | else |
| 928 | nominal_freq = cppc_perf.nominal_freq; |
| 929 | nominal_freq *= 1000; |
| 930 | |
| 931 | if (quirks && quirks->lowest_freq) { |
| 932 | min_freq = quirks->lowest_freq; |
| 933 | perf.lowest_perf = freq_to_perf(perf, nominal_freq, freq_val: min_freq); |
| 934 | WRITE_ONCE(cpudata->perf, perf); |
| 935 | } else |
| 936 | min_freq = cppc_perf.lowest_freq; |
| 937 | |
| 938 | min_freq *= 1000; |
| 939 | |
| 940 | WRITE_ONCE(cpudata->nominal_freq, nominal_freq); |
| 941 | |
| 942 | max_freq = perf_to_freq(perf, nominal_freq, perf_val: perf.highest_perf); |
| 943 | lowest_nonlinear_freq = perf_to_freq(perf, nominal_freq, perf_val: perf.lowest_nonlinear_perf); |
| 944 | WRITE_ONCE(cpudata->lowest_nonlinear_freq, lowest_nonlinear_freq); |
| 945 | |
| 946 | /** |
| 947 | * Below values need to be initialized correctly, otherwise driver will fail to load |
| 948 | * max_freq is calculated according to (nominal_freq * highest_perf)/nominal_perf |
| 949 | * lowest_nonlinear_freq is a value between [min_freq, nominal_freq] |
| 950 | * Check _CPC in ACPI table objects if any values are incorrect |
| 951 | */ |
| 952 | if (min_freq <= 0 || max_freq <= 0 || nominal_freq <= 0 || min_freq > max_freq) { |
| 953 | pr_err("min_freq(%d) or max_freq(%d) or nominal_freq(%d) value is incorrect\n" , |
| 954 | min_freq, max_freq, nominal_freq); |
| 955 | return -EINVAL; |
| 956 | } |
| 957 | |
| 958 | if (lowest_nonlinear_freq <= min_freq || lowest_nonlinear_freq > nominal_freq) { |
| 959 | pr_err("lowest_nonlinear_freq(%d) value is out of range [min_freq(%d), nominal_freq(%d)]\n" , |
| 960 | lowest_nonlinear_freq, min_freq, nominal_freq); |
| 961 | return -EINVAL; |
| 962 | } |
| 963 | |
| 964 | return 0; |
| 965 | } |
| 966 | |
| 967 | static int amd_pstate_cpu_init(struct cpufreq_policy *policy) |
| 968 | { |
| 969 | struct amd_cpudata *cpudata; |
| 970 | union perf_cached perf; |
| 971 | struct device *dev; |
| 972 | int ret; |
| 973 | |
| 974 | /* |
| 975 | * Resetting PERF_CTL_MSR will put the CPU in P0 frequency, |
| 976 | * which is ideal for initialization process. |
| 977 | */ |
| 978 | amd_perf_ctl_reset(cpu: policy->cpu); |
| 979 | dev = get_cpu_device(cpu: policy->cpu); |
| 980 | if (!dev) |
| 981 | return -ENODEV; |
| 982 | |
| 983 | cpudata = kzalloc(sizeof(*cpudata), GFP_KERNEL); |
| 984 | if (!cpudata) |
| 985 | return -ENOMEM; |
| 986 | |
| 987 | cpudata->cpu = policy->cpu; |
| 988 | |
| 989 | ret = amd_pstate_init_perf(cpudata); |
| 990 | if (ret) |
| 991 | goto free_cpudata1; |
| 992 | |
| 993 | amd_pstate_init_prefcore(cpudata); |
| 994 | |
| 995 | ret = amd_pstate_init_freq(cpudata); |
| 996 | if (ret) |
| 997 | goto free_cpudata1; |
| 998 | |
| 999 | ret = amd_pstate_init_boost_support(cpudata); |
| 1000 | if (ret) |
| 1001 | goto free_cpudata1; |
| 1002 | |
| 1003 | policy->cpuinfo.transition_latency = amd_pstate_get_transition_latency(cpu: policy->cpu); |
| 1004 | policy->transition_delay_us = amd_pstate_get_transition_delay_us(cpu: policy->cpu); |
| 1005 | |
| 1006 | perf = READ_ONCE(cpudata->perf); |
| 1007 | |
| 1008 | policy->cpuinfo.min_freq = policy->min = perf_to_freq(perf, |
| 1009 | nominal_freq: cpudata->nominal_freq, |
| 1010 | perf_val: perf.lowest_perf); |
| 1011 | policy->cpuinfo.max_freq = policy->max = perf_to_freq(perf, |
| 1012 | nominal_freq: cpudata->nominal_freq, |
| 1013 | perf_val: perf.highest_perf); |
| 1014 | |
| 1015 | ret = amd_pstate_cppc_enable(policy); |
| 1016 | if (ret) |
| 1017 | goto free_cpudata1; |
| 1018 | |
| 1019 | policy->boost_supported = READ_ONCE(cpudata->boost_supported); |
| 1020 | |
| 1021 | /* It will be updated by governor */ |
| 1022 | policy->cur = policy->cpuinfo.min_freq; |
| 1023 | |
| 1024 | if (cpu_feature_enabled(X86_FEATURE_CPPC)) |
| 1025 | policy->fast_switch_possible = true; |
| 1026 | |
| 1027 | ret = freq_qos_add_request(qos: &policy->constraints, req: &cpudata->req[0], |
| 1028 | type: FREQ_QOS_MIN, FREQ_QOS_MIN_DEFAULT_VALUE); |
| 1029 | if (ret < 0) { |
| 1030 | dev_err(dev, "Failed to add min-freq constraint (%d)\n" , ret); |
| 1031 | goto free_cpudata1; |
| 1032 | } |
| 1033 | |
| 1034 | ret = freq_qos_add_request(qos: &policy->constraints, req: &cpudata->req[1], |
| 1035 | type: FREQ_QOS_MAX, value: policy->cpuinfo.max_freq); |
| 1036 | if (ret < 0) { |
| 1037 | dev_err(dev, "Failed to add max-freq constraint (%d)\n" , ret); |
| 1038 | goto free_cpudata2; |
| 1039 | } |
| 1040 | |
| 1041 | policy->driver_data = cpudata; |
| 1042 | |
| 1043 | if (!current_pstate_driver->adjust_perf) |
| 1044 | current_pstate_driver->adjust_perf = amd_pstate_adjust_perf; |
| 1045 | |
| 1046 | return 0; |
| 1047 | |
| 1048 | free_cpudata2: |
| 1049 | freq_qos_remove_request(req: &cpudata->req[0]); |
| 1050 | free_cpudata1: |
| 1051 | pr_warn("Failed to initialize CPU %d: %d\n" , policy->cpu, ret); |
| 1052 | kfree(objp: cpudata); |
| 1053 | return ret; |
| 1054 | } |
| 1055 | |
| 1056 | static void amd_pstate_cpu_exit(struct cpufreq_policy *policy) |
| 1057 | { |
| 1058 | struct amd_cpudata *cpudata = policy->driver_data; |
| 1059 | union perf_cached perf = READ_ONCE(cpudata->perf); |
| 1060 | |
| 1061 | /* Reset CPPC_REQ MSR to the BIOS value */ |
| 1062 | amd_pstate_update_perf(policy, min_perf: perf.bios_min_perf, des_perf: 0U, max_perf: 0U, epp: 0U, fast_switch: false); |
| 1063 | |
| 1064 | freq_qos_remove_request(req: &cpudata->req[1]); |
| 1065 | freq_qos_remove_request(req: &cpudata->req[0]); |
| 1066 | policy->fast_switch_possible = false; |
| 1067 | kfree(objp: cpudata); |
| 1068 | } |
| 1069 | |
| 1070 | /* Sysfs attributes */ |
| 1071 | |
| 1072 | /* |
| 1073 | * This frequency is to indicate the maximum hardware frequency. |
| 1074 | * If boost is not active but supported, the frequency will be larger than the |
| 1075 | * one in cpuinfo. |
| 1076 | */ |
| 1077 | static ssize_t show_amd_pstate_max_freq(struct cpufreq_policy *policy, |
| 1078 | char *buf) |
| 1079 | { |
| 1080 | struct amd_cpudata *cpudata; |
| 1081 | union perf_cached perf; |
| 1082 | |
| 1083 | cpudata = policy->driver_data; |
| 1084 | perf = READ_ONCE(cpudata->perf); |
| 1085 | |
| 1086 | return sysfs_emit(buf, fmt: "%u\n" , |
| 1087 | perf_to_freq(perf, nominal_freq: cpudata->nominal_freq, perf_val: perf.highest_perf)); |
| 1088 | } |
| 1089 | |
| 1090 | static ssize_t show_amd_pstate_lowest_nonlinear_freq(struct cpufreq_policy *policy, |
| 1091 | char *buf) |
| 1092 | { |
| 1093 | struct amd_cpudata *cpudata; |
| 1094 | union perf_cached perf; |
| 1095 | |
| 1096 | cpudata = policy->driver_data; |
| 1097 | perf = READ_ONCE(cpudata->perf); |
| 1098 | |
| 1099 | return sysfs_emit(buf, fmt: "%u\n" , |
| 1100 | perf_to_freq(perf, nominal_freq: cpudata->nominal_freq, perf_val: perf.lowest_nonlinear_perf)); |
| 1101 | } |
| 1102 | |
| 1103 | /* |
| 1104 | * In some of ASICs, the highest_perf is not the one in the _CPC table, so we |
| 1105 | * need to expose it to sysfs. |
| 1106 | */ |
| 1107 | static ssize_t show_amd_pstate_highest_perf(struct cpufreq_policy *policy, |
| 1108 | char *buf) |
| 1109 | { |
| 1110 | struct amd_cpudata *cpudata; |
| 1111 | |
| 1112 | cpudata = policy->driver_data; |
| 1113 | |
| 1114 | return sysfs_emit(buf, fmt: "%u\n" , cpudata->perf.highest_perf); |
| 1115 | } |
| 1116 | |
| 1117 | static ssize_t show_amd_pstate_prefcore_ranking(struct cpufreq_policy *policy, |
| 1118 | char *buf) |
| 1119 | { |
| 1120 | u8 perf; |
| 1121 | struct amd_cpudata *cpudata = policy->driver_data; |
| 1122 | |
| 1123 | perf = READ_ONCE(cpudata->prefcore_ranking); |
| 1124 | |
| 1125 | return sysfs_emit(buf, fmt: "%u\n" , perf); |
| 1126 | } |
| 1127 | |
| 1128 | static ssize_t show_amd_pstate_hw_prefcore(struct cpufreq_policy *policy, |
| 1129 | char *buf) |
| 1130 | { |
| 1131 | bool hw_prefcore; |
| 1132 | struct amd_cpudata *cpudata = policy->driver_data; |
| 1133 | |
| 1134 | hw_prefcore = READ_ONCE(cpudata->hw_prefcore); |
| 1135 | |
| 1136 | return sysfs_emit(buf, fmt: "%s\n" , str_enabled_disabled(v: hw_prefcore)); |
| 1137 | } |
| 1138 | |
| 1139 | static ssize_t show_energy_performance_available_preferences( |
| 1140 | struct cpufreq_policy *policy, char *buf) |
| 1141 | { |
| 1142 | int offset = 0, i; |
| 1143 | struct amd_cpudata *cpudata = policy->driver_data; |
| 1144 | |
| 1145 | if (cpudata->policy == CPUFREQ_POLICY_PERFORMANCE) |
| 1146 | return sysfs_emit_at(buf, at: offset, fmt: "%s\n" , |
| 1147 | energy_perf_strings[EPP_INDEX_PERFORMANCE]); |
| 1148 | |
| 1149 | for (i = 0; i < ARRAY_SIZE(energy_perf_strings); i++) |
| 1150 | offset += sysfs_emit_at(buf, at: offset, fmt: "%s " , energy_perf_strings[i]); |
| 1151 | |
| 1152 | offset += sysfs_emit_at(buf, at: offset, fmt: "\n" ); |
| 1153 | |
| 1154 | return offset; |
| 1155 | } |
| 1156 | |
| 1157 | static ssize_t store_energy_performance_preference( |
| 1158 | struct cpufreq_policy *policy, const char *buf, size_t count) |
| 1159 | { |
| 1160 | struct amd_cpudata *cpudata = policy->driver_data; |
| 1161 | ssize_t ret; |
| 1162 | u8 epp; |
| 1163 | |
| 1164 | ret = sysfs_match_string(energy_perf_strings, buf); |
| 1165 | if (ret < 0) |
| 1166 | return -EINVAL; |
| 1167 | |
| 1168 | if (!ret) |
| 1169 | epp = cpudata->epp_default; |
| 1170 | else |
| 1171 | epp = epp_values[ret]; |
| 1172 | |
| 1173 | if (epp > 0 && policy->policy == CPUFREQ_POLICY_PERFORMANCE) { |
| 1174 | pr_debug("EPP cannot be set under performance policy\n" ); |
| 1175 | return -EBUSY; |
| 1176 | } |
| 1177 | |
| 1178 | ret = amd_pstate_set_epp(policy, epp); |
| 1179 | |
| 1180 | return ret ? ret : count; |
| 1181 | } |
| 1182 | |
| 1183 | static ssize_t show_energy_performance_preference( |
| 1184 | struct cpufreq_policy *policy, char *buf) |
| 1185 | { |
| 1186 | struct amd_cpudata *cpudata = policy->driver_data; |
| 1187 | u8 preference, epp; |
| 1188 | |
| 1189 | epp = FIELD_GET(AMD_CPPC_EPP_PERF_MASK, cpudata->cppc_req_cached); |
| 1190 | |
| 1191 | switch (epp) { |
| 1192 | case AMD_CPPC_EPP_PERFORMANCE: |
| 1193 | preference = EPP_INDEX_PERFORMANCE; |
| 1194 | break; |
| 1195 | case AMD_CPPC_EPP_BALANCE_PERFORMANCE: |
| 1196 | preference = EPP_INDEX_BALANCE_PERFORMANCE; |
| 1197 | break; |
| 1198 | case AMD_CPPC_EPP_BALANCE_POWERSAVE: |
| 1199 | preference = EPP_INDEX_BALANCE_POWERSAVE; |
| 1200 | break; |
| 1201 | case AMD_CPPC_EPP_POWERSAVE: |
| 1202 | preference = EPP_INDEX_POWERSAVE; |
| 1203 | break; |
| 1204 | default: |
| 1205 | return -EINVAL; |
| 1206 | } |
| 1207 | |
| 1208 | return sysfs_emit(buf, fmt: "%s\n" , energy_perf_strings[preference]); |
| 1209 | } |
| 1210 | |
| 1211 | static void amd_pstate_driver_cleanup(void) |
| 1212 | { |
| 1213 | if (amd_pstate_prefcore) |
| 1214 | sched_clear_itmt_support(); |
| 1215 | |
| 1216 | cppc_state = AMD_PSTATE_DISABLE; |
| 1217 | current_pstate_driver = NULL; |
| 1218 | } |
| 1219 | |
| 1220 | static int amd_pstate_set_driver(int mode_idx) |
| 1221 | { |
| 1222 | if (mode_idx >= AMD_PSTATE_DISABLE && mode_idx < AMD_PSTATE_MAX) { |
| 1223 | cppc_state = mode_idx; |
| 1224 | if (cppc_state == AMD_PSTATE_DISABLE) |
| 1225 | pr_info("driver is explicitly disabled\n" ); |
| 1226 | |
| 1227 | if (cppc_state == AMD_PSTATE_ACTIVE) |
| 1228 | current_pstate_driver = &amd_pstate_epp_driver; |
| 1229 | |
| 1230 | if (cppc_state == AMD_PSTATE_PASSIVE || cppc_state == AMD_PSTATE_GUIDED) |
| 1231 | current_pstate_driver = &amd_pstate_driver; |
| 1232 | |
| 1233 | return 0; |
| 1234 | } |
| 1235 | |
| 1236 | return -EINVAL; |
| 1237 | } |
| 1238 | |
| 1239 | static int amd_pstate_register_driver(int mode) |
| 1240 | { |
| 1241 | int ret; |
| 1242 | |
| 1243 | ret = amd_pstate_set_driver(mode_idx: mode); |
| 1244 | if (ret) |
| 1245 | return ret; |
| 1246 | |
| 1247 | cppc_state = mode; |
| 1248 | |
| 1249 | /* at least one CPU supports CPB */ |
| 1250 | current_pstate_driver->boost_enabled = cpu_feature_enabled(X86_FEATURE_CPB); |
| 1251 | |
| 1252 | ret = cpufreq_register_driver(driver_data: current_pstate_driver); |
| 1253 | if (ret) { |
| 1254 | amd_pstate_driver_cleanup(); |
| 1255 | return ret; |
| 1256 | } |
| 1257 | |
| 1258 | /* Enable ITMT support once all CPUs have initialized their asym priorities. */ |
| 1259 | if (amd_pstate_prefcore) |
| 1260 | sched_set_itmt_support(); |
| 1261 | |
| 1262 | return 0; |
| 1263 | } |
| 1264 | |
| 1265 | static int amd_pstate_unregister_driver(int dummy) |
| 1266 | { |
| 1267 | cpufreq_unregister_driver(driver_data: current_pstate_driver); |
| 1268 | amd_pstate_driver_cleanup(); |
| 1269 | return 0; |
| 1270 | } |
| 1271 | |
| 1272 | static int amd_pstate_change_mode_without_dvr_change(int mode) |
| 1273 | { |
| 1274 | int cpu = 0; |
| 1275 | |
| 1276 | cppc_state = mode; |
| 1277 | |
| 1278 | if (cpu_feature_enabled(X86_FEATURE_CPPC) || cppc_state == AMD_PSTATE_ACTIVE) |
| 1279 | return 0; |
| 1280 | |
| 1281 | for_each_online_cpu(cpu) { |
| 1282 | cppc_set_auto_sel(cpu, enable: (cppc_state == AMD_PSTATE_PASSIVE) ? 0 : 1); |
| 1283 | } |
| 1284 | |
| 1285 | return 0; |
| 1286 | } |
| 1287 | |
| 1288 | static int amd_pstate_change_driver_mode(int mode) |
| 1289 | { |
| 1290 | int ret; |
| 1291 | |
| 1292 | ret = amd_pstate_unregister_driver(dummy: 0); |
| 1293 | if (ret) |
| 1294 | return ret; |
| 1295 | |
| 1296 | ret = amd_pstate_register_driver(mode); |
| 1297 | if (ret) |
| 1298 | return ret; |
| 1299 | |
| 1300 | return 0; |
| 1301 | } |
| 1302 | |
| 1303 | static cppc_mode_transition_fn mode_state_machine[AMD_PSTATE_MAX][AMD_PSTATE_MAX] = { |
| 1304 | [AMD_PSTATE_DISABLE] = { |
| 1305 | [AMD_PSTATE_DISABLE] = NULL, |
| 1306 | [AMD_PSTATE_PASSIVE] = amd_pstate_register_driver, |
| 1307 | [AMD_PSTATE_ACTIVE] = amd_pstate_register_driver, |
| 1308 | [AMD_PSTATE_GUIDED] = amd_pstate_register_driver, |
| 1309 | }, |
| 1310 | [AMD_PSTATE_PASSIVE] = { |
| 1311 | [AMD_PSTATE_DISABLE] = amd_pstate_unregister_driver, |
| 1312 | [AMD_PSTATE_PASSIVE] = NULL, |
| 1313 | [AMD_PSTATE_ACTIVE] = amd_pstate_change_driver_mode, |
| 1314 | [AMD_PSTATE_GUIDED] = amd_pstate_change_mode_without_dvr_change, |
| 1315 | }, |
| 1316 | [AMD_PSTATE_ACTIVE] = { |
| 1317 | [AMD_PSTATE_DISABLE] = amd_pstate_unregister_driver, |
| 1318 | [AMD_PSTATE_PASSIVE] = amd_pstate_change_driver_mode, |
| 1319 | [AMD_PSTATE_ACTIVE] = NULL, |
| 1320 | [AMD_PSTATE_GUIDED] = amd_pstate_change_driver_mode, |
| 1321 | }, |
| 1322 | [AMD_PSTATE_GUIDED] = { |
| 1323 | [AMD_PSTATE_DISABLE] = amd_pstate_unregister_driver, |
| 1324 | [AMD_PSTATE_PASSIVE] = amd_pstate_change_mode_without_dvr_change, |
| 1325 | [AMD_PSTATE_ACTIVE] = amd_pstate_change_driver_mode, |
| 1326 | [AMD_PSTATE_GUIDED] = NULL, |
| 1327 | }, |
| 1328 | }; |
| 1329 | |
| 1330 | static ssize_t amd_pstate_show_status(char *buf) |
| 1331 | { |
| 1332 | if (!current_pstate_driver) |
| 1333 | return sysfs_emit(buf, fmt: "disable\n" ); |
| 1334 | |
| 1335 | return sysfs_emit(buf, fmt: "%s\n" , amd_pstate_mode_string[cppc_state]); |
| 1336 | } |
| 1337 | |
| 1338 | int amd_pstate_get_status(void) |
| 1339 | { |
| 1340 | return cppc_state; |
| 1341 | } |
| 1342 | EXPORT_SYMBOL_GPL(amd_pstate_get_status); |
| 1343 | |
| 1344 | int amd_pstate_update_status(const char *buf, size_t size) |
| 1345 | { |
| 1346 | int mode_idx; |
| 1347 | |
| 1348 | if (size > strlen("passive" ) || size < strlen("active" )) |
| 1349 | return -EINVAL; |
| 1350 | |
| 1351 | mode_idx = get_mode_idx_from_str(str: buf, size); |
| 1352 | if (mode_idx < 0) |
| 1353 | return mode_idx; |
| 1354 | |
| 1355 | if (mode_state_machine[cppc_state][mode_idx]) { |
| 1356 | guard(mutex)(T: &amd_pstate_driver_lock); |
| 1357 | return mode_state_machine[cppc_state][mode_idx](mode_idx); |
| 1358 | } |
| 1359 | |
| 1360 | return 0; |
| 1361 | } |
| 1362 | EXPORT_SYMBOL_GPL(amd_pstate_update_status); |
| 1363 | |
| 1364 | static ssize_t status_show(struct device *dev, |
| 1365 | struct device_attribute *attr, char *buf) |
| 1366 | { |
| 1367 | |
| 1368 | guard(mutex)(T: &amd_pstate_driver_lock); |
| 1369 | |
| 1370 | return amd_pstate_show_status(buf); |
| 1371 | } |
| 1372 | |
| 1373 | static ssize_t status_store(struct device *a, struct device_attribute *b, |
| 1374 | const char *buf, size_t count) |
| 1375 | { |
| 1376 | char *p = memchr(p: buf, c: '\n', size: count); |
| 1377 | int ret; |
| 1378 | |
| 1379 | ret = amd_pstate_update_status(buf, p ? p - buf : count); |
| 1380 | |
| 1381 | return ret < 0 ? ret : count; |
| 1382 | } |
| 1383 | |
| 1384 | static ssize_t prefcore_show(struct device *dev, |
| 1385 | struct device_attribute *attr, char *buf) |
| 1386 | { |
| 1387 | return sysfs_emit(buf, fmt: "%s\n" , str_enabled_disabled(v: amd_pstate_prefcore)); |
| 1388 | } |
| 1389 | |
| 1390 | cpufreq_freq_attr_ro(amd_pstate_max_freq); |
| 1391 | cpufreq_freq_attr_ro(amd_pstate_lowest_nonlinear_freq); |
| 1392 | |
| 1393 | cpufreq_freq_attr_ro(amd_pstate_highest_perf); |
| 1394 | cpufreq_freq_attr_ro(amd_pstate_prefcore_ranking); |
| 1395 | cpufreq_freq_attr_ro(amd_pstate_hw_prefcore); |
| 1396 | cpufreq_freq_attr_rw(energy_performance_preference); |
| 1397 | cpufreq_freq_attr_ro(energy_performance_available_preferences); |
| 1398 | static DEVICE_ATTR_RW(status); |
| 1399 | static DEVICE_ATTR_RO(prefcore); |
| 1400 | |
| 1401 | static struct freq_attr *amd_pstate_attr[] = { |
| 1402 | &amd_pstate_max_freq, |
| 1403 | &amd_pstate_lowest_nonlinear_freq, |
| 1404 | &amd_pstate_highest_perf, |
| 1405 | &amd_pstate_prefcore_ranking, |
| 1406 | &amd_pstate_hw_prefcore, |
| 1407 | NULL, |
| 1408 | }; |
| 1409 | |
| 1410 | static struct freq_attr *amd_pstate_epp_attr[] = { |
| 1411 | &amd_pstate_max_freq, |
| 1412 | &amd_pstate_lowest_nonlinear_freq, |
| 1413 | &amd_pstate_highest_perf, |
| 1414 | &amd_pstate_prefcore_ranking, |
| 1415 | &amd_pstate_hw_prefcore, |
| 1416 | &energy_performance_preference, |
| 1417 | &energy_performance_available_preferences, |
| 1418 | NULL, |
| 1419 | }; |
| 1420 | |
| 1421 | static struct attribute *pstate_global_attributes[] = { |
| 1422 | &dev_attr_status.attr, |
| 1423 | &dev_attr_prefcore.attr, |
| 1424 | NULL |
| 1425 | }; |
| 1426 | |
| 1427 | static const struct attribute_group amd_pstate_global_attr_group = { |
| 1428 | .name = "amd_pstate" , |
| 1429 | .attrs = pstate_global_attributes, |
| 1430 | }; |
| 1431 | |
| 1432 | static bool amd_pstate_acpi_pm_profile_server(void) |
| 1433 | { |
| 1434 | switch (acpi_gbl_FADT.preferred_profile) { |
| 1435 | case PM_ENTERPRISE_SERVER: |
| 1436 | case PM_SOHO_SERVER: |
| 1437 | case PM_PERFORMANCE_SERVER: |
| 1438 | return true; |
| 1439 | } |
| 1440 | return false; |
| 1441 | } |
| 1442 | |
| 1443 | static bool amd_pstate_acpi_pm_profile_undefined(void) |
| 1444 | { |
| 1445 | if (acpi_gbl_FADT.preferred_profile == PM_UNSPECIFIED) |
| 1446 | return true; |
| 1447 | if (acpi_gbl_FADT.preferred_profile >= NR_PM_PROFILES) |
| 1448 | return true; |
| 1449 | return false; |
| 1450 | } |
| 1451 | |
| 1452 | static int amd_pstate_epp_cpu_init(struct cpufreq_policy *policy) |
| 1453 | { |
| 1454 | struct amd_cpudata *cpudata; |
| 1455 | union perf_cached perf; |
| 1456 | struct device *dev; |
| 1457 | int ret; |
| 1458 | |
| 1459 | /* |
| 1460 | * Resetting PERF_CTL_MSR will put the CPU in P0 frequency, |
| 1461 | * which is ideal for initialization process. |
| 1462 | */ |
| 1463 | amd_perf_ctl_reset(cpu: policy->cpu); |
| 1464 | dev = get_cpu_device(cpu: policy->cpu); |
| 1465 | if (!dev) |
| 1466 | return -ENODEV; |
| 1467 | |
| 1468 | cpudata = kzalloc(sizeof(*cpudata), GFP_KERNEL); |
| 1469 | if (!cpudata) |
| 1470 | return -ENOMEM; |
| 1471 | |
| 1472 | cpudata->cpu = policy->cpu; |
| 1473 | |
| 1474 | ret = amd_pstate_init_perf(cpudata); |
| 1475 | if (ret) |
| 1476 | goto free_cpudata1; |
| 1477 | |
| 1478 | amd_pstate_init_prefcore(cpudata); |
| 1479 | |
| 1480 | ret = amd_pstate_init_freq(cpudata); |
| 1481 | if (ret) |
| 1482 | goto free_cpudata1; |
| 1483 | |
| 1484 | ret = amd_pstate_init_boost_support(cpudata); |
| 1485 | if (ret) |
| 1486 | goto free_cpudata1; |
| 1487 | |
| 1488 | perf = READ_ONCE(cpudata->perf); |
| 1489 | |
| 1490 | policy->cpuinfo.min_freq = policy->min = perf_to_freq(perf, |
| 1491 | nominal_freq: cpudata->nominal_freq, |
| 1492 | perf_val: perf.lowest_perf); |
| 1493 | policy->cpuinfo.max_freq = policy->max = perf_to_freq(perf, |
| 1494 | nominal_freq: cpudata->nominal_freq, |
| 1495 | perf_val: perf.highest_perf); |
| 1496 | policy->driver_data = cpudata; |
| 1497 | |
| 1498 | ret = amd_pstate_cppc_enable(policy); |
| 1499 | if (ret) |
| 1500 | goto free_cpudata1; |
| 1501 | |
| 1502 | /* It will be updated by governor */ |
| 1503 | policy->cur = policy->cpuinfo.min_freq; |
| 1504 | |
| 1505 | |
| 1506 | policy->boost_supported = READ_ONCE(cpudata->boost_supported); |
| 1507 | |
| 1508 | /* |
| 1509 | * Set the policy to provide a valid fallback value in case |
| 1510 | * the default cpufreq governor is neither powersave nor performance. |
| 1511 | */ |
| 1512 | if (amd_pstate_acpi_pm_profile_server() || |
| 1513 | amd_pstate_acpi_pm_profile_undefined()) { |
| 1514 | policy->policy = CPUFREQ_POLICY_PERFORMANCE; |
| 1515 | cpudata->epp_default = amd_pstate_get_epp(cpudata); |
| 1516 | } else { |
| 1517 | policy->policy = CPUFREQ_POLICY_POWERSAVE; |
| 1518 | cpudata->epp_default = AMD_CPPC_EPP_BALANCE_PERFORMANCE; |
| 1519 | } |
| 1520 | |
| 1521 | ret = amd_pstate_set_epp(policy, epp: cpudata->epp_default); |
| 1522 | if (ret) |
| 1523 | return ret; |
| 1524 | |
| 1525 | current_pstate_driver->adjust_perf = NULL; |
| 1526 | |
| 1527 | return 0; |
| 1528 | |
| 1529 | free_cpudata1: |
| 1530 | pr_warn("Failed to initialize CPU %d: %d\n" , policy->cpu, ret); |
| 1531 | kfree(objp: cpudata); |
| 1532 | return ret; |
| 1533 | } |
| 1534 | |
| 1535 | static void amd_pstate_epp_cpu_exit(struct cpufreq_policy *policy) |
| 1536 | { |
| 1537 | struct amd_cpudata *cpudata = policy->driver_data; |
| 1538 | |
| 1539 | if (cpudata) { |
| 1540 | union perf_cached perf = READ_ONCE(cpudata->perf); |
| 1541 | |
| 1542 | /* Reset CPPC_REQ MSR to the BIOS value */ |
| 1543 | amd_pstate_update_perf(policy, min_perf: perf.bios_min_perf, des_perf: 0U, max_perf: 0U, epp: 0U, fast_switch: false); |
| 1544 | |
| 1545 | kfree(objp: cpudata); |
| 1546 | policy->driver_data = NULL; |
| 1547 | } |
| 1548 | |
| 1549 | pr_debug("CPU %d exiting\n" , policy->cpu); |
| 1550 | } |
| 1551 | |
| 1552 | static int amd_pstate_epp_update_limit(struct cpufreq_policy *policy, bool policy_change) |
| 1553 | { |
| 1554 | struct amd_cpudata *cpudata = policy->driver_data; |
| 1555 | union perf_cached perf; |
| 1556 | u8 epp; |
| 1557 | |
| 1558 | if (policy_change || |
| 1559 | policy->min != cpudata->min_limit_freq || |
| 1560 | policy->max != cpudata->max_limit_freq) |
| 1561 | amd_pstate_update_min_max_limit(policy); |
| 1562 | |
| 1563 | if (cpudata->policy == CPUFREQ_POLICY_PERFORMANCE) |
| 1564 | epp = 0; |
| 1565 | else |
| 1566 | epp = FIELD_GET(AMD_CPPC_EPP_PERF_MASK, cpudata->cppc_req_cached); |
| 1567 | |
| 1568 | perf = READ_ONCE(cpudata->perf); |
| 1569 | |
| 1570 | return amd_pstate_update_perf(policy, min_perf: perf.min_limit_perf, des_perf: 0U, |
| 1571 | max_perf: perf.max_limit_perf, epp, fast_switch: false); |
| 1572 | } |
| 1573 | |
| 1574 | static int amd_pstate_epp_set_policy(struct cpufreq_policy *policy) |
| 1575 | { |
| 1576 | struct amd_cpudata *cpudata = policy->driver_data; |
| 1577 | int ret; |
| 1578 | |
| 1579 | if (!policy->cpuinfo.max_freq) |
| 1580 | return -ENODEV; |
| 1581 | |
| 1582 | cpudata->policy = policy->policy; |
| 1583 | |
| 1584 | ret = amd_pstate_epp_update_limit(policy, policy_change: true); |
| 1585 | if (ret) |
| 1586 | return ret; |
| 1587 | |
| 1588 | /* |
| 1589 | * policy->cur is never updated with the amd_pstate_epp driver, but it |
| 1590 | * is used as a stale frequency value. So, keep it within limits. |
| 1591 | */ |
| 1592 | policy->cur = policy->min; |
| 1593 | |
| 1594 | return 0; |
| 1595 | } |
| 1596 | |
| 1597 | static int amd_pstate_cpu_online(struct cpufreq_policy *policy) |
| 1598 | { |
| 1599 | return amd_pstate_cppc_enable(policy); |
| 1600 | } |
| 1601 | |
| 1602 | static int amd_pstate_cpu_offline(struct cpufreq_policy *policy) |
| 1603 | { |
| 1604 | struct amd_cpudata *cpudata = policy->driver_data; |
| 1605 | union perf_cached perf = READ_ONCE(cpudata->perf); |
| 1606 | |
| 1607 | /* |
| 1608 | * Reset CPPC_REQ MSR to the BIOS value, this will allow us to retain the BIOS specified |
| 1609 | * min_perf value across kexec reboots. If this CPU is just onlined normally after this, the |
| 1610 | * limits, epp and desired perf will get reset to the cached values in cpudata struct |
| 1611 | */ |
| 1612 | return amd_pstate_update_perf(policy, min_perf: perf.bios_min_perf, |
| 1613 | FIELD_GET(AMD_CPPC_DES_PERF_MASK, cpudata->cppc_req_cached), |
| 1614 | FIELD_GET(AMD_CPPC_MAX_PERF_MASK, cpudata->cppc_req_cached), |
| 1615 | FIELD_GET(AMD_CPPC_EPP_PERF_MASK, cpudata->cppc_req_cached), |
| 1616 | fast_switch: false); |
| 1617 | } |
| 1618 | |
| 1619 | static int amd_pstate_suspend(struct cpufreq_policy *policy) |
| 1620 | { |
| 1621 | struct amd_cpudata *cpudata = policy->driver_data; |
| 1622 | union perf_cached perf = READ_ONCE(cpudata->perf); |
| 1623 | int ret; |
| 1624 | |
| 1625 | /* |
| 1626 | * Reset CPPC_REQ MSR to the BIOS value, this will allow us to retain the BIOS specified |
| 1627 | * min_perf value across kexec reboots. If this CPU is just resumed back without kexec, |
| 1628 | * the limits, epp and desired perf will get reset to the cached values in cpudata struct |
| 1629 | */ |
| 1630 | ret = amd_pstate_update_perf(policy, min_perf: perf.bios_min_perf, |
| 1631 | FIELD_GET(AMD_CPPC_DES_PERF_MASK, cpudata->cppc_req_cached), |
| 1632 | FIELD_GET(AMD_CPPC_MAX_PERF_MASK, cpudata->cppc_req_cached), |
| 1633 | FIELD_GET(AMD_CPPC_EPP_PERF_MASK, cpudata->cppc_req_cached), |
| 1634 | fast_switch: false); |
| 1635 | if (ret) |
| 1636 | return ret; |
| 1637 | |
| 1638 | /* set this flag to avoid setting core offline*/ |
| 1639 | cpudata->suspended = true; |
| 1640 | |
| 1641 | return 0; |
| 1642 | } |
| 1643 | |
| 1644 | static int amd_pstate_resume(struct cpufreq_policy *policy) |
| 1645 | { |
| 1646 | struct amd_cpudata *cpudata = policy->driver_data; |
| 1647 | union perf_cached perf = READ_ONCE(cpudata->perf); |
| 1648 | int cur_perf = freq_to_perf(perf, nominal_freq: cpudata->nominal_freq, freq_val: policy->cur); |
| 1649 | |
| 1650 | /* Set CPPC_REQ to last sane value until the governor updates it */ |
| 1651 | return amd_pstate_update_perf(policy, min_perf: perf.min_limit_perf, des_perf: cur_perf, max_perf: perf.max_limit_perf, |
| 1652 | epp: 0U, fast_switch: false); |
| 1653 | } |
| 1654 | |
| 1655 | static int amd_pstate_epp_resume(struct cpufreq_policy *policy) |
| 1656 | { |
| 1657 | struct amd_cpudata *cpudata = policy->driver_data; |
| 1658 | |
| 1659 | if (cpudata->suspended) { |
| 1660 | int ret; |
| 1661 | |
| 1662 | /* enable amd pstate from suspend state*/ |
| 1663 | ret = amd_pstate_epp_update_limit(policy, policy_change: false); |
| 1664 | if (ret) |
| 1665 | return ret; |
| 1666 | |
| 1667 | cpudata->suspended = false; |
| 1668 | } |
| 1669 | |
| 1670 | return 0; |
| 1671 | } |
| 1672 | |
| 1673 | static struct cpufreq_driver amd_pstate_driver = { |
| 1674 | .flags = CPUFREQ_CONST_LOOPS | CPUFREQ_NEED_UPDATE_LIMITS, |
| 1675 | .verify = amd_pstate_verify, |
| 1676 | .target = amd_pstate_target, |
| 1677 | .fast_switch = amd_pstate_fast_switch, |
| 1678 | .init = amd_pstate_cpu_init, |
| 1679 | .exit = amd_pstate_cpu_exit, |
| 1680 | .online = amd_pstate_cpu_online, |
| 1681 | .offline = amd_pstate_cpu_offline, |
| 1682 | .suspend = amd_pstate_suspend, |
| 1683 | .resume = amd_pstate_resume, |
| 1684 | .set_boost = amd_pstate_set_boost, |
| 1685 | .update_limits = amd_pstate_update_limits, |
| 1686 | .name = "amd-pstate" , |
| 1687 | .attr = amd_pstate_attr, |
| 1688 | }; |
| 1689 | |
| 1690 | static struct cpufreq_driver amd_pstate_epp_driver = { |
| 1691 | .flags = CPUFREQ_CONST_LOOPS, |
| 1692 | .verify = amd_pstate_verify, |
| 1693 | .setpolicy = amd_pstate_epp_set_policy, |
| 1694 | .init = amd_pstate_epp_cpu_init, |
| 1695 | .exit = amd_pstate_epp_cpu_exit, |
| 1696 | .offline = amd_pstate_cpu_offline, |
| 1697 | .online = amd_pstate_cpu_online, |
| 1698 | .suspend = amd_pstate_suspend, |
| 1699 | .resume = amd_pstate_epp_resume, |
| 1700 | .update_limits = amd_pstate_update_limits, |
| 1701 | .set_boost = amd_pstate_set_boost, |
| 1702 | .name = "amd-pstate-epp" , |
| 1703 | .attr = amd_pstate_epp_attr, |
| 1704 | }; |
| 1705 | |
| 1706 | /* |
| 1707 | * CPPC function is not supported for family ID 17H with model_ID ranging from 0x10 to 0x2F. |
| 1708 | * show the debug message that helps to check if the CPU has CPPC support for loading issue. |
| 1709 | */ |
| 1710 | static bool amd_cppc_supported(void) |
| 1711 | { |
| 1712 | struct cpuinfo_x86 *c = &cpu_data(0); |
| 1713 | bool warn = false; |
| 1714 | |
| 1715 | if ((boot_cpu_data.x86 == 0x17) && (boot_cpu_data.x86_model < 0x30)) { |
| 1716 | pr_debug_once("CPPC feature is not supported by the processor\n" ); |
| 1717 | return false; |
| 1718 | } |
| 1719 | |
| 1720 | /* |
| 1721 | * If the CPPC feature is disabled in the BIOS for processors |
| 1722 | * that support MSR-based CPPC, the AMD Pstate driver may not |
| 1723 | * function correctly. |
| 1724 | * |
| 1725 | * For such processors, check the CPPC flag and display a |
| 1726 | * warning message if the platform supports CPPC. |
| 1727 | * |
| 1728 | * Note: The code check below will not abort the driver |
| 1729 | * registration process because of the code is added for |
| 1730 | * debugging purposes. Besides, it may still be possible for |
| 1731 | * the driver to work using the shared-memory mechanism. |
| 1732 | */ |
| 1733 | if (!cpu_feature_enabled(X86_FEATURE_CPPC)) { |
| 1734 | if (cpu_feature_enabled(X86_FEATURE_ZEN2)) { |
| 1735 | switch (c->x86_model) { |
| 1736 | case 0x60 ... 0x6F: |
| 1737 | case 0x80 ... 0xAF: |
| 1738 | warn = true; |
| 1739 | break; |
| 1740 | } |
| 1741 | } else if (cpu_feature_enabled(X86_FEATURE_ZEN3) || |
| 1742 | cpu_feature_enabled(X86_FEATURE_ZEN4)) { |
| 1743 | switch (c->x86_model) { |
| 1744 | case 0x10 ... 0x1F: |
| 1745 | case 0x40 ... 0xAF: |
| 1746 | warn = true; |
| 1747 | break; |
| 1748 | } |
| 1749 | } else if (cpu_feature_enabled(X86_FEATURE_ZEN5)) { |
| 1750 | warn = true; |
| 1751 | } |
| 1752 | } |
| 1753 | |
| 1754 | if (warn) |
| 1755 | pr_warn_once("The CPPC feature is supported but currently disabled by the BIOS.\n" |
| 1756 | "Please enable it if your BIOS has the CPPC option.\n" ); |
| 1757 | return true; |
| 1758 | } |
| 1759 | |
| 1760 | static int __init amd_pstate_init(void) |
| 1761 | { |
| 1762 | struct device *dev_root; |
| 1763 | int ret; |
| 1764 | |
| 1765 | if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD) |
| 1766 | return -ENODEV; |
| 1767 | |
| 1768 | /* show debug message only if CPPC is not supported */ |
| 1769 | if (!amd_cppc_supported()) |
| 1770 | return -EOPNOTSUPP; |
| 1771 | |
| 1772 | /* show warning message when BIOS broken or ACPI disabled */ |
| 1773 | if (!acpi_cpc_valid()) { |
| 1774 | pr_warn_once("the _CPC object is not present in SBIOS or ACPI disabled\n" ); |
| 1775 | return -ENODEV; |
| 1776 | } |
| 1777 | |
| 1778 | /* don't keep reloading if cpufreq_driver exists */ |
| 1779 | if (cpufreq_get_current_driver()) |
| 1780 | return -EEXIST; |
| 1781 | |
| 1782 | quirks = NULL; |
| 1783 | |
| 1784 | /* check if this machine need CPPC quirks */ |
| 1785 | dmi_check_system(list: amd_pstate_quirks_table); |
| 1786 | |
| 1787 | /* |
| 1788 | * determine the driver mode from the command line or kernel config. |
| 1789 | * If no command line input is provided, cppc_state will be AMD_PSTATE_UNDEFINED. |
| 1790 | * command line options will override the kernel config settings. |
| 1791 | */ |
| 1792 | |
| 1793 | if (cppc_state == AMD_PSTATE_UNDEFINED) { |
| 1794 | /* Disable on the following configs by default: |
| 1795 | * 1. Undefined platforms |
| 1796 | * 2. Server platforms with CPUs older than Family 0x1A. |
| 1797 | */ |
| 1798 | if (amd_pstate_acpi_pm_profile_undefined() || |
| 1799 | (amd_pstate_acpi_pm_profile_server() && boot_cpu_data.x86 < 0x1A)) { |
| 1800 | pr_info("driver load is disabled, boot with specific mode to enable this\n" ); |
| 1801 | return -ENODEV; |
| 1802 | } |
| 1803 | /* get driver mode from kernel config option [1:4] */ |
| 1804 | cppc_state = CONFIG_X86_AMD_PSTATE_DEFAULT_MODE; |
| 1805 | } |
| 1806 | |
| 1807 | if (cppc_state == AMD_PSTATE_DISABLE) { |
| 1808 | pr_info("driver load is disabled, boot with specific mode to enable this\n" ); |
| 1809 | return -ENODEV; |
| 1810 | } |
| 1811 | |
| 1812 | /* capability check */ |
| 1813 | if (cpu_feature_enabled(X86_FEATURE_CPPC)) { |
| 1814 | pr_debug("AMD CPPC MSR based functionality is supported\n" ); |
| 1815 | } else { |
| 1816 | pr_debug("AMD CPPC shared memory based functionality is supported\n" ); |
| 1817 | static_call_update(amd_pstate_cppc_enable, shmem_cppc_enable); |
| 1818 | static_call_update(amd_pstate_init_perf, shmem_init_perf); |
| 1819 | static_call_update(amd_pstate_update_perf, shmem_update_perf); |
| 1820 | static_call_update(amd_pstate_get_epp, shmem_get_epp); |
| 1821 | static_call_update(amd_pstate_set_epp, shmem_set_epp); |
| 1822 | } |
| 1823 | |
| 1824 | if (amd_pstate_prefcore) { |
| 1825 | ret = amd_detect_prefcore(detected: &amd_pstate_prefcore); |
| 1826 | if (ret) |
| 1827 | return ret; |
| 1828 | } |
| 1829 | |
| 1830 | ret = amd_pstate_register_driver(mode: cppc_state); |
| 1831 | if (ret) { |
| 1832 | pr_err("failed to register with return %d\n" , ret); |
| 1833 | return ret; |
| 1834 | } |
| 1835 | |
| 1836 | dev_root = bus_get_dev_root(bus: &cpu_subsys); |
| 1837 | if (dev_root) { |
| 1838 | ret = sysfs_create_group(kobj: &dev_root->kobj, grp: &amd_pstate_global_attr_group); |
| 1839 | put_device(dev: dev_root); |
| 1840 | if (ret) { |
| 1841 | pr_err("sysfs attribute export failed with error %d.\n" , ret); |
| 1842 | goto global_attr_free; |
| 1843 | } |
| 1844 | } |
| 1845 | |
| 1846 | return ret; |
| 1847 | |
| 1848 | global_attr_free: |
| 1849 | cpufreq_unregister_driver(driver_data: current_pstate_driver); |
| 1850 | return ret; |
| 1851 | } |
| 1852 | device_initcall(amd_pstate_init); |
| 1853 | |
| 1854 | static int __init amd_pstate_param(char *str) |
| 1855 | { |
| 1856 | size_t size; |
| 1857 | int mode_idx; |
| 1858 | |
| 1859 | if (!str) |
| 1860 | return -EINVAL; |
| 1861 | |
| 1862 | size = strlen(str); |
| 1863 | mode_idx = get_mode_idx_from_str(str, size); |
| 1864 | |
| 1865 | return amd_pstate_set_driver(mode_idx); |
| 1866 | } |
| 1867 | |
| 1868 | static int __init amd_prefcore_param(char *str) |
| 1869 | { |
| 1870 | if (!strcmp(str, "disable" )) |
| 1871 | amd_pstate_prefcore = false; |
| 1872 | |
| 1873 | return 0; |
| 1874 | } |
| 1875 | |
| 1876 | early_param("amd_pstate" , amd_pstate_param); |
| 1877 | early_param("amd_prefcore" , amd_prefcore_param); |
| 1878 | |
| 1879 | MODULE_AUTHOR("Huang Rui <ray.huang@amd.com>" ); |
| 1880 | MODULE_DESCRIPTION("AMD Processor P-state Frequency Driver" ); |
| 1881 | |