1 | // SPDX-License-Identifier: GPL-2.0 OR MIT |
2 | /* |
3 | * Copyright 2014-2022 Advanced Micro Devices, Inc. |
4 | * |
5 | * Permission is hereby granted, free of charge, to any person obtaining a |
6 | * copy of this software and associated documentation files (the "Software"), |
7 | * to deal in the Software without restriction, including without limitation |
8 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, |
9 | * and/or sell copies of the Software, and to permit persons to whom the |
10 | * Software is furnished to do so, subject to the following conditions: |
11 | * |
12 | * The above copyright notice and this permission notice shall be included in |
13 | * all copies or substantial portions of the Software. |
14 | * |
15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
18 | * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR |
19 | * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, |
20 | * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR |
21 | * OTHER DEALINGS IN THE SOFTWARE. |
22 | */ |
23 | |
24 | #include <linux/types.h> |
25 | #include <linux/kernel.h> |
26 | #include <linux/pci.h> |
27 | #include <linux/errno.h> |
28 | #include <linux/acpi.h> |
29 | #include <linux/hash.h> |
30 | #include <linux/cpufreq.h> |
31 | #include <linux/log2.h> |
32 | #include <linux/dmi.h> |
33 | #include <linux/atomic.h> |
34 | |
35 | #include "kfd_priv.h" |
36 | #include "kfd_crat.h" |
37 | #include "kfd_topology.h" |
38 | #include "kfd_device_queue_manager.h" |
39 | #include "kfd_svm.h" |
40 | #include "kfd_debug.h" |
41 | #include "amdgpu_amdkfd.h" |
42 | #include "amdgpu_ras.h" |
43 | #include "amdgpu.h" |
44 | |
45 | /* topology_device_list - Master list of all topology devices */ |
46 | static struct list_head topology_device_list; |
47 | static struct kfd_system_properties sys_props; |
48 | |
49 | static DECLARE_RWSEM(topology_lock); |
50 | static uint32_t topology_crat_proximity_domain; |
51 | |
52 | struct kfd_topology_device *kfd_topology_device_by_proximity_domain_no_lock( |
53 | uint32_t proximity_domain) |
54 | { |
55 | struct kfd_topology_device *top_dev; |
56 | struct kfd_topology_device *device = NULL; |
57 | |
58 | list_for_each_entry(top_dev, &topology_device_list, list) |
59 | if (top_dev->proximity_domain == proximity_domain) { |
60 | device = top_dev; |
61 | break; |
62 | } |
63 | |
64 | return device; |
65 | } |
66 | |
67 | struct kfd_topology_device *kfd_topology_device_by_proximity_domain( |
68 | uint32_t proximity_domain) |
69 | { |
70 | struct kfd_topology_device *device = NULL; |
71 | |
72 | down_read(sem: &topology_lock); |
73 | |
74 | device = kfd_topology_device_by_proximity_domain_no_lock( |
75 | proximity_domain); |
76 | up_read(sem: &topology_lock); |
77 | |
78 | return device; |
79 | } |
80 | |
81 | struct kfd_topology_device *kfd_topology_device_by_id(uint32_t gpu_id) |
82 | { |
83 | struct kfd_topology_device *top_dev = NULL; |
84 | struct kfd_topology_device *ret = NULL; |
85 | |
86 | down_read(sem: &topology_lock); |
87 | |
88 | list_for_each_entry(top_dev, &topology_device_list, list) |
89 | if (top_dev->gpu_id == gpu_id) { |
90 | ret = top_dev; |
91 | break; |
92 | } |
93 | |
94 | up_read(sem: &topology_lock); |
95 | |
96 | return ret; |
97 | } |
98 | |
99 | struct kfd_node *kfd_device_by_id(uint32_t gpu_id) |
100 | { |
101 | struct kfd_topology_device *top_dev; |
102 | |
103 | top_dev = kfd_topology_device_by_id(gpu_id); |
104 | if (!top_dev) |
105 | return NULL; |
106 | |
107 | return top_dev->gpu; |
108 | } |
109 | |
110 | struct kfd_node *kfd_device_by_pci_dev(const struct pci_dev *pdev) |
111 | { |
112 | struct kfd_topology_device *top_dev; |
113 | struct kfd_node *device = NULL; |
114 | |
115 | down_read(sem: &topology_lock); |
116 | |
117 | list_for_each_entry(top_dev, &topology_device_list, list) |
118 | if (top_dev->gpu && top_dev->gpu->adev->pdev == pdev) { |
119 | device = top_dev->gpu; |
120 | break; |
121 | } |
122 | |
123 | up_read(sem: &topology_lock); |
124 | |
125 | return device; |
126 | } |
127 | |
128 | /* Called with write topology_lock acquired */ |
129 | static void kfd_release_topology_device(struct kfd_topology_device *dev) |
130 | { |
131 | struct kfd_mem_properties *mem; |
132 | struct kfd_cache_properties *cache; |
133 | struct kfd_iolink_properties *iolink; |
134 | struct kfd_iolink_properties *p2plink; |
135 | struct kfd_perf_properties *perf; |
136 | |
137 | list_del(entry: &dev->list); |
138 | |
139 | while (dev->mem_props.next != &dev->mem_props) { |
140 | mem = container_of(dev->mem_props.next, |
141 | struct kfd_mem_properties, list); |
142 | list_del(entry: &mem->list); |
143 | kfree(objp: mem); |
144 | } |
145 | |
146 | while (dev->cache_props.next != &dev->cache_props) { |
147 | cache = container_of(dev->cache_props.next, |
148 | struct kfd_cache_properties, list); |
149 | list_del(entry: &cache->list); |
150 | kfree(objp: cache); |
151 | } |
152 | |
153 | while (dev->io_link_props.next != &dev->io_link_props) { |
154 | iolink = container_of(dev->io_link_props.next, |
155 | struct kfd_iolink_properties, list); |
156 | list_del(entry: &iolink->list); |
157 | kfree(objp: iolink); |
158 | } |
159 | |
160 | while (dev->p2p_link_props.next != &dev->p2p_link_props) { |
161 | p2plink = container_of(dev->p2p_link_props.next, |
162 | struct kfd_iolink_properties, list); |
163 | list_del(entry: &p2plink->list); |
164 | kfree(objp: p2plink); |
165 | } |
166 | |
167 | while (dev->perf_props.next != &dev->perf_props) { |
168 | perf = container_of(dev->perf_props.next, |
169 | struct kfd_perf_properties, list); |
170 | list_del(entry: &perf->list); |
171 | kfree(objp: perf); |
172 | } |
173 | |
174 | kfree(objp: dev); |
175 | } |
176 | |
177 | void kfd_release_topology_device_list(struct list_head *device_list) |
178 | { |
179 | struct kfd_topology_device *dev; |
180 | |
181 | while (!list_empty(head: device_list)) { |
182 | dev = list_first_entry(device_list, |
183 | struct kfd_topology_device, list); |
184 | kfd_release_topology_device(dev); |
185 | } |
186 | } |
187 | |
188 | static void kfd_release_live_view(void) |
189 | { |
190 | kfd_release_topology_device_list(device_list: &topology_device_list); |
191 | memset(&sys_props, 0, sizeof(sys_props)); |
192 | } |
193 | |
194 | struct kfd_topology_device *kfd_create_topology_device( |
195 | struct list_head *device_list) |
196 | { |
197 | struct kfd_topology_device *dev; |
198 | |
199 | dev = kfd_alloc_struct(dev); |
200 | if (!dev) { |
201 | pr_err("No memory to allocate a topology device" ); |
202 | return NULL; |
203 | } |
204 | |
205 | INIT_LIST_HEAD(list: &dev->mem_props); |
206 | INIT_LIST_HEAD(list: &dev->cache_props); |
207 | INIT_LIST_HEAD(list: &dev->io_link_props); |
208 | INIT_LIST_HEAD(list: &dev->p2p_link_props); |
209 | INIT_LIST_HEAD(list: &dev->perf_props); |
210 | |
211 | list_add_tail(new: &dev->list, head: device_list); |
212 | |
213 | return dev; |
214 | } |
215 | |
216 | |
217 | #define sysfs_show_gen_prop(buffer, offs, fmt, ...) \ |
218 | (offs += snprintf(buffer+offs, PAGE_SIZE-offs, \ |
219 | fmt, __VA_ARGS__)) |
220 | #define sysfs_show_32bit_prop(buffer, offs, name, value) \ |
221 | sysfs_show_gen_prop(buffer, offs, "%s %u\n", name, value) |
222 | #define sysfs_show_64bit_prop(buffer, offs, name, value) \ |
223 | sysfs_show_gen_prop(buffer, offs, "%s %llu\n", name, value) |
224 | #define sysfs_show_32bit_val(buffer, offs, value) \ |
225 | sysfs_show_gen_prop(buffer, offs, "%u\n", value) |
226 | #define sysfs_show_str_val(buffer, offs, value) \ |
227 | sysfs_show_gen_prop(buffer, offs, "%s\n", value) |
228 | |
229 | static ssize_t sysprops_show(struct kobject *kobj, struct attribute *attr, |
230 | char *buffer) |
231 | { |
232 | int offs = 0; |
233 | |
234 | /* Making sure that the buffer is an empty string */ |
235 | buffer[0] = 0; |
236 | |
237 | if (attr == &sys_props.attr_genid) { |
238 | sysfs_show_32bit_val(buffer, offs, |
239 | sys_props.generation_count); |
240 | } else if (attr == &sys_props.attr_props) { |
241 | sysfs_show_64bit_prop(buffer, offs, "platform_oem" , |
242 | sys_props.platform_oem); |
243 | sysfs_show_64bit_prop(buffer, offs, "platform_id" , |
244 | sys_props.platform_id); |
245 | sysfs_show_64bit_prop(buffer, offs, "platform_rev" , |
246 | sys_props.platform_rev); |
247 | } else { |
248 | offs = -EINVAL; |
249 | } |
250 | |
251 | return offs; |
252 | } |
253 | |
254 | static void kfd_topology_kobj_release(struct kobject *kobj) |
255 | { |
256 | kfree(objp: kobj); |
257 | } |
258 | |
259 | static const struct sysfs_ops sysprops_ops = { |
260 | .show = sysprops_show, |
261 | }; |
262 | |
263 | static const struct kobj_type sysprops_type = { |
264 | .release = kfd_topology_kobj_release, |
265 | .sysfs_ops = &sysprops_ops, |
266 | }; |
267 | |
268 | static ssize_t iolink_show(struct kobject *kobj, struct attribute *attr, |
269 | char *buffer) |
270 | { |
271 | int offs = 0; |
272 | struct kfd_iolink_properties *iolink; |
273 | |
274 | /* Making sure that the buffer is an empty string */ |
275 | buffer[0] = 0; |
276 | |
277 | iolink = container_of(attr, struct kfd_iolink_properties, attr); |
278 | if (iolink->gpu && kfd_devcgroup_check_permission(kfd: iolink->gpu)) |
279 | return -EPERM; |
280 | sysfs_show_32bit_prop(buffer, offs, "type" , iolink->iolink_type); |
281 | sysfs_show_32bit_prop(buffer, offs, "version_major" , iolink->ver_maj); |
282 | sysfs_show_32bit_prop(buffer, offs, "version_minor" , iolink->ver_min); |
283 | sysfs_show_32bit_prop(buffer, offs, "node_from" , iolink->node_from); |
284 | sysfs_show_32bit_prop(buffer, offs, "node_to" , iolink->node_to); |
285 | sysfs_show_32bit_prop(buffer, offs, "weight" , iolink->weight); |
286 | sysfs_show_32bit_prop(buffer, offs, "min_latency" , iolink->min_latency); |
287 | sysfs_show_32bit_prop(buffer, offs, "max_latency" , iolink->max_latency); |
288 | sysfs_show_32bit_prop(buffer, offs, "min_bandwidth" , |
289 | iolink->min_bandwidth); |
290 | sysfs_show_32bit_prop(buffer, offs, "max_bandwidth" , |
291 | iolink->max_bandwidth); |
292 | sysfs_show_32bit_prop(buffer, offs, "recommended_transfer_size" , |
293 | iolink->rec_transfer_size); |
294 | sysfs_show_32bit_prop(buffer, offs, "flags" , iolink->flags); |
295 | |
296 | return offs; |
297 | } |
298 | |
299 | static const struct sysfs_ops iolink_ops = { |
300 | .show = iolink_show, |
301 | }; |
302 | |
303 | static const struct kobj_type iolink_type = { |
304 | .release = kfd_topology_kobj_release, |
305 | .sysfs_ops = &iolink_ops, |
306 | }; |
307 | |
308 | static ssize_t mem_show(struct kobject *kobj, struct attribute *attr, |
309 | char *buffer) |
310 | { |
311 | int offs = 0; |
312 | struct kfd_mem_properties *mem; |
313 | |
314 | /* Making sure that the buffer is an empty string */ |
315 | buffer[0] = 0; |
316 | |
317 | mem = container_of(attr, struct kfd_mem_properties, attr); |
318 | if (mem->gpu && kfd_devcgroup_check_permission(kfd: mem->gpu)) |
319 | return -EPERM; |
320 | sysfs_show_32bit_prop(buffer, offs, "heap_type" , mem->heap_type); |
321 | sysfs_show_64bit_prop(buffer, offs, "size_in_bytes" , |
322 | mem->size_in_bytes); |
323 | sysfs_show_32bit_prop(buffer, offs, "flags" , mem->flags); |
324 | sysfs_show_32bit_prop(buffer, offs, "width" , mem->width); |
325 | sysfs_show_32bit_prop(buffer, offs, "mem_clk_max" , |
326 | mem->mem_clk_max); |
327 | |
328 | return offs; |
329 | } |
330 | |
331 | static const struct sysfs_ops mem_ops = { |
332 | .show = mem_show, |
333 | }; |
334 | |
335 | static const struct kobj_type mem_type = { |
336 | .release = kfd_topology_kobj_release, |
337 | .sysfs_ops = &mem_ops, |
338 | }; |
339 | |
340 | static ssize_t kfd_cache_show(struct kobject *kobj, struct attribute *attr, |
341 | char *buffer) |
342 | { |
343 | int offs = 0; |
344 | uint32_t i, j; |
345 | struct kfd_cache_properties *cache; |
346 | |
347 | /* Making sure that the buffer is an empty string */ |
348 | buffer[0] = 0; |
349 | cache = container_of(attr, struct kfd_cache_properties, attr); |
350 | if (cache->gpu && kfd_devcgroup_check_permission(kfd: cache->gpu)) |
351 | return -EPERM; |
352 | sysfs_show_32bit_prop(buffer, offs, "processor_id_low" , |
353 | cache->processor_id_low); |
354 | sysfs_show_32bit_prop(buffer, offs, "level" , cache->cache_level); |
355 | sysfs_show_32bit_prop(buffer, offs, "size" , cache->cache_size); |
356 | sysfs_show_32bit_prop(buffer, offs, "cache_line_size" , |
357 | cache->cacheline_size); |
358 | sysfs_show_32bit_prop(buffer, offs, "cache_lines_per_tag" , |
359 | cache->cachelines_per_tag); |
360 | sysfs_show_32bit_prop(buffer, offs, "association" , cache->cache_assoc); |
361 | sysfs_show_32bit_prop(buffer, offs, "latency" , cache->cache_latency); |
362 | sysfs_show_32bit_prop(buffer, offs, "type" , cache->cache_type); |
363 | |
364 | offs += snprintf(buf: buffer+offs, PAGE_SIZE-offs, fmt: "sibling_map " ); |
365 | for (i = 0; i < cache->sibling_map_size; i++) |
366 | for (j = 0; j < sizeof(cache->sibling_map[0])*8; j++) |
367 | /* Check each bit */ |
368 | offs += snprintf(buf: buffer+offs, PAGE_SIZE-offs, fmt: "%d," , |
369 | (cache->sibling_map[i] >> j) & 1); |
370 | |
371 | /* Replace the last "," with end of line */ |
372 | buffer[offs-1] = '\n'; |
373 | return offs; |
374 | } |
375 | |
376 | static const struct sysfs_ops cache_ops = { |
377 | .show = kfd_cache_show, |
378 | }; |
379 | |
380 | static const struct kobj_type cache_type = { |
381 | .release = kfd_topology_kobj_release, |
382 | .sysfs_ops = &cache_ops, |
383 | }; |
384 | |
385 | /****** Sysfs of Performance Counters ******/ |
386 | |
387 | struct kfd_perf_attr { |
388 | struct kobj_attribute attr; |
389 | uint32_t data; |
390 | }; |
391 | |
392 | static ssize_t perf_show(struct kobject *kobj, struct kobj_attribute *attrs, |
393 | char *buf) |
394 | { |
395 | int offs = 0; |
396 | struct kfd_perf_attr *attr; |
397 | |
398 | buf[0] = 0; |
399 | attr = container_of(attrs, struct kfd_perf_attr, attr); |
400 | if (!attr->data) /* invalid data for PMC */ |
401 | return 0; |
402 | else |
403 | return sysfs_show_32bit_val(buf, offs, attr->data); |
404 | } |
405 | |
406 | #define KFD_PERF_DESC(_name, _data) \ |
407 | { \ |
408 | .attr = __ATTR(_name, 0444, perf_show, NULL), \ |
409 | .data = _data, \ |
410 | } |
411 | |
412 | static struct kfd_perf_attr perf_attr_iommu[] = { |
413 | KFD_PERF_DESC(max_concurrent, 0), |
414 | KFD_PERF_DESC(num_counters, 0), |
415 | KFD_PERF_DESC(counter_ids, 0), |
416 | }; |
417 | /****************************************/ |
418 | |
419 | static ssize_t node_show(struct kobject *kobj, struct attribute *attr, |
420 | char *buffer) |
421 | { |
422 | int offs = 0; |
423 | struct kfd_topology_device *dev; |
424 | uint32_t log_max_watch_addr; |
425 | |
426 | /* Making sure that the buffer is an empty string */ |
427 | buffer[0] = 0; |
428 | |
429 | if (strcmp(attr->name, "gpu_id" ) == 0) { |
430 | dev = container_of(attr, struct kfd_topology_device, |
431 | attr_gpuid); |
432 | if (dev->gpu && kfd_devcgroup_check_permission(kfd: dev->gpu)) |
433 | return -EPERM; |
434 | return sysfs_show_32bit_val(buffer, offs, dev->gpu_id); |
435 | } |
436 | |
437 | if (strcmp(attr->name, "name" ) == 0) { |
438 | dev = container_of(attr, struct kfd_topology_device, |
439 | attr_name); |
440 | |
441 | if (dev->gpu && kfd_devcgroup_check_permission(kfd: dev->gpu)) |
442 | return -EPERM; |
443 | return sysfs_show_str_val(buffer, offs, dev->node_props.name); |
444 | } |
445 | |
446 | dev = container_of(attr, struct kfd_topology_device, |
447 | attr_props); |
448 | if (dev->gpu && kfd_devcgroup_check_permission(kfd: dev->gpu)) |
449 | return -EPERM; |
450 | sysfs_show_32bit_prop(buffer, offs, "cpu_cores_count" , |
451 | dev->node_props.cpu_cores_count); |
452 | sysfs_show_32bit_prop(buffer, offs, "simd_count" , |
453 | dev->gpu ? dev->node_props.simd_count : 0); |
454 | sysfs_show_32bit_prop(buffer, offs, "mem_banks_count" , |
455 | dev->node_props.mem_banks_count); |
456 | sysfs_show_32bit_prop(buffer, offs, "caches_count" , |
457 | dev->node_props.caches_count); |
458 | sysfs_show_32bit_prop(buffer, offs, "io_links_count" , |
459 | dev->node_props.io_links_count); |
460 | sysfs_show_32bit_prop(buffer, offs, "p2p_links_count" , |
461 | dev->node_props.p2p_links_count); |
462 | sysfs_show_32bit_prop(buffer, offs, "cpu_core_id_base" , |
463 | dev->node_props.cpu_core_id_base); |
464 | sysfs_show_32bit_prop(buffer, offs, "simd_id_base" , |
465 | dev->node_props.simd_id_base); |
466 | sysfs_show_32bit_prop(buffer, offs, "max_waves_per_simd" , |
467 | dev->node_props.max_waves_per_simd); |
468 | sysfs_show_32bit_prop(buffer, offs, "lds_size_in_kb" , |
469 | dev->node_props.lds_size_in_kb); |
470 | sysfs_show_32bit_prop(buffer, offs, "gds_size_in_kb" , |
471 | dev->node_props.gds_size_in_kb); |
472 | sysfs_show_32bit_prop(buffer, offs, "num_gws" , |
473 | dev->node_props.num_gws); |
474 | sysfs_show_32bit_prop(buffer, offs, "wave_front_size" , |
475 | dev->node_props.wave_front_size); |
476 | sysfs_show_32bit_prop(buffer, offs, "array_count" , |
477 | dev->gpu ? (dev->node_props.array_count * |
478 | NUM_XCC(dev->gpu->xcc_mask)) : 0); |
479 | sysfs_show_32bit_prop(buffer, offs, "simd_arrays_per_engine" , |
480 | dev->node_props.simd_arrays_per_engine); |
481 | sysfs_show_32bit_prop(buffer, offs, "cu_per_simd_array" , |
482 | dev->node_props.cu_per_simd_array); |
483 | sysfs_show_32bit_prop(buffer, offs, "simd_per_cu" , |
484 | dev->node_props.simd_per_cu); |
485 | sysfs_show_32bit_prop(buffer, offs, "max_slots_scratch_cu" , |
486 | dev->node_props.max_slots_scratch_cu); |
487 | sysfs_show_32bit_prop(buffer, offs, "gfx_target_version" , |
488 | dev->node_props.gfx_target_version); |
489 | sysfs_show_32bit_prop(buffer, offs, "vendor_id" , |
490 | dev->node_props.vendor_id); |
491 | sysfs_show_32bit_prop(buffer, offs, "device_id" , |
492 | dev->node_props.device_id); |
493 | sysfs_show_32bit_prop(buffer, offs, "location_id" , |
494 | dev->node_props.location_id); |
495 | sysfs_show_32bit_prop(buffer, offs, "domain" , |
496 | dev->node_props.domain); |
497 | sysfs_show_32bit_prop(buffer, offs, "drm_render_minor" , |
498 | dev->node_props.drm_render_minor); |
499 | sysfs_show_64bit_prop(buffer, offs, "hive_id" , |
500 | dev->node_props.hive_id); |
501 | sysfs_show_32bit_prop(buffer, offs, "num_sdma_engines" , |
502 | dev->node_props.num_sdma_engines); |
503 | sysfs_show_32bit_prop(buffer, offs, "num_sdma_xgmi_engines" , |
504 | dev->node_props.num_sdma_xgmi_engines); |
505 | sysfs_show_32bit_prop(buffer, offs, "num_sdma_queues_per_engine" , |
506 | dev->node_props.num_sdma_queues_per_engine); |
507 | sysfs_show_32bit_prop(buffer, offs, "num_cp_queues" , |
508 | dev->node_props.num_cp_queues); |
509 | |
510 | if (dev->gpu) { |
511 | log_max_watch_addr = |
512 | __ilog2_u32(n: dev->gpu->kfd->device_info.num_of_watch_points); |
513 | |
514 | if (log_max_watch_addr) { |
515 | dev->node_props.capability |= |
516 | HSA_CAP_WATCH_POINTS_SUPPORTED; |
517 | |
518 | dev->node_props.capability |= |
519 | ((log_max_watch_addr << |
520 | HSA_CAP_WATCH_POINTS_TOTALBITS_SHIFT) & |
521 | HSA_CAP_WATCH_POINTS_TOTALBITS_MASK); |
522 | } |
523 | |
524 | if (dev->gpu->adev->asic_type == CHIP_TONGA) |
525 | dev->node_props.capability |= |
526 | HSA_CAP_AQL_QUEUE_DOUBLE_MAP; |
527 | |
528 | sysfs_show_32bit_prop(buffer, offs, "max_engine_clk_fcompute" , |
529 | dev->node_props.max_engine_clk_fcompute); |
530 | |
531 | sysfs_show_64bit_prop(buffer, offs, "local_mem_size" , 0ULL); |
532 | |
533 | sysfs_show_32bit_prop(buffer, offs, "fw_version" , |
534 | dev->gpu->kfd->mec_fw_version); |
535 | sysfs_show_32bit_prop(buffer, offs, "capability" , |
536 | dev->node_props.capability); |
537 | sysfs_show_64bit_prop(buffer, offs, "debug_prop" , |
538 | dev->node_props.debug_prop); |
539 | sysfs_show_32bit_prop(buffer, offs, "sdma_fw_version" , |
540 | dev->gpu->kfd->sdma_fw_version); |
541 | sysfs_show_64bit_prop(buffer, offs, "unique_id" , |
542 | dev->gpu->adev->unique_id); |
543 | sysfs_show_32bit_prop(buffer, offs, "num_xcc" , |
544 | NUM_XCC(dev->gpu->xcc_mask)); |
545 | } |
546 | |
547 | return sysfs_show_32bit_prop(buffer, offs, "max_engine_clk_ccompute" , |
548 | cpufreq_quick_get_max(0)/1000); |
549 | } |
550 | |
551 | static const struct sysfs_ops node_ops = { |
552 | .show = node_show, |
553 | }; |
554 | |
555 | static const struct kobj_type node_type = { |
556 | .release = kfd_topology_kobj_release, |
557 | .sysfs_ops = &node_ops, |
558 | }; |
559 | |
560 | static void kfd_remove_sysfs_file(struct kobject *kobj, struct attribute *attr) |
561 | { |
562 | sysfs_remove_file(kobj, attr); |
563 | kobject_del(kobj); |
564 | kobject_put(kobj); |
565 | } |
566 | |
567 | static void kfd_remove_sysfs_node_entry(struct kfd_topology_device *dev) |
568 | { |
569 | struct kfd_iolink_properties *p2plink; |
570 | struct kfd_iolink_properties *iolink; |
571 | struct kfd_cache_properties *cache; |
572 | struct kfd_mem_properties *mem; |
573 | struct kfd_perf_properties *perf; |
574 | |
575 | if (dev->kobj_iolink) { |
576 | list_for_each_entry(iolink, &dev->io_link_props, list) |
577 | if (iolink->kobj) { |
578 | kfd_remove_sysfs_file(kobj: iolink->kobj, |
579 | attr: &iolink->attr); |
580 | iolink->kobj = NULL; |
581 | } |
582 | kobject_del(kobj: dev->kobj_iolink); |
583 | kobject_put(kobj: dev->kobj_iolink); |
584 | dev->kobj_iolink = NULL; |
585 | } |
586 | |
587 | if (dev->kobj_p2plink) { |
588 | list_for_each_entry(p2plink, &dev->p2p_link_props, list) |
589 | if (p2plink->kobj) { |
590 | kfd_remove_sysfs_file(kobj: p2plink->kobj, |
591 | attr: &p2plink->attr); |
592 | p2plink->kobj = NULL; |
593 | } |
594 | kobject_del(kobj: dev->kobj_p2plink); |
595 | kobject_put(kobj: dev->kobj_p2plink); |
596 | dev->kobj_p2plink = NULL; |
597 | } |
598 | |
599 | if (dev->kobj_cache) { |
600 | list_for_each_entry(cache, &dev->cache_props, list) |
601 | if (cache->kobj) { |
602 | kfd_remove_sysfs_file(kobj: cache->kobj, |
603 | attr: &cache->attr); |
604 | cache->kobj = NULL; |
605 | } |
606 | kobject_del(kobj: dev->kobj_cache); |
607 | kobject_put(kobj: dev->kobj_cache); |
608 | dev->kobj_cache = NULL; |
609 | } |
610 | |
611 | if (dev->kobj_mem) { |
612 | list_for_each_entry(mem, &dev->mem_props, list) |
613 | if (mem->kobj) { |
614 | kfd_remove_sysfs_file(kobj: mem->kobj, attr: &mem->attr); |
615 | mem->kobj = NULL; |
616 | } |
617 | kobject_del(kobj: dev->kobj_mem); |
618 | kobject_put(kobj: dev->kobj_mem); |
619 | dev->kobj_mem = NULL; |
620 | } |
621 | |
622 | if (dev->kobj_perf) { |
623 | list_for_each_entry(perf, &dev->perf_props, list) { |
624 | kfree(objp: perf->attr_group); |
625 | perf->attr_group = NULL; |
626 | } |
627 | kobject_del(kobj: dev->kobj_perf); |
628 | kobject_put(kobj: dev->kobj_perf); |
629 | dev->kobj_perf = NULL; |
630 | } |
631 | |
632 | if (dev->kobj_node) { |
633 | sysfs_remove_file(kobj: dev->kobj_node, attr: &dev->attr_gpuid); |
634 | sysfs_remove_file(kobj: dev->kobj_node, attr: &dev->attr_name); |
635 | sysfs_remove_file(kobj: dev->kobj_node, attr: &dev->attr_props); |
636 | kobject_del(kobj: dev->kobj_node); |
637 | kobject_put(kobj: dev->kobj_node); |
638 | dev->kobj_node = NULL; |
639 | } |
640 | } |
641 | |
642 | static int kfd_build_sysfs_node_entry(struct kfd_topology_device *dev, |
643 | uint32_t id) |
644 | { |
645 | struct kfd_iolink_properties *p2plink; |
646 | struct kfd_iolink_properties *iolink; |
647 | struct kfd_cache_properties *cache; |
648 | struct kfd_mem_properties *mem; |
649 | struct kfd_perf_properties *perf; |
650 | int ret; |
651 | uint32_t i, num_attrs; |
652 | struct attribute **attrs; |
653 | |
654 | if (WARN_ON(dev->kobj_node)) |
655 | return -EEXIST; |
656 | |
657 | /* |
658 | * Creating the sysfs folders |
659 | */ |
660 | dev->kobj_node = kfd_alloc_struct(dev->kobj_node); |
661 | if (!dev->kobj_node) |
662 | return -ENOMEM; |
663 | |
664 | ret = kobject_init_and_add(kobj: dev->kobj_node, ktype: &node_type, |
665 | parent: sys_props.kobj_nodes, fmt: "%d" , id); |
666 | if (ret < 0) { |
667 | kobject_put(kobj: dev->kobj_node); |
668 | return ret; |
669 | } |
670 | |
671 | dev->kobj_mem = kobject_create_and_add(name: "mem_banks" , parent: dev->kobj_node); |
672 | if (!dev->kobj_mem) |
673 | return -ENOMEM; |
674 | |
675 | dev->kobj_cache = kobject_create_and_add(name: "caches" , parent: dev->kobj_node); |
676 | if (!dev->kobj_cache) |
677 | return -ENOMEM; |
678 | |
679 | dev->kobj_iolink = kobject_create_and_add(name: "io_links" , parent: dev->kobj_node); |
680 | if (!dev->kobj_iolink) |
681 | return -ENOMEM; |
682 | |
683 | dev->kobj_p2plink = kobject_create_and_add(name: "p2p_links" , parent: dev->kobj_node); |
684 | if (!dev->kobj_p2plink) |
685 | return -ENOMEM; |
686 | |
687 | dev->kobj_perf = kobject_create_and_add(name: "perf" , parent: dev->kobj_node); |
688 | if (!dev->kobj_perf) |
689 | return -ENOMEM; |
690 | |
691 | /* |
692 | * Creating sysfs files for node properties |
693 | */ |
694 | dev->attr_gpuid.name = "gpu_id" ; |
695 | dev->attr_gpuid.mode = KFD_SYSFS_FILE_MODE; |
696 | sysfs_attr_init(&dev->attr_gpuid); |
697 | dev->attr_name.name = "name" ; |
698 | dev->attr_name.mode = KFD_SYSFS_FILE_MODE; |
699 | sysfs_attr_init(&dev->attr_name); |
700 | dev->attr_props.name = "properties" ; |
701 | dev->attr_props.mode = KFD_SYSFS_FILE_MODE; |
702 | sysfs_attr_init(&dev->attr_props); |
703 | ret = sysfs_create_file(kobj: dev->kobj_node, attr: &dev->attr_gpuid); |
704 | if (ret < 0) |
705 | return ret; |
706 | ret = sysfs_create_file(kobj: dev->kobj_node, attr: &dev->attr_name); |
707 | if (ret < 0) |
708 | return ret; |
709 | ret = sysfs_create_file(kobj: dev->kobj_node, attr: &dev->attr_props); |
710 | if (ret < 0) |
711 | return ret; |
712 | |
713 | i = 0; |
714 | list_for_each_entry(mem, &dev->mem_props, list) { |
715 | mem->kobj = kzalloc(size: sizeof(struct kobject), GFP_KERNEL); |
716 | if (!mem->kobj) |
717 | return -ENOMEM; |
718 | ret = kobject_init_and_add(kobj: mem->kobj, ktype: &mem_type, |
719 | parent: dev->kobj_mem, fmt: "%d" , i); |
720 | if (ret < 0) { |
721 | kobject_put(kobj: mem->kobj); |
722 | return ret; |
723 | } |
724 | |
725 | mem->attr.name = "properties" ; |
726 | mem->attr.mode = KFD_SYSFS_FILE_MODE; |
727 | sysfs_attr_init(&mem->attr); |
728 | ret = sysfs_create_file(kobj: mem->kobj, attr: &mem->attr); |
729 | if (ret < 0) |
730 | return ret; |
731 | i++; |
732 | } |
733 | |
734 | i = 0; |
735 | list_for_each_entry(cache, &dev->cache_props, list) { |
736 | cache->kobj = kzalloc(size: sizeof(struct kobject), GFP_KERNEL); |
737 | if (!cache->kobj) |
738 | return -ENOMEM; |
739 | ret = kobject_init_and_add(kobj: cache->kobj, ktype: &cache_type, |
740 | parent: dev->kobj_cache, fmt: "%d" , i); |
741 | if (ret < 0) { |
742 | kobject_put(kobj: cache->kobj); |
743 | return ret; |
744 | } |
745 | |
746 | cache->attr.name = "properties" ; |
747 | cache->attr.mode = KFD_SYSFS_FILE_MODE; |
748 | sysfs_attr_init(&cache->attr); |
749 | ret = sysfs_create_file(kobj: cache->kobj, attr: &cache->attr); |
750 | if (ret < 0) |
751 | return ret; |
752 | i++; |
753 | } |
754 | |
755 | i = 0; |
756 | list_for_each_entry(iolink, &dev->io_link_props, list) { |
757 | iolink->kobj = kzalloc(size: sizeof(struct kobject), GFP_KERNEL); |
758 | if (!iolink->kobj) |
759 | return -ENOMEM; |
760 | ret = kobject_init_and_add(kobj: iolink->kobj, ktype: &iolink_type, |
761 | parent: dev->kobj_iolink, fmt: "%d" , i); |
762 | if (ret < 0) { |
763 | kobject_put(kobj: iolink->kobj); |
764 | return ret; |
765 | } |
766 | |
767 | iolink->attr.name = "properties" ; |
768 | iolink->attr.mode = KFD_SYSFS_FILE_MODE; |
769 | sysfs_attr_init(&iolink->attr); |
770 | ret = sysfs_create_file(kobj: iolink->kobj, attr: &iolink->attr); |
771 | if (ret < 0) |
772 | return ret; |
773 | i++; |
774 | } |
775 | |
776 | i = 0; |
777 | list_for_each_entry(p2plink, &dev->p2p_link_props, list) { |
778 | p2plink->kobj = kzalloc(size: sizeof(struct kobject), GFP_KERNEL); |
779 | if (!p2plink->kobj) |
780 | return -ENOMEM; |
781 | ret = kobject_init_and_add(kobj: p2plink->kobj, ktype: &iolink_type, |
782 | parent: dev->kobj_p2plink, fmt: "%d" , i); |
783 | if (ret < 0) { |
784 | kobject_put(kobj: p2plink->kobj); |
785 | return ret; |
786 | } |
787 | |
788 | p2plink->attr.name = "properties" ; |
789 | p2plink->attr.mode = KFD_SYSFS_FILE_MODE; |
790 | sysfs_attr_init(&p2plink->attr); |
791 | ret = sysfs_create_file(kobj: p2plink->kobj, attr: &p2plink->attr); |
792 | if (ret < 0) |
793 | return ret; |
794 | i++; |
795 | } |
796 | |
797 | /* All hardware blocks have the same number of attributes. */ |
798 | num_attrs = ARRAY_SIZE(perf_attr_iommu); |
799 | list_for_each_entry(perf, &dev->perf_props, list) { |
800 | perf->attr_group = kzalloc(size: sizeof(struct kfd_perf_attr) |
801 | * num_attrs + sizeof(struct attribute_group), |
802 | GFP_KERNEL); |
803 | if (!perf->attr_group) |
804 | return -ENOMEM; |
805 | |
806 | attrs = (struct attribute **)(perf->attr_group + 1); |
807 | if (!strcmp(perf->block_name, "iommu" )) { |
808 | /* Information of IOMMU's num_counters and counter_ids is shown |
809 | * under /sys/bus/event_source/devices/amd_iommu. We don't |
810 | * duplicate here. |
811 | */ |
812 | perf_attr_iommu[0].data = perf->max_concurrent; |
813 | for (i = 0; i < num_attrs; i++) |
814 | attrs[i] = &perf_attr_iommu[i].attr.attr; |
815 | } |
816 | perf->attr_group->name = perf->block_name; |
817 | perf->attr_group->attrs = attrs; |
818 | ret = sysfs_create_group(kobj: dev->kobj_perf, grp: perf->attr_group); |
819 | if (ret < 0) |
820 | return ret; |
821 | } |
822 | |
823 | return 0; |
824 | } |
825 | |
826 | /* Called with write topology lock acquired */ |
827 | static int kfd_build_sysfs_node_tree(void) |
828 | { |
829 | struct kfd_topology_device *dev; |
830 | int ret; |
831 | uint32_t i = 0; |
832 | |
833 | list_for_each_entry(dev, &topology_device_list, list) { |
834 | ret = kfd_build_sysfs_node_entry(dev, id: i); |
835 | if (ret < 0) |
836 | return ret; |
837 | i++; |
838 | } |
839 | |
840 | return 0; |
841 | } |
842 | |
843 | /* Called with write topology lock acquired */ |
844 | static void kfd_remove_sysfs_node_tree(void) |
845 | { |
846 | struct kfd_topology_device *dev; |
847 | |
848 | list_for_each_entry(dev, &topology_device_list, list) |
849 | kfd_remove_sysfs_node_entry(dev); |
850 | } |
851 | |
852 | static int kfd_topology_update_sysfs(void) |
853 | { |
854 | int ret; |
855 | |
856 | if (!sys_props.kobj_topology) { |
857 | sys_props.kobj_topology = |
858 | kfd_alloc_struct(sys_props.kobj_topology); |
859 | if (!sys_props.kobj_topology) |
860 | return -ENOMEM; |
861 | |
862 | ret = kobject_init_and_add(kobj: sys_props.kobj_topology, |
863 | ktype: &sysprops_type, parent: &kfd_device->kobj, |
864 | fmt: "topology" ); |
865 | if (ret < 0) { |
866 | kobject_put(kobj: sys_props.kobj_topology); |
867 | return ret; |
868 | } |
869 | |
870 | sys_props.kobj_nodes = kobject_create_and_add(name: "nodes" , |
871 | parent: sys_props.kobj_topology); |
872 | if (!sys_props.kobj_nodes) |
873 | return -ENOMEM; |
874 | |
875 | sys_props.attr_genid.name = "generation_id" ; |
876 | sys_props.attr_genid.mode = KFD_SYSFS_FILE_MODE; |
877 | sysfs_attr_init(&sys_props.attr_genid); |
878 | ret = sysfs_create_file(kobj: sys_props.kobj_topology, |
879 | attr: &sys_props.attr_genid); |
880 | if (ret < 0) |
881 | return ret; |
882 | |
883 | sys_props.attr_props.name = "system_properties" ; |
884 | sys_props.attr_props.mode = KFD_SYSFS_FILE_MODE; |
885 | sysfs_attr_init(&sys_props.attr_props); |
886 | ret = sysfs_create_file(kobj: sys_props.kobj_topology, |
887 | attr: &sys_props.attr_props); |
888 | if (ret < 0) |
889 | return ret; |
890 | } |
891 | |
892 | kfd_remove_sysfs_node_tree(); |
893 | |
894 | return kfd_build_sysfs_node_tree(); |
895 | } |
896 | |
897 | static void kfd_topology_release_sysfs(void) |
898 | { |
899 | kfd_remove_sysfs_node_tree(); |
900 | if (sys_props.kobj_topology) { |
901 | sysfs_remove_file(kobj: sys_props.kobj_topology, |
902 | attr: &sys_props.attr_genid); |
903 | sysfs_remove_file(kobj: sys_props.kobj_topology, |
904 | attr: &sys_props.attr_props); |
905 | if (sys_props.kobj_nodes) { |
906 | kobject_del(kobj: sys_props.kobj_nodes); |
907 | kobject_put(kobj: sys_props.kobj_nodes); |
908 | sys_props.kobj_nodes = NULL; |
909 | } |
910 | kobject_del(kobj: sys_props.kobj_topology); |
911 | kobject_put(kobj: sys_props.kobj_topology); |
912 | sys_props.kobj_topology = NULL; |
913 | } |
914 | } |
915 | |
916 | /* Called with write topology_lock acquired */ |
917 | static void kfd_topology_update_device_list(struct list_head *temp_list, |
918 | struct list_head *master_list) |
919 | { |
920 | while (!list_empty(head: temp_list)) { |
921 | list_move_tail(list: temp_list->next, head: master_list); |
922 | sys_props.num_devices++; |
923 | } |
924 | } |
925 | |
926 | static void kfd_debug_print_topology(void) |
927 | { |
928 | struct kfd_topology_device *dev; |
929 | |
930 | down_read(sem: &topology_lock); |
931 | |
932 | dev = list_last_entry(&topology_device_list, |
933 | struct kfd_topology_device, list); |
934 | if (dev) { |
935 | if (dev->node_props.cpu_cores_count && |
936 | dev->node_props.simd_count) { |
937 | pr_info("Topology: Add APU node [0x%0x:0x%0x]\n" , |
938 | dev->node_props.device_id, |
939 | dev->node_props.vendor_id); |
940 | } else if (dev->node_props.cpu_cores_count) |
941 | pr_info("Topology: Add CPU node\n" ); |
942 | else if (dev->node_props.simd_count) |
943 | pr_info("Topology: Add dGPU node [0x%0x:0x%0x]\n" , |
944 | dev->node_props.device_id, |
945 | dev->node_props.vendor_id); |
946 | } |
947 | up_read(sem: &topology_lock); |
948 | } |
949 | |
950 | /* Helper function for intializing platform_xx members of |
951 | * kfd_system_properties. Uses OEM info from the last CPU/APU node. |
952 | */ |
953 | static void kfd_update_system_properties(void) |
954 | { |
955 | struct kfd_topology_device *dev; |
956 | |
957 | down_read(sem: &topology_lock); |
958 | dev = list_last_entry(&topology_device_list, |
959 | struct kfd_topology_device, list); |
960 | if (dev) { |
961 | sys_props.platform_id = |
962 | (*((uint64_t *)dev->oem_id)) & CRAT_OEMID_64BIT_MASK; |
963 | sys_props.platform_oem = *((uint64_t *)dev->oem_table_id); |
964 | sys_props.platform_rev = dev->oem_revision; |
965 | } |
966 | up_read(sem: &topology_lock); |
967 | } |
968 | |
969 | static void find_system_memory(const struct dmi_header *dm, |
970 | void *private) |
971 | { |
972 | struct kfd_mem_properties *mem; |
973 | u16 mem_width, mem_clock; |
974 | struct kfd_topology_device *kdev = |
975 | (struct kfd_topology_device *)private; |
976 | const u8 *dmi_data = (const u8 *)(dm + 1); |
977 | |
978 | if (dm->type == DMI_ENTRY_MEM_DEVICE && dm->length >= 0x15) { |
979 | mem_width = (u16)(*(const u16 *)(dmi_data + 0x6)); |
980 | mem_clock = (u16)(*(const u16 *)(dmi_data + 0x11)); |
981 | list_for_each_entry(mem, &kdev->mem_props, list) { |
982 | if (mem_width != 0xFFFF && mem_width != 0) |
983 | mem->width = mem_width; |
984 | if (mem_clock != 0) |
985 | mem->mem_clk_max = mem_clock; |
986 | } |
987 | } |
988 | } |
989 | |
990 | /* kfd_add_non_crat_information - Add information that is not currently |
991 | * defined in CRAT but is necessary for KFD topology |
992 | * @dev - topology device to which addition info is added |
993 | */ |
994 | static void kfd_add_non_crat_information(struct kfd_topology_device *kdev) |
995 | { |
996 | /* Check if CPU only node. */ |
997 | if (!kdev->gpu) { |
998 | /* Add system memory information */ |
999 | dmi_walk(decode: find_system_memory, private_data: kdev); |
1000 | } |
1001 | /* TODO: For GPU node, rearrange code from kfd_topology_add_device */ |
1002 | } |
1003 | |
1004 | int kfd_topology_init(void) |
1005 | { |
1006 | void *crat_image = NULL; |
1007 | size_t image_size = 0; |
1008 | int ret; |
1009 | struct list_head temp_topology_device_list; |
1010 | int cpu_only_node = 0; |
1011 | struct kfd_topology_device *kdev; |
1012 | int proximity_domain; |
1013 | |
1014 | /* topology_device_list - Master list of all topology devices |
1015 | * temp_topology_device_list - temporary list created while parsing CRAT |
1016 | * or VCRAT. Once parsing is complete the contents of list is moved to |
1017 | * topology_device_list |
1018 | */ |
1019 | |
1020 | /* Initialize the head for the both the lists */ |
1021 | INIT_LIST_HEAD(list: &topology_device_list); |
1022 | INIT_LIST_HEAD(list: &temp_topology_device_list); |
1023 | init_rwsem(&topology_lock); |
1024 | |
1025 | memset(&sys_props, 0, sizeof(sys_props)); |
1026 | |
1027 | /* Proximity domains in ACPI CRAT tables start counting at |
1028 | * 0. The same should be true for virtual CRAT tables created |
1029 | * at this stage. GPUs added later in kfd_topology_add_device |
1030 | * use a counter. |
1031 | */ |
1032 | proximity_domain = 0; |
1033 | |
1034 | ret = kfd_create_crat_image_virtual(crat_image: &crat_image, size: &image_size, |
1035 | COMPUTE_UNIT_CPU, NULL, |
1036 | proximity_domain); |
1037 | cpu_only_node = 1; |
1038 | if (ret) { |
1039 | pr_err("Error creating VCRAT table for CPU\n" ); |
1040 | return ret; |
1041 | } |
1042 | |
1043 | ret = kfd_parse_crat_table(crat_image, |
1044 | device_list: &temp_topology_device_list, |
1045 | proximity_domain); |
1046 | if (ret) { |
1047 | pr_err("Error parsing VCRAT table for CPU\n" ); |
1048 | goto err; |
1049 | } |
1050 | |
1051 | kdev = list_first_entry(&temp_topology_device_list, |
1052 | struct kfd_topology_device, list); |
1053 | |
1054 | down_write(sem: &topology_lock); |
1055 | kfd_topology_update_device_list(temp_list: &temp_topology_device_list, |
1056 | master_list: &topology_device_list); |
1057 | topology_crat_proximity_domain = sys_props.num_devices-1; |
1058 | ret = kfd_topology_update_sysfs(); |
1059 | up_write(sem: &topology_lock); |
1060 | |
1061 | if (!ret) { |
1062 | sys_props.generation_count++; |
1063 | kfd_update_system_properties(); |
1064 | kfd_debug_print_topology(); |
1065 | } else |
1066 | pr_err("Failed to update topology in sysfs ret=%d\n" , ret); |
1067 | |
1068 | /* For nodes with GPU, this information gets added |
1069 | * when GPU is detected (kfd_topology_add_device). |
1070 | */ |
1071 | if (cpu_only_node) { |
1072 | /* Add additional information to CPU only node created above */ |
1073 | down_write(sem: &topology_lock); |
1074 | kdev = list_first_entry(&topology_device_list, |
1075 | struct kfd_topology_device, list); |
1076 | up_write(sem: &topology_lock); |
1077 | kfd_add_non_crat_information(kdev); |
1078 | } |
1079 | |
1080 | err: |
1081 | kfd_destroy_crat_image(crat_image); |
1082 | return ret; |
1083 | } |
1084 | |
1085 | void kfd_topology_shutdown(void) |
1086 | { |
1087 | down_write(sem: &topology_lock); |
1088 | kfd_topology_release_sysfs(); |
1089 | kfd_release_live_view(); |
1090 | up_write(sem: &topology_lock); |
1091 | } |
1092 | |
1093 | static uint32_t kfd_generate_gpu_id(struct kfd_node *gpu) |
1094 | { |
1095 | uint32_t hashout; |
1096 | uint32_t buf[8]; |
1097 | uint64_t local_mem_size; |
1098 | int i; |
1099 | |
1100 | if (!gpu) |
1101 | return 0; |
1102 | |
1103 | local_mem_size = gpu->local_mem_info.local_mem_size_private + |
1104 | gpu->local_mem_info.local_mem_size_public; |
1105 | buf[0] = gpu->adev->pdev->devfn; |
1106 | buf[1] = gpu->adev->pdev->subsystem_vendor | |
1107 | (gpu->adev->pdev->subsystem_device << 16); |
1108 | buf[2] = pci_domain_nr(bus: gpu->adev->pdev->bus); |
1109 | buf[3] = gpu->adev->pdev->device; |
1110 | buf[4] = gpu->adev->pdev->bus->number; |
1111 | buf[5] = lower_32_bits(local_mem_size); |
1112 | buf[6] = upper_32_bits(local_mem_size); |
1113 | buf[7] = (ffs(gpu->xcc_mask) - 1) | (NUM_XCC(gpu->xcc_mask) << 16); |
1114 | |
1115 | for (i = 0, hashout = 0; i < 8; i++) |
1116 | hashout ^= hash_32(val: buf[i], KFD_GPU_ID_HASH_WIDTH); |
1117 | |
1118 | return hashout; |
1119 | } |
1120 | /* kfd_assign_gpu - Attach @gpu to the correct kfd topology device. If |
1121 | * the GPU device is not already present in the topology device |
1122 | * list then return NULL. This means a new topology device has to |
1123 | * be created for this GPU. |
1124 | */ |
1125 | static struct kfd_topology_device *kfd_assign_gpu(struct kfd_node *gpu) |
1126 | { |
1127 | struct kfd_topology_device *dev; |
1128 | struct kfd_topology_device *out_dev = NULL; |
1129 | struct kfd_mem_properties *mem; |
1130 | struct kfd_cache_properties *cache; |
1131 | struct kfd_iolink_properties *iolink; |
1132 | struct kfd_iolink_properties *p2plink; |
1133 | |
1134 | list_for_each_entry(dev, &topology_device_list, list) { |
1135 | /* Discrete GPUs need their own topology device list |
1136 | * entries. Don't assign them to CPU/APU nodes. |
1137 | */ |
1138 | if (dev->node_props.cpu_cores_count) |
1139 | continue; |
1140 | |
1141 | if (!dev->gpu && (dev->node_props.simd_count > 0)) { |
1142 | dev->gpu = gpu; |
1143 | out_dev = dev; |
1144 | |
1145 | list_for_each_entry(mem, &dev->mem_props, list) |
1146 | mem->gpu = dev->gpu; |
1147 | list_for_each_entry(cache, &dev->cache_props, list) |
1148 | cache->gpu = dev->gpu; |
1149 | list_for_each_entry(iolink, &dev->io_link_props, list) |
1150 | iolink->gpu = dev->gpu; |
1151 | list_for_each_entry(p2plink, &dev->p2p_link_props, list) |
1152 | p2plink->gpu = dev->gpu; |
1153 | break; |
1154 | } |
1155 | } |
1156 | return out_dev; |
1157 | } |
1158 | |
1159 | static void kfd_notify_gpu_change(uint32_t gpu_id, int arrival) |
1160 | { |
1161 | /* |
1162 | * TODO: Generate an event for thunk about the arrival/removal |
1163 | * of the GPU |
1164 | */ |
1165 | } |
1166 | |
1167 | /* kfd_fill_mem_clk_max_info - Since CRAT doesn't have memory clock info, |
1168 | * patch this after CRAT parsing. |
1169 | */ |
1170 | static void kfd_fill_mem_clk_max_info(struct kfd_topology_device *dev) |
1171 | { |
1172 | struct kfd_mem_properties *mem; |
1173 | struct kfd_local_mem_info local_mem_info; |
1174 | |
1175 | if (!dev) |
1176 | return; |
1177 | |
1178 | /* Currently, amdgpu driver (amdgpu_mc) deals only with GPUs with |
1179 | * single bank of VRAM local memory. |
1180 | * for dGPUs - VCRAT reports only one bank of Local Memory |
1181 | * for APUs - If CRAT from ACPI reports more than one bank, then |
1182 | * all the banks will report the same mem_clk_max information |
1183 | */ |
1184 | amdgpu_amdkfd_get_local_mem_info(adev: dev->gpu->adev, mem_info: &local_mem_info, |
1185 | xcp: dev->gpu->xcp); |
1186 | |
1187 | list_for_each_entry(mem, &dev->mem_props, list) |
1188 | mem->mem_clk_max = local_mem_info.mem_clk_max; |
1189 | } |
1190 | |
1191 | static void kfd_set_iolink_no_atomics(struct kfd_topology_device *dev, |
1192 | struct kfd_topology_device *target_gpu_dev, |
1193 | struct kfd_iolink_properties *link) |
1194 | { |
1195 | /* xgmi always supports atomics between links. */ |
1196 | if (link->iolink_type == CRAT_IOLINK_TYPE_XGMI) |
1197 | return; |
1198 | |
1199 | /* check pcie support to set cpu(dev) flags for target_gpu_dev link. */ |
1200 | if (target_gpu_dev) { |
1201 | uint32_t cap; |
1202 | |
1203 | pcie_capability_read_dword(dev: target_gpu_dev->gpu->adev->pdev, |
1204 | PCI_EXP_DEVCAP2, val: &cap); |
1205 | |
1206 | if (!(cap & (PCI_EXP_DEVCAP2_ATOMIC_COMP32 | |
1207 | PCI_EXP_DEVCAP2_ATOMIC_COMP64))) |
1208 | link->flags |= CRAT_IOLINK_FLAGS_NO_ATOMICS_32_BIT | |
1209 | CRAT_IOLINK_FLAGS_NO_ATOMICS_64_BIT; |
1210 | /* set gpu (dev) flags. */ |
1211 | } else { |
1212 | if (!dev->gpu->kfd->pci_atomic_requested || |
1213 | dev->gpu->adev->asic_type == CHIP_HAWAII) |
1214 | link->flags |= CRAT_IOLINK_FLAGS_NO_ATOMICS_32_BIT | |
1215 | CRAT_IOLINK_FLAGS_NO_ATOMICS_64_BIT; |
1216 | } |
1217 | } |
1218 | |
1219 | static void kfd_set_iolink_non_coherent(struct kfd_topology_device *to_dev, |
1220 | struct kfd_iolink_properties *outbound_link, |
1221 | struct kfd_iolink_properties *inbound_link) |
1222 | { |
1223 | /* CPU -> GPU with PCIe */ |
1224 | if (!to_dev->gpu && |
1225 | inbound_link->iolink_type == CRAT_IOLINK_TYPE_PCIEXPRESS) |
1226 | inbound_link->flags |= CRAT_IOLINK_FLAGS_NON_COHERENT; |
1227 | |
1228 | if (to_dev->gpu) { |
1229 | /* GPU <-> GPU with PCIe and |
1230 | * Vega20 with XGMI |
1231 | */ |
1232 | if (inbound_link->iolink_type == CRAT_IOLINK_TYPE_PCIEXPRESS || |
1233 | (inbound_link->iolink_type == CRAT_IOLINK_TYPE_XGMI && |
1234 | KFD_GC_VERSION(to_dev->gpu) == IP_VERSION(9, 4, 0))) { |
1235 | outbound_link->flags |= CRAT_IOLINK_FLAGS_NON_COHERENT; |
1236 | inbound_link->flags |= CRAT_IOLINK_FLAGS_NON_COHERENT; |
1237 | } |
1238 | } |
1239 | } |
1240 | |
1241 | static void kfd_fill_iolink_non_crat_info(struct kfd_topology_device *dev) |
1242 | { |
1243 | struct kfd_iolink_properties *link, *inbound_link; |
1244 | struct kfd_topology_device *peer_dev; |
1245 | |
1246 | if (!dev || !dev->gpu) |
1247 | return; |
1248 | |
1249 | /* GPU only creates direct links so apply flags setting to all */ |
1250 | list_for_each_entry(link, &dev->io_link_props, list) { |
1251 | link->flags = CRAT_IOLINK_FLAGS_ENABLED; |
1252 | kfd_set_iolink_no_atomics(dev, NULL, link); |
1253 | peer_dev = kfd_topology_device_by_proximity_domain( |
1254 | proximity_domain: link->node_to); |
1255 | |
1256 | if (!peer_dev) |
1257 | continue; |
1258 | |
1259 | /* Include the CPU peer in GPU hive if connected over xGMI. */ |
1260 | if (!peer_dev->gpu && |
1261 | link->iolink_type == CRAT_IOLINK_TYPE_XGMI) { |
1262 | /* |
1263 | * If the GPU is not part of a GPU hive, use its pci |
1264 | * device location as the hive ID to bind with the CPU. |
1265 | */ |
1266 | if (!dev->node_props.hive_id) |
1267 | dev->node_props.hive_id = pci_dev_id(dev: dev->gpu->adev->pdev); |
1268 | peer_dev->node_props.hive_id = dev->node_props.hive_id; |
1269 | } |
1270 | |
1271 | list_for_each_entry(inbound_link, &peer_dev->io_link_props, |
1272 | list) { |
1273 | if (inbound_link->node_to != link->node_from) |
1274 | continue; |
1275 | |
1276 | inbound_link->flags = CRAT_IOLINK_FLAGS_ENABLED; |
1277 | kfd_set_iolink_no_atomics(dev: peer_dev, target_gpu_dev: dev, link: inbound_link); |
1278 | kfd_set_iolink_non_coherent(to_dev: peer_dev, outbound_link: link, inbound_link); |
1279 | } |
1280 | } |
1281 | |
1282 | /* Create indirect links so apply flags setting to all */ |
1283 | list_for_each_entry(link, &dev->p2p_link_props, list) { |
1284 | link->flags = CRAT_IOLINK_FLAGS_ENABLED; |
1285 | kfd_set_iolink_no_atomics(dev, NULL, link); |
1286 | peer_dev = kfd_topology_device_by_proximity_domain( |
1287 | proximity_domain: link->node_to); |
1288 | |
1289 | if (!peer_dev) |
1290 | continue; |
1291 | |
1292 | list_for_each_entry(inbound_link, &peer_dev->p2p_link_props, |
1293 | list) { |
1294 | if (inbound_link->node_to != link->node_from) |
1295 | continue; |
1296 | |
1297 | inbound_link->flags = CRAT_IOLINK_FLAGS_ENABLED; |
1298 | kfd_set_iolink_no_atomics(dev: peer_dev, target_gpu_dev: dev, link: inbound_link); |
1299 | kfd_set_iolink_non_coherent(to_dev: peer_dev, outbound_link: link, inbound_link); |
1300 | } |
1301 | } |
1302 | } |
1303 | |
1304 | static int kfd_build_p2p_node_entry(struct kfd_topology_device *dev, |
1305 | struct kfd_iolink_properties *p2plink) |
1306 | { |
1307 | int ret; |
1308 | |
1309 | p2plink->kobj = kzalloc(size: sizeof(struct kobject), GFP_KERNEL); |
1310 | if (!p2plink->kobj) |
1311 | return -ENOMEM; |
1312 | |
1313 | ret = kobject_init_and_add(kobj: p2plink->kobj, ktype: &iolink_type, |
1314 | parent: dev->kobj_p2plink, fmt: "%d" , dev->node_props.p2p_links_count - 1); |
1315 | if (ret < 0) { |
1316 | kobject_put(kobj: p2plink->kobj); |
1317 | return ret; |
1318 | } |
1319 | |
1320 | p2plink->attr.name = "properties" ; |
1321 | p2plink->attr.mode = KFD_SYSFS_FILE_MODE; |
1322 | sysfs_attr_init(&p2plink->attr); |
1323 | ret = sysfs_create_file(kobj: p2plink->kobj, attr: &p2plink->attr); |
1324 | if (ret < 0) |
1325 | return ret; |
1326 | |
1327 | return 0; |
1328 | } |
1329 | |
1330 | static int kfd_create_indirect_link_prop(struct kfd_topology_device *kdev, int gpu_node) |
1331 | { |
1332 | struct kfd_iolink_properties *gpu_link, *tmp_link, *cpu_link; |
1333 | struct kfd_iolink_properties *props = NULL, *props2 = NULL; |
1334 | struct kfd_topology_device *cpu_dev; |
1335 | int ret = 0; |
1336 | int i, num_cpu; |
1337 | |
1338 | num_cpu = 0; |
1339 | list_for_each_entry(cpu_dev, &topology_device_list, list) { |
1340 | if (cpu_dev->gpu) |
1341 | break; |
1342 | num_cpu++; |
1343 | } |
1344 | |
1345 | gpu_link = list_first_entry(&kdev->io_link_props, |
1346 | struct kfd_iolink_properties, list); |
1347 | if (!gpu_link) |
1348 | return -ENOMEM; |
1349 | |
1350 | for (i = 0; i < num_cpu; i++) { |
1351 | /* CPU <--> GPU */ |
1352 | if (gpu_link->node_to == i) |
1353 | continue; |
1354 | |
1355 | /* find CPU <--> CPU links */ |
1356 | cpu_link = NULL; |
1357 | cpu_dev = kfd_topology_device_by_proximity_domain(proximity_domain: i); |
1358 | if (cpu_dev) { |
1359 | list_for_each_entry(tmp_link, |
1360 | &cpu_dev->io_link_props, list) { |
1361 | if (tmp_link->node_to == gpu_link->node_to) { |
1362 | cpu_link = tmp_link; |
1363 | break; |
1364 | } |
1365 | } |
1366 | } |
1367 | |
1368 | if (!cpu_link) |
1369 | return -ENOMEM; |
1370 | |
1371 | /* CPU <--> CPU <--> GPU, GPU node*/ |
1372 | props = kfd_alloc_struct(props); |
1373 | if (!props) |
1374 | return -ENOMEM; |
1375 | |
1376 | memcpy(props, gpu_link, sizeof(struct kfd_iolink_properties)); |
1377 | props->weight = gpu_link->weight + cpu_link->weight; |
1378 | props->min_latency = gpu_link->min_latency + cpu_link->min_latency; |
1379 | props->max_latency = gpu_link->max_latency + cpu_link->max_latency; |
1380 | props->min_bandwidth = min(gpu_link->min_bandwidth, cpu_link->min_bandwidth); |
1381 | props->max_bandwidth = min(gpu_link->max_bandwidth, cpu_link->max_bandwidth); |
1382 | |
1383 | props->node_from = gpu_node; |
1384 | props->node_to = i; |
1385 | kdev->node_props.p2p_links_count++; |
1386 | list_add_tail(new: &props->list, head: &kdev->p2p_link_props); |
1387 | ret = kfd_build_p2p_node_entry(dev: kdev, p2plink: props); |
1388 | if (ret < 0) |
1389 | return ret; |
1390 | |
1391 | /* for small Bar, no CPU --> GPU in-direct links */ |
1392 | if (kfd_dev_is_large_bar(dev: kdev->gpu)) { |
1393 | /* CPU <--> CPU <--> GPU, CPU node*/ |
1394 | props2 = kfd_alloc_struct(props2); |
1395 | if (!props2) |
1396 | return -ENOMEM; |
1397 | |
1398 | memcpy(props2, props, sizeof(struct kfd_iolink_properties)); |
1399 | props2->node_from = i; |
1400 | props2->node_to = gpu_node; |
1401 | props2->kobj = NULL; |
1402 | cpu_dev->node_props.p2p_links_count++; |
1403 | list_add_tail(new: &props2->list, head: &cpu_dev->p2p_link_props); |
1404 | ret = kfd_build_p2p_node_entry(dev: cpu_dev, p2plink: props2); |
1405 | if (ret < 0) |
1406 | return ret; |
1407 | } |
1408 | } |
1409 | return ret; |
1410 | } |
1411 | |
1412 | #if defined(CONFIG_HSA_AMD_P2P) |
1413 | static int kfd_add_peer_prop(struct kfd_topology_device *kdev, |
1414 | struct kfd_topology_device *peer, int from, int to) |
1415 | { |
1416 | struct kfd_iolink_properties *props = NULL; |
1417 | struct kfd_iolink_properties *iolink1, *iolink2, *iolink3; |
1418 | struct kfd_topology_device *cpu_dev; |
1419 | int ret = 0; |
1420 | |
1421 | if (!amdgpu_device_is_peer_accessible( |
1422 | adev: kdev->gpu->adev, |
1423 | peer_adev: peer->gpu->adev)) |
1424 | return ret; |
1425 | |
1426 | iolink1 = list_first_entry(&kdev->io_link_props, |
1427 | struct kfd_iolink_properties, list); |
1428 | if (!iolink1) |
1429 | return -ENOMEM; |
1430 | |
1431 | iolink2 = list_first_entry(&peer->io_link_props, |
1432 | struct kfd_iolink_properties, list); |
1433 | if (!iolink2) |
1434 | return -ENOMEM; |
1435 | |
1436 | props = kfd_alloc_struct(props); |
1437 | if (!props) |
1438 | return -ENOMEM; |
1439 | |
1440 | memcpy(props, iolink1, sizeof(struct kfd_iolink_properties)); |
1441 | |
1442 | props->weight = iolink1->weight + iolink2->weight; |
1443 | props->min_latency = iolink1->min_latency + iolink2->min_latency; |
1444 | props->max_latency = iolink1->max_latency + iolink2->max_latency; |
1445 | props->min_bandwidth = min(iolink1->min_bandwidth, iolink2->min_bandwidth); |
1446 | props->max_bandwidth = min(iolink2->max_bandwidth, iolink2->max_bandwidth); |
1447 | |
1448 | if (iolink1->node_to != iolink2->node_to) { |
1449 | /* CPU->CPU link*/ |
1450 | cpu_dev = kfd_topology_device_by_proximity_domain(proximity_domain: iolink1->node_to); |
1451 | if (cpu_dev) { |
1452 | list_for_each_entry(iolink3, &cpu_dev->io_link_props, list) |
1453 | if (iolink3->node_to == iolink2->node_to) |
1454 | break; |
1455 | |
1456 | props->weight += iolink3->weight; |
1457 | props->min_latency += iolink3->min_latency; |
1458 | props->max_latency += iolink3->max_latency; |
1459 | props->min_bandwidth = min(props->min_bandwidth, |
1460 | iolink3->min_bandwidth); |
1461 | props->max_bandwidth = min(props->max_bandwidth, |
1462 | iolink3->max_bandwidth); |
1463 | } else { |
1464 | WARN(1, "CPU node not found" ); |
1465 | } |
1466 | } |
1467 | |
1468 | props->node_from = from; |
1469 | props->node_to = to; |
1470 | peer->node_props.p2p_links_count++; |
1471 | list_add_tail(new: &props->list, head: &peer->p2p_link_props); |
1472 | ret = kfd_build_p2p_node_entry(dev: peer, p2plink: props); |
1473 | |
1474 | return ret; |
1475 | } |
1476 | #endif |
1477 | |
1478 | static int kfd_dev_create_p2p_links(void) |
1479 | { |
1480 | struct kfd_topology_device *dev; |
1481 | struct kfd_topology_device *new_dev; |
1482 | #if defined(CONFIG_HSA_AMD_P2P) |
1483 | uint32_t i; |
1484 | #endif |
1485 | uint32_t k; |
1486 | int ret = 0; |
1487 | |
1488 | k = 0; |
1489 | list_for_each_entry(dev, &topology_device_list, list) |
1490 | k++; |
1491 | if (k < 2) |
1492 | return 0; |
1493 | |
1494 | new_dev = list_last_entry(&topology_device_list, struct kfd_topology_device, list); |
1495 | if (WARN_ON(!new_dev->gpu)) |
1496 | return 0; |
1497 | |
1498 | k--; |
1499 | |
1500 | /* create in-direct links */ |
1501 | ret = kfd_create_indirect_link_prop(kdev: new_dev, gpu_node: k); |
1502 | if (ret < 0) |
1503 | goto out; |
1504 | |
1505 | /* create p2p links */ |
1506 | #if defined(CONFIG_HSA_AMD_P2P) |
1507 | i = 0; |
1508 | list_for_each_entry(dev, &topology_device_list, list) { |
1509 | if (dev == new_dev) |
1510 | break; |
1511 | if (!dev->gpu || !dev->gpu->adev || |
1512 | (dev->gpu->kfd->hive_id && |
1513 | dev->gpu->kfd->hive_id == new_dev->gpu->kfd->hive_id)) |
1514 | goto next; |
1515 | |
1516 | /* check if node(s) is/are peer accessible in one direction or bi-direction */ |
1517 | ret = kfd_add_peer_prop(kdev: new_dev, peer: dev, from: i, to: k); |
1518 | if (ret < 0) |
1519 | goto out; |
1520 | |
1521 | ret = kfd_add_peer_prop(kdev: dev, peer: new_dev, from: k, to: i); |
1522 | if (ret < 0) |
1523 | goto out; |
1524 | next: |
1525 | i++; |
1526 | } |
1527 | #endif |
1528 | |
1529 | out: |
1530 | return ret; |
1531 | } |
1532 | |
1533 | /* Helper function. See kfd_fill_gpu_cache_info for parameter description */ |
1534 | static int fill_in_l1_pcache(struct kfd_cache_properties **props_ext, |
1535 | struct kfd_gpu_cache_info *pcache_info, |
1536 | int cu_bitmask, |
1537 | int cache_type, unsigned int cu_processor_id, |
1538 | int cu_block) |
1539 | { |
1540 | unsigned int cu_sibling_map_mask; |
1541 | int first_active_cu; |
1542 | struct kfd_cache_properties *pcache = NULL; |
1543 | |
1544 | cu_sibling_map_mask = cu_bitmask; |
1545 | cu_sibling_map_mask >>= cu_block; |
1546 | cu_sibling_map_mask &= ((1 << pcache_info[cache_type].num_cu_shared) - 1); |
1547 | first_active_cu = ffs(cu_sibling_map_mask); |
1548 | |
1549 | /* CU could be inactive. In case of shared cache find the first active |
1550 | * CU. and incase of non-shared cache check if the CU is inactive. If |
1551 | * inactive active skip it |
1552 | */ |
1553 | if (first_active_cu) { |
1554 | pcache = kfd_alloc_struct(pcache); |
1555 | if (!pcache) |
1556 | return -ENOMEM; |
1557 | |
1558 | memset(pcache, 0, sizeof(struct kfd_cache_properties)); |
1559 | pcache->processor_id_low = cu_processor_id + (first_active_cu - 1); |
1560 | pcache->cache_level = pcache_info[cache_type].cache_level; |
1561 | pcache->cache_size = pcache_info[cache_type].cache_size; |
1562 | |
1563 | if (pcache_info[cache_type].flags & CRAT_CACHE_FLAGS_DATA_CACHE) |
1564 | pcache->cache_type |= HSA_CACHE_TYPE_DATA; |
1565 | if (pcache_info[cache_type].flags & CRAT_CACHE_FLAGS_INST_CACHE) |
1566 | pcache->cache_type |= HSA_CACHE_TYPE_INSTRUCTION; |
1567 | if (pcache_info[cache_type].flags & CRAT_CACHE_FLAGS_CPU_CACHE) |
1568 | pcache->cache_type |= HSA_CACHE_TYPE_CPU; |
1569 | if (pcache_info[cache_type].flags & CRAT_CACHE_FLAGS_SIMD_CACHE) |
1570 | pcache->cache_type |= HSA_CACHE_TYPE_HSACU; |
1571 | |
1572 | /* Sibling map is w.r.t processor_id_low, so shift out |
1573 | * inactive CU |
1574 | */ |
1575 | cu_sibling_map_mask = |
1576 | cu_sibling_map_mask >> (first_active_cu - 1); |
1577 | |
1578 | pcache->sibling_map[0] = (uint8_t)(cu_sibling_map_mask & 0xFF); |
1579 | pcache->sibling_map[1] = |
1580 | (uint8_t)((cu_sibling_map_mask >> 8) & 0xFF); |
1581 | pcache->sibling_map[2] = |
1582 | (uint8_t)((cu_sibling_map_mask >> 16) & 0xFF); |
1583 | pcache->sibling_map[3] = |
1584 | (uint8_t)((cu_sibling_map_mask >> 24) & 0xFF); |
1585 | |
1586 | pcache->sibling_map_size = 4; |
1587 | *props_ext = pcache; |
1588 | |
1589 | return 0; |
1590 | } |
1591 | return 1; |
1592 | } |
1593 | |
1594 | /* Helper function. See kfd_fill_gpu_cache_info for parameter description */ |
1595 | static int fill_in_l2_l3_pcache(struct kfd_cache_properties **props_ext, |
1596 | struct kfd_gpu_cache_info *pcache_info, |
1597 | struct amdgpu_cu_info *cu_info, |
1598 | struct amdgpu_gfx_config *gfx_info, |
1599 | int cache_type, unsigned int cu_processor_id, |
1600 | struct kfd_node *knode) |
1601 | { |
1602 | unsigned int cu_sibling_map_mask; |
1603 | int first_active_cu; |
1604 | int i, j, k, xcc, start, end; |
1605 | int num_xcc = NUM_XCC(knode->xcc_mask); |
1606 | struct kfd_cache_properties *pcache = NULL; |
1607 | enum amdgpu_memory_partition mode; |
1608 | struct amdgpu_device *adev = knode->adev; |
1609 | |
1610 | start = ffs(knode->xcc_mask) - 1; |
1611 | end = start + num_xcc; |
1612 | cu_sibling_map_mask = cu_info->bitmap[start][0][0]; |
1613 | cu_sibling_map_mask &= |
1614 | ((1 << pcache_info[cache_type].num_cu_shared) - 1); |
1615 | first_active_cu = ffs(cu_sibling_map_mask); |
1616 | |
1617 | /* CU could be inactive. In case of shared cache find the first active |
1618 | * CU. and incase of non-shared cache check if the CU is inactive. If |
1619 | * inactive active skip it |
1620 | */ |
1621 | if (first_active_cu) { |
1622 | pcache = kfd_alloc_struct(pcache); |
1623 | if (!pcache) |
1624 | return -ENOMEM; |
1625 | |
1626 | memset(pcache, 0, sizeof(struct kfd_cache_properties)); |
1627 | pcache->processor_id_low = cu_processor_id |
1628 | + (first_active_cu - 1); |
1629 | pcache->cache_level = pcache_info[cache_type].cache_level; |
1630 | |
1631 | if (KFD_GC_VERSION(knode) == IP_VERSION(9, 4, 3)) |
1632 | mode = adev->gmc.gmc_funcs->query_mem_partition_mode(adev); |
1633 | else |
1634 | mode = UNKNOWN_MEMORY_PARTITION_MODE; |
1635 | |
1636 | if (pcache->cache_level == 2) |
1637 | pcache->cache_size = pcache_info[cache_type].cache_size * num_xcc; |
1638 | else if (mode) |
1639 | pcache->cache_size = pcache_info[cache_type].cache_size / mode; |
1640 | else |
1641 | pcache->cache_size = pcache_info[cache_type].cache_size; |
1642 | |
1643 | if (pcache_info[cache_type].flags & CRAT_CACHE_FLAGS_DATA_CACHE) |
1644 | pcache->cache_type |= HSA_CACHE_TYPE_DATA; |
1645 | if (pcache_info[cache_type].flags & CRAT_CACHE_FLAGS_INST_CACHE) |
1646 | pcache->cache_type |= HSA_CACHE_TYPE_INSTRUCTION; |
1647 | if (pcache_info[cache_type].flags & CRAT_CACHE_FLAGS_CPU_CACHE) |
1648 | pcache->cache_type |= HSA_CACHE_TYPE_CPU; |
1649 | if (pcache_info[cache_type].flags & CRAT_CACHE_FLAGS_SIMD_CACHE) |
1650 | pcache->cache_type |= HSA_CACHE_TYPE_HSACU; |
1651 | |
1652 | /* Sibling map is w.r.t processor_id_low, so shift out |
1653 | * inactive CU |
1654 | */ |
1655 | cu_sibling_map_mask = cu_sibling_map_mask >> (first_active_cu - 1); |
1656 | k = 0; |
1657 | |
1658 | for (xcc = start; xcc < end; xcc++) { |
1659 | for (i = 0; i < gfx_info->max_shader_engines; i++) { |
1660 | for (j = 0; j < gfx_info->max_sh_per_se; j++) { |
1661 | pcache->sibling_map[k] = (uint8_t)(cu_sibling_map_mask & 0xFF); |
1662 | pcache->sibling_map[k+1] = (uint8_t)((cu_sibling_map_mask >> 8) & 0xFF); |
1663 | pcache->sibling_map[k+2] = (uint8_t)((cu_sibling_map_mask >> 16) & 0xFF); |
1664 | pcache->sibling_map[k+3] = (uint8_t)((cu_sibling_map_mask >> 24) & 0xFF); |
1665 | k += 4; |
1666 | |
1667 | cu_sibling_map_mask = cu_info->bitmap[xcc][i % 4][j + i / 4]; |
1668 | cu_sibling_map_mask &= ((1 << pcache_info[cache_type].num_cu_shared) - 1); |
1669 | } |
1670 | } |
1671 | } |
1672 | pcache->sibling_map_size = k; |
1673 | *props_ext = pcache; |
1674 | return 0; |
1675 | } |
1676 | return 1; |
1677 | } |
1678 | |
1679 | #define KFD_MAX_CACHE_TYPES 6 |
1680 | |
1681 | /* kfd_fill_cache_non_crat_info - Fill GPU cache info using kfd_gpu_cache_info |
1682 | * tables |
1683 | */ |
1684 | static void kfd_fill_cache_non_crat_info(struct kfd_topology_device *dev, struct kfd_node *kdev) |
1685 | { |
1686 | struct kfd_gpu_cache_info *pcache_info = NULL; |
1687 | int i, j, k, xcc, start, end; |
1688 | int ct = 0; |
1689 | unsigned int cu_processor_id; |
1690 | int ret; |
1691 | unsigned int num_cu_shared; |
1692 | struct amdgpu_cu_info *cu_info = &kdev->adev->gfx.cu_info; |
1693 | struct amdgpu_gfx_config *gfx_info = &kdev->adev->gfx.config; |
1694 | int gpu_processor_id; |
1695 | struct kfd_cache_properties *props_ext; |
1696 | int num_of_entries = 0; |
1697 | int num_of_cache_types = 0; |
1698 | struct kfd_gpu_cache_info cache_info[KFD_MAX_CACHE_TYPES]; |
1699 | |
1700 | |
1701 | gpu_processor_id = dev->node_props.simd_id_base; |
1702 | |
1703 | pcache_info = cache_info; |
1704 | num_of_cache_types = kfd_get_gpu_cache_info(kdev, pcache_info: &pcache_info); |
1705 | if (!num_of_cache_types) { |
1706 | pr_warn("no cache info found\n" ); |
1707 | return; |
1708 | } |
1709 | |
1710 | /* For each type of cache listed in the kfd_gpu_cache_info table, |
1711 | * go through all available Compute Units. |
1712 | * The [i,j,k] loop will |
1713 | * if kfd_gpu_cache_info.num_cu_shared = 1 |
1714 | * will parse through all available CU |
1715 | * If (kfd_gpu_cache_info.num_cu_shared != 1) |
1716 | * then it will consider only one CU from |
1717 | * the shared unit |
1718 | */ |
1719 | start = ffs(kdev->xcc_mask) - 1; |
1720 | end = start + NUM_XCC(kdev->xcc_mask); |
1721 | |
1722 | for (ct = 0; ct < num_of_cache_types; ct++) { |
1723 | cu_processor_id = gpu_processor_id; |
1724 | if (pcache_info[ct].cache_level == 1) { |
1725 | for (xcc = start; xcc < end; xcc++) { |
1726 | for (i = 0; i < gfx_info->max_shader_engines; i++) { |
1727 | for (j = 0; j < gfx_info->max_sh_per_se; j++) { |
1728 | for (k = 0; k < gfx_info->max_cu_per_sh; k += pcache_info[ct].num_cu_shared) { |
1729 | |
1730 | ret = fill_in_l1_pcache(props_ext: &props_ext, pcache_info, |
1731 | cu_bitmask: cu_info->bitmap[xcc][i % 4][j + i / 4], cache_type: ct, |
1732 | cu_processor_id, cu_block: k); |
1733 | |
1734 | if (ret < 0) |
1735 | break; |
1736 | |
1737 | if (!ret) { |
1738 | num_of_entries++; |
1739 | list_add_tail(new: &props_ext->list, head: &dev->cache_props); |
1740 | } |
1741 | |
1742 | /* Move to next CU block */ |
1743 | num_cu_shared = ((k + pcache_info[ct].num_cu_shared) <= |
1744 | gfx_info->max_cu_per_sh) ? |
1745 | pcache_info[ct].num_cu_shared : |
1746 | (gfx_info->max_cu_per_sh - k); |
1747 | cu_processor_id += num_cu_shared; |
1748 | } |
1749 | } |
1750 | } |
1751 | } |
1752 | } else { |
1753 | ret = fill_in_l2_l3_pcache(props_ext: &props_ext, pcache_info, |
1754 | cu_info, gfx_info, cache_type: ct, cu_processor_id, knode: kdev); |
1755 | |
1756 | if (ret < 0) |
1757 | break; |
1758 | |
1759 | if (!ret) { |
1760 | num_of_entries++; |
1761 | list_add_tail(new: &props_ext->list, head: &dev->cache_props); |
1762 | } |
1763 | } |
1764 | } |
1765 | dev->node_props.caches_count += num_of_entries; |
1766 | pr_debug("Added [%d] GPU cache entries\n" , num_of_entries); |
1767 | } |
1768 | |
1769 | static int kfd_topology_add_device_locked(struct kfd_node *gpu, uint32_t gpu_id, |
1770 | struct kfd_topology_device **dev) |
1771 | { |
1772 | int proximity_domain = ++topology_crat_proximity_domain; |
1773 | struct list_head temp_topology_device_list; |
1774 | void *crat_image = NULL; |
1775 | size_t image_size = 0; |
1776 | int res; |
1777 | |
1778 | res = kfd_create_crat_image_virtual(crat_image: &crat_image, size: &image_size, |
1779 | COMPUTE_UNIT_GPU, kdev: gpu, |
1780 | proximity_domain); |
1781 | if (res) { |
1782 | pr_err("Error creating VCRAT for GPU (ID: 0x%x)\n" , |
1783 | gpu_id); |
1784 | topology_crat_proximity_domain--; |
1785 | goto err; |
1786 | } |
1787 | |
1788 | INIT_LIST_HEAD(list: &temp_topology_device_list); |
1789 | |
1790 | res = kfd_parse_crat_table(crat_image, |
1791 | device_list: &temp_topology_device_list, |
1792 | proximity_domain); |
1793 | if (res) { |
1794 | pr_err("Error parsing VCRAT for GPU (ID: 0x%x)\n" , |
1795 | gpu_id); |
1796 | topology_crat_proximity_domain--; |
1797 | goto err; |
1798 | } |
1799 | |
1800 | kfd_topology_update_device_list(temp_list: &temp_topology_device_list, |
1801 | master_list: &topology_device_list); |
1802 | |
1803 | *dev = kfd_assign_gpu(gpu); |
1804 | if (WARN_ON(!*dev)) { |
1805 | res = -ENODEV; |
1806 | goto err; |
1807 | } |
1808 | |
1809 | /* Fill the cache affinity information here for the GPUs |
1810 | * using VCRAT |
1811 | */ |
1812 | kfd_fill_cache_non_crat_info(dev: *dev, kdev: gpu); |
1813 | |
1814 | /* Update the SYSFS tree, since we added another topology |
1815 | * device |
1816 | */ |
1817 | res = kfd_topology_update_sysfs(); |
1818 | if (!res) |
1819 | sys_props.generation_count++; |
1820 | else |
1821 | pr_err("Failed to update GPU (ID: 0x%x) to sysfs topology. res=%d\n" , |
1822 | gpu_id, res); |
1823 | |
1824 | err: |
1825 | kfd_destroy_crat_image(crat_image); |
1826 | return res; |
1827 | } |
1828 | |
1829 | static void kfd_topology_set_dbg_firmware_support(struct kfd_topology_device *dev) |
1830 | { |
1831 | bool firmware_supported = true; |
1832 | |
1833 | if (KFD_GC_VERSION(dev->gpu) >= IP_VERSION(11, 0, 0) && |
1834 | KFD_GC_VERSION(dev->gpu) < IP_VERSION(12, 0, 0)) { |
1835 | uint32_t mes_api_rev = (dev->gpu->adev->mes.sched_version & |
1836 | AMDGPU_MES_API_VERSION_MASK) >> |
1837 | AMDGPU_MES_API_VERSION_SHIFT; |
1838 | uint32_t mes_rev = dev->gpu->adev->mes.sched_version & |
1839 | AMDGPU_MES_VERSION_MASK; |
1840 | |
1841 | firmware_supported = (mes_api_rev >= 14) && (mes_rev >= 64); |
1842 | goto out; |
1843 | } |
1844 | |
1845 | /* |
1846 | * Note: Any unlisted devices here are assumed to support exception handling. |
1847 | * Add additional checks here as needed. |
1848 | */ |
1849 | switch (KFD_GC_VERSION(dev->gpu)) { |
1850 | case IP_VERSION(9, 0, 1): |
1851 | firmware_supported = dev->gpu->kfd->mec_fw_version >= 459 + 32768; |
1852 | break; |
1853 | case IP_VERSION(9, 1, 0): |
1854 | case IP_VERSION(9, 2, 1): |
1855 | case IP_VERSION(9, 2, 2): |
1856 | case IP_VERSION(9, 3, 0): |
1857 | case IP_VERSION(9, 4, 0): |
1858 | firmware_supported = dev->gpu->kfd->mec_fw_version >= 459; |
1859 | break; |
1860 | case IP_VERSION(9, 4, 1): |
1861 | firmware_supported = dev->gpu->kfd->mec_fw_version >= 60; |
1862 | break; |
1863 | case IP_VERSION(9, 4, 2): |
1864 | firmware_supported = dev->gpu->kfd->mec_fw_version >= 51; |
1865 | break; |
1866 | case IP_VERSION(10, 1, 10): |
1867 | case IP_VERSION(10, 1, 2): |
1868 | case IP_VERSION(10, 1, 1): |
1869 | firmware_supported = dev->gpu->kfd->mec_fw_version >= 144; |
1870 | break; |
1871 | case IP_VERSION(10, 3, 0): |
1872 | case IP_VERSION(10, 3, 2): |
1873 | case IP_VERSION(10, 3, 1): |
1874 | case IP_VERSION(10, 3, 4): |
1875 | case IP_VERSION(10, 3, 5): |
1876 | firmware_supported = dev->gpu->kfd->mec_fw_version >= 89; |
1877 | break; |
1878 | case IP_VERSION(10, 1, 3): |
1879 | case IP_VERSION(10, 3, 3): |
1880 | firmware_supported = false; |
1881 | break; |
1882 | default: |
1883 | break; |
1884 | } |
1885 | |
1886 | out: |
1887 | if (firmware_supported) |
1888 | dev->node_props.capability |= HSA_CAP_TRAP_DEBUG_FIRMWARE_SUPPORTED; |
1889 | } |
1890 | |
1891 | static void kfd_topology_set_capabilities(struct kfd_topology_device *dev) |
1892 | { |
1893 | dev->node_props.capability |= ((HSA_CAP_DOORBELL_TYPE_2_0 << |
1894 | HSA_CAP_DOORBELL_TYPE_TOTALBITS_SHIFT) & |
1895 | HSA_CAP_DOORBELL_TYPE_TOTALBITS_MASK); |
1896 | |
1897 | dev->node_props.capability |= HSA_CAP_TRAP_DEBUG_SUPPORT | |
1898 | HSA_CAP_TRAP_DEBUG_WAVE_LAUNCH_TRAP_OVERRIDE_SUPPORTED | |
1899 | HSA_CAP_TRAP_DEBUG_WAVE_LAUNCH_MODE_SUPPORTED; |
1900 | |
1901 | if (kfd_dbg_has_ttmps_always_setup(dev: dev->gpu)) |
1902 | dev->node_props.debug_prop |= HSA_DBG_DISPATCH_INFO_ALWAYS_VALID; |
1903 | |
1904 | if (KFD_GC_VERSION(dev->gpu) < IP_VERSION(10, 0, 0)) { |
1905 | if (KFD_GC_VERSION(dev->gpu) == IP_VERSION(9, 4, 3)) |
1906 | dev->node_props.debug_prop |= |
1907 | HSA_DBG_WATCH_ADDR_MASK_LO_BIT_GFX9_4_3 | |
1908 | HSA_DBG_WATCH_ADDR_MASK_HI_BIT_GFX9_4_3; |
1909 | else |
1910 | dev->node_props.debug_prop |= |
1911 | HSA_DBG_WATCH_ADDR_MASK_LO_BIT_GFX9 | |
1912 | HSA_DBG_WATCH_ADDR_MASK_HI_BIT; |
1913 | |
1914 | if (KFD_GC_VERSION(dev->gpu) >= IP_VERSION(9, 4, 2)) |
1915 | dev->node_props.capability |= |
1916 | HSA_CAP_TRAP_DEBUG_PRECISE_MEMORY_OPERATIONS_SUPPORTED; |
1917 | } else { |
1918 | dev->node_props.debug_prop |= HSA_DBG_WATCH_ADDR_MASK_LO_BIT_GFX10 | |
1919 | HSA_DBG_WATCH_ADDR_MASK_HI_BIT; |
1920 | |
1921 | if (KFD_GC_VERSION(dev->gpu) >= IP_VERSION(11, 0, 0)) |
1922 | dev->node_props.capability |= |
1923 | HSA_CAP_TRAP_DEBUG_PRECISE_MEMORY_OPERATIONS_SUPPORTED; |
1924 | } |
1925 | |
1926 | kfd_topology_set_dbg_firmware_support(dev); |
1927 | } |
1928 | |
1929 | int kfd_topology_add_device(struct kfd_node *gpu) |
1930 | { |
1931 | uint32_t gpu_id; |
1932 | struct kfd_topology_device *dev; |
1933 | int res = 0; |
1934 | int i; |
1935 | const char *asic_name = amdgpu_asic_name[gpu->adev->asic_type]; |
1936 | struct amdgpu_gfx_config *gfx_info = &gpu->adev->gfx.config; |
1937 | struct amdgpu_cu_info *cu_info = &gpu->adev->gfx.cu_info; |
1938 | |
1939 | gpu_id = kfd_generate_gpu_id(gpu); |
1940 | if (gpu->xcp && !gpu->xcp->ddev) { |
1941 | dev_warn(gpu->adev->dev, |
1942 | "Won't add GPU (ID: 0x%x) to topology since it has no drm node assigned." , |
1943 | gpu_id); |
1944 | return 0; |
1945 | } else { |
1946 | pr_debug("Adding new GPU (ID: 0x%x) to topology\n" , gpu_id); |
1947 | } |
1948 | |
1949 | /* Check to see if this gpu device exists in the topology_device_list. |
1950 | * If so, assign the gpu to that device, |
1951 | * else create a Virtual CRAT for this gpu device and then parse that |
1952 | * CRAT to create a new topology device. Once created assign the gpu to |
1953 | * that topology device |
1954 | */ |
1955 | down_write(sem: &topology_lock); |
1956 | dev = kfd_assign_gpu(gpu); |
1957 | if (!dev) |
1958 | res = kfd_topology_add_device_locked(gpu, gpu_id, dev: &dev); |
1959 | up_write(sem: &topology_lock); |
1960 | if (res) |
1961 | return res; |
1962 | |
1963 | dev->gpu_id = gpu_id; |
1964 | gpu->id = gpu_id; |
1965 | |
1966 | kfd_dev_create_p2p_links(); |
1967 | |
1968 | /* TODO: Move the following lines to function |
1969 | * kfd_add_non_crat_information |
1970 | */ |
1971 | |
1972 | /* Fill-in additional information that is not available in CRAT but |
1973 | * needed for the topology |
1974 | */ |
1975 | for (i = 0; i < KFD_TOPOLOGY_PUBLIC_NAME_SIZE-1; i++) { |
1976 | dev->node_props.name[i] = __tolower(c: asic_name[i]); |
1977 | if (asic_name[i] == '\0') |
1978 | break; |
1979 | } |
1980 | dev->node_props.name[i] = '\0'; |
1981 | |
1982 | dev->node_props.simd_arrays_per_engine = |
1983 | gfx_info->max_sh_per_se; |
1984 | |
1985 | dev->node_props.gfx_target_version = |
1986 | gpu->kfd->device_info.gfx_target_version; |
1987 | dev->node_props.vendor_id = gpu->adev->pdev->vendor; |
1988 | dev->node_props.device_id = gpu->adev->pdev->device; |
1989 | dev->node_props.capability |= |
1990 | ((dev->gpu->adev->rev_id << HSA_CAP_ASIC_REVISION_SHIFT) & |
1991 | HSA_CAP_ASIC_REVISION_MASK); |
1992 | |
1993 | dev->node_props.location_id = pci_dev_id(dev: gpu->adev->pdev); |
1994 | if (KFD_GC_VERSION(dev->gpu->kfd) == IP_VERSION(9, 4, 3)) |
1995 | dev->node_props.location_id |= dev->gpu->node_id; |
1996 | |
1997 | dev->node_props.domain = pci_domain_nr(bus: gpu->adev->pdev->bus); |
1998 | dev->node_props.max_engine_clk_fcompute = |
1999 | amdgpu_amdkfd_get_max_engine_clock_in_mhz(adev: dev->gpu->adev); |
2000 | dev->node_props.max_engine_clk_ccompute = |
2001 | cpufreq_quick_get_max(cpu: 0) / 1000; |
2002 | |
2003 | if (gpu->xcp) |
2004 | dev->node_props.drm_render_minor = gpu->xcp->ddev->render->index; |
2005 | else |
2006 | dev->node_props.drm_render_minor = |
2007 | gpu->kfd->shared_resources.drm_render_minor; |
2008 | |
2009 | dev->node_props.hive_id = gpu->kfd->hive_id; |
2010 | dev->node_props.num_sdma_engines = kfd_get_num_sdma_engines(kdev: gpu); |
2011 | dev->node_props.num_sdma_xgmi_engines = |
2012 | kfd_get_num_xgmi_sdma_engines(kdev: gpu); |
2013 | dev->node_props.num_sdma_queues_per_engine = |
2014 | gpu->kfd->device_info.num_sdma_queues_per_engine - |
2015 | gpu->kfd->device_info.num_reserved_sdma_queues_per_engine; |
2016 | dev->node_props.num_gws = (dev->gpu->gws && |
2017 | dev->gpu->dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS) ? |
2018 | dev->gpu->adev->gds.gws_size : 0; |
2019 | dev->node_props.num_cp_queues = get_cp_queues_num(dqm: dev->gpu->dqm); |
2020 | |
2021 | kfd_fill_mem_clk_max_info(dev); |
2022 | kfd_fill_iolink_non_crat_info(dev); |
2023 | |
2024 | switch (dev->gpu->adev->asic_type) { |
2025 | case CHIP_KAVERI: |
2026 | case CHIP_HAWAII: |
2027 | case CHIP_TONGA: |
2028 | dev->node_props.capability |= ((HSA_CAP_DOORBELL_TYPE_PRE_1_0 << |
2029 | HSA_CAP_DOORBELL_TYPE_TOTALBITS_SHIFT) & |
2030 | HSA_CAP_DOORBELL_TYPE_TOTALBITS_MASK); |
2031 | break; |
2032 | case CHIP_CARRIZO: |
2033 | case CHIP_FIJI: |
2034 | case CHIP_POLARIS10: |
2035 | case CHIP_POLARIS11: |
2036 | case CHIP_POLARIS12: |
2037 | case CHIP_VEGAM: |
2038 | pr_debug("Adding doorbell packet type capability\n" ); |
2039 | dev->node_props.capability |= ((HSA_CAP_DOORBELL_TYPE_1_0 << |
2040 | HSA_CAP_DOORBELL_TYPE_TOTALBITS_SHIFT) & |
2041 | HSA_CAP_DOORBELL_TYPE_TOTALBITS_MASK); |
2042 | break; |
2043 | default: |
2044 | if (KFD_GC_VERSION(dev->gpu) < IP_VERSION(9, 0, 1)) |
2045 | WARN(1, "Unexpected ASIC family %u" , |
2046 | dev->gpu->adev->asic_type); |
2047 | else |
2048 | kfd_topology_set_capabilities(dev); |
2049 | } |
2050 | |
2051 | /* |
2052 | * Overwrite ATS capability according to needs_iommu_device to fix |
2053 | * potential missing corresponding bit in CRAT of BIOS. |
2054 | */ |
2055 | dev->node_props.capability &= ~HSA_CAP_ATS_PRESENT; |
2056 | |
2057 | /* Fix errors in CZ CRAT. |
2058 | * simd_count: Carrizo CRAT reports wrong simd_count, probably |
2059 | * because it doesn't consider masked out CUs |
2060 | * max_waves_per_simd: Carrizo reports wrong max_waves_per_simd |
2061 | */ |
2062 | if (dev->gpu->adev->asic_type == CHIP_CARRIZO) { |
2063 | dev->node_props.simd_count = |
2064 | cu_info->simd_per_cu * cu_info->number; |
2065 | dev->node_props.max_waves_per_simd = 10; |
2066 | } |
2067 | |
2068 | /* kfd only concerns sram ecc on GFX and HBM ecc on UMC */ |
2069 | dev->node_props.capability |= |
2070 | ((dev->gpu->adev->ras_enabled & BIT(AMDGPU_RAS_BLOCK__GFX)) != 0) ? |
2071 | HSA_CAP_SRAM_EDCSUPPORTED : 0; |
2072 | dev->node_props.capability |= |
2073 | ((dev->gpu->adev->ras_enabled & BIT(AMDGPU_RAS_BLOCK__UMC)) != 0) ? |
2074 | HSA_CAP_MEM_EDCSUPPORTED : 0; |
2075 | |
2076 | if (KFD_GC_VERSION(dev->gpu) != IP_VERSION(9, 0, 1)) |
2077 | dev->node_props.capability |= (dev->gpu->adev->ras_enabled != 0) ? |
2078 | HSA_CAP_RASEVENTNOTIFY : 0; |
2079 | |
2080 | if (KFD_IS_SVM_API_SUPPORTED(dev->gpu->adev)) |
2081 | dev->node_props.capability |= HSA_CAP_SVMAPI_SUPPORTED; |
2082 | |
2083 | if (dev->gpu->adev->gmc.is_app_apu || |
2084 | dev->gpu->adev->gmc.xgmi.connected_to_cpu) |
2085 | dev->node_props.capability |= HSA_CAP_FLAGS_COHERENTHOSTACCESS; |
2086 | |
2087 | kfd_debug_print_topology(); |
2088 | |
2089 | kfd_notify_gpu_change(gpu_id, arrival: 1); |
2090 | |
2091 | return 0; |
2092 | } |
2093 | |
2094 | /** |
2095 | * kfd_topology_update_io_links() - Update IO links after device removal. |
2096 | * @proximity_domain: Proximity domain value of the dev being removed. |
2097 | * |
2098 | * The topology list currently is arranged in increasing order of |
2099 | * proximity domain. |
2100 | * |
2101 | * Two things need to be done when a device is removed: |
2102 | * 1. All the IO links to this device need to be removed. |
2103 | * 2. All nodes after the current device node need to move |
2104 | * up once this device node is removed from the topology |
2105 | * list. As a result, the proximity domain values for |
2106 | * all nodes after the node being deleted reduce by 1. |
2107 | * This would also cause the proximity domain values for |
2108 | * io links to be updated based on new proximity domain |
2109 | * values. |
2110 | * |
2111 | * Context: The caller must hold write topology_lock. |
2112 | */ |
2113 | static void kfd_topology_update_io_links(int proximity_domain) |
2114 | { |
2115 | struct kfd_topology_device *dev; |
2116 | struct kfd_iolink_properties *iolink, *p2plink, *tmp; |
2117 | |
2118 | list_for_each_entry(dev, &topology_device_list, list) { |
2119 | if (dev->proximity_domain > proximity_domain) |
2120 | dev->proximity_domain--; |
2121 | |
2122 | list_for_each_entry_safe(iolink, tmp, &dev->io_link_props, list) { |
2123 | /* |
2124 | * If there is an io link to the dev being deleted |
2125 | * then remove that IO link also. |
2126 | */ |
2127 | if (iolink->node_to == proximity_domain) { |
2128 | list_del(entry: &iolink->list); |
2129 | dev->node_props.io_links_count--; |
2130 | } else { |
2131 | if (iolink->node_from > proximity_domain) |
2132 | iolink->node_from--; |
2133 | if (iolink->node_to > proximity_domain) |
2134 | iolink->node_to--; |
2135 | } |
2136 | } |
2137 | |
2138 | list_for_each_entry_safe(p2plink, tmp, &dev->p2p_link_props, list) { |
2139 | /* |
2140 | * If there is a p2p link to the dev being deleted |
2141 | * then remove that p2p link also. |
2142 | */ |
2143 | if (p2plink->node_to == proximity_domain) { |
2144 | list_del(entry: &p2plink->list); |
2145 | dev->node_props.p2p_links_count--; |
2146 | } else { |
2147 | if (p2plink->node_from > proximity_domain) |
2148 | p2plink->node_from--; |
2149 | if (p2plink->node_to > proximity_domain) |
2150 | p2plink->node_to--; |
2151 | } |
2152 | } |
2153 | } |
2154 | } |
2155 | |
2156 | int kfd_topology_remove_device(struct kfd_node *gpu) |
2157 | { |
2158 | struct kfd_topology_device *dev, *tmp; |
2159 | uint32_t gpu_id; |
2160 | int res = -ENODEV; |
2161 | int i = 0; |
2162 | |
2163 | down_write(sem: &topology_lock); |
2164 | |
2165 | list_for_each_entry_safe(dev, tmp, &topology_device_list, list) { |
2166 | if (dev->gpu == gpu) { |
2167 | gpu_id = dev->gpu_id; |
2168 | kfd_remove_sysfs_node_entry(dev); |
2169 | kfd_release_topology_device(dev); |
2170 | sys_props.num_devices--; |
2171 | kfd_topology_update_io_links(proximity_domain: i); |
2172 | topology_crat_proximity_domain = sys_props.num_devices-1; |
2173 | sys_props.generation_count++; |
2174 | res = 0; |
2175 | if (kfd_topology_update_sysfs() < 0) |
2176 | kfd_topology_release_sysfs(); |
2177 | break; |
2178 | } |
2179 | i++; |
2180 | } |
2181 | |
2182 | up_write(sem: &topology_lock); |
2183 | |
2184 | if (!res) |
2185 | kfd_notify_gpu_change(gpu_id, arrival: 0); |
2186 | |
2187 | return res; |
2188 | } |
2189 | |
2190 | /* kfd_topology_enum_kfd_devices - Enumerate through all devices in KFD |
2191 | * topology. If GPU device is found @idx, then valid kfd_dev pointer is |
2192 | * returned through @kdev |
2193 | * Return - 0: On success (@kdev will be NULL for non GPU nodes) |
2194 | * -1: If end of list |
2195 | */ |
2196 | int kfd_topology_enum_kfd_devices(uint8_t idx, struct kfd_node **kdev) |
2197 | { |
2198 | |
2199 | struct kfd_topology_device *top_dev; |
2200 | uint8_t device_idx = 0; |
2201 | |
2202 | *kdev = NULL; |
2203 | down_read(sem: &topology_lock); |
2204 | |
2205 | list_for_each_entry(top_dev, &topology_device_list, list) { |
2206 | if (device_idx == idx) { |
2207 | *kdev = top_dev->gpu; |
2208 | up_read(sem: &topology_lock); |
2209 | return 0; |
2210 | } |
2211 | |
2212 | device_idx++; |
2213 | } |
2214 | |
2215 | up_read(sem: &topology_lock); |
2216 | |
2217 | return -1; |
2218 | |
2219 | } |
2220 | |
2221 | static int kfd_cpumask_to_apic_id(const struct cpumask *cpumask) |
2222 | { |
2223 | int first_cpu_of_numa_node; |
2224 | |
2225 | if (!cpumask || cpumask == cpu_none_mask) |
2226 | return -1; |
2227 | first_cpu_of_numa_node = cpumask_first(srcp: cpumask); |
2228 | if (first_cpu_of_numa_node >= nr_cpu_ids) |
2229 | return -1; |
2230 | #ifdef CONFIG_X86_64 |
2231 | return cpu_data(first_cpu_of_numa_node).topo.apicid; |
2232 | #else |
2233 | return first_cpu_of_numa_node; |
2234 | #endif |
2235 | } |
2236 | |
2237 | /* kfd_numa_node_to_apic_id - Returns the APIC ID of the first logical processor |
2238 | * of the given NUMA node (numa_node_id) |
2239 | * Return -1 on failure |
2240 | */ |
2241 | int kfd_numa_node_to_apic_id(int numa_node_id) |
2242 | { |
2243 | if (numa_node_id == -1) { |
2244 | pr_warn("Invalid NUMA Node. Use online CPU mask\n" ); |
2245 | return kfd_cpumask_to_apic_id(cpu_online_mask); |
2246 | } |
2247 | return kfd_cpumask_to_apic_id(cpumask: cpumask_of_node(node: numa_node_id)); |
2248 | } |
2249 | |
2250 | #if defined(CONFIG_DEBUG_FS) |
2251 | |
2252 | int kfd_debugfs_hqds_by_device(struct seq_file *m, void *data) |
2253 | { |
2254 | struct kfd_topology_device *dev; |
2255 | unsigned int i = 0; |
2256 | int r = 0; |
2257 | |
2258 | down_read(sem: &topology_lock); |
2259 | |
2260 | list_for_each_entry(dev, &topology_device_list, list) { |
2261 | if (!dev->gpu) { |
2262 | i++; |
2263 | continue; |
2264 | } |
2265 | |
2266 | seq_printf(m, fmt: "Node %u, gpu_id %x:\n" , i++, dev->gpu->id); |
2267 | r = dqm_debugfs_hqds(m, data: dev->gpu->dqm); |
2268 | if (r) |
2269 | break; |
2270 | } |
2271 | |
2272 | up_read(sem: &topology_lock); |
2273 | |
2274 | return r; |
2275 | } |
2276 | |
2277 | int kfd_debugfs_rls_by_device(struct seq_file *m, void *data) |
2278 | { |
2279 | struct kfd_topology_device *dev; |
2280 | unsigned int i = 0; |
2281 | int r = 0; |
2282 | |
2283 | down_read(sem: &topology_lock); |
2284 | |
2285 | list_for_each_entry(dev, &topology_device_list, list) { |
2286 | if (!dev->gpu) { |
2287 | i++; |
2288 | continue; |
2289 | } |
2290 | |
2291 | seq_printf(m, fmt: "Node %u, gpu_id %x:\n" , i++, dev->gpu->id); |
2292 | r = pm_debugfs_runlist(m, data: &dev->gpu->dqm->packet_mgr); |
2293 | if (r) |
2294 | break; |
2295 | } |
2296 | |
2297 | up_read(sem: &topology_lock); |
2298 | |
2299 | return r; |
2300 | } |
2301 | |
2302 | #endif |
2303 | |