1 | // SPDX-License-Identifier: GPL-2.0 OR MIT |
2 | /* |
3 | * Copyright 2014-2022 Advanced Micro Devices, Inc. |
4 | * |
5 | * Permission is hereby granted, free of charge, to any person obtaining a |
6 | * copy of this software and associated documentation files (the "Software"), |
7 | * to deal in the Software without restriction, including without limitation |
8 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, |
9 | * and/or sell copies of the Software, and to permit persons to whom the |
10 | * Software is furnished to do so, subject to the following conditions: |
11 | * |
12 | * The above copyright notice and this permission notice shall be included in |
13 | * all copies or substantial portions of the Software. |
14 | * |
15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
18 | * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR |
19 | * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, |
20 | * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR |
21 | * OTHER DEALINGS IN THE SOFTWARE. |
22 | */ |
23 | #include "kfd_priv.h" |
24 | #include <linux/mm.h> |
25 | #include <linux/mman.h> |
26 | #include <linux/slab.h> |
27 | #include <linux/io.h> |
28 | #include <linux/idr.h> |
29 | |
30 | /* |
31 | * This extension supports a kernel level doorbells management for the |
32 | * kernel queues using the first doorbell page reserved for the kernel. |
33 | */ |
34 | |
35 | /* |
36 | * Each device exposes a doorbell aperture, a PCI MMIO aperture that |
37 | * receives 32-bit writes that are passed to queues as wptr values. |
38 | * The doorbells are intended to be written by applications as part |
39 | * of queueing work on user-mode queues. |
40 | * We assign doorbells to applications in PAGE_SIZE-sized and aligned chunks. |
41 | * We map the doorbell address space into user-mode when a process creates |
42 | * its first queue on each device. |
43 | * Although the mapping is done by KFD, it is equivalent to an mmap of |
44 | * the /dev/kfd with the particular device encoded in the mmap offset. |
45 | * There will be other uses for mmap of /dev/kfd, so only a range of |
46 | * offsets (KFD_MMAP_DOORBELL_START-END) is used for doorbells. |
47 | */ |
48 | |
49 | /* # of doorbell bytes allocated for each process. */ |
50 | size_t kfd_doorbell_process_slice(struct kfd_dev *kfd) |
51 | { |
52 | if (!kfd->shared_resources.enable_mes) |
53 | return roundup(kfd->device_info.doorbell_size * |
54 | KFD_MAX_NUM_OF_QUEUES_PER_PROCESS, |
55 | PAGE_SIZE); |
56 | else |
57 | return amdgpu_mes_doorbell_process_slice( |
58 | (struct amdgpu_device *)kfd->adev); |
59 | } |
60 | |
61 | /* Doorbell calculations for device init. */ |
62 | int kfd_doorbell_init(struct kfd_dev *kfd) |
63 | { |
64 | size_t doorbell_start_offset; |
65 | size_t doorbell_aperture_size; |
66 | size_t doorbell_process_limit; |
67 | |
68 | /* |
69 | * With MES enabled, just set the doorbell base as it is needed |
70 | * to calculate doorbell physical address. |
71 | */ |
72 | if (kfd->shared_resources.enable_mes) { |
73 | kfd->doorbell_base = |
74 | kfd->shared_resources.doorbell_physical_address; |
75 | return 0; |
76 | } |
77 | |
78 | /* |
79 | * We start with calculations in bytes because the input data might |
80 | * only be byte-aligned. |
81 | * Only after we have done the rounding can we assume any alignment. |
82 | */ |
83 | |
84 | doorbell_start_offset = |
85 | roundup(kfd->shared_resources.doorbell_start_offset, |
86 | kfd_doorbell_process_slice(kfd)); |
87 | |
88 | doorbell_aperture_size = |
89 | rounddown(kfd->shared_resources.doorbell_aperture_size, |
90 | kfd_doorbell_process_slice(kfd)); |
91 | |
92 | if (doorbell_aperture_size > doorbell_start_offset) |
93 | doorbell_process_limit = |
94 | (doorbell_aperture_size - doorbell_start_offset) / |
95 | kfd_doorbell_process_slice(kfd); |
96 | else |
97 | return -ENOSPC; |
98 | |
99 | if (!kfd->max_doorbell_slices || |
100 | doorbell_process_limit < kfd->max_doorbell_slices) |
101 | kfd->max_doorbell_slices = doorbell_process_limit; |
102 | |
103 | kfd->doorbell_base = kfd->shared_resources.doorbell_physical_address + |
104 | doorbell_start_offset; |
105 | |
106 | kfd->doorbell_base_dw_offset = doorbell_start_offset / sizeof(u32); |
107 | |
108 | kfd->doorbell_kernel_ptr = ioremap(offset: kfd->doorbell_base, |
109 | size: kfd_doorbell_process_slice(kfd)); |
110 | |
111 | if (!kfd->doorbell_kernel_ptr) |
112 | return -ENOMEM; |
113 | |
114 | pr_debug("Doorbell initialization:\n" ); |
115 | pr_debug("doorbell base == 0x%08lX\n" , |
116 | (uintptr_t)kfd->doorbell_base); |
117 | |
118 | pr_debug("doorbell_base_dw_offset == 0x%08lX\n" , |
119 | kfd->doorbell_base_dw_offset); |
120 | |
121 | pr_debug("doorbell_process_limit == 0x%08lX\n" , |
122 | doorbell_process_limit); |
123 | |
124 | pr_debug("doorbell_kernel_offset == 0x%08lX\n" , |
125 | (uintptr_t)kfd->doorbell_base); |
126 | |
127 | pr_debug("doorbell aperture size == 0x%08lX\n" , |
128 | kfd->shared_resources.doorbell_aperture_size); |
129 | |
130 | pr_debug("doorbell kernel address == %p\n" , kfd->doorbell_kernel_ptr); |
131 | |
132 | return 0; |
133 | } |
134 | |
135 | void kfd_doorbell_fini(struct kfd_dev *kfd) |
136 | { |
137 | if (kfd->doorbell_kernel_ptr) |
138 | iounmap(addr: kfd->doorbell_kernel_ptr); |
139 | } |
140 | |
141 | int kfd_doorbell_mmap(struct kfd_dev *dev, struct kfd_process *process, |
142 | struct vm_area_struct *vma) |
143 | { |
144 | phys_addr_t address; |
145 | struct kfd_process_device *pdd; |
146 | |
147 | /* |
148 | * For simplicitly we only allow mapping of the entire doorbell |
149 | * allocation of a single device & process. |
150 | */ |
151 | if (vma->vm_end - vma->vm_start != kfd_doorbell_process_slice(dev)) |
152 | return -EINVAL; |
153 | |
154 | pdd = kfd_get_process_device_data(dev, p: process); |
155 | if (!pdd) |
156 | return -EINVAL; |
157 | |
158 | /* Calculate physical address of doorbell */ |
159 | address = kfd_get_process_doorbells(pdd); |
160 | if (!address) |
161 | return -ENOMEM; |
162 | vm_flags_set(vma, VM_IO | VM_DONTCOPY | VM_DONTEXPAND | VM_NORESERVE | |
163 | VM_DONTDUMP | VM_PFNMAP); |
164 | |
165 | vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); |
166 | |
167 | pr_debug("Mapping doorbell page\n" |
168 | " target user address == 0x%08llX\n" |
169 | " physical address == 0x%08llX\n" |
170 | " vm_flags == 0x%04lX\n" |
171 | " size == 0x%04lX\n" , |
172 | (unsigned long long) vma->vm_start, address, vma->vm_flags, |
173 | kfd_doorbell_process_slice(dev)); |
174 | |
175 | |
176 | return io_remap_pfn_range(vma, |
177 | addr: vma->vm_start, |
178 | pfn: address >> PAGE_SHIFT, |
179 | size: kfd_doorbell_process_slice(kfd: dev), |
180 | prot: vma->vm_page_prot); |
181 | } |
182 | |
183 | |
184 | /* get kernel iomem pointer for a doorbell */ |
185 | void __iomem *kfd_get_kernel_doorbell(struct kfd_dev *kfd, |
186 | unsigned int *doorbell_off) |
187 | { |
188 | u32 inx; |
189 | |
190 | mutex_lock(lock: &kfd->doorbell_mutex); |
191 | inx = find_first_zero_bit(addr: kfd->doorbell_available_index, |
192 | KFD_MAX_NUM_OF_QUEUES_PER_PROCESS); |
193 | |
194 | __set_bit(inx, kfd->doorbell_available_index); |
195 | mutex_unlock(lock: &kfd->doorbell_mutex); |
196 | |
197 | if (inx >= KFD_MAX_NUM_OF_QUEUES_PER_PROCESS) |
198 | return NULL; |
199 | |
200 | inx *= kfd->device_info.doorbell_size / sizeof(u32); |
201 | |
202 | /* |
203 | * Calculating the kernel doorbell offset using the first |
204 | * doorbell page. |
205 | */ |
206 | *doorbell_off = kfd->doorbell_base_dw_offset + inx; |
207 | |
208 | pr_debug("Get kernel queue doorbell\n" |
209 | " doorbell offset == 0x%08X\n" |
210 | " doorbell index == 0x%x\n" , |
211 | *doorbell_off, inx); |
212 | |
213 | return kfd->doorbell_kernel_ptr + inx; |
214 | } |
215 | |
216 | void kfd_release_kernel_doorbell(struct kfd_dev *kfd, u32 __iomem *db_addr) |
217 | { |
218 | unsigned int inx; |
219 | |
220 | inx = (unsigned int)(db_addr - kfd->doorbell_kernel_ptr) |
221 | * sizeof(u32) / kfd->device_info.doorbell_size; |
222 | |
223 | mutex_lock(lock: &kfd->doorbell_mutex); |
224 | __clear_bit(inx, kfd->doorbell_available_index); |
225 | mutex_unlock(lock: &kfd->doorbell_mutex); |
226 | } |
227 | |
228 | void write_kernel_doorbell(void __iomem *db, u32 value) |
229 | { |
230 | if (db) { |
231 | writel(val: value, addr: db); |
232 | pr_debug("Writing %d to doorbell address %p\n" , value, db); |
233 | } |
234 | } |
235 | |
236 | void write_kernel_doorbell64(void __iomem *db, u64 value) |
237 | { |
238 | if (db) { |
239 | WARN(((unsigned long)db & 7) != 0, |
240 | "Unaligned 64-bit doorbell" ); |
241 | writeq(value, (u64 __iomem *)db); |
242 | pr_debug("writing %llu to doorbell address %p\n" , value, db); |
243 | } |
244 | } |
245 | |
246 | unsigned int kfd_get_doorbell_dw_offset_in_bar(struct kfd_dev *kfd, |
247 | struct kfd_process_device *pdd, |
248 | unsigned int doorbell_id) |
249 | { |
250 | /* |
251 | * doorbell_base_dw_offset accounts for doorbells taken by KGD. |
252 | * index * kfd_doorbell_process_slice/sizeof(u32) adjusts to |
253 | * the process's doorbells. The offset returned is in dword |
254 | * units regardless of the ASIC-dependent doorbell size. |
255 | */ |
256 | if (!kfd->shared_resources.enable_mes) |
257 | return kfd->doorbell_base_dw_offset + |
258 | pdd->doorbell_index |
259 | * kfd_doorbell_process_slice(kfd) / sizeof(u32) + |
260 | doorbell_id * |
261 | kfd->device_info.doorbell_size / sizeof(u32); |
262 | else |
263 | return amdgpu_mes_get_doorbell_dw_offset_in_bar( |
264 | (struct amdgpu_device *)kfd->adev, |
265 | pdd->doorbell_index, doorbell_id); |
266 | } |
267 | |
268 | uint64_t kfd_get_number_elems(struct kfd_dev *kfd) |
269 | { |
270 | uint64_t num_of_elems = (kfd->shared_resources.doorbell_aperture_size - |
271 | kfd->shared_resources.doorbell_start_offset) / |
272 | kfd_doorbell_process_slice(kfd) + 1; |
273 | |
274 | return num_of_elems; |
275 | |
276 | } |
277 | |
278 | phys_addr_t kfd_get_process_doorbells(struct kfd_process_device *pdd) |
279 | { |
280 | if (!pdd->doorbell_index) { |
281 | int r = kfd_alloc_process_doorbells(kfd: pdd->dev, |
282 | doorbell_index: &pdd->doorbell_index); |
283 | if (r < 0) |
284 | return 0; |
285 | } |
286 | |
287 | return pdd->dev->doorbell_base + |
288 | pdd->doorbell_index * kfd_doorbell_process_slice(kfd: pdd->dev); |
289 | } |
290 | |
291 | int kfd_alloc_process_doorbells(struct kfd_dev *kfd, unsigned int *doorbell_index) |
292 | { |
293 | int r = 0; |
294 | |
295 | if (!kfd->shared_resources.enable_mes) |
296 | r = ida_simple_get(&kfd->doorbell_ida, 1, |
297 | kfd->max_doorbell_slices, GFP_KERNEL); |
298 | else |
299 | r = amdgpu_mes_alloc_process_doorbells( |
300 | (struct amdgpu_device *)kfd->adev, |
301 | doorbell_index); |
302 | |
303 | if (r > 0) |
304 | *doorbell_index = r; |
305 | |
306 | if (r < 0) |
307 | pr_err("Failed to allocate process doorbells\n" ); |
308 | |
309 | return r; |
310 | } |
311 | |
312 | void kfd_free_process_doorbells(struct kfd_dev *kfd, unsigned int doorbell_index) |
313 | { |
314 | if (doorbell_index) { |
315 | if (!kfd->shared_resources.enable_mes) |
316 | ida_simple_remove(&kfd->doorbell_ida, doorbell_index); |
317 | else |
318 | amdgpu_mes_free_process_doorbells( |
319 | (struct amdgpu_device *)kfd->adev, |
320 | doorbell_index); |
321 | } |
322 | } |
323 | |