1 | // SPDX-License-Identifier: GPL-2.0 |
2 | |
3 | /* |
4 | * Copyright 2016-2021 HabanaLabs, Ltd. |
5 | * All Rights Reserved. |
6 | */ |
7 | |
8 | #include <uapi/drm/habanalabs_accel.h> |
9 | #include "habanalabs.h" |
10 | |
11 | #include <linux/uaccess.h> |
12 | #include <linux/slab.h> |
13 | |
14 | #define HL_CS_FLAGS_TYPE_MASK (HL_CS_FLAGS_SIGNAL | HL_CS_FLAGS_WAIT | \ |
15 | HL_CS_FLAGS_COLLECTIVE_WAIT | HL_CS_FLAGS_RESERVE_SIGNALS_ONLY | \ |
16 | HL_CS_FLAGS_UNRESERVE_SIGNALS_ONLY | HL_CS_FLAGS_ENGINE_CORE_COMMAND | \ |
17 | HL_CS_FLAGS_ENGINES_COMMAND | HL_CS_FLAGS_FLUSH_PCI_HBW_WRITES) |
18 | |
19 | |
20 | #define MAX_TS_ITER_NUM 100 |
21 | |
22 | /** |
23 | * enum hl_cs_wait_status - cs wait status |
24 | * @CS_WAIT_STATUS_BUSY: cs was not completed yet |
25 | * @CS_WAIT_STATUS_COMPLETED: cs completed |
26 | * @CS_WAIT_STATUS_GONE: cs completed but fence is already gone |
27 | */ |
28 | enum hl_cs_wait_status { |
29 | CS_WAIT_STATUS_BUSY, |
30 | CS_WAIT_STATUS_COMPLETED, |
31 | CS_WAIT_STATUS_GONE |
32 | }; |
33 | |
34 | /* |
35 | * Data used while handling wait/timestamp nodes. |
36 | * The purpose of this struct is to store the needed data for both operations |
37 | * in one variable instead of passing large number of arguments to functions. |
38 | */ |
39 | struct wait_interrupt_data { |
40 | struct hl_user_interrupt *interrupt; |
41 | struct hl_mmap_mem_buf *buf; |
42 | struct hl_mem_mgr *mmg; |
43 | struct hl_cb *cq_cb; |
44 | u64 ts_handle; |
45 | u64 ts_offset; |
46 | u64 cq_handle; |
47 | u64 cq_offset; |
48 | u64 target_value; |
49 | u64 intr_timeout_us; |
50 | }; |
51 | |
52 | static void job_wq_completion(struct work_struct *work); |
53 | static int _hl_cs_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx, u64 timeout_us, u64 seq, |
54 | enum hl_cs_wait_status *status, s64 *timestamp); |
55 | static void cs_do_release(struct kref *ref); |
56 | |
57 | static void hl_push_cs_outcome(struct hl_device *hdev, |
58 | struct hl_cs_outcome_store *outcome_store, |
59 | u64 seq, ktime_t ts, int error) |
60 | { |
61 | struct hl_cs_outcome *node; |
62 | unsigned long flags; |
63 | |
64 | /* |
65 | * CS outcome store supports the following operations: |
66 | * push outcome - store a recent CS outcome in the store |
67 | * pop outcome - retrieve a SPECIFIC (by seq) CS outcome from the store |
68 | * It uses 2 lists: used list and free list. |
69 | * It has a pre-allocated amount of nodes, each node stores |
70 | * a single CS outcome. |
71 | * Initially, all the nodes are in the free list. |
72 | * On push outcome, a node (any) is taken from the free list, its |
73 | * information is filled in, and the node is moved to the used list. |
74 | * It is possible, that there are no nodes left in the free list. |
75 | * In this case, we will lose some information about old outcomes. We |
76 | * will pop the OLDEST node from the used list, and make it free. |
77 | * On pop, the node is searched for in the used list (using a search |
78 | * index). |
79 | * If found, the node is then removed from the used list, and moved |
80 | * back to the free list. The outcome data that the node contained is |
81 | * returned back to the user. |
82 | */ |
83 | |
84 | spin_lock_irqsave(&outcome_store->db_lock, flags); |
85 | |
86 | if (list_empty(head: &outcome_store->free_list)) { |
87 | node = list_last_entry(&outcome_store->used_list, |
88 | struct hl_cs_outcome, list_link); |
89 | hash_del(node: &node->map_link); |
90 | dev_dbg(hdev->dev, "CS %llu outcome was lost\n" , node->seq); |
91 | } else { |
92 | node = list_last_entry(&outcome_store->free_list, |
93 | struct hl_cs_outcome, list_link); |
94 | } |
95 | |
96 | list_del_init(entry: &node->list_link); |
97 | |
98 | node->seq = seq; |
99 | node->ts = ts; |
100 | node->error = error; |
101 | |
102 | list_add(new: &node->list_link, head: &outcome_store->used_list); |
103 | hash_add(outcome_store->outcome_map, &node->map_link, node->seq); |
104 | |
105 | spin_unlock_irqrestore(lock: &outcome_store->db_lock, flags); |
106 | } |
107 | |
108 | static bool hl_pop_cs_outcome(struct hl_cs_outcome_store *outcome_store, |
109 | u64 seq, ktime_t *ts, int *error) |
110 | { |
111 | struct hl_cs_outcome *node; |
112 | unsigned long flags; |
113 | |
114 | spin_lock_irqsave(&outcome_store->db_lock, flags); |
115 | |
116 | hash_for_each_possible(outcome_store->outcome_map, node, map_link, seq) |
117 | if (node->seq == seq) { |
118 | *ts = node->ts; |
119 | *error = node->error; |
120 | |
121 | hash_del(node: &node->map_link); |
122 | list_del_init(entry: &node->list_link); |
123 | list_add(new: &node->list_link, head: &outcome_store->free_list); |
124 | |
125 | spin_unlock_irqrestore(lock: &outcome_store->db_lock, flags); |
126 | |
127 | return true; |
128 | } |
129 | |
130 | spin_unlock_irqrestore(lock: &outcome_store->db_lock, flags); |
131 | |
132 | return false; |
133 | } |
134 | |
135 | static void hl_sob_reset(struct kref *ref) |
136 | { |
137 | struct hl_hw_sob *hw_sob = container_of(ref, struct hl_hw_sob, |
138 | kref); |
139 | struct hl_device *hdev = hw_sob->hdev; |
140 | |
141 | dev_dbg(hdev->dev, "reset sob id %u\n" , hw_sob->sob_id); |
142 | |
143 | hdev->asic_funcs->reset_sob(hdev, hw_sob); |
144 | |
145 | hw_sob->need_reset = false; |
146 | } |
147 | |
148 | void hl_sob_reset_error(struct kref *ref) |
149 | { |
150 | struct hl_hw_sob *hw_sob = container_of(ref, struct hl_hw_sob, |
151 | kref); |
152 | struct hl_device *hdev = hw_sob->hdev; |
153 | |
154 | dev_crit(hdev->dev, |
155 | "SOB release shouldn't be called here, q_idx: %d, sob_id: %d\n" , |
156 | hw_sob->q_idx, hw_sob->sob_id); |
157 | } |
158 | |
159 | void hw_sob_put(struct hl_hw_sob *hw_sob) |
160 | { |
161 | if (hw_sob) |
162 | kref_put(kref: &hw_sob->kref, release: hl_sob_reset); |
163 | } |
164 | |
165 | static void hw_sob_put_err(struct hl_hw_sob *hw_sob) |
166 | { |
167 | if (hw_sob) |
168 | kref_put(kref: &hw_sob->kref, release: hl_sob_reset_error); |
169 | } |
170 | |
171 | void hw_sob_get(struct hl_hw_sob *hw_sob) |
172 | { |
173 | if (hw_sob) |
174 | kref_get(kref: &hw_sob->kref); |
175 | } |
176 | |
177 | /** |
178 | * hl_gen_sob_mask() - Generates a sob mask to be used in a monitor arm packet |
179 | * @sob_base: sob base id |
180 | * @sob_mask: sob user mask, each bit represents a sob offset from sob base |
181 | * @mask: generated mask |
182 | * |
183 | * Return: 0 if given parameters are valid |
184 | */ |
185 | int hl_gen_sob_mask(u16 sob_base, u8 sob_mask, u8 *mask) |
186 | { |
187 | int i; |
188 | |
189 | if (sob_mask == 0) |
190 | return -EINVAL; |
191 | |
192 | if (sob_mask == 0x1) { |
193 | *mask = ~(1 << (sob_base & 0x7)); |
194 | } else { |
195 | /* find msb in order to verify sob range is valid */ |
196 | for (i = BITS_PER_BYTE - 1 ; i >= 0 ; i--) |
197 | if (BIT(i) & sob_mask) |
198 | break; |
199 | |
200 | if (i > (HL_MAX_SOBS_PER_MONITOR - (sob_base & 0x7) - 1)) |
201 | return -EINVAL; |
202 | |
203 | *mask = ~sob_mask; |
204 | } |
205 | |
206 | return 0; |
207 | } |
208 | |
209 | static void hl_fence_release(struct kref *kref) |
210 | { |
211 | struct hl_fence *fence = |
212 | container_of(kref, struct hl_fence, refcount); |
213 | struct hl_cs_compl *hl_cs_cmpl = |
214 | container_of(fence, struct hl_cs_compl, base_fence); |
215 | |
216 | kfree(objp: hl_cs_cmpl); |
217 | } |
218 | |
219 | void hl_fence_put(struct hl_fence *fence) |
220 | { |
221 | if (IS_ERR_OR_NULL(ptr: fence)) |
222 | return; |
223 | kref_put(kref: &fence->refcount, release: hl_fence_release); |
224 | } |
225 | |
226 | void hl_fences_put(struct hl_fence **fence, int len) |
227 | { |
228 | int i; |
229 | |
230 | for (i = 0; i < len; i++, fence++) |
231 | hl_fence_put(fence: *fence); |
232 | } |
233 | |
234 | void hl_fence_get(struct hl_fence *fence) |
235 | { |
236 | if (fence) |
237 | kref_get(kref: &fence->refcount); |
238 | } |
239 | |
240 | static void hl_fence_init(struct hl_fence *fence, u64 sequence) |
241 | { |
242 | kref_init(kref: &fence->refcount); |
243 | fence->cs_sequence = sequence; |
244 | fence->error = 0; |
245 | fence->timestamp = ktime_set(secs: 0, nsecs: 0); |
246 | fence->mcs_handling_done = false; |
247 | init_completion(x: &fence->completion); |
248 | } |
249 | |
250 | void cs_get(struct hl_cs *cs) |
251 | { |
252 | kref_get(kref: &cs->refcount); |
253 | } |
254 | |
255 | static int cs_get_unless_zero(struct hl_cs *cs) |
256 | { |
257 | return kref_get_unless_zero(kref: &cs->refcount); |
258 | } |
259 | |
260 | static void cs_put(struct hl_cs *cs) |
261 | { |
262 | kref_put(kref: &cs->refcount, release: cs_do_release); |
263 | } |
264 | |
265 | static void cs_job_do_release(struct kref *ref) |
266 | { |
267 | struct hl_cs_job *job = container_of(ref, struct hl_cs_job, refcount); |
268 | |
269 | kfree(objp: job); |
270 | } |
271 | |
272 | static void hl_cs_job_put(struct hl_cs_job *job) |
273 | { |
274 | kref_put(kref: &job->refcount, release: cs_job_do_release); |
275 | } |
276 | |
277 | bool cs_needs_completion(struct hl_cs *cs) |
278 | { |
279 | /* In case this is a staged CS, only the last CS in sequence should |
280 | * get a completion, any non staged CS will always get a completion |
281 | */ |
282 | if (cs->staged_cs && !cs->staged_last) |
283 | return false; |
284 | |
285 | return true; |
286 | } |
287 | |
288 | bool cs_needs_timeout(struct hl_cs *cs) |
289 | { |
290 | /* In case this is a staged CS, only the first CS in sequence should |
291 | * get a timeout, any non staged CS will always get a timeout |
292 | */ |
293 | if (cs->staged_cs && !cs->staged_first) |
294 | return false; |
295 | |
296 | return true; |
297 | } |
298 | |
299 | static bool is_cb_patched(struct hl_device *hdev, struct hl_cs_job *job) |
300 | { |
301 | /* Patched CB is created for external queues jobs */ |
302 | return (job->queue_type == QUEUE_TYPE_EXT); |
303 | } |
304 | |
305 | /* |
306 | * cs_parser - parse the user command submission |
307 | * |
308 | * @hpriv : pointer to the private data of the fd |
309 | * @job : pointer to the job that holds the command submission info |
310 | * |
311 | * The function parses the command submission of the user. It calls the |
312 | * ASIC specific parser, which returns a list of memory blocks to send |
313 | * to the device as different command buffers |
314 | * |
315 | */ |
316 | static int cs_parser(struct hl_fpriv *hpriv, struct hl_cs_job *job) |
317 | { |
318 | struct hl_device *hdev = hpriv->hdev; |
319 | struct hl_cs_parser parser; |
320 | int rc; |
321 | |
322 | parser.ctx_id = job->cs->ctx->asid; |
323 | parser.cs_sequence = job->cs->sequence; |
324 | parser.job_id = job->id; |
325 | |
326 | parser.hw_queue_id = job->hw_queue_id; |
327 | parser.job_userptr_list = &job->userptr_list; |
328 | parser.patched_cb = NULL; |
329 | parser.user_cb = job->user_cb; |
330 | parser.user_cb_size = job->user_cb_size; |
331 | parser.queue_type = job->queue_type; |
332 | parser.is_kernel_allocated_cb = job->is_kernel_allocated_cb; |
333 | job->patched_cb = NULL; |
334 | parser.completion = cs_needs_completion(cs: job->cs); |
335 | |
336 | rc = hdev->asic_funcs->cs_parser(hdev, &parser); |
337 | |
338 | if (is_cb_patched(hdev, job)) { |
339 | if (!rc) { |
340 | job->patched_cb = parser.patched_cb; |
341 | job->job_cb_size = parser.patched_cb_size; |
342 | job->contains_dma_pkt = parser.contains_dma_pkt; |
343 | atomic_inc(v: &job->patched_cb->cs_cnt); |
344 | } |
345 | |
346 | /* |
347 | * Whether the parsing worked or not, we don't need the |
348 | * original CB anymore because it was already parsed and |
349 | * won't be accessed again for this CS |
350 | */ |
351 | atomic_dec(v: &job->user_cb->cs_cnt); |
352 | hl_cb_put(cb: job->user_cb); |
353 | job->user_cb = NULL; |
354 | } else if (!rc) { |
355 | job->job_cb_size = job->user_cb_size; |
356 | } |
357 | |
358 | return rc; |
359 | } |
360 | |
361 | static void hl_complete_job(struct hl_device *hdev, struct hl_cs_job *job) |
362 | { |
363 | struct hl_cs *cs = job->cs; |
364 | |
365 | if (is_cb_patched(hdev, job)) { |
366 | hl_userptr_delete_list(hdev, userptr_list: &job->userptr_list); |
367 | |
368 | /* |
369 | * We might arrive here from rollback and patched CB wasn't |
370 | * created, so we need to check it's not NULL |
371 | */ |
372 | if (job->patched_cb) { |
373 | atomic_dec(v: &job->patched_cb->cs_cnt); |
374 | hl_cb_put(cb: job->patched_cb); |
375 | } |
376 | } |
377 | |
378 | /* For H/W queue jobs, if a user CB was allocated by driver, |
379 | * the user CB isn't released in cs_parser() and thus should be |
380 | * released here. This is also true for INT queues jobs which were |
381 | * allocated by driver. |
382 | */ |
383 | if (job->is_kernel_allocated_cb && |
384 | (job->queue_type == QUEUE_TYPE_HW || job->queue_type == QUEUE_TYPE_INT)) { |
385 | atomic_dec(v: &job->user_cb->cs_cnt); |
386 | hl_cb_put(cb: job->user_cb); |
387 | } |
388 | |
389 | /* |
390 | * This is the only place where there can be multiple threads |
391 | * modifying the list at the same time |
392 | */ |
393 | spin_lock(lock: &cs->job_lock); |
394 | list_del(entry: &job->cs_node); |
395 | spin_unlock(lock: &cs->job_lock); |
396 | |
397 | hl_debugfs_remove_job(hdev, job); |
398 | |
399 | /* We decrement reference only for a CS that gets completion |
400 | * because the reference was incremented only for this kind of CS |
401 | * right before it was scheduled. |
402 | * |
403 | * In staged submission, only the last CS marked as 'staged_last' |
404 | * gets completion, hence its release function will be called from here. |
405 | * As for all the rest CS's in the staged submission which do not get |
406 | * completion, their CS reference will be decremented by the |
407 | * 'staged_last' CS during the CS release flow. |
408 | * All relevant PQ CI counters will be incremented during the CS release |
409 | * flow by calling 'hl_hw_queue_update_ci'. |
410 | */ |
411 | if (cs_needs_completion(cs) && |
412 | (job->queue_type == QUEUE_TYPE_EXT || job->queue_type == QUEUE_TYPE_HW)) { |
413 | |
414 | /* In CS based completions, the timestamp is already available, |
415 | * so no need to extract it from job |
416 | */ |
417 | if (hdev->asic_prop.completion_mode == HL_COMPLETION_MODE_JOB) |
418 | cs->completion_timestamp = job->timestamp; |
419 | |
420 | cs_put(cs); |
421 | } |
422 | |
423 | hl_cs_job_put(job); |
424 | } |
425 | |
426 | /* |
427 | * hl_staged_cs_find_first - locate the first CS in this staged submission |
428 | * |
429 | * @hdev: pointer to device structure |
430 | * @cs_seq: staged submission sequence number |
431 | * |
432 | * @note: This function must be called under 'hdev->cs_mirror_lock' |
433 | * |
434 | * Find and return a CS pointer with the given sequence |
435 | */ |
436 | struct hl_cs *hl_staged_cs_find_first(struct hl_device *hdev, u64 cs_seq) |
437 | { |
438 | struct hl_cs *cs; |
439 | |
440 | list_for_each_entry_reverse(cs, &hdev->cs_mirror_list, mirror_node) |
441 | if (cs->staged_cs && cs->staged_first && |
442 | cs->sequence == cs_seq) |
443 | return cs; |
444 | |
445 | return NULL; |
446 | } |
447 | |
448 | /* |
449 | * is_staged_cs_last_exists - returns true if the last CS in sequence exists |
450 | * |
451 | * @hdev: pointer to device structure |
452 | * @cs: staged submission member |
453 | * |
454 | */ |
455 | bool is_staged_cs_last_exists(struct hl_device *hdev, struct hl_cs *cs) |
456 | { |
457 | struct hl_cs *last_entry; |
458 | |
459 | last_entry = list_last_entry(&cs->staged_cs_node, struct hl_cs, |
460 | staged_cs_node); |
461 | |
462 | if (last_entry->staged_last) |
463 | return true; |
464 | |
465 | return false; |
466 | } |
467 | |
468 | /* |
469 | * staged_cs_get - get CS reference if this CS is a part of a staged CS |
470 | * |
471 | * @hdev: pointer to device structure |
472 | * @cs: current CS |
473 | * @cs_seq: staged submission sequence number |
474 | * |
475 | * Increment CS reference for every CS in this staged submission except for |
476 | * the CS which get completion. |
477 | */ |
478 | static void staged_cs_get(struct hl_device *hdev, struct hl_cs *cs) |
479 | { |
480 | /* Only the last CS in this staged submission will get a completion. |
481 | * We must increment the reference for all other CS's in this |
482 | * staged submission. |
483 | * Once we get a completion we will release the whole staged submission. |
484 | */ |
485 | if (!cs->staged_last) |
486 | cs_get(cs); |
487 | } |
488 | |
489 | /* |
490 | * staged_cs_put - put a CS in case it is part of staged submission |
491 | * |
492 | * @hdev: pointer to device structure |
493 | * @cs: CS to put |
494 | * |
495 | * This function decrements a CS reference (for a non completion CS) |
496 | */ |
497 | static void staged_cs_put(struct hl_device *hdev, struct hl_cs *cs) |
498 | { |
499 | /* We release all CS's in a staged submission except the last |
500 | * CS which we have never incremented its reference. |
501 | */ |
502 | if (!cs_needs_completion(cs)) |
503 | cs_put(cs); |
504 | } |
505 | |
506 | static void cs_handle_tdr(struct hl_device *hdev, struct hl_cs *cs) |
507 | { |
508 | struct hl_cs *next = NULL, *iter, *first_cs; |
509 | |
510 | if (!cs_needs_timeout(cs)) |
511 | return; |
512 | |
513 | spin_lock(lock: &hdev->cs_mirror_lock); |
514 | |
515 | /* We need to handle tdr only once for the complete staged submission. |
516 | * Hence, we choose the CS that reaches this function first which is |
517 | * the CS marked as 'staged_last'. |
518 | * In case single staged cs was submitted which has both first and last |
519 | * indications, then "cs_find_first" below will return NULL, since we |
520 | * removed the cs node from the list before getting here, |
521 | * in such cases just continue with the cs to cancel it's TDR work. |
522 | */ |
523 | if (cs->staged_cs && cs->staged_last) { |
524 | first_cs = hl_staged_cs_find_first(hdev, cs_seq: cs->staged_sequence); |
525 | if (first_cs) |
526 | cs = first_cs; |
527 | } |
528 | |
529 | spin_unlock(lock: &hdev->cs_mirror_lock); |
530 | |
531 | /* Don't cancel TDR in case this CS was timedout because we might be |
532 | * running from the TDR context |
533 | */ |
534 | if (cs->timedout || hdev->timeout_jiffies == MAX_SCHEDULE_TIMEOUT) |
535 | return; |
536 | |
537 | if (cs->tdr_active) |
538 | cancel_delayed_work_sync(dwork: &cs->work_tdr); |
539 | |
540 | spin_lock(lock: &hdev->cs_mirror_lock); |
541 | |
542 | /* queue TDR for next CS */ |
543 | list_for_each_entry(iter, &hdev->cs_mirror_list, mirror_node) |
544 | if (cs_needs_timeout(cs: iter)) { |
545 | next = iter; |
546 | break; |
547 | } |
548 | |
549 | if (next && !next->tdr_active) { |
550 | next->tdr_active = true; |
551 | schedule_delayed_work(dwork: &next->work_tdr, delay: next->timeout_jiffies); |
552 | } |
553 | |
554 | spin_unlock(lock: &hdev->cs_mirror_lock); |
555 | } |
556 | |
557 | /* |
558 | * force_complete_multi_cs - complete all contexts that wait on multi-CS |
559 | * |
560 | * @hdev: pointer to habanalabs device structure |
561 | */ |
562 | static void force_complete_multi_cs(struct hl_device *hdev) |
563 | { |
564 | int i; |
565 | |
566 | for (i = 0; i < MULTI_CS_MAX_USER_CTX; i++) { |
567 | struct multi_cs_completion *mcs_compl; |
568 | |
569 | mcs_compl = &hdev->multi_cs_completion[i]; |
570 | |
571 | spin_lock(lock: &mcs_compl->lock); |
572 | |
573 | if (!mcs_compl->used) { |
574 | spin_unlock(lock: &mcs_compl->lock); |
575 | continue; |
576 | } |
577 | |
578 | /* when calling force complete no context should be waiting on |
579 | * multi-cS. |
580 | * We are calling the function as a protection for such case |
581 | * to free any pending context and print error message |
582 | */ |
583 | dev_err(hdev->dev, |
584 | "multi-CS completion context %d still waiting when calling force completion\n" , |
585 | i); |
586 | complete_all(&mcs_compl->completion); |
587 | spin_unlock(lock: &mcs_compl->lock); |
588 | } |
589 | } |
590 | |
591 | /* |
592 | * complete_multi_cs - complete all waiting entities on multi-CS |
593 | * |
594 | * @hdev: pointer to habanalabs device structure |
595 | * @cs: CS structure |
596 | * The function signals a waiting entity that has an overlapping stream masters |
597 | * with the completed CS. |
598 | * For example: |
599 | * - a completed CS worked on stream master QID 4, multi CS completion |
600 | * is actively waiting on stream master QIDs 3, 5. don't send signal as no |
601 | * common stream master QID |
602 | * - a completed CS worked on stream master QID 4, multi CS completion |
603 | * is actively waiting on stream master QIDs 3, 4. send signal as stream |
604 | * master QID 4 is common |
605 | */ |
606 | static void complete_multi_cs(struct hl_device *hdev, struct hl_cs *cs) |
607 | { |
608 | struct hl_fence *fence = cs->fence; |
609 | int i; |
610 | |
611 | /* in case of multi CS check for completion only for the first CS */ |
612 | if (cs->staged_cs && !cs->staged_first) |
613 | return; |
614 | |
615 | for (i = 0; i < MULTI_CS_MAX_USER_CTX; i++) { |
616 | struct multi_cs_completion *mcs_compl; |
617 | |
618 | mcs_compl = &hdev->multi_cs_completion[i]; |
619 | if (!mcs_compl->used) |
620 | continue; |
621 | |
622 | spin_lock(lock: &mcs_compl->lock); |
623 | |
624 | /* |
625 | * complete if: |
626 | * 1. still waiting for completion |
627 | * 2. the completed CS has at least one overlapping stream |
628 | * master with the stream masters in the completion |
629 | */ |
630 | if (mcs_compl->used && |
631 | (fence->stream_master_qid_map & |
632 | mcs_compl->stream_master_qid_map)) { |
633 | /* extract the timestamp only of first completed CS */ |
634 | if (!mcs_compl->timestamp) |
635 | mcs_compl->timestamp = ktime_to_ns(kt: fence->timestamp); |
636 | |
637 | complete_all(&mcs_compl->completion); |
638 | |
639 | /* |
640 | * Setting mcs_handling_done inside the lock ensures |
641 | * at least one fence have mcs_handling_done set to |
642 | * true before wait for mcs finish. This ensures at |
643 | * least one CS will be set as completed when polling |
644 | * mcs fences. |
645 | */ |
646 | fence->mcs_handling_done = true; |
647 | } |
648 | |
649 | spin_unlock(lock: &mcs_compl->lock); |
650 | } |
651 | /* In case CS completed without mcs completion initialized */ |
652 | fence->mcs_handling_done = true; |
653 | } |
654 | |
655 | static inline void cs_release_sob_reset_handler(struct hl_device *hdev, |
656 | struct hl_cs *cs, |
657 | struct hl_cs_compl *hl_cs_cmpl) |
658 | { |
659 | /* Skip this handler if the cs wasn't submitted, to avoid putting |
660 | * the hw_sob twice, since this case already handled at this point, |
661 | * also skip if the hw_sob pointer wasn't set. |
662 | */ |
663 | if (!hl_cs_cmpl->hw_sob || !cs->submitted) |
664 | return; |
665 | |
666 | spin_lock(lock: &hl_cs_cmpl->lock); |
667 | |
668 | /* |
669 | * we get refcount upon reservation of signals or signal/wait cs for the |
670 | * hw_sob object, and need to put it when the first staged cs |
671 | * (which contains the encaps signals) or cs signal/wait is completed. |
672 | */ |
673 | if ((hl_cs_cmpl->type == CS_TYPE_SIGNAL) || |
674 | (hl_cs_cmpl->type == CS_TYPE_WAIT) || |
675 | (hl_cs_cmpl->type == CS_TYPE_COLLECTIVE_WAIT) || |
676 | (!!hl_cs_cmpl->encaps_signals)) { |
677 | dev_dbg(hdev->dev, |
678 | "CS 0x%llx type %d finished, sob_id: %d, sob_val: %u\n" , |
679 | hl_cs_cmpl->cs_seq, |
680 | hl_cs_cmpl->type, |
681 | hl_cs_cmpl->hw_sob->sob_id, |
682 | hl_cs_cmpl->sob_val); |
683 | |
684 | hw_sob_put(hw_sob: hl_cs_cmpl->hw_sob); |
685 | |
686 | if (hl_cs_cmpl->type == CS_TYPE_COLLECTIVE_WAIT) |
687 | hdev->asic_funcs->reset_sob_group(hdev, |
688 | hl_cs_cmpl->sob_group); |
689 | } |
690 | |
691 | spin_unlock(lock: &hl_cs_cmpl->lock); |
692 | } |
693 | |
694 | static void cs_do_release(struct kref *ref) |
695 | { |
696 | struct hl_cs *cs = container_of(ref, struct hl_cs, refcount); |
697 | struct hl_device *hdev = cs->ctx->hdev; |
698 | struct hl_cs_job *job, *tmp; |
699 | struct hl_cs_compl *hl_cs_cmpl = |
700 | container_of(cs->fence, struct hl_cs_compl, base_fence); |
701 | |
702 | cs->completed = true; |
703 | |
704 | /* |
705 | * Although if we reached here it means that all external jobs have |
706 | * finished, because each one of them took refcnt to CS, we still |
707 | * need to go over the internal jobs and complete them. Otherwise, we |
708 | * will have leaked memory and what's worse, the CS object (and |
709 | * potentially the CTX object) could be released, while the JOB |
710 | * still holds a pointer to them (but no reference). |
711 | */ |
712 | list_for_each_entry_safe(job, tmp, &cs->job_list, cs_node) |
713 | hl_complete_job(hdev, job); |
714 | |
715 | if (!cs->submitted) { |
716 | /* |
717 | * In case the wait for signal CS was submitted, the fence put |
718 | * occurs in init_signal_wait_cs() or collective_wait_init_cs() |
719 | * right before hanging on the PQ. |
720 | */ |
721 | if (cs->type == CS_TYPE_WAIT || |
722 | cs->type == CS_TYPE_COLLECTIVE_WAIT) |
723 | hl_fence_put(fence: cs->signal_fence); |
724 | |
725 | goto out; |
726 | } |
727 | |
728 | /* Need to update CI for all queue jobs that does not get completion */ |
729 | hl_hw_queue_update_ci(cs); |
730 | |
731 | /* remove CS from CS mirror list */ |
732 | spin_lock(lock: &hdev->cs_mirror_lock); |
733 | list_del_init(entry: &cs->mirror_node); |
734 | spin_unlock(lock: &hdev->cs_mirror_lock); |
735 | |
736 | cs_handle_tdr(hdev, cs); |
737 | |
738 | if (cs->staged_cs) { |
739 | /* the completion CS decrements reference for the entire |
740 | * staged submission |
741 | */ |
742 | if (cs->staged_last) { |
743 | struct hl_cs *staged_cs, *tmp_cs; |
744 | |
745 | list_for_each_entry_safe(staged_cs, tmp_cs, |
746 | &cs->staged_cs_node, staged_cs_node) |
747 | staged_cs_put(hdev, cs: staged_cs); |
748 | } |
749 | |
750 | /* A staged CS will be a member in the list only after it |
751 | * was submitted. We used 'cs_mirror_lock' when inserting |
752 | * it to list so we will use it again when removing it |
753 | */ |
754 | if (cs->submitted) { |
755 | spin_lock(lock: &hdev->cs_mirror_lock); |
756 | list_del(entry: &cs->staged_cs_node); |
757 | spin_unlock(lock: &hdev->cs_mirror_lock); |
758 | } |
759 | |
760 | /* decrement refcount to handle when first staged cs |
761 | * with encaps signals is completed. |
762 | */ |
763 | if (hl_cs_cmpl->encaps_signals) |
764 | kref_put(kref: &hl_cs_cmpl->encaps_sig_hdl->refcount, |
765 | release: hl_encaps_release_handle_and_put_ctx); |
766 | } |
767 | |
768 | if ((cs->type == CS_TYPE_WAIT || cs->type == CS_TYPE_COLLECTIVE_WAIT) && cs->encaps_signals) |
769 | kref_put(kref: &cs->encaps_sig_hdl->refcount, release: hl_encaps_release_handle_and_put_ctx); |
770 | |
771 | out: |
772 | /* Must be called before hl_ctx_put because inside we use ctx to get |
773 | * the device |
774 | */ |
775 | hl_debugfs_remove_cs(cs); |
776 | |
777 | hdev->shadow_cs_queue[cs->sequence & (hdev->asic_prop.max_pending_cs - 1)] = NULL; |
778 | |
779 | /* We need to mark an error for not submitted because in that case |
780 | * the hl fence release flow is different. Mainly, we don't need |
781 | * to handle hw_sob for signal/wait |
782 | */ |
783 | if (cs->timedout) |
784 | cs->fence->error = -ETIMEDOUT; |
785 | else if (cs->aborted) |
786 | cs->fence->error = -EIO; |
787 | else if (!cs->submitted) |
788 | cs->fence->error = -EBUSY; |
789 | |
790 | if (unlikely(cs->skip_reset_on_timeout)) { |
791 | dev_err(hdev->dev, |
792 | "Command submission %llu completed after %llu (s)\n" , |
793 | cs->sequence, |
794 | div_u64(jiffies - cs->submission_time_jiffies, HZ)); |
795 | } |
796 | |
797 | if (cs->timestamp) { |
798 | cs->fence->timestamp = cs->completion_timestamp; |
799 | hl_push_cs_outcome(hdev, outcome_store: &cs->ctx->outcome_store, seq: cs->sequence, |
800 | ts: cs->fence->timestamp, error: cs->fence->error); |
801 | } |
802 | |
803 | hl_ctx_put(ctx: cs->ctx); |
804 | |
805 | complete_all(&cs->fence->completion); |
806 | complete_multi_cs(hdev, cs); |
807 | |
808 | cs_release_sob_reset_handler(hdev, cs, hl_cs_cmpl); |
809 | |
810 | hl_fence_put(fence: cs->fence); |
811 | |
812 | kfree(objp: cs->jobs_in_queue_cnt); |
813 | kfree(objp: cs); |
814 | } |
815 | |
816 | static void cs_timedout(struct work_struct *work) |
817 | { |
818 | struct hl_cs *cs = container_of(work, struct hl_cs, work_tdr.work); |
819 | bool skip_reset_on_timeout, device_reset = false; |
820 | struct hl_device *hdev; |
821 | u64 event_mask = 0x0; |
822 | uint timeout_sec; |
823 | int rc; |
824 | |
825 | skip_reset_on_timeout = cs->skip_reset_on_timeout; |
826 | |
827 | rc = cs_get_unless_zero(cs); |
828 | if (!rc) |
829 | return; |
830 | |
831 | if ((!cs->submitted) || (cs->completed)) { |
832 | cs_put(cs); |
833 | return; |
834 | } |
835 | |
836 | hdev = cs->ctx->hdev; |
837 | |
838 | if (likely(!skip_reset_on_timeout)) { |
839 | if (hdev->reset_on_lockup) |
840 | device_reset = true; |
841 | else |
842 | hdev->reset_info.needs_reset = true; |
843 | |
844 | /* Mark the CS is timed out so we won't try to cancel its TDR */ |
845 | cs->timedout = true; |
846 | } |
847 | |
848 | /* Save only the first CS timeout parameters */ |
849 | rc = atomic_cmpxchg(v: &hdev->captured_err_info.cs_timeout.write_enable, old: 1, new: 0); |
850 | if (rc) { |
851 | hdev->captured_err_info.cs_timeout.timestamp = ktime_get(); |
852 | hdev->captured_err_info.cs_timeout.seq = cs->sequence; |
853 | event_mask |= HL_NOTIFIER_EVENT_CS_TIMEOUT; |
854 | } |
855 | |
856 | timeout_sec = jiffies_to_msecs(j: hdev->timeout_jiffies) / 1000; |
857 | |
858 | switch (cs->type) { |
859 | case CS_TYPE_SIGNAL: |
860 | dev_err(hdev->dev, |
861 | "Signal command submission %llu has not finished in %u seconds!\n" , |
862 | cs->sequence, timeout_sec); |
863 | break; |
864 | |
865 | case CS_TYPE_WAIT: |
866 | dev_err(hdev->dev, |
867 | "Wait command submission %llu has not finished in %u seconds!\n" , |
868 | cs->sequence, timeout_sec); |
869 | break; |
870 | |
871 | case CS_TYPE_COLLECTIVE_WAIT: |
872 | dev_err(hdev->dev, |
873 | "Collective Wait command submission %llu has not finished in %u seconds!\n" , |
874 | cs->sequence, timeout_sec); |
875 | break; |
876 | |
877 | default: |
878 | dev_err(hdev->dev, |
879 | "Command submission %llu has not finished in %u seconds!\n" , |
880 | cs->sequence, timeout_sec); |
881 | break; |
882 | } |
883 | |
884 | rc = hl_state_dump(hdev); |
885 | if (rc) |
886 | dev_err(hdev->dev, "Error during system state dump %d\n" , rc); |
887 | |
888 | cs_put(cs); |
889 | |
890 | if (device_reset) { |
891 | event_mask |= HL_NOTIFIER_EVENT_DEVICE_RESET; |
892 | hl_device_cond_reset(hdev, HL_DRV_RESET_TDR, event_mask); |
893 | } else if (event_mask) { |
894 | hl_notifier_event_send_all(hdev, event_mask); |
895 | } |
896 | } |
897 | |
898 | static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx, |
899 | enum hl_cs_type cs_type, u64 user_sequence, |
900 | struct hl_cs **cs_new, u32 flags, u32 timeout) |
901 | { |
902 | struct hl_cs_counters_atomic *cntr; |
903 | struct hl_fence *other = NULL; |
904 | struct hl_cs_compl *cs_cmpl; |
905 | struct hl_cs *cs; |
906 | int rc; |
907 | |
908 | cntr = &hdev->aggregated_cs_counters; |
909 | |
910 | cs = kzalloc(size: sizeof(*cs), GFP_ATOMIC); |
911 | if (!cs) |
912 | cs = kzalloc(size: sizeof(*cs), GFP_KERNEL); |
913 | |
914 | if (!cs) { |
915 | atomic64_inc(v: &ctx->cs_counters.out_of_mem_drop_cnt); |
916 | atomic64_inc(v: &cntr->out_of_mem_drop_cnt); |
917 | return -ENOMEM; |
918 | } |
919 | |
920 | /* increment refcnt for context */ |
921 | hl_ctx_get(ctx); |
922 | |
923 | cs->ctx = ctx; |
924 | cs->submitted = false; |
925 | cs->completed = false; |
926 | cs->type = cs_type; |
927 | cs->timestamp = !!(flags & HL_CS_FLAGS_TIMESTAMP); |
928 | cs->encaps_signals = !!(flags & HL_CS_FLAGS_ENCAP_SIGNALS); |
929 | cs->timeout_jiffies = timeout; |
930 | cs->skip_reset_on_timeout = |
931 | hdev->reset_info.skip_reset_on_timeout || |
932 | !!(flags & HL_CS_FLAGS_SKIP_RESET_ON_TIMEOUT); |
933 | cs->submission_time_jiffies = jiffies; |
934 | INIT_LIST_HEAD(list: &cs->job_list); |
935 | INIT_DELAYED_WORK(&cs->work_tdr, cs_timedout); |
936 | kref_init(kref: &cs->refcount); |
937 | spin_lock_init(&cs->job_lock); |
938 | |
939 | cs_cmpl = kzalloc(size: sizeof(*cs_cmpl), GFP_ATOMIC); |
940 | if (!cs_cmpl) |
941 | cs_cmpl = kzalloc(size: sizeof(*cs_cmpl), GFP_KERNEL); |
942 | |
943 | if (!cs_cmpl) { |
944 | atomic64_inc(v: &ctx->cs_counters.out_of_mem_drop_cnt); |
945 | atomic64_inc(v: &cntr->out_of_mem_drop_cnt); |
946 | rc = -ENOMEM; |
947 | goto free_cs; |
948 | } |
949 | |
950 | cs->jobs_in_queue_cnt = kcalloc(n: hdev->asic_prop.max_queues, |
951 | size: sizeof(*cs->jobs_in_queue_cnt), GFP_ATOMIC); |
952 | if (!cs->jobs_in_queue_cnt) |
953 | cs->jobs_in_queue_cnt = kcalloc(n: hdev->asic_prop.max_queues, |
954 | size: sizeof(*cs->jobs_in_queue_cnt), GFP_KERNEL); |
955 | |
956 | if (!cs->jobs_in_queue_cnt) { |
957 | atomic64_inc(v: &ctx->cs_counters.out_of_mem_drop_cnt); |
958 | atomic64_inc(v: &cntr->out_of_mem_drop_cnt); |
959 | rc = -ENOMEM; |
960 | goto free_cs_cmpl; |
961 | } |
962 | |
963 | cs_cmpl->hdev = hdev; |
964 | cs_cmpl->type = cs->type; |
965 | spin_lock_init(&cs_cmpl->lock); |
966 | cs->fence = &cs_cmpl->base_fence; |
967 | |
968 | spin_lock(lock: &ctx->cs_lock); |
969 | |
970 | cs_cmpl->cs_seq = ctx->cs_sequence; |
971 | other = ctx->cs_pending[cs_cmpl->cs_seq & |
972 | (hdev->asic_prop.max_pending_cs - 1)]; |
973 | |
974 | if (other && !completion_done(x: &other->completion)) { |
975 | /* If the following statement is true, it means we have reached |
976 | * a point in which only part of the staged submission was |
977 | * submitted and we don't have enough room in the 'cs_pending' |
978 | * array for the rest of the submission. |
979 | * This causes a deadlock because this CS will never be |
980 | * completed as it depends on future CS's for completion. |
981 | */ |
982 | if (other->cs_sequence == user_sequence) |
983 | dev_crit_ratelimited(hdev->dev, |
984 | "Staged CS %llu deadlock due to lack of resources" , |
985 | user_sequence); |
986 | |
987 | dev_dbg_ratelimited(hdev->dev, |
988 | "Rejecting CS because of too many in-flights CS\n" ); |
989 | atomic64_inc(v: &ctx->cs_counters.max_cs_in_flight_drop_cnt); |
990 | atomic64_inc(v: &cntr->max_cs_in_flight_drop_cnt); |
991 | rc = -EAGAIN; |
992 | goto free_fence; |
993 | } |
994 | |
995 | /* init hl_fence */ |
996 | hl_fence_init(fence: &cs_cmpl->base_fence, sequence: cs_cmpl->cs_seq); |
997 | |
998 | cs->sequence = cs_cmpl->cs_seq; |
999 | |
1000 | ctx->cs_pending[cs_cmpl->cs_seq & |
1001 | (hdev->asic_prop.max_pending_cs - 1)] = |
1002 | &cs_cmpl->base_fence; |
1003 | ctx->cs_sequence++; |
1004 | |
1005 | hl_fence_get(fence: &cs_cmpl->base_fence); |
1006 | |
1007 | hl_fence_put(fence: other); |
1008 | |
1009 | spin_unlock(lock: &ctx->cs_lock); |
1010 | |
1011 | *cs_new = cs; |
1012 | |
1013 | return 0; |
1014 | |
1015 | free_fence: |
1016 | spin_unlock(lock: &ctx->cs_lock); |
1017 | kfree(objp: cs->jobs_in_queue_cnt); |
1018 | free_cs_cmpl: |
1019 | kfree(objp: cs_cmpl); |
1020 | free_cs: |
1021 | kfree(objp: cs); |
1022 | hl_ctx_put(ctx); |
1023 | return rc; |
1024 | } |
1025 | |
1026 | static void cs_rollback(struct hl_device *hdev, struct hl_cs *cs) |
1027 | { |
1028 | struct hl_cs_job *job, *tmp; |
1029 | |
1030 | staged_cs_put(hdev, cs); |
1031 | |
1032 | list_for_each_entry_safe(job, tmp, &cs->job_list, cs_node) |
1033 | hl_complete_job(hdev, job); |
1034 | } |
1035 | |
1036 | /* |
1037 | * release_reserved_encaps_signals() - release reserved encapsulated signals. |
1038 | * @hdev: pointer to habanalabs device structure |
1039 | * |
1040 | * Release reserved encapsulated signals which weren't un-reserved, or for which a CS with |
1041 | * encapsulated signals wasn't submitted and thus weren't released as part of CS roll-back. |
1042 | * For these signals need also to put the refcount of the H/W SOB which was taken at the |
1043 | * reservation. |
1044 | */ |
1045 | static void release_reserved_encaps_signals(struct hl_device *hdev) |
1046 | { |
1047 | struct hl_ctx *ctx = hl_get_compute_ctx(hdev); |
1048 | struct hl_cs_encaps_sig_handle *handle; |
1049 | struct hl_encaps_signals_mgr *mgr; |
1050 | u32 id; |
1051 | |
1052 | if (!ctx) |
1053 | return; |
1054 | |
1055 | mgr = &ctx->sig_mgr; |
1056 | |
1057 | idr_for_each_entry(&mgr->handles, handle, id) |
1058 | if (handle->cs_seq == ULLONG_MAX) |
1059 | kref_put(kref: &handle->refcount, release: hl_encaps_release_handle_and_put_sob_ctx); |
1060 | |
1061 | hl_ctx_put(ctx); |
1062 | } |
1063 | |
1064 | void hl_cs_rollback_all(struct hl_device *hdev, bool skip_wq_flush) |
1065 | { |
1066 | int i; |
1067 | struct hl_cs *cs, *tmp; |
1068 | |
1069 | if (!skip_wq_flush) { |
1070 | flush_workqueue(hdev->ts_free_obj_wq); |
1071 | |
1072 | /* flush all completions before iterating over the CS mirror list in |
1073 | * order to avoid a race with the release functions |
1074 | */ |
1075 | for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++) |
1076 | flush_workqueue(hdev->cq_wq[i]); |
1077 | |
1078 | flush_workqueue(hdev->cs_cmplt_wq); |
1079 | } |
1080 | |
1081 | /* Make sure we don't have leftovers in the CS mirror list */ |
1082 | list_for_each_entry_safe(cs, tmp, &hdev->cs_mirror_list, mirror_node) { |
1083 | cs_get(cs); |
1084 | cs->aborted = true; |
1085 | dev_warn_ratelimited(hdev->dev, "Killing CS %d.%llu\n" , |
1086 | cs->ctx->asid, cs->sequence); |
1087 | cs_rollback(hdev, cs); |
1088 | cs_put(cs); |
1089 | } |
1090 | |
1091 | force_complete_multi_cs(hdev); |
1092 | |
1093 | release_reserved_encaps_signals(hdev); |
1094 | } |
1095 | |
1096 | static void |
1097 | wake_pending_user_interrupt_threads(struct hl_user_interrupt *interrupt) |
1098 | { |
1099 | struct hl_user_pending_interrupt *pend, *temp; |
1100 | unsigned long flags; |
1101 | |
1102 | spin_lock_irqsave(&interrupt->wait_list_lock, flags); |
1103 | list_for_each_entry_safe(pend, temp, &interrupt->wait_list_head, list_node) { |
1104 | pend->fence.error = -EIO; |
1105 | complete_all(&pend->fence.completion); |
1106 | } |
1107 | spin_unlock_irqrestore(lock: &interrupt->wait_list_lock, flags); |
1108 | |
1109 | spin_lock_irqsave(&interrupt->ts_list_lock, flags); |
1110 | list_for_each_entry_safe(pend, temp, &interrupt->ts_list_head, list_node) { |
1111 | list_del(entry: &pend->list_node); |
1112 | hl_mmap_mem_buf_put(buf: pend->ts_reg_info.buf); |
1113 | hl_cb_put(cb: pend->ts_reg_info.cq_cb); |
1114 | } |
1115 | spin_unlock_irqrestore(lock: &interrupt->ts_list_lock, flags); |
1116 | } |
1117 | |
1118 | void hl_release_pending_user_interrupts(struct hl_device *hdev) |
1119 | { |
1120 | struct asic_fixed_properties *prop = &hdev->asic_prop; |
1121 | struct hl_user_interrupt *interrupt; |
1122 | int i; |
1123 | |
1124 | if (!prop->user_interrupt_count) |
1125 | return; |
1126 | |
1127 | /* We iterate through the user interrupt requests and waking up all |
1128 | * user threads waiting for interrupt completion. We iterate the |
1129 | * list under a lock, this is why all user threads, once awake, |
1130 | * will wait on the same lock and will release the waiting object upon |
1131 | * unlock. |
1132 | */ |
1133 | |
1134 | for (i = 0 ; i < prop->user_interrupt_count ; i++) { |
1135 | interrupt = &hdev->user_interrupt[i]; |
1136 | wake_pending_user_interrupt_threads(interrupt); |
1137 | } |
1138 | |
1139 | interrupt = &hdev->common_user_cq_interrupt; |
1140 | wake_pending_user_interrupt_threads(interrupt); |
1141 | |
1142 | interrupt = &hdev->common_decoder_interrupt; |
1143 | wake_pending_user_interrupt_threads(interrupt); |
1144 | } |
1145 | |
1146 | static void force_complete_cs(struct hl_device *hdev) |
1147 | { |
1148 | struct hl_cs *cs; |
1149 | |
1150 | spin_lock(lock: &hdev->cs_mirror_lock); |
1151 | |
1152 | list_for_each_entry(cs, &hdev->cs_mirror_list, mirror_node) { |
1153 | cs->fence->error = -EIO; |
1154 | complete_all(&cs->fence->completion); |
1155 | } |
1156 | |
1157 | spin_unlock(lock: &hdev->cs_mirror_lock); |
1158 | } |
1159 | |
1160 | void hl_abort_waiting_for_cs_completions(struct hl_device *hdev) |
1161 | { |
1162 | force_complete_cs(hdev); |
1163 | force_complete_multi_cs(hdev); |
1164 | } |
1165 | |
1166 | static void job_wq_completion(struct work_struct *work) |
1167 | { |
1168 | struct hl_cs_job *job = container_of(work, struct hl_cs_job, |
1169 | finish_work); |
1170 | struct hl_cs *cs = job->cs; |
1171 | struct hl_device *hdev = cs->ctx->hdev; |
1172 | |
1173 | /* job is no longer needed */ |
1174 | hl_complete_job(hdev, job); |
1175 | } |
1176 | |
1177 | static void cs_completion(struct work_struct *work) |
1178 | { |
1179 | struct hl_cs *cs = container_of(work, struct hl_cs, finish_work); |
1180 | struct hl_device *hdev = cs->ctx->hdev; |
1181 | struct hl_cs_job *job, *tmp; |
1182 | |
1183 | list_for_each_entry_safe(job, tmp, &cs->job_list, cs_node) |
1184 | hl_complete_job(hdev, job); |
1185 | } |
1186 | |
1187 | u32 hl_get_active_cs_num(struct hl_device *hdev) |
1188 | { |
1189 | u32 active_cs_num = 0; |
1190 | struct hl_cs *cs; |
1191 | |
1192 | spin_lock(lock: &hdev->cs_mirror_lock); |
1193 | |
1194 | list_for_each_entry(cs, &hdev->cs_mirror_list, mirror_node) |
1195 | if (!cs->completed) |
1196 | active_cs_num++; |
1197 | |
1198 | spin_unlock(lock: &hdev->cs_mirror_lock); |
1199 | |
1200 | return active_cs_num; |
1201 | } |
1202 | |
1203 | static int validate_queue_index(struct hl_device *hdev, |
1204 | struct hl_cs_chunk *chunk, |
1205 | enum hl_queue_type *queue_type, |
1206 | bool *is_kernel_allocated_cb) |
1207 | { |
1208 | struct asic_fixed_properties *asic = &hdev->asic_prop; |
1209 | struct hw_queue_properties *hw_queue_prop; |
1210 | |
1211 | /* This must be checked here to prevent out-of-bounds access to |
1212 | * hw_queues_props array |
1213 | */ |
1214 | if (chunk->queue_index >= asic->max_queues) { |
1215 | dev_err(hdev->dev, "Queue index %d is invalid\n" , |
1216 | chunk->queue_index); |
1217 | return -EINVAL; |
1218 | } |
1219 | |
1220 | hw_queue_prop = &asic->hw_queues_props[chunk->queue_index]; |
1221 | |
1222 | if (hw_queue_prop->type == QUEUE_TYPE_NA) { |
1223 | dev_err(hdev->dev, "Queue index %d is not applicable\n" , |
1224 | chunk->queue_index); |
1225 | return -EINVAL; |
1226 | } |
1227 | |
1228 | if (hw_queue_prop->binned) { |
1229 | dev_err(hdev->dev, "Queue index %d is binned out\n" , |
1230 | chunk->queue_index); |
1231 | return -EINVAL; |
1232 | } |
1233 | |
1234 | if (hw_queue_prop->driver_only) { |
1235 | dev_err(hdev->dev, |
1236 | "Queue index %d is restricted for the kernel driver\n" , |
1237 | chunk->queue_index); |
1238 | return -EINVAL; |
1239 | } |
1240 | |
1241 | /* When hw queue type isn't QUEUE_TYPE_HW, |
1242 | * USER_ALLOC_CB flag shall be referred as "don't care". |
1243 | */ |
1244 | if (hw_queue_prop->type == QUEUE_TYPE_HW) { |
1245 | if (chunk->cs_chunk_flags & HL_CS_CHUNK_FLAGS_USER_ALLOC_CB) { |
1246 | if (!(hw_queue_prop->cb_alloc_flags & CB_ALLOC_USER)) { |
1247 | dev_err(hdev->dev, |
1248 | "Queue index %d doesn't support user CB\n" , |
1249 | chunk->queue_index); |
1250 | return -EINVAL; |
1251 | } |
1252 | |
1253 | *is_kernel_allocated_cb = false; |
1254 | } else { |
1255 | if (!(hw_queue_prop->cb_alloc_flags & |
1256 | CB_ALLOC_KERNEL)) { |
1257 | dev_err(hdev->dev, |
1258 | "Queue index %d doesn't support kernel CB\n" , |
1259 | chunk->queue_index); |
1260 | return -EINVAL; |
1261 | } |
1262 | |
1263 | *is_kernel_allocated_cb = true; |
1264 | } |
1265 | } else { |
1266 | *is_kernel_allocated_cb = !!(hw_queue_prop->cb_alloc_flags |
1267 | & CB_ALLOC_KERNEL); |
1268 | } |
1269 | |
1270 | *queue_type = hw_queue_prop->type; |
1271 | return 0; |
1272 | } |
1273 | |
1274 | static struct hl_cb *get_cb_from_cs_chunk(struct hl_device *hdev, |
1275 | struct hl_mem_mgr *mmg, |
1276 | struct hl_cs_chunk *chunk) |
1277 | { |
1278 | struct hl_cb *cb; |
1279 | |
1280 | cb = hl_cb_get(mmg, handle: chunk->cb_handle); |
1281 | if (!cb) { |
1282 | dev_err(hdev->dev, "CB handle 0x%llx invalid\n" , chunk->cb_handle); |
1283 | return NULL; |
1284 | } |
1285 | |
1286 | if ((chunk->cb_size < 8) || (chunk->cb_size > cb->size)) { |
1287 | dev_err(hdev->dev, "CB size %u invalid\n" , chunk->cb_size); |
1288 | goto release_cb; |
1289 | } |
1290 | |
1291 | atomic_inc(v: &cb->cs_cnt); |
1292 | |
1293 | return cb; |
1294 | |
1295 | release_cb: |
1296 | hl_cb_put(cb); |
1297 | return NULL; |
1298 | } |
1299 | |
1300 | struct hl_cs_job *hl_cs_allocate_job(struct hl_device *hdev, |
1301 | enum hl_queue_type queue_type, bool is_kernel_allocated_cb) |
1302 | { |
1303 | struct hl_cs_job *job; |
1304 | |
1305 | job = kzalloc(size: sizeof(*job), GFP_ATOMIC); |
1306 | if (!job) |
1307 | job = kzalloc(size: sizeof(*job), GFP_KERNEL); |
1308 | |
1309 | if (!job) |
1310 | return NULL; |
1311 | |
1312 | kref_init(kref: &job->refcount); |
1313 | job->queue_type = queue_type; |
1314 | job->is_kernel_allocated_cb = is_kernel_allocated_cb; |
1315 | |
1316 | if (is_cb_patched(hdev, job)) |
1317 | INIT_LIST_HEAD(list: &job->userptr_list); |
1318 | |
1319 | if (job->queue_type == QUEUE_TYPE_EXT) |
1320 | INIT_WORK(&job->finish_work, job_wq_completion); |
1321 | |
1322 | return job; |
1323 | } |
1324 | |
1325 | static enum hl_cs_type hl_cs_get_cs_type(u32 cs_type_flags) |
1326 | { |
1327 | if (cs_type_flags & HL_CS_FLAGS_SIGNAL) |
1328 | return CS_TYPE_SIGNAL; |
1329 | else if (cs_type_flags & HL_CS_FLAGS_WAIT) |
1330 | return CS_TYPE_WAIT; |
1331 | else if (cs_type_flags & HL_CS_FLAGS_COLLECTIVE_WAIT) |
1332 | return CS_TYPE_COLLECTIVE_WAIT; |
1333 | else if (cs_type_flags & HL_CS_FLAGS_RESERVE_SIGNALS_ONLY) |
1334 | return CS_RESERVE_SIGNALS; |
1335 | else if (cs_type_flags & HL_CS_FLAGS_UNRESERVE_SIGNALS_ONLY) |
1336 | return CS_UNRESERVE_SIGNALS; |
1337 | else if (cs_type_flags & HL_CS_FLAGS_ENGINE_CORE_COMMAND) |
1338 | return CS_TYPE_ENGINE_CORE; |
1339 | else if (cs_type_flags & HL_CS_FLAGS_ENGINES_COMMAND) |
1340 | return CS_TYPE_ENGINES; |
1341 | else if (cs_type_flags & HL_CS_FLAGS_FLUSH_PCI_HBW_WRITES) |
1342 | return CS_TYPE_FLUSH_PCI_HBW_WRITES; |
1343 | else |
1344 | return CS_TYPE_DEFAULT; |
1345 | } |
1346 | |
1347 | static int hl_cs_sanity_checks(struct hl_fpriv *hpriv, union hl_cs_args *args) |
1348 | { |
1349 | struct hl_device *hdev = hpriv->hdev; |
1350 | struct hl_ctx *ctx = hpriv->ctx; |
1351 | u32 cs_type_flags, num_chunks; |
1352 | enum hl_device_status status; |
1353 | enum hl_cs_type cs_type; |
1354 | bool is_sync_stream; |
1355 | int i; |
1356 | |
1357 | for (i = 0 ; i < sizeof(args->in.pad) ; i++) |
1358 | if (args->in.pad[i]) { |
1359 | dev_dbg(hdev->dev, "Padding bytes must be 0\n" ); |
1360 | return -EINVAL; |
1361 | } |
1362 | |
1363 | if (!hl_device_operational(hdev, status: &status)) |
1364 | return -EBUSY; |
1365 | |
1366 | if ((args->in.cs_flags & HL_CS_FLAGS_STAGED_SUBMISSION) && |
1367 | !hdev->supports_staged_submission) { |
1368 | dev_err(hdev->dev, "staged submission not supported" ); |
1369 | return -EPERM; |
1370 | } |
1371 | |
1372 | cs_type_flags = args->in.cs_flags & HL_CS_FLAGS_TYPE_MASK; |
1373 | |
1374 | if (unlikely(cs_type_flags && !is_power_of_2(cs_type_flags))) { |
1375 | dev_err(hdev->dev, |
1376 | "CS type flags are mutually exclusive, context %d\n" , |
1377 | ctx->asid); |
1378 | return -EINVAL; |
1379 | } |
1380 | |
1381 | cs_type = hl_cs_get_cs_type(cs_type_flags); |
1382 | num_chunks = args->in.num_chunks_execute; |
1383 | |
1384 | is_sync_stream = (cs_type == CS_TYPE_SIGNAL || cs_type == CS_TYPE_WAIT || |
1385 | cs_type == CS_TYPE_COLLECTIVE_WAIT); |
1386 | |
1387 | if (unlikely(is_sync_stream && !hdev->supports_sync_stream)) { |
1388 | dev_err(hdev->dev, "Sync stream CS is not supported\n" ); |
1389 | return -EINVAL; |
1390 | } |
1391 | |
1392 | if (cs_type == CS_TYPE_DEFAULT) { |
1393 | if (!num_chunks) { |
1394 | dev_err(hdev->dev, "Got execute CS with 0 chunks, context %d\n" , ctx->asid); |
1395 | return -EINVAL; |
1396 | } |
1397 | } else if (is_sync_stream && num_chunks != 1) { |
1398 | dev_err(hdev->dev, |
1399 | "Sync stream CS mandates one chunk only, context %d\n" , |
1400 | ctx->asid); |
1401 | return -EINVAL; |
1402 | } |
1403 | |
1404 | return 0; |
1405 | } |
1406 | |
1407 | static int hl_cs_copy_chunk_array(struct hl_device *hdev, |
1408 | struct hl_cs_chunk **cs_chunk_array, |
1409 | void __user *chunks, u32 num_chunks, |
1410 | struct hl_ctx *ctx) |
1411 | { |
1412 | u32 size_to_copy; |
1413 | |
1414 | if (num_chunks > HL_MAX_JOBS_PER_CS) { |
1415 | atomic64_inc(v: &ctx->cs_counters.validation_drop_cnt); |
1416 | atomic64_inc(v: &hdev->aggregated_cs_counters.validation_drop_cnt); |
1417 | dev_err(hdev->dev, |
1418 | "Number of chunks can NOT be larger than %d\n" , |
1419 | HL_MAX_JOBS_PER_CS); |
1420 | return -EINVAL; |
1421 | } |
1422 | |
1423 | *cs_chunk_array = kmalloc_array(n: num_chunks, size: sizeof(**cs_chunk_array), |
1424 | GFP_ATOMIC); |
1425 | if (!*cs_chunk_array) |
1426 | *cs_chunk_array = kmalloc_array(n: num_chunks, |
1427 | size: sizeof(**cs_chunk_array), GFP_KERNEL); |
1428 | if (!*cs_chunk_array) { |
1429 | atomic64_inc(v: &ctx->cs_counters.out_of_mem_drop_cnt); |
1430 | atomic64_inc(v: &hdev->aggregated_cs_counters.out_of_mem_drop_cnt); |
1431 | return -ENOMEM; |
1432 | } |
1433 | |
1434 | size_to_copy = num_chunks * sizeof(struct hl_cs_chunk); |
1435 | if (copy_from_user(to: *cs_chunk_array, from: chunks, n: size_to_copy)) { |
1436 | atomic64_inc(v: &ctx->cs_counters.validation_drop_cnt); |
1437 | atomic64_inc(v: &hdev->aggregated_cs_counters.validation_drop_cnt); |
1438 | dev_err(hdev->dev, "Failed to copy cs chunk array from user\n" ); |
1439 | kfree(objp: *cs_chunk_array); |
1440 | return -EFAULT; |
1441 | } |
1442 | |
1443 | return 0; |
1444 | } |
1445 | |
1446 | static int cs_staged_submission(struct hl_device *hdev, struct hl_cs *cs, |
1447 | u64 sequence, u32 flags, |
1448 | u32 encaps_signal_handle) |
1449 | { |
1450 | if (!(flags & HL_CS_FLAGS_STAGED_SUBMISSION)) |
1451 | return 0; |
1452 | |
1453 | cs->staged_last = !!(flags & HL_CS_FLAGS_STAGED_SUBMISSION_LAST); |
1454 | cs->staged_first = !!(flags & HL_CS_FLAGS_STAGED_SUBMISSION_FIRST); |
1455 | |
1456 | if (cs->staged_first) { |
1457 | /* Staged CS sequence is the first CS sequence */ |
1458 | INIT_LIST_HEAD(list: &cs->staged_cs_node); |
1459 | cs->staged_sequence = cs->sequence; |
1460 | |
1461 | if (cs->encaps_signals) |
1462 | cs->encaps_sig_hdl_id = encaps_signal_handle; |
1463 | } else { |
1464 | /* User sequence will be validated in 'hl_hw_queue_schedule_cs' |
1465 | * under the cs_mirror_lock |
1466 | */ |
1467 | cs->staged_sequence = sequence; |
1468 | } |
1469 | |
1470 | /* Increment CS reference if needed */ |
1471 | staged_cs_get(hdev, cs); |
1472 | |
1473 | cs->staged_cs = true; |
1474 | |
1475 | return 0; |
1476 | } |
1477 | |
1478 | static u32 get_stream_master_qid_mask(struct hl_device *hdev, u32 qid) |
1479 | { |
1480 | int i; |
1481 | |
1482 | for (i = 0; i < hdev->stream_master_qid_arr_size; i++) |
1483 | if (qid == hdev->stream_master_qid_arr[i]) |
1484 | return BIT(i); |
1485 | |
1486 | return 0; |
1487 | } |
1488 | |
1489 | static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks, |
1490 | u32 num_chunks, u64 *cs_seq, u32 flags, |
1491 | u32 encaps_signals_handle, u32 timeout, |
1492 | u16 *signal_initial_sob_count) |
1493 | { |
1494 | bool staged_mid, int_queues_only = true, using_hw_queues = false; |
1495 | struct hl_device *hdev = hpriv->hdev; |
1496 | struct hl_cs_chunk *cs_chunk_array; |
1497 | struct hl_cs_counters_atomic *cntr; |
1498 | struct hl_ctx *ctx = hpriv->ctx; |
1499 | struct hl_cs_job *job; |
1500 | struct hl_cs *cs; |
1501 | struct hl_cb *cb; |
1502 | u64 user_sequence; |
1503 | u8 stream_master_qid_map = 0; |
1504 | int rc, i; |
1505 | |
1506 | cntr = &hdev->aggregated_cs_counters; |
1507 | user_sequence = *cs_seq; |
1508 | *cs_seq = ULLONG_MAX; |
1509 | |
1510 | rc = hl_cs_copy_chunk_array(hdev, cs_chunk_array: &cs_chunk_array, chunks, num_chunks, |
1511 | ctx: hpriv->ctx); |
1512 | if (rc) |
1513 | goto out; |
1514 | |
1515 | if ((flags & HL_CS_FLAGS_STAGED_SUBMISSION) && |
1516 | !(flags & HL_CS_FLAGS_STAGED_SUBMISSION_FIRST)) |
1517 | staged_mid = true; |
1518 | else |
1519 | staged_mid = false; |
1520 | |
1521 | rc = allocate_cs(hdev, ctx: hpriv->ctx, cs_type: CS_TYPE_DEFAULT, |
1522 | user_sequence: staged_mid ? user_sequence : ULLONG_MAX, cs_new: &cs, flags, |
1523 | timeout); |
1524 | if (rc) |
1525 | goto free_cs_chunk_array; |
1526 | |
1527 | *cs_seq = cs->sequence; |
1528 | |
1529 | hl_debugfs_add_cs(cs); |
1530 | |
1531 | rc = cs_staged_submission(hdev, cs, sequence: user_sequence, flags, |
1532 | encaps_signal_handle: encaps_signals_handle); |
1533 | if (rc) |
1534 | goto free_cs_object; |
1535 | |
1536 | /* If this is a staged submission we must return the staged sequence |
1537 | * rather than the internal CS sequence |
1538 | */ |
1539 | if (cs->staged_cs) |
1540 | *cs_seq = cs->staged_sequence; |
1541 | |
1542 | /* Validate ALL the CS chunks before submitting the CS */ |
1543 | for (i = 0 ; i < num_chunks ; i++) { |
1544 | struct hl_cs_chunk *chunk = &cs_chunk_array[i]; |
1545 | enum hl_queue_type queue_type; |
1546 | bool is_kernel_allocated_cb; |
1547 | |
1548 | rc = validate_queue_index(hdev, chunk, queue_type: &queue_type, |
1549 | is_kernel_allocated_cb: &is_kernel_allocated_cb); |
1550 | if (rc) { |
1551 | atomic64_inc(v: &ctx->cs_counters.validation_drop_cnt); |
1552 | atomic64_inc(v: &cntr->validation_drop_cnt); |
1553 | goto free_cs_object; |
1554 | } |
1555 | |
1556 | if (is_kernel_allocated_cb) { |
1557 | cb = get_cb_from_cs_chunk(hdev, mmg: &hpriv->mem_mgr, chunk); |
1558 | if (!cb) { |
1559 | atomic64_inc( |
1560 | v: &ctx->cs_counters.validation_drop_cnt); |
1561 | atomic64_inc(v: &cntr->validation_drop_cnt); |
1562 | rc = -EINVAL; |
1563 | goto free_cs_object; |
1564 | } |
1565 | } else { |
1566 | cb = (struct hl_cb *) (uintptr_t) chunk->cb_handle; |
1567 | } |
1568 | |
1569 | if (queue_type == QUEUE_TYPE_EXT || |
1570 | queue_type == QUEUE_TYPE_HW) { |
1571 | int_queues_only = false; |
1572 | |
1573 | /* |
1574 | * store which stream are being used for external/HW |
1575 | * queues of this CS |
1576 | */ |
1577 | if (hdev->supports_wait_for_multi_cs) |
1578 | stream_master_qid_map |= |
1579 | get_stream_master_qid_mask(hdev, |
1580 | qid: chunk->queue_index); |
1581 | } |
1582 | |
1583 | if (queue_type == QUEUE_TYPE_HW) |
1584 | using_hw_queues = true; |
1585 | |
1586 | job = hl_cs_allocate_job(hdev, queue_type, |
1587 | is_kernel_allocated_cb); |
1588 | if (!job) { |
1589 | atomic64_inc(v: &ctx->cs_counters.out_of_mem_drop_cnt); |
1590 | atomic64_inc(v: &cntr->out_of_mem_drop_cnt); |
1591 | dev_err(hdev->dev, "Failed to allocate a new job\n" ); |
1592 | rc = -ENOMEM; |
1593 | if (is_kernel_allocated_cb) |
1594 | goto release_cb; |
1595 | |
1596 | goto free_cs_object; |
1597 | } |
1598 | |
1599 | job->id = i + 1; |
1600 | job->cs = cs; |
1601 | job->user_cb = cb; |
1602 | job->user_cb_size = chunk->cb_size; |
1603 | job->hw_queue_id = chunk->queue_index; |
1604 | |
1605 | cs->jobs_in_queue_cnt[job->hw_queue_id]++; |
1606 | cs->jobs_cnt++; |
1607 | |
1608 | list_add_tail(new: &job->cs_node, head: &cs->job_list); |
1609 | |
1610 | /* |
1611 | * Increment CS reference. When CS reference is 0, CS is |
1612 | * done and can be signaled to user and free all its resources |
1613 | * Only increment for JOB on external or H/W queues, because |
1614 | * only for those JOBs we get completion |
1615 | */ |
1616 | if (cs_needs_completion(cs) && |
1617 | (job->queue_type == QUEUE_TYPE_EXT || |
1618 | job->queue_type == QUEUE_TYPE_HW)) |
1619 | cs_get(cs); |
1620 | |
1621 | hl_debugfs_add_job(hdev, job); |
1622 | |
1623 | rc = cs_parser(hpriv, job); |
1624 | if (rc) { |
1625 | atomic64_inc(v: &ctx->cs_counters.parsing_drop_cnt); |
1626 | atomic64_inc(v: &cntr->parsing_drop_cnt); |
1627 | dev_err(hdev->dev, |
1628 | "Failed to parse JOB %d.%llu.%d, err %d, rejecting the CS\n" , |
1629 | cs->ctx->asid, cs->sequence, job->id, rc); |
1630 | goto free_cs_object; |
1631 | } |
1632 | } |
1633 | |
1634 | /* We allow a CS with any queue type combination as long as it does |
1635 | * not get a completion |
1636 | */ |
1637 | if (int_queues_only && cs_needs_completion(cs)) { |
1638 | atomic64_inc(v: &ctx->cs_counters.validation_drop_cnt); |
1639 | atomic64_inc(v: &cntr->validation_drop_cnt); |
1640 | dev_err(hdev->dev, |
1641 | "Reject CS %d.%llu since it contains only internal queues jobs and needs completion\n" , |
1642 | cs->ctx->asid, cs->sequence); |
1643 | rc = -EINVAL; |
1644 | goto free_cs_object; |
1645 | } |
1646 | |
1647 | if (using_hw_queues) |
1648 | INIT_WORK(&cs->finish_work, cs_completion); |
1649 | |
1650 | /* |
1651 | * store the (external/HW queues) streams used by the CS in the |
1652 | * fence object for multi-CS completion |
1653 | */ |
1654 | if (hdev->supports_wait_for_multi_cs) |
1655 | cs->fence->stream_master_qid_map = stream_master_qid_map; |
1656 | |
1657 | rc = hl_hw_queue_schedule_cs(cs); |
1658 | if (rc) { |
1659 | if (rc != -EAGAIN) |
1660 | dev_err(hdev->dev, |
1661 | "Failed to submit CS %d.%llu to H/W queues, error %d\n" , |
1662 | cs->ctx->asid, cs->sequence, rc); |
1663 | goto free_cs_object; |
1664 | } |
1665 | |
1666 | *signal_initial_sob_count = cs->initial_sob_count; |
1667 | |
1668 | rc = HL_CS_STATUS_SUCCESS; |
1669 | goto put_cs; |
1670 | |
1671 | release_cb: |
1672 | atomic_dec(v: &cb->cs_cnt); |
1673 | hl_cb_put(cb); |
1674 | free_cs_object: |
1675 | cs_rollback(hdev, cs); |
1676 | *cs_seq = ULLONG_MAX; |
1677 | /* The path below is both for good and erroneous exits */ |
1678 | put_cs: |
1679 | /* We finished with the CS in this function, so put the ref */ |
1680 | cs_put(cs); |
1681 | free_cs_chunk_array: |
1682 | kfree(objp: cs_chunk_array); |
1683 | out: |
1684 | return rc; |
1685 | } |
1686 | |
1687 | static int hl_cs_ctx_switch(struct hl_fpriv *hpriv, union hl_cs_args *args, |
1688 | u64 *cs_seq) |
1689 | { |
1690 | struct hl_device *hdev = hpriv->hdev; |
1691 | struct hl_ctx *ctx = hpriv->ctx; |
1692 | bool need_soft_reset = false; |
1693 | int rc = 0, do_ctx_switch = 0; |
1694 | void __user *chunks; |
1695 | u32 num_chunks, tmp; |
1696 | u16 sob_count; |
1697 | int ret; |
1698 | |
1699 | if (hdev->supports_ctx_switch) |
1700 | do_ctx_switch = atomic_cmpxchg(v: &ctx->thread_ctx_switch_token, old: 1, new: 0); |
1701 | |
1702 | if (do_ctx_switch || (args->in.cs_flags & HL_CS_FLAGS_FORCE_RESTORE)) { |
1703 | mutex_lock(&hpriv->restore_phase_mutex); |
1704 | |
1705 | if (do_ctx_switch) { |
1706 | rc = hdev->asic_funcs->context_switch(hdev, ctx->asid); |
1707 | if (rc) { |
1708 | dev_err_ratelimited(hdev->dev, |
1709 | "Failed to switch to context %d, rejecting CS! %d\n" , |
1710 | ctx->asid, rc); |
1711 | /* |
1712 | * If we timedout, or if the device is not IDLE |
1713 | * while we want to do context-switch (-EBUSY), |
1714 | * we need to soft-reset because QMAN is |
1715 | * probably stuck. However, we can't call to |
1716 | * reset here directly because of deadlock, so |
1717 | * need to do it at the very end of this |
1718 | * function |
1719 | */ |
1720 | if ((rc == -ETIMEDOUT) || (rc == -EBUSY)) |
1721 | need_soft_reset = true; |
1722 | mutex_unlock(lock: &hpriv->restore_phase_mutex); |
1723 | goto out; |
1724 | } |
1725 | } |
1726 | |
1727 | hdev->asic_funcs->restore_phase_topology(hdev); |
1728 | |
1729 | chunks = (void __user *) (uintptr_t) args->in.chunks_restore; |
1730 | num_chunks = args->in.num_chunks_restore; |
1731 | |
1732 | if (!num_chunks) { |
1733 | dev_dbg(hdev->dev, |
1734 | "Need to run restore phase but restore CS is empty\n" ); |
1735 | rc = 0; |
1736 | } else { |
1737 | rc = cs_ioctl_default(hpriv, chunks, num_chunks, |
1738 | cs_seq, flags: 0, encaps_signals_handle: 0, timeout: hdev->timeout_jiffies, signal_initial_sob_count: &sob_count); |
1739 | } |
1740 | |
1741 | mutex_unlock(lock: &hpriv->restore_phase_mutex); |
1742 | |
1743 | if (rc) { |
1744 | dev_err(hdev->dev, |
1745 | "Failed to submit restore CS for context %d (%d)\n" , |
1746 | ctx->asid, rc); |
1747 | goto out; |
1748 | } |
1749 | |
1750 | /* Need to wait for restore completion before execution phase */ |
1751 | if (num_chunks) { |
1752 | enum hl_cs_wait_status status; |
1753 | |
1754 | ret = _hl_cs_wait_ioctl(hdev, ctx, |
1755 | timeout_us: jiffies_to_usecs(j: hdev->timeout_jiffies), |
1756 | seq: *cs_seq, status: &status, NULL); |
1757 | if (ret) { |
1758 | dev_err(hdev->dev, |
1759 | "Restore CS for context %d failed to complete %d\n" , |
1760 | ctx->asid, ret); |
1761 | rc = -ENOEXEC; |
1762 | goto out; |
1763 | } |
1764 | } |
1765 | |
1766 | if (hdev->supports_ctx_switch) |
1767 | ctx->thread_ctx_switch_wait_token = 1; |
1768 | |
1769 | } else if (hdev->supports_ctx_switch && !ctx->thread_ctx_switch_wait_token) { |
1770 | rc = hl_poll_timeout_memory(hdev, |
1771 | &ctx->thread_ctx_switch_wait_token, tmp, (tmp == 1), |
1772 | 100, jiffies_to_usecs(hdev->timeout_jiffies), false); |
1773 | |
1774 | if (rc == -ETIMEDOUT) { |
1775 | dev_err(hdev->dev, |
1776 | "context switch phase timeout (%d)\n" , tmp); |
1777 | goto out; |
1778 | } |
1779 | } |
1780 | |
1781 | out: |
1782 | if ((rc == -ETIMEDOUT || rc == -EBUSY) && (need_soft_reset)) |
1783 | hl_device_reset(hdev, flags: 0); |
1784 | |
1785 | return rc; |
1786 | } |
1787 | |
1788 | /* |
1789 | * hl_cs_signal_sob_wraparound_handler: handle SOB value wrapaound case. |
1790 | * if the SOB value reaches the max value move to the other SOB reserved |
1791 | * to the queue. |
1792 | * @hdev: pointer to device structure |
1793 | * @q_idx: stream queue index |
1794 | * @hw_sob: the H/W SOB used in this signal CS. |
1795 | * @count: signals count |
1796 | * @encaps_sig: tells whether it's reservation for encaps signals or not. |
1797 | * |
1798 | * Note that this function must be called while hw_queues_lock is taken. |
1799 | */ |
1800 | int hl_cs_signal_sob_wraparound_handler(struct hl_device *hdev, u32 q_idx, |
1801 | struct hl_hw_sob **hw_sob, u32 count, bool encaps_sig) |
1802 | |
1803 | { |
1804 | struct hl_sync_stream_properties *prop; |
1805 | struct hl_hw_sob *sob = *hw_sob, *other_sob; |
1806 | u8 other_sob_offset; |
1807 | |
1808 | prop = &hdev->kernel_queues[q_idx].sync_stream_prop; |
1809 | |
1810 | hw_sob_get(hw_sob: sob); |
1811 | |
1812 | /* check for wraparound */ |
1813 | if (prop->next_sob_val + count >= HL_MAX_SOB_VAL) { |
1814 | /* |
1815 | * Decrement as we reached the max value. |
1816 | * The release function won't be called here as we've |
1817 | * just incremented the refcount right before calling this |
1818 | * function. |
1819 | */ |
1820 | hw_sob_put_err(hw_sob: sob); |
1821 | |
1822 | /* |
1823 | * check the other sob value, if it still in use then fail |
1824 | * otherwise make the switch |
1825 | */ |
1826 | other_sob_offset = (prop->curr_sob_offset + 1) % HL_RSVD_SOBS; |
1827 | other_sob = &prop->hw_sob[other_sob_offset]; |
1828 | |
1829 | if (kref_read(kref: &other_sob->kref) != 1) { |
1830 | dev_err(hdev->dev, "error: Cannot switch SOBs q_idx: %d\n" , |
1831 | q_idx); |
1832 | return -EINVAL; |
1833 | } |
1834 | |
1835 | /* |
1836 | * next_sob_val always points to the next available signal |
1837 | * in the sob, so in encaps signals it will be the next one |
1838 | * after reserving the required amount. |
1839 | */ |
1840 | if (encaps_sig) |
1841 | prop->next_sob_val = count + 1; |
1842 | else |
1843 | prop->next_sob_val = count; |
1844 | |
1845 | /* only two SOBs are currently in use */ |
1846 | prop->curr_sob_offset = other_sob_offset; |
1847 | *hw_sob = other_sob; |
1848 | |
1849 | /* |
1850 | * check if other_sob needs reset, then do it before using it |
1851 | * for the reservation or the next signal cs. |
1852 | * we do it here, and for both encaps and regular signal cs |
1853 | * cases in order to avoid possible races of two kref_put |
1854 | * of the sob which can occur at the same time if we move the |
1855 | * sob reset(kref_put) to cs_do_release function. |
1856 | * in addition, if we have combination of cs signal and |
1857 | * encaps, and at the point we need to reset the sob there was |
1858 | * no more reservations and only signal cs keep coming, |
1859 | * in such case we need signal_cs to put the refcount and |
1860 | * reset the sob. |
1861 | */ |
1862 | if (other_sob->need_reset) |
1863 | hw_sob_put(hw_sob: other_sob); |
1864 | |
1865 | if (encaps_sig) { |
1866 | /* set reset indication for the sob */ |
1867 | sob->need_reset = true; |
1868 | hw_sob_get(hw_sob: other_sob); |
1869 | } |
1870 | |
1871 | dev_dbg(hdev->dev, "switched to SOB %d, q_idx: %d\n" , |
1872 | prop->curr_sob_offset, q_idx); |
1873 | } else { |
1874 | prop->next_sob_val += count; |
1875 | } |
1876 | |
1877 | return 0; |
1878 | } |
1879 | |
1880 | static int (struct hl_device *hdev, |
1881 | struct hl_cs_chunk *chunk, u64 *signal_seq, struct hl_ctx *ctx, |
1882 | bool encaps_signals) |
1883 | { |
1884 | u64 *signal_seq_arr = NULL; |
1885 | u32 size_to_copy, signal_seq_arr_len; |
1886 | int rc = 0; |
1887 | |
1888 | if (encaps_signals) { |
1889 | *signal_seq = chunk->encaps_signal_seq; |
1890 | return 0; |
1891 | } |
1892 | |
1893 | signal_seq_arr_len = chunk->num_signal_seq_arr; |
1894 | |
1895 | /* currently only one signal seq is supported */ |
1896 | if (signal_seq_arr_len != 1) { |
1897 | atomic64_inc(v: &ctx->cs_counters.validation_drop_cnt); |
1898 | atomic64_inc(v: &hdev->aggregated_cs_counters.validation_drop_cnt); |
1899 | dev_err(hdev->dev, |
1900 | "Wait for signal CS supports only one signal CS seq\n" ); |
1901 | return -EINVAL; |
1902 | } |
1903 | |
1904 | signal_seq_arr = kmalloc_array(n: signal_seq_arr_len, |
1905 | size: sizeof(*signal_seq_arr), |
1906 | GFP_ATOMIC); |
1907 | if (!signal_seq_arr) |
1908 | signal_seq_arr = kmalloc_array(n: signal_seq_arr_len, |
1909 | size: sizeof(*signal_seq_arr), |
1910 | GFP_KERNEL); |
1911 | if (!signal_seq_arr) { |
1912 | atomic64_inc(v: &ctx->cs_counters.out_of_mem_drop_cnt); |
1913 | atomic64_inc(v: &hdev->aggregated_cs_counters.out_of_mem_drop_cnt); |
1914 | return -ENOMEM; |
1915 | } |
1916 | |
1917 | size_to_copy = signal_seq_arr_len * sizeof(*signal_seq_arr); |
1918 | if (copy_from_user(to: signal_seq_arr, |
1919 | u64_to_user_ptr(chunk->signal_seq_arr), |
1920 | n: size_to_copy)) { |
1921 | atomic64_inc(v: &ctx->cs_counters.validation_drop_cnt); |
1922 | atomic64_inc(v: &hdev->aggregated_cs_counters.validation_drop_cnt); |
1923 | dev_err(hdev->dev, |
1924 | "Failed to copy signal seq array from user\n" ); |
1925 | rc = -EFAULT; |
1926 | goto out; |
1927 | } |
1928 | |
1929 | /* currently it is guaranteed to have only one signal seq */ |
1930 | *signal_seq = signal_seq_arr[0]; |
1931 | |
1932 | out: |
1933 | kfree(objp: signal_seq_arr); |
1934 | |
1935 | return rc; |
1936 | } |
1937 | |
1938 | static int cs_ioctl_signal_wait_create_jobs(struct hl_device *hdev, |
1939 | struct hl_ctx *ctx, struct hl_cs *cs, |
1940 | enum hl_queue_type q_type, u32 q_idx, u32 encaps_signal_offset) |
1941 | { |
1942 | struct hl_cs_counters_atomic *cntr; |
1943 | struct hl_cs_job *job; |
1944 | struct hl_cb *cb; |
1945 | u32 cb_size; |
1946 | |
1947 | cntr = &hdev->aggregated_cs_counters; |
1948 | |
1949 | job = hl_cs_allocate_job(hdev, queue_type: q_type, is_kernel_allocated_cb: true); |
1950 | if (!job) { |
1951 | atomic64_inc(v: &ctx->cs_counters.out_of_mem_drop_cnt); |
1952 | atomic64_inc(v: &cntr->out_of_mem_drop_cnt); |
1953 | dev_err(hdev->dev, "Failed to allocate a new job\n" ); |
1954 | return -ENOMEM; |
1955 | } |
1956 | |
1957 | if (cs->type == CS_TYPE_WAIT) |
1958 | cb_size = hdev->asic_funcs->get_wait_cb_size(hdev); |
1959 | else |
1960 | cb_size = hdev->asic_funcs->get_signal_cb_size(hdev); |
1961 | |
1962 | cb = hl_cb_kernel_create(hdev, cb_size, internal_cb: q_type == QUEUE_TYPE_HW); |
1963 | if (!cb) { |
1964 | atomic64_inc(v: &ctx->cs_counters.out_of_mem_drop_cnt); |
1965 | atomic64_inc(v: &cntr->out_of_mem_drop_cnt); |
1966 | kfree(objp: job); |
1967 | return -EFAULT; |
1968 | } |
1969 | |
1970 | job->id = 0; |
1971 | job->cs = cs; |
1972 | job->user_cb = cb; |
1973 | atomic_inc(v: &job->user_cb->cs_cnt); |
1974 | job->user_cb_size = cb_size; |
1975 | job->hw_queue_id = q_idx; |
1976 | |
1977 | if ((cs->type == CS_TYPE_WAIT || cs->type == CS_TYPE_COLLECTIVE_WAIT) |
1978 | && cs->encaps_signals) |
1979 | job->encaps_sig_wait_offset = encaps_signal_offset; |
1980 | /* |
1981 | * No need in parsing, user CB is the patched CB. |
1982 | * We call hl_cb_destroy() out of two reasons - we don't need the CB in |
1983 | * the CB idr anymore and to decrement its refcount as it was |
1984 | * incremented inside hl_cb_kernel_create(). |
1985 | */ |
1986 | job->patched_cb = job->user_cb; |
1987 | job->job_cb_size = job->user_cb_size; |
1988 | hl_cb_destroy(mmg: &hdev->kernel_mem_mgr, cb_handle: cb->buf->handle); |
1989 | |
1990 | /* increment refcount as for external queues we get completion */ |
1991 | cs_get(cs); |
1992 | |
1993 | cs->jobs_in_queue_cnt[job->hw_queue_id]++; |
1994 | cs->jobs_cnt++; |
1995 | |
1996 | list_add_tail(new: &job->cs_node, head: &cs->job_list); |
1997 | |
1998 | hl_debugfs_add_job(hdev, job); |
1999 | |
2000 | return 0; |
2001 | } |
2002 | |
2003 | static int cs_ioctl_reserve_signals(struct hl_fpriv *hpriv, |
2004 | u32 q_idx, u32 count, |
2005 | u32 *handle_id, u32 *sob_addr, |
2006 | u32 *signals_count) |
2007 | { |
2008 | struct hw_queue_properties *hw_queue_prop; |
2009 | struct hl_sync_stream_properties *prop; |
2010 | struct hl_device *hdev = hpriv->hdev; |
2011 | struct hl_cs_encaps_sig_handle *handle; |
2012 | struct hl_encaps_signals_mgr *mgr; |
2013 | struct hl_hw_sob *hw_sob; |
2014 | int hdl_id; |
2015 | int rc = 0; |
2016 | |
2017 | if (count >= HL_MAX_SOB_VAL) { |
2018 | dev_err(hdev->dev, "signals count(%u) exceeds the max SOB value\n" , |
2019 | count); |
2020 | rc = -EINVAL; |
2021 | goto out; |
2022 | } |
2023 | |
2024 | if (q_idx >= hdev->asic_prop.max_queues) { |
2025 | dev_err(hdev->dev, "Queue index %d is invalid\n" , |
2026 | q_idx); |
2027 | rc = -EINVAL; |
2028 | goto out; |
2029 | } |
2030 | |
2031 | hw_queue_prop = &hdev->asic_prop.hw_queues_props[q_idx]; |
2032 | |
2033 | if (!hw_queue_prop->supports_sync_stream) { |
2034 | dev_err(hdev->dev, |
2035 | "Queue index %d does not support sync stream operations\n" , |
2036 | q_idx); |
2037 | rc = -EINVAL; |
2038 | goto out; |
2039 | } |
2040 | |
2041 | prop = &hdev->kernel_queues[q_idx].sync_stream_prop; |
2042 | |
2043 | handle = kzalloc(size: sizeof(*handle), GFP_KERNEL); |
2044 | if (!handle) { |
2045 | rc = -ENOMEM; |
2046 | goto out; |
2047 | } |
2048 | |
2049 | handle->count = count; |
2050 | |
2051 | hl_ctx_get(ctx: hpriv->ctx); |
2052 | handle->ctx = hpriv->ctx; |
2053 | mgr = &hpriv->ctx->sig_mgr; |
2054 | |
2055 | spin_lock(lock: &mgr->lock); |
2056 | hdl_id = idr_alloc(&mgr->handles, ptr: handle, start: 1, end: 0, GFP_ATOMIC); |
2057 | spin_unlock(lock: &mgr->lock); |
2058 | |
2059 | if (hdl_id < 0) { |
2060 | dev_err(hdev->dev, "Failed to allocate IDR for a new signal reservation\n" ); |
2061 | rc = -EINVAL; |
2062 | goto put_ctx; |
2063 | } |
2064 | |
2065 | handle->id = hdl_id; |
2066 | handle->q_idx = q_idx; |
2067 | handle->hdev = hdev; |
2068 | kref_init(kref: &handle->refcount); |
2069 | |
2070 | hdev->asic_funcs->hw_queues_lock(hdev); |
2071 | |
2072 | hw_sob = &prop->hw_sob[prop->curr_sob_offset]; |
2073 | |
2074 | /* |
2075 | * Increment the SOB value by count by user request |
2076 | * to reserve those signals |
2077 | * check if the signals amount to reserve is not exceeding the max sob |
2078 | * value, if yes then switch sob. |
2079 | */ |
2080 | rc = hl_cs_signal_sob_wraparound_handler(hdev, q_idx, hw_sob: &hw_sob, count, |
2081 | encaps_sig: true); |
2082 | if (rc) { |
2083 | dev_err(hdev->dev, "Failed to switch SOB\n" ); |
2084 | hdev->asic_funcs->hw_queues_unlock(hdev); |
2085 | rc = -EINVAL; |
2086 | goto remove_idr; |
2087 | } |
2088 | /* set the hw_sob to the handle after calling the sob wraparound handler |
2089 | * since sob could have changed. |
2090 | */ |
2091 | handle->hw_sob = hw_sob; |
2092 | |
2093 | /* store the current sob value for unreserve validity check, and |
2094 | * signal offset support |
2095 | */ |
2096 | handle->pre_sob_val = prop->next_sob_val - handle->count; |
2097 | |
2098 | handle->cs_seq = ULLONG_MAX; |
2099 | |
2100 | *signals_count = prop->next_sob_val; |
2101 | hdev->asic_funcs->hw_queues_unlock(hdev); |
2102 | |
2103 | *sob_addr = handle->hw_sob->sob_addr; |
2104 | *handle_id = hdl_id; |
2105 | |
2106 | dev_dbg(hdev->dev, |
2107 | "Signals reserved, sob_id: %d, sob addr: 0x%x, last sob_val: %u, q_idx: %d, hdl_id: %d\n" , |
2108 | hw_sob->sob_id, handle->hw_sob->sob_addr, |
2109 | prop->next_sob_val - 1, q_idx, hdl_id); |
2110 | goto out; |
2111 | |
2112 | remove_idr: |
2113 | spin_lock(lock: &mgr->lock); |
2114 | idr_remove(&mgr->handles, id: hdl_id); |
2115 | spin_unlock(lock: &mgr->lock); |
2116 | |
2117 | put_ctx: |
2118 | hl_ctx_put(ctx: handle->ctx); |
2119 | kfree(objp: handle); |
2120 | |
2121 | out: |
2122 | return rc; |
2123 | } |
2124 | |
2125 | static int cs_ioctl_unreserve_signals(struct hl_fpriv *hpriv, u32 handle_id) |
2126 | { |
2127 | struct hl_cs_encaps_sig_handle *encaps_sig_hdl; |
2128 | struct hl_sync_stream_properties *prop; |
2129 | struct hl_device *hdev = hpriv->hdev; |
2130 | struct hl_encaps_signals_mgr *mgr; |
2131 | struct hl_hw_sob *hw_sob; |
2132 | u32 q_idx, sob_addr; |
2133 | int rc = 0; |
2134 | |
2135 | mgr = &hpriv->ctx->sig_mgr; |
2136 | |
2137 | spin_lock(lock: &mgr->lock); |
2138 | encaps_sig_hdl = idr_find(&mgr->handles, id: handle_id); |
2139 | if (encaps_sig_hdl) { |
2140 | dev_dbg(hdev->dev, "unreserve signals, handle: %u, SOB:0x%x, count: %u\n" , |
2141 | handle_id, encaps_sig_hdl->hw_sob->sob_addr, |
2142 | encaps_sig_hdl->count); |
2143 | |
2144 | hdev->asic_funcs->hw_queues_lock(hdev); |
2145 | |
2146 | q_idx = encaps_sig_hdl->q_idx; |
2147 | prop = &hdev->kernel_queues[q_idx].sync_stream_prop; |
2148 | hw_sob = &prop->hw_sob[prop->curr_sob_offset]; |
2149 | sob_addr = hdev->asic_funcs->get_sob_addr(hdev, hw_sob->sob_id); |
2150 | |
2151 | /* Check if sob_val got out of sync due to other |
2152 | * signal submission requests which were handled |
2153 | * between the reserve-unreserve calls or SOB switch |
2154 | * upon reaching SOB max value. |
2155 | */ |
2156 | if (encaps_sig_hdl->pre_sob_val + encaps_sig_hdl->count |
2157 | != prop->next_sob_val || |
2158 | sob_addr != encaps_sig_hdl->hw_sob->sob_addr) { |
2159 | dev_err(hdev->dev, "Cannot unreserve signals, SOB val ran out of sync, expected: %u, actual val: %u\n" , |
2160 | encaps_sig_hdl->pre_sob_val, |
2161 | (prop->next_sob_val - encaps_sig_hdl->count)); |
2162 | |
2163 | hdev->asic_funcs->hw_queues_unlock(hdev); |
2164 | rc = -EINVAL; |
2165 | goto out_unlock; |
2166 | } |
2167 | |
2168 | /* |
2169 | * Decrement the SOB value by count by user request |
2170 | * to unreserve those signals |
2171 | */ |
2172 | prop->next_sob_val -= encaps_sig_hdl->count; |
2173 | |
2174 | hdev->asic_funcs->hw_queues_unlock(hdev); |
2175 | |
2176 | hw_sob_put(hw_sob); |
2177 | |
2178 | /* Release the id and free allocated memory of the handle */ |
2179 | idr_remove(&mgr->handles, id: handle_id); |
2180 | |
2181 | /* unlock before calling ctx_put, where we might sleep */ |
2182 | spin_unlock(lock: &mgr->lock); |
2183 | hl_ctx_put(ctx: encaps_sig_hdl->ctx); |
2184 | kfree(objp: encaps_sig_hdl); |
2185 | goto out; |
2186 | } else { |
2187 | rc = -EINVAL; |
2188 | dev_err(hdev->dev, "failed to unreserve signals, cannot find handler\n" ); |
2189 | } |
2190 | |
2191 | out_unlock: |
2192 | spin_unlock(lock: &mgr->lock); |
2193 | |
2194 | out: |
2195 | return rc; |
2196 | } |
2197 | |
2198 | static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type, |
2199 | void __user *chunks, u32 num_chunks, |
2200 | u64 *cs_seq, u32 flags, u32 timeout, |
2201 | u32 *signal_sob_addr_offset, u16 *signal_initial_sob_count) |
2202 | { |
2203 | struct hl_cs_encaps_sig_handle *encaps_sig_hdl = NULL; |
2204 | bool handle_found = false, is_wait_cs = false, |
2205 | wait_cs_submitted = false, |
2206 | cs_encaps_signals = false; |
2207 | struct hl_cs_chunk *cs_chunk_array, *chunk; |
2208 | bool staged_cs_with_encaps_signals = false; |
2209 | struct hw_queue_properties *hw_queue_prop; |
2210 | struct hl_device *hdev = hpriv->hdev; |
2211 | struct hl_cs_compl *sig_waitcs_cmpl; |
2212 | u32 q_idx, collective_engine_id = 0; |
2213 | struct hl_cs_counters_atomic *cntr; |
2214 | struct hl_fence *sig_fence = NULL; |
2215 | struct hl_ctx *ctx = hpriv->ctx; |
2216 | enum hl_queue_type q_type; |
2217 | struct hl_cs *cs; |
2218 | u64 signal_seq; |
2219 | int rc; |
2220 | |
2221 | cntr = &hdev->aggregated_cs_counters; |
2222 | *cs_seq = ULLONG_MAX; |
2223 | |
2224 | rc = hl_cs_copy_chunk_array(hdev, cs_chunk_array: &cs_chunk_array, chunks, num_chunks, |
2225 | ctx); |
2226 | if (rc) |
2227 | goto out; |
2228 | |
2229 | /* currently it is guaranteed to have only one chunk */ |
2230 | chunk = &cs_chunk_array[0]; |
2231 | |
2232 | if (chunk->queue_index >= hdev->asic_prop.max_queues) { |
2233 | atomic64_inc(v: &ctx->cs_counters.validation_drop_cnt); |
2234 | atomic64_inc(v: &cntr->validation_drop_cnt); |
2235 | dev_err(hdev->dev, "Queue index %d is invalid\n" , |
2236 | chunk->queue_index); |
2237 | rc = -EINVAL; |
2238 | goto free_cs_chunk_array; |
2239 | } |
2240 | |
2241 | q_idx = chunk->queue_index; |
2242 | hw_queue_prop = &hdev->asic_prop.hw_queues_props[q_idx]; |
2243 | q_type = hw_queue_prop->type; |
2244 | |
2245 | if (!hw_queue_prop->supports_sync_stream) { |
2246 | atomic64_inc(v: &ctx->cs_counters.validation_drop_cnt); |
2247 | atomic64_inc(v: &cntr->validation_drop_cnt); |
2248 | dev_err(hdev->dev, |
2249 | "Queue index %d does not support sync stream operations\n" , |
2250 | q_idx); |
2251 | rc = -EINVAL; |
2252 | goto free_cs_chunk_array; |
2253 | } |
2254 | |
2255 | if (cs_type == CS_TYPE_COLLECTIVE_WAIT) { |
2256 | if (!(hw_queue_prop->collective_mode == HL_COLLECTIVE_MASTER)) { |
2257 | atomic64_inc(v: &ctx->cs_counters.validation_drop_cnt); |
2258 | atomic64_inc(v: &cntr->validation_drop_cnt); |
2259 | dev_err(hdev->dev, |
2260 | "Queue index %d is invalid\n" , q_idx); |
2261 | rc = -EINVAL; |
2262 | goto free_cs_chunk_array; |
2263 | } |
2264 | |
2265 | if (!hdev->nic_ports_mask) { |
2266 | atomic64_inc(v: &ctx->cs_counters.validation_drop_cnt); |
2267 | atomic64_inc(v: &cntr->validation_drop_cnt); |
2268 | dev_err(hdev->dev, |
2269 | "Collective operations not supported when NIC ports are disabled" ); |
2270 | rc = -EINVAL; |
2271 | goto free_cs_chunk_array; |
2272 | } |
2273 | |
2274 | collective_engine_id = chunk->collective_engine_id; |
2275 | } |
2276 | |
2277 | is_wait_cs = !!(cs_type == CS_TYPE_WAIT || |
2278 | cs_type == CS_TYPE_COLLECTIVE_WAIT); |
2279 | |
2280 | cs_encaps_signals = !!(flags & HL_CS_FLAGS_ENCAP_SIGNALS); |
2281 | |
2282 | if (is_wait_cs) { |
2283 | rc = cs_ioctl_extract_signal_seq(hdev, chunk, signal_seq: &signal_seq, |
2284 | ctx, encaps_signals: cs_encaps_signals); |
2285 | if (rc) |
2286 | goto free_cs_chunk_array; |
2287 | |
2288 | if (cs_encaps_signals) { |
2289 | /* check if cs sequence has encapsulated |
2290 | * signals handle |
2291 | */ |
2292 | struct idr *idp; |
2293 | u32 id; |
2294 | |
2295 | spin_lock(lock: &ctx->sig_mgr.lock); |
2296 | idp = &ctx->sig_mgr.handles; |
2297 | idr_for_each_entry(idp, encaps_sig_hdl, id) { |
2298 | if (encaps_sig_hdl->cs_seq == signal_seq) { |
2299 | /* get refcount to protect removing this handle from idr, |
2300 | * needed when multiple wait cs are used with offset |
2301 | * to wait on reserved encaps signals. |
2302 | * Since kref_put of this handle is executed outside the |
2303 | * current lock, it is possible that the handle refcount |
2304 | * is 0 but it yet to be removed from the list. In this |
2305 | * case need to consider the handle as not valid. |
2306 | */ |
2307 | if (kref_get_unless_zero(kref: &encaps_sig_hdl->refcount)) |
2308 | handle_found = true; |
2309 | break; |
2310 | } |
2311 | } |
2312 | spin_unlock(lock: &ctx->sig_mgr.lock); |
2313 | |
2314 | if (!handle_found) { |
2315 | /* treat as signal CS already finished */ |
2316 | dev_dbg(hdev->dev, "Cannot find encapsulated signals handle for seq 0x%llx\n" , |
2317 | signal_seq); |
2318 | rc = 0; |
2319 | goto free_cs_chunk_array; |
2320 | } |
2321 | |
2322 | /* validate also the signal offset value */ |
2323 | if (chunk->encaps_signal_offset > |
2324 | encaps_sig_hdl->count) { |
2325 | dev_err(hdev->dev, "offset(%u) value exceed max reserved signals count(%u)!\n" , |
2326 | chunk->encaps_signal_offset, |
2327 | encaps_sig_hdl->count); |
2328 | rc = -EINVAL; |
2329 | goto free_cs_chunk_array; |
2330 | } |
2331 | } |
2332 | |
2333 | sig_fence = hl_ctx_get_fence(ctx, seq: signal_seq); |
2334 | if (IS_ERR(ptr: sig_fence)) { |
2335 | atomic64_inc(v: &ctx->cs_counters.validation_drop_cnt); |
2336 | atomic64_inc(v: &cntr->validation_drop_cnt); |
2337 | dev_err(hdev->dev, |
2338 | "Failed to get signal CS with seq 0x%llx\n" , |
2339 | signal_seq); |
2340 | rc = PTR_ERR(ptr: sig_fence); |
2341 | goto free_cs_chunk_array; |
2342 | } |
2343 | |
2344 | if (!sig_fence) { |
2345 | /* signal CS already finished */ |
2346 | rc = 0; |
2347 | goto free_cs_chunk_array; |
2348 | } |
2349 | |
2350 | sig_waitcs_cmpl = |
2351 | container_of(sig_fence, struct hl_cs_compl, base_fence); |
2352 | |
2353 | staged_cs_with_encaps_signals = !! |
2354 | (sig_waitcs_cmpl->type == CS_TYPE_DEFAULT && |
2355 | (flags & HL_CS_FLAGS_ENCAP_SIGNALS)); |
2356 | |
2357 | if (sig_waitcs_cmpl->type != CS_TYPE_SIGNAL && |
2358 | !staged_cs_with_encaps_signals) { |
2359 | atomic64_inc(v: &ctx->cs_counters.validation_drop_cnt); |
2360 | atomic64_inc(v: &cntr->validation_drop_cnt); |
2361 | dev_err(hdev->dev, |
2362 | "CS seq 0x%llx is not of a signal/encaps-signal CS\n" , |
2363 | signal_seq); |
2364 | hl_fence_put(fence: sig_fence); |
2365 | rc = -EINVAL; |
2366 | goto free_cs_chunk_array; |
2367 | } |
2368 | |
2369 | if (completion_done(x: &sig_fence->completion)) { |
2370 | /* signal CS already finished */ |
2371 | hl_fence_put(fence: sig_fence); |
2372 | rc = 0; |
2373 | goto free_cs_chunk_array; |
2374 | } |
2375 | } |
2376 | |
2377 | rc = allocate_cs(hdev, ctx, cs_type, ULLONG_MAX, cs_new: &cs, flags, timeout); |
2378 | if (rc) { |
2379 | if (is_wait_cs) |
2380 | hl_fence_put(fence: sig_fence); |
2381 | |
2382 | goto free_cs_chunk_array; |
2383 | } |
2384 | |
2385 | /* |
2386 | * Save the signal CS fence for later initialization right before |
2387 | * hanging the wait CS on the queue. |
2388 | * for encaps signals case, we save the cs sequence and handle pointer |
2389 | * for later initialization. |
2390 | */ |
2391 | if (is_wait_cs) { |
2392 | cs->signal_fence = sig_fence; |
2393 | /* store the handle pointer, so we don't have to |
2394 | * look for it again, later on the flow |
2395 | * when we need to set SOB info in hw_queue. |
2396 | */ |
2397 | if (cs->encaps_signals) |
2398 | cs->encaps_sig_hdl = encaps_sig_hdl; |
2399 | } |
2400 | |
2401 | hl_debugfs_add_cs(cs); |
2402 | |
2403 | *cs_seq = cs->sequence; |
2404 | |
2405 | if (cs_type == CS_TYPE_WAIT || cs_type == CS_TYPE_SIGNAL) |
2406 | rc = cs_ioctl_signal_wait_create_jobs(hdev, ctx, cs, q_type, |
2407 | q_idx, encaps_signal_offset: chunk->encaps_signal_offset); |
2408 | else if (cs_type == CS_TYPE_COLLECTIVE_WAIT) |
2409 | rc = hdev->asic_funcs->collective_wait_create_jobs(hdev, ctx, |
2410 | cs, q_idx, collective_engine_id, |
2411 | chunk->encaps_signal_offset); |
2412 | else { |
2413 | atomic64_inc(v: &ctx->cs_counters.validation_drop_cnt); |
2414 | atomic64_inc(v: &cntr->validation_drop_cnt); |
2415 | rc = -EINVAL; |
2416 | } |
2417 | |
2418 | if (rc) |
2419 | goto free_cs_object; |
2420 | |
2421 | if (q_type == QUEUE_TYPE_HW) |
2422 | INIT_WORK(&cs->finish_work, cs_completion); |
2423 | |
2424 | rc = hl_hw_queue_schedule_cs(cs); |
2425 | if (rc) { |
2426 | /* In case wait cs failed here, it means the signal cs |
2427 | * already completed. we want to free all it's related objects |
2428 | * but we don't want to fail the ioctl. |
2429 | */ |
2430 | if (is_wait_cs) |
2431 | rc = 0; |
2432 | else if (rc != -EAGAIN) |
2433 | dev_err(hdev->dev, |
2434 | "Failed to submit CS %d.%llu to H/W queues, error %d\n" , |
2435 | ctx->asid, cs->sequence, rc); |
2436 | goto free_cs_object; |
2437 | } |
2438 | |
2439 | *signal_sob_addr_offset = cs->sob_addr_offset; |
2440 | *signal_initial_sob_count = cs->initial_sob_count; |
2441 | |
2442 | rc = HL_CS_STATUS_SUCCESS; |
2443 | if (is_wait_cs) |
2444 | wait_cs_submitted = true; |
2445 | goto put_cs; |
2446 | |
2447 | free_cs_object: |
2448 | cs_rollback(hdev, cs); |
2449 | *cs_seq = ULLONG_MAX; |
2450 | /* The path below is both for good and erroneous exits */ |
2451 | put_cs: |
2452 | /* We finished with the CS in this function, so put the ref */ |
2453 | cs_put(cs); |
2454 | free_cs_chunk_array: |
2455 | if (!wait_cs_submitted && cs_encaps_signals && handle_found && is_wait_cs) |
2456 | kref_put(kref: &encaps_sig_hdl->refcount, release: hl_encaps_release_handle_and_put_ctx); |
2457 | kfree(objp: cs_chunk_array); |
2458 | out: |
2459 | return rc; |
2460 | } |
2461 | |
2462 | static int cs_ioctl_engine_cores(struct hl_fpriv *hpriv, u64 engine_cores, |
2463 | u32 num_engine_cores, u32 core_command) |
2464 | { |
2465 | struct hl_device *hdev = hpriv->hdev; |
2466 | void __user *engine_cores_arr; |
2467 | u32 *cores; |
2468 | int rc; |
2469 | |
2470 | if (!hdev->asic_prop.supports_engine_modes) |
2471 | return -EPERM; |
2472 | |
2473 | if (!num_engine_cores || num_engine_cores > hdev->asic_prop.num_engine_cores) { |
2474 | dev_err(hdev->dev, "Number of engine cores %d is invalid\n" , num_engine_cores); |
2475 | return -EINVAL; |
2476 | } |
2477 | |
2478 | if (core_command != HL_ENGINE_CORE_RUN && core_command != HL_ENGINE_CORE_HALT) { |
2479 | dev_err(hdev->dev, "Engine core command is invalid\n" ); |
2480 | return -EINVAL; |
2481 | } |
2482 | |
2483 | engine_cores_arr = (void __user *) (uintptr_t) engine_cores; |
2484 | cores = kmalloc_array(n: num_engine_cores, size: sizeof(u32), GFP_KERNEL); |
2485 | if (!cores) |
2486 | return -ENOMEM; |
2487 | |
2488 | if (copy_from_user(to: cores, from: engine_cores_arr, n: num_engine_cores * sizeof(u32))) { |
2489 | dev_err(hdev->dev, "Failed to copy core-ids array from user\n" ); |
2490 | kfree(objp: cores); |
2491 | return -EFAULT; |
2492 | } |
2493 | |
2494 | rc = hdev->asic_funcs->set_engine_cores(hdev, cores, num_engine_cores, core_command); |
2495 | kfree(objp: cores); |
2496 | |
2497 | return rc; |
2498 | } |
2499 | |
2500 | static int cs_ioctl_engines(struct hl_fpriv *hpriv, u64 engines_arr_user_addr, |
2501 | u32 num_engines, enum hl_engine_command command) |
2502 | { |
2503 | struct hl_device *hdev = hpriv->hdev; |
2504 | u32 *engines, max_num_of_engines; |
2505 | void __user *engines_arr; |
2506 | int rc; |
2507 | |
2508 | if (!hdev->asic_prop.supports_engine_modes) |
2509 | return -EPERM; |
2510 | |
2511 | if (command >= HL_ENGINE_COMMAND_MAX) { |
2512 | dev_err(hdev->dev, "Engine command is invalid\n" ); |
2513 | return -EINVAL; |
2514 | } |
2515 | |
2516 | max_num_of_engines = hdev->asic_prop.max_num_of_engines; |
2517 | if (command == HL_ENGINE_CORE_RUN || command == HL_ENGINE_CORE_HALT) |
2518 | max_num_of_engines = hdev->asic_prop.num_engine_cores; |
2519 | |
2520 | if (!num_engines || num_engines > max_num_of_engines) { |
2521 | dev_err(hdev->dev, "Number of engines %d is invalid\n" , num_engines); |
2522 | return -EINVAL; |
2523 | } |
2524 | |
2525 | engines_arr = (void __user *) (uintptr_t) engines_arr_user_addr; |
2526 | engines = kmalloc_array(n: num_engines, size: sizeof(u32), GFP_KERNEL); |
2527 | if (!engines) |
2528 | return -ENOMEM; |
2529 | |
2530 | if (copy_from_user(to: engines, from: engines_arr, n: num_engines * sizeof(u32))) { |
2531 | dev_err(hdev->dev, "Failed to copy engine-ids array from user\n" ); |
2532 | kfree(objp: engines); |
2533 | return -EFAULT; |
2534 | } |
2535 | |
2536 | rc = hdev->asic_funcs->set_engines(hdev, engines, num_engines, command); |
2537 | kfree(objp: engines); |
2538 | |
2539 | return rc; |
2540 | } |
2541 | |
2542 | static int cs_ioctl_flush_pci_hbw_writes(struct hl_fpriv *hpriv) |
2543 | { |
2544 | struct hl_device *hdev = hpriv->hdev; |
2545 | struct asic_fixed_properties *prop = &hdev->asic_prop; |
2546 | |
2547 | if (!prop->hbw_flush_reg) { |
2548 | dev_dbg(hdev->dev, "HBW flush is not supported\n" ); |
2549 | return -EOPNOTSUPP; |
2550 | } |
2551 | |
2552 | RREG32(prop->hbw_flush_reg); |
2553 | |
2554 | return 0; |
2555 | } |
2556 | |
2557 | int hl_cs_ioctl(struct drm_device *ddev, void *data, struct drm_file *file_priv) |
2558 | { |
2559 | struct hl_fpriv *hpriv = file_priv->driver_priv; |
2560 | union hl_cs_args *args = data; |
2561 | enum hl_cs_type cs_type = 0; |
2562 | u64 cs_seq = ULONG_MAX; |
2563 | void __user *chunks; |
2564 | u32 num_chunks, flags, timeout, |
2565 | signals_count = 0, sob_addr = 0, handle_id = 0; |
2566 | u16 sob_initial_count = 0; |
2567 | int rc; |
2568 | |
2569 | rc = hl_cs_sanity_checks(hpriv, args); |
2570 | if (rc) |
2571 | goto out; |
2572 | |
2573 | rc = hl_cs_ctx_switch(hpriv, args, cs_seq: &cs_seq); |
2574 | if (rc) |
2575 | goto out; |
2576 | |
2577 | cs_type = hl_cs_get_cs_type(cs_type_flags: args->in.cs_flags & |
2578 | ~HL_CS_FLAGS_FORCE_RESTORE); |
2579 | chunks = (void __user *) (uintptr_t) args->in.chunks_execute; |
2580 | num_chunks = args->in.num_chunks_execute; |
2581 | flags = args->in.cs_flags; |
2582 | |
2583 | /* In case this is a staged CS, user should supply the CS sequence */ |
2584 | if ((flags & HL_CS_FLAGS_STAGED_SUBMISSION) && |
2585 | !(flags & HL_CS_FLAGS_STAGED_SUBMISSION_FIRST)) |
2586 | cs_seq = args->in.seq; |
2587 | |
2588 | timeout = flags & HL_CS_FLAGS_CUSTOM_TIMEOUT |
2589 | ? msecs_to_jiffies(m: args->in.timeout * 1000) |
2590 | : hpriv->hdev->timeout_jiffies; |
2591 | |
2592 | switch (cs_type) { |
2593 | case CS_TYPE_SIGNAL: |
2594 | case CS_TYPE_WAIT: |
2595 | case CS_TYPE_COLLECTIVE_WAIT: |
2596 | rc = cs_ioctl_signal_wait(hpriv, cs_type, chunks, num_chunks, |
2597 | cs_seq: &cs_seq, flags: args->in.cs_flags, timeout, |
2598 | signal_sob_addr_offset: &sob_addr, signal_initial_sob_count: &sob_initial_count); |
2599 | break; |
2600 | case CS_RESERVE_SIGNALS: |
2601 | rc = cs_ioctl_reserve_signals(hpriv, |
2602 | q_idx: args->in.encaps_signals_q_idx, |
2603 | count: args->in.encaps_signals_count, |
2604 | handle_id: &handle_id, sob_addr: &sob_addr, signals_count: &signals_count); |
2605 | break; |
2606 | case CS_UNRESERVE_SIGNALS: |
2607 | rc = cs_ioctl_unreserve_signals(hpriv, |
2608 | handle_id: args->in.encaps_sig_handle_id); |
2609 | break; |
2610 | case CS_TYPE_ENGINE_CORE: |
2611 | rc = cs_ioctl_engine_cores(hpriv, engine_cores: args->in.engine_cores, |
2612 | num_engine_cores: args->in.num_engine_cores, core_command: args->in.core_command); |
2613 | break; |
2614 | case CS_TYPE_ENGINES: |
2615 | rc = cs_ioctl_engines(hpriv, engines_arr_user_addr: args->in.engines, |
2616 | num_engines: args->in.num_engines, command: args->in.engine_command); |
2617 | break; |
2618 | case CS_TYPE_FLUSH_PCI_HBW_WRITES: |
2619 | rc = cs_ioctl_flush_pci_hbw_writes(hpriv); |
2620 | break; |
2621 | default: |
2622 | rc = cs_ioctl_default(hpriv, chunks, num_chunks, cs_seq: &cs_seq, |
2623 | flags: args->in.cs_flags, |
2624 | encaps_signals_handle: args->in.encaps_sig_handle_id, |
2625 | timeout, signal_initial_sob_count: &sob_initial_count); |
2626 | break; |
2627 | } |
2628 | out: |
2629 | if (rc != -EAGAIN) { |
2630 | memset(args, 0, sizeof(*args)); |
2631 | |
2632 | switch (cs_type) { |
2633 | case CS_RESERVE_SIGNALS: |
2634 | args->out.handle_id = handle_id; |
2635 | args->out.sob_base_addr_offset = sob_addr; |
2636 | args->out.count = signals_count; |
2637 | break; |
2638 | case CS_TYPE_SIGNAL: |
2639 | args->out.sob_base_addr_offset = sob_addr; |
2640 | args->out.sob_count_before_submission = sob_initial_count; |
2641 | args->out.seq = cs_seq; |
2642 | break; |
2643 | case CS_TYPE_DEFAULT: |
2644 | args->out.sob_count_before_submission = sob_initial_count; |
2645 | args->out.seq = cs_seq; |
2646 | break; |
2647 | default: |
2648 | args->out.seq = cs_seq; |
2649 | break; |
2650 | } |
2651 | |
2652 | args->out.status = rc; |
2653 | } |
2654 | |
2655 | return rc; |
2656 | } |
2657 | |
2658 | static int hl_wait_for_fence(struct hl_ctx *ctx, u64 seq, struct hl_fence *fence, |
2659 | enum hl_cs_wait_status *status, u64 timeout_us, s64 *timestamp) |
2660 | { |
2661 | struct hl_device *hdev = ctx->hdev; |
2662 | ktime_t timestamp_kt; |
2663 | long completion_rc; |
2664 | int rc = 0, error; |
2665 | |
2666 | if (IS_ERR(ptr: fence)) { |
2667 | rc = PTR_ERR(ptr: fence); |
2668 | if (rc == -EINVAL) |
2669 | dev_notice_ratelimited(hdev->dev, |
2670 | "Can't wait on CS %llu because current CS is at seq %llu\n" , |
2671 | seq, ctx->cs_sequence); |
2672 | return rc; |
2673 | } |
2674 | |
2675 | if (!fence) { |
2676 | if (!hl_pop_cs_outcome(outcome_store: &ctx->outcome_store, seq, ts: ×tamp_kt, error: &error)) { |
2677 | dev_dbg(hdev->dev, |
2678 | "Can't wait on seq %llu because current CS is at seq %llu (Fence is gone)\n" , |
2679 | seq, ctx->cs_sequence); |
2680 | *status = CS_WAIT_STATUS_GONE; |
2681 | return 0; |
2682 | } |
2683 | |
2684 | completion_rc = 1; |
2685 | goto report_results; |
2686 | } |
2687 | |
2688 | if (!timeout_us) { |
2689 | completion_rc = completion_done(x: &fence->completion); |
2690 | } else { |
2691 | unsigned long timeout; |
2692 | |
2693 | timeout = (timeout_us == MAX_SCHEDULE_TIMEOUT) ? |
2694 | timeout_us : usecs_to_jiffies(u: timeout_us); |
2695 | completion_rc = |
2696 | wait_for_completion_interruptible_timeout( |
2697 | x: &fence->completion, timeout); |
2698 | } |
2699 | |
2700 | error = fence->error; |
2701 | timestamp_kt = fence->timestamp; |
2702 | |
2703 | report_results: |
2704 | if (completion_rc > 0) { |
2705 | *status = CS_WAIT_STATUS_COMPLETED; |
2706 | if (timestamp) |
2707 | *timestamp = ktime_to_ns(kt: timestamp_kt); |
2708 | } else { |
2709 | *status = CS_WAIT_STATUS_BUSY; |
2710 | } |
2711 | |
2712 | if (completion_rc == -ERESTARTSYS) |
2713 | rc = completion_rc; |
2714 | else if (error == -ETIMEDOUT || error == -EIO) |
2715 | rc = error; |
2716 | |
2717 | return rc; |
2718 | } |
2719 | |
2720 | /* |
2721 | * hl_cs_poll_fences - iterate CS fences to check for CS completion |
2722 | * |
2723 | * @mcs_data: multi-CS internal data |
2724 | * @mcs_compl: multi-CS completion structure |
2725 | * |
2726 | * @return 0 on success, otherwise non 0 error code |
2727 | * |
2728 | * The function iterates on all CS sequence in the list and set bit in |
2729 | * completion_bitmap for each completed CS. |
2730 | * While iterating, the function sets the stream map of each fence in the fence |
2731 | * array in the completion QID stream map to be used by CSs to perform |
2732 | * completion to the multi-CS context. |
2733 | * This function shall be called after taking context ref |
2734 | */ |
2735 | static int hl_cs_poll_fences(struct multi_cs_data *mcs_data, struct multi_cs_completion *mcs_compl) |
2736 | { |
2737 | struct hl_fence **fence_ptr = mcs_data->fence_arr; |
2738 | struct hl_device *hdev = mcs_data->ctx->hdev; |
2739 | int i, rc, arr_len = mcs_data->arr_len; |
2740 | u64 *seq_arr = mcs_data->seq_arr; |
2741 | ktime_t max_ktime, first_cs_time; |
2742 | enum hl_cs_wait_status status; |
2743 | |
2744 | memset(fence_ptr, 0, arr_len * sizeof(struct hl_fence *)); |
2745 | |
2746 | /* get all fences under the same lock */ |
2747 | rc = hl_ctx_get_fences(ctx: mcs_data->ctx, seq_arr, fence: fence_ptr, arr_len); |
2748 | if (rc) |
2749 | return rc; |
2750 | |
2751 | /* |
2752 | * re-initialize the completion here to handle 2 possible cases: |
2753 | * 1. CS will complete the multi-CS prior clearing the completion. in which |
2754 | * case the fence iteration is guaranteed to catch the CS completion. |
2755 | * 2. the completion will occur after re-init of the completion. |
2756 | * in which case we will wake up immediately in wait_for_completion. |
2757 | */ |
2758 | reinit_completion(x: &mcs_compl->completion); |
2759 | |
2760 | /* |
2761 | * set to maximum time to verify timestamp is valid: if at the end |
2762 | * this value is maintained- no timestamp was updated |
2763 | */ |
2764 | max_ktime = ktime_set(KTIME_SEC_MAX, nsecs: 0); |
2765 | first_cs_time = max_ktime; |
2766 | |
2767 | for (i = 0; i < arr_len; i++, fence_ptr++) { |
2768 | struct hl_fence *fence = *fence_ptr; |
2769 | |
2770 | /* |
2771 | * In order to prevent case where we wait until timeout even though a CS associated |
2772 | * with the multi-CS actually completed we do things in the below order: |
2773 | * 1. for each fence set it's QID map in the multi-CS completion QID map. This way |
2774 | * any CS can, potentially, complete the multi CS for the specific QID (note |
2775 | * that once completion is initialized, calling complete* and then wait on the |
2776 | * completion will cause it to return at once) |
2777 | * 2. only after allowing multi-CS completion for the specific QID we check whether |
2778 | * the specific CS already completed (and thus the wait for completion part will |
2779 | * be skipped). if the CS not completed it is guaranteed that completing CS will |
2780 | * wake up the completion. |
2781 | */ |
2782 | if (fence) |
2783 | mcs_compl->stream_master_qid_map |= fence->stream_master_qid_map; |
2784 | |
2785 | /* |
2786 | * function won't sleep as it is called with timeout 0 (i.e. |
2787 | * poll the fence) |
2788 | */ |
2789 | rc = hl_wait_for_fence(ctx: mcs_data->ctx, seq: seq_arr[i], fence, status: &status, timeout_us: 0, NULL); |
2790 | if (rc) { |
2791 | dev_err(hdev->dev, |
2792 | "wait_for_fence error :%d for CS seq %llu\n" , |
2793 | rc, seq_arr[i]); |
2794 | break; |
2795 | } |
2796 | |
2797 | switch (status) { |
2798 | case CS_WAIT_STATUS_BUSY: |
2799 | /* CS did not finished, QID to wait on already stored */ |
2800 | break; |
2801 | case CS_WAIT_STATUS_COMPLETED: |
2802 | /* |
2803 | * Using mcs_handling_done to avoid possibility of mcs_data |
2804 | * returns to user indicating CS completed before it finished |
2805 | * all of its mcs handling, to avoid race the next time the |
2806 | * user waits for mcs. |
2807 | * note: when reaching this case fence is definitely not NULL |
2808 | * but NULL check was added to overcome static analysis |
2809 | */ |
2810 | if (fence && !fence->mcs_handling_done) { |
2811 | /* |
2812 | * in case multi CS is completed but MCS handling not done |
2813 | * we "complete" the multi CS to prevent it from waiting |
2814 | * until time-out and the "multi-CS handling done" will have |
2815 | * another chance at the next iteration |
2816 | */ |
2817 | complete_all(&mcs_compl->completion); |
2818 | break; |
2819 | } |
2820 | |
2821 | mcs_data->completion_bitmap |= BIT(i); |
2822 | /* |
2823 | * For all completed CSs we take the earliest timestamp. |
2824 | * For this we have to validate that the timestamp is |
2825 | * earliest of all timestamps so far. |
2826 | */ |
2827 | if (fence && mcs_data->update_ts && |
2828 | (ktime_compare(cmp1: fence->timestamp, cmp2: first_cs_time) < 0)) |
2829 | first_cs_time = fence->timestamp; |
2830 | break; |
2831 | case CS_WAIT_STATUS_GONE: |
2832 | mcs_data->update_ts = false; |
2833 | mcs_data->gone_cs = true; |
2834 | /* |
2835 | * It is possible to get an old sequence numbers from user |
2836 | * which related to already completed CSs and their fences |
2837 | * already gone. In this case, CS set as completed but |
2838 | * no need to consider its QID for mcs completion. |
2839 | */ |
2840 | mcs_data->completion_bitmap |= BIT(i); |
2841 | break; |
2842 | default: |
2843 | dev_err(hdev->dev, "Invalid fence status\n" ); |
2844 | rc = -EINVAL; |
2845 | break; |
2846 | } |
2847 | |
2848 | } |
2849 | |
2850 | hl_fences_put(fence: mcs_data->fence_arr, len: arr_len); |
2851 | |
2852 | if (mcs_data->update_ts && |
2853 | (ktime_compare(cmp1: first_cs_time, cmp2: max_ktime) != 0)) |
2854 | mcs_data->timestamp = ktime_to_ns(kt: first_cs_time); |
2855 | |
2856 | return rc; |
2857 | } |
2858 | |
2859 | static int _hl_cs_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx, u64 timeout_us, u64 seq, |
2860 | enum hl_cs_wait_status *status, s64 *timestamp) |
2861 | { |
2862 | struct hl_fence *fence; |
2863 | int rc = 0; |
2864 | |
2865 | if (timestamp) |
2866 | *timestamp = 0; |
2867 | |
2868 | hl_ctx_get(ctx); |
2869 | |
2870 | fence = hl_ctx_get_fence(ctx, seq); |
2871 | |
2872 | rc = hl_wait_for_fence(ctx, seq, fence, status, timeout_us, timestamp); |
2873 | hl_fence_put(fence); |
2874 | hl_ctx_put(ctx); |
2875 | |
2876 | return rc; |
2877 | } |
2878 | |
2879 | static inline unsigned long hl_usecs64_to_jiffies(const u64 usecs) |
2880 | { |
2881 | if (usecs <= U32_MAX) |
2882 | return usecs_to_jiffies(u: usecs); |
2883 | |
2884 | /* |
2885 | * If the value in nanoseconds is larger than 64 bit, use the largest |
2886 | * 64 bit value. |
2887 | */ |
2888 | if (usecs >= ((u64)(U64_MAX / NSEC_PER_USEC))) |
2889 | return nsecs_to_jiffies(U64_MAX); |
2890 | |
2891 | return nsecs_to_jiffies(n: usecs * NSEC_PER_USEC); |
2892 | } |
2893 | |
2894 | /* |
2895 | * hl_wait_multi_cs_completion_init - init completion structure |
2896 | * |
2897 | * @hdev: pointer to habanalabs device structure |
2898 | * @stream_master_bitmap: stream master QIDs map, set bit indicates stream |
2899 | * master QID to wait on |
2900 | * |
2901 | * @return valid completion struct pointer on success, otherwise error pointer |
2902 | * |
2903 | * up to MULTI_CS_MAX_USER_CTX calls can be done concurrently to the driver. |
2904 | * the function gets the first available completion (by marking it "used") |
2905 | * and initialize its values. |
2906 | */ |
2907 | static struct multi_cs_completion *hl_wait_multi_cs_completion_init(struct hl_device *hdev) |
2908 | { |
2909 | struct multi_cs_completion *mcs_compl; |
2910 | int i; |
2911 | |
2912 | /* find free multi_cs completion structure */ |
2913 | for (i = 0; i < MULTI_CS_MAX_USER_CTX; i++) { |
2914 | mcs_compl = &hdev->multi_cs_completion[i]; |
2915 | spin_lock(lock: &mcs_compl->lock); |
2916 | if (!mcs_compl->used) { |
2917 | mcs_compl->used = 1; |
2918 | mcs_compl->timestamp = 0; |
2919 | /* |
2920 | * init QID map to 0 to avoid completion by CSs. the actual QID map |
2921 | * to multi-CS CSs will be set incrementally at a later stage |
2922 | */ |
2923 | mcs_compl->stream_master_qid_map = 0; |
2924 | spin_unlock(lock: &mcs_compl->lock); |
2925 | break; |
2926 | } |
2927 | spin_unlock(lock: &mcs_compl->lock); |
2928 | } |
2929 | |
2930 | if (i == MULTI_CS_MAX_USER_CTX) { |
2931 | dev_err(hdev->dev, "no available multi-CS completion structure\n" ); |
2932 | return ERR_PTR(error: -ENOMEM); |
2933 | } |
2934 | return mcs_compl; |
2935 | } |
2936 | |
2937 | /* |
2938 | * hl_wait_multi_cs_completion_fini - return completion structure and set as |
2939 | * unused |
2940 | * |
2941 | * @mcs_compl: pointer to the completion structure |
2942 | */ |
2943 | static void hl_wait_multi_cs_completion_fini( |
2944 | struct multi_cs_completion *mcs_compl) |
2945 | { |
2946 | /* |
2947 | * free completion structure, do it under lock to be in-sync with the |
2948 | * thread that signals completion |
2949 | */ |
2950 | spin_lock(lock: &mcs_compl->lock); |
2951 | mcs_compl->used = 0; |
2952 | spin_unlock(lock: &mcs_compl->lock); |
2953 | } |
2954 | |
2955 | /* |
2956 | * hl_wait_multi_cs_completion - wait for first CS to complete |
2957 | * |
2958 | * @mcs_data: multi-CS internal data |
2959 | * |
2960 | * @return 0 on success, otherwise non 0 error code |
2961 | */ |
2962 | static int hl_wait_multi_cs_completion(struct multi_cs_data *mcs_data, |
2963 | struct multi_cs_completion *mcs_compl) |
2964 | { |
2965 | long completion_rc; |
2966 | |
2967 | completion_rc = wait_for_completion_interruptible_timeout(x: &mcs_compl->completion, |
2968 | timeout: mcs_data->timeout_jiffies); |
2969 | |
2970 | /* update timestamp */ |
2971 | if (completion_rc > 0) |
2972 | mcs_data->timestamp = mcs_compl->timestamp; |
2973 | |
2974 | if (completion_rc == -ERESTARTSYS) |
2975 | return completion_rc; |
2976 | |
2977 | mcs_data->wait_status = completion_rc; |
2978 | |
2979 | return 0; |
2980 | } |
2981 | |
2982 | /* |
2983 | * hl_multi_cs_completion_init - init array of multi-CS completion structures |
2984 | * |
2985 | * @hdev: pointer to habanalabs device structure |
2986 | */ |
2987 | void hl_multi_cs_completion_init(struct hl_device *hdev) |
2988 | { |
2989 | struct multi_cs_completion *mcs_cmpl; |
2990 | int i; |
2991 | |
2992 | for (i = 0; i < MULTI_CS_MAX_USER_CTX; i++) { |
2993 | mcs_cmpl = &hdev->multi_cs_completion[i]; |
2994 | mcs_cmpl->used = 0; |
2995 | spin_lock_init(&mcs_cmpl->lock); |
2996 | init_completion(x: &mcs_cmpl->completion); |
2997 | } |
2998 | } |
2999 | |
3000 | /* |
3001 | * hl_multi_cs_wait_ioctl - implementation of the multi-CS wait ioctl |
3002 | * |
3003 | * @hpriv: pointer to the private data of the fd |
3004 | * @data: pointer to multi-CS wait ioctl in/out args |
3005 | * |
3006 | */ |
3007 | static int hl_multi_cs_wait_ioctl(struct hl_fpriv *hpriv, void *data) |
3008 | { |
3009 | struct multi_cs_completion *mcs_compl; |
3010 | struct hl_device *hdev = hpriv->hdev; |
3011 | struct multi_cs_data mcs_data = {}; |
3012 | union hl_wait_cs_args *args = data; |
3013 | struct hl_ctx *ctx = hpriv->ctx; |
3014 | struct hl_fence **fence_arr; |
3015 | void __user *seq_arr; |
3016 | u32 size_to_copy; |
3017 | u64 *cs_seq_arr; |
3018 | u8 seq_arr_len; |
3019 | int rc, i; |
3020 | |
3021 | for (i = 0 ; i < sizeof(args->in.pad) ; i++) |
3022 | if (args->in.pad[i]) { |
3023 | dev_dbg(hdev->dev, "Padding bytes must be 0\n" ); |
3024 | return -EINVAL; |
3025 | } |
3026 | |
3027 | if (!hdev->supports_wait_for_multi_cs) { |
3028 | dev_err(hdev->dev, "Wait for multi CS is not supported\n" ); |
3029 | return -EPERM; |
3030 | } |
3031 | |
3032 | seq_arr_len = args->in.seq_arr_len; |
3033 | |
3034 | if (seq_arr_len > HL_WAIT_MULTI_CS_LIST_MAX_LEN) { |
3035 | dev_err(hdev->dev, "Can wait only up to %d CSs, input sequence is of length %u\n" , |
3036 | HL_WAIT_MULTI_CS_LIST_MAX_LEN, seq_arr_len); |
3037 | return -EINVAL; |
3038 | } |
3039 | |
3040 | /* allocate memory for sequence array */ |
3041 | cs_seq_arr = |
3042 | kmalloc_array(n: seq_arr_len, size: sizeof(*cs_seq_arr), GFP_KERNEL); |
3043 | if (!cs_seq_arr) |
3044 | return -ENOMEM; |
3045 | |
3046 | /* copy CS sequence array from user */ |
3047 | seq_arr = (void __user *) (uintptr_t) args->in.seq; |
3048 | size_to_copy = seq_arr_len * sizeof(*cs_seq_arr); |
3049 | if (copy_from_user(to: cs_seq_arr, from: seq_arr, n: size_to_copy)) { |
3050 | dev_err(hdev->dev, "Failed to copy multi-cs sequence array from user\n" ); |
3051 | rc = -EFAULT; |
3052 | goto free_seq_arr; |
3053 | } |
3054 | |
3055 | /* allocate array for the fences */ |
3056 | fence_arr = kmalloc_array(n: seq_arr_len, size: sizeof(struct hl_fence *), GFP_KERNEL); |
3057 | if (!fence_arr) { |
3058 | rc = -ENOMEM; |
3059 | goto free_seq_arr; |
3060 | } |
3061 | |
3062 | /* initialize the multi-CS internal data */ |
3063 | mcs_data.ctx = ctx; |
3064 | mcs_data.seq_arr = cs_seq_arr; |
3065 | mcs_data.fence_arr = fence_arr; |
3066 | mcs_data.arr_len = seq_arr_len; |
3067 | |
3068 | hl_ctx_get(ctx); |
3069 | |
3070 | /* wait (with timeout) for the first CS to be completed */ |
3071 | mcs_data.timeout_jiffies = hl_usecs64_to_jiffies(usecs: args->in.timeout_us); |
3072 | mcs_compl = hl_wait_multi_cs_completion_init(hdev); |
3073 | if (IS_ERR(ptr: mcs_compl)) { |
3074 | rc = PTR_ERR(ptr: mcs_compl); |
3075 | goto put_ctx; |
3076 | } |
3077 | |
3078 | /* poll all CS fences, extract timestamp */ |
3079 | mcs_data.update_ts = true; |
3080 | rc = hl_cs_poll_fences(mcs_data: &mcs_data, mcs_compl); |
3081 | /* |
3082 | * skip wait for CS completion when one of the below is true: |
3083 | * - an error on the poll function |
3084 | * - one or more CS in the list completed |
3085 | * - the user called ioctl with timeout 0 |
3086 | */ |
3087 | if (rc || mcs_data.completion_bitmap || !args->in.timeout_us) |
3088 | goto completion_fini; |
3089 | |
3090 | while (true) { |
3091 | rc = hl_wait_multi_cs_completion(mcs_data: &mcs_data, mcs_compl); |
3092 | if (rc || (mcs_data.wait_status == 0)) |
3093 | break; |
3094 | |
3095 | /* |
3096 | * poll fences once again to update the CS map. |
3097 | * no timestamp should be updated this time. |
3098 | */ |
3099 | mcs_data.update_ts = false; |
3100 | rc = hl_cs_poll_fences(mcs_data: &mcs_data, mcs_compl); |
3101 | |
3102 | if (rc || mcs_data.completion_bitmap) |
3103 | break; |
3104 | |
3105 | /* |
3106 | * if hl_wait_multi_cs_completion returned before timeout (i.e. |
3107 | * it got a completion) it either got completed by CS in the multi CS list |
3108 | * (in which case the indication will be non empty completion_bitmap) or it |
3109 | * got completed by CS submitted to one of the shared stream master but |
3110 | * not in the multi CS list (in which case we should wait again but modify |
3111 | * the timeout and set timestamp as zero to let a CS related to the current |
3112 | * multi-CS set a new, relevant, timestamp) |
3113 | */ |
3114 | mcs_data.timeout_jiffies = mcs_data.wait_status; |
3115 | mcs_compl->timestamp = 0; |
3116 | } |
3117 | |
3118 | completion_fini: |
3119 | hl_wait_multi_cs_completion_fini(mcs_compl); |
3120 | |
3121 | put_ctx: |
3122 | hl_ctx_put(ctx); |
3123 | kfree(objp: fence_arr); |
3124 | |
3125 | free_seq_arr: |
3126 | kfree(objp: cs_seq_arr); |
3127 | |
3128 | if (rc == -ERESTARTSYS) { |
3129 | dev_err_ratelimited(hdev->dev, |
3130 | "user process got signal while waiting for Multi-CS\n" ); |
3131 | rc = -EINTR; |
3132 | } |
3133 | |
3134 | if (rc) |
3135 | return rc; |
3136 | |
3137 | /* update output args */ |
3138 | memset(args, 0, sizeof(*args)); |
3139 | |
3140 | if (mcs_data.completion_bitmap) { |
3141 | args->out.status = HL_WAIT_CS_STATUS_COMPLETED; |
3142 | args->out.cs_completion_map = mcs_data.completion_bitmap; |
3143 | |
3144 | /* if timestamp not 0- it's valid */ |
3145 | if (mcs_data.timestamp) { |
3146 | args->out.timestamp_nsec = mcs_data.timestamp; |
3147 | args->out.flags |= HL_WAIT_CS_STATUS_FLAG_TIMESTAMP_VLD; |
3148 | } |
3149 | |
3150 | /* update if some CS was gone */ |
3151 | if (!mcs_data.timestamp) |
3152 | args->out.flags |= HL_WAIT_CS_STATUS_FLAG_GONE; |
3153 | } else { |
3154 | args->out.status = HL_WAIT_CS_STATUS_BUSY; |
3155 | } |
3156 | |
3157 | return 0; |
3158 | } |
3159 | |
3160 | static int hl_cs_wait_ioctl(struct hl_fpriv *hpriv, void *data) |
3161 | { |
3162 | struct hl_device *hdev = hpriv->hdev; |
3163 | union hl_wait_cs_args *args = data; |
3164 | enum hl_cs_wait_status status; |
3165 | u64 seq = args->in.seq; |
3166 | s64 timestamp; |
3167 | int rc; |
3168 | |
3169 | rc = _hl_cs_wait_ioctl(hdev, ctx: hpriv->ctx, timeout_us: args->in.timeout_us, seq, status: &status, timestamp: ×tamp); |
3170 | |
3171 | if (rc == -ERESTARTSYS) { |
3172 | dev_err_ratelimited(hdev->dev, |
3173 | "user process got signal while waiting for CS handle %llu\n" , |
3174 | seq); |
3175 | return -EINTR; |
3176 | } |
3177 | |
3178 | memset(args, 0, sizeof(*args)); |
3179 | |
3180 | if (rc) { |
3181 | if (rc == -ETIMEDOUT) { |
3182 | dev_err_ratelimited(hdev->dev, |
3183 | "CS %llu has timed-out while user process is waiting for it\n" , |
3184 | seq); |
3185 | args->out.status = HL_WAIT_CS_STATUS_TIMEDOUT; |
3186 | } else if (rc == -EIO) { |
3187 | dev_err_ratelimited(hdev->dev, |
3188 | "CS %llu has been aborted while user process is waiting for it\n" , |
3189 | seq); |
3190 | args->out.status = HL_WAIT_CS_STATUS_ABORTED; |
3191 | } |
3192 | return rc; |
3193 | } |
3194 | |
3195 | if (timestamp) { |
3196 | args->out.flags |= HL_WAIT_CS_STATUS_FLAG_TIMESTAMP_VLD; |
3197 | args->out.timestamp_nsec = timestamp; |
3198 | } |
3199 | |
3200 | switch (status) { |
3201 | case CS_WAIT_STATUS_GONE: |
3202 | args->out.flags |= HL_WAIT_CS_STATUS_FLAG_GONE; |
3203 | fallthrough; |
3204 | case CS_WAIT_STATUS_COMPLETED: |
3205 | args->out.status = HL_WAIT_CS_STATUS_COMPLETED; |
3206 | break; |
3207 | case CS_WAIT_STATUS_BUSY: |
3208 | default: |
3209 | args->out.status = HL_WAIT_CS_STATUS_BUSY; |
3210 | break; |
3211 | } |
3212 | |
3213 | return 0; |
3214 | } |
3215 | |
3216 | static inline void set_record_cq_info(struct hl_user_pending_interrupt *record, |
3217 | struct hl_cb *cq_cb, u32 cq_offset, u32 target_value) |
3218 | { |
3219 | record->ts_reg_info.cq_cb = cq_cb; |
3220 | record->cq_kernel_addr = (u64 *) cq_cb->kernel_address + cq_offset; |
3221 | record->cq_target_value = target_value; |
3222 | } |
3223 | |
3224 | static int validate_and_get_ts_record(struct device *dev, |
3225 | struct hl_ts_buff *ts_buff, u64 ts_offset, |
3226 | struct hl_user_pending_interrupt **req_event_record) |
3227 | { |
3228 | struct hl_user_pending_interrupt *ts_cb_last; |
3229 | |
3230 | *req_event_record = (struct hl_user_pending_interrupt *)ts_buff->kernel_buff_address + |
3231 | ts_offset; |
3232 | ts_cb_last = (struct hl_user_pending_interrupt *)ts_buff->kernel_buff_address + |
3233 | (ts_buff->kernel_buff_size / sizeof(struct hl_user_pending_interrupt)); |
3234 | |
3235 | /* Validate ts_offset not exceeding last max */ |
3236 | if (*req_event_record >= ts_cb_last) { |
3237 | dev_err(dev, "Ts offset(%llu) exceeds max CB offset(0x%llx)\n" , |
3238 | ts_offset, (u64)(uintptr_t)ts_cb_last); |
3239 | return -EINVAL; |
3240 | } |
3241 | |
3242 | return 0; |
3243 | } |
3244 | |
3245 | static void unregister_timestamp_node(struct hl_device *hdev, |
3246 | struct hl_user_pending_interrupt *record, bool need_lock) |
3247 | { |
3248 | struct hl_user_interrupt *interrupt = record->ts_reg_info.interrupt; |
3249 | bool ts_rec_found = false; |
3250 | unsigned long flags; |
3251 | |
3252 | if (need_lock) |
3253 | spin_lock_irqsave(&interrupt->ts_list_lock, flags); |
3254 | |
3255 | if (record->ts_reg_info.in_use) { |
3256 | record->ts_reg_info.in_use = false; |
3257 | list_del(entry: &record->list_node); |
3258 | ts_rec_found = true; |
3259 | } |
3260 | |
3261 | if (need_lock) |
3262 | spin_unlock_irqrestore(lock: &interrupt->ts_list_lock, flags); |
3263 | |
3264 | /* Put refcounts that were taken when we registered the event */ |
3265 | if (ts_rec_found) { |
3266 | hl_mmap_mem_buf_put(buf: record->ts_reg_info.buf); |
3267 | hl_cb_put(cb: record->ts_reg_info.cq_cb); |
3268 | } |
3269 | } |
3270 | |
3271 | static int ts_get_and_handle_kernel_record(struct hl_device *hdev, struct hl_ctx *ctx, |
3272 | struct wait_interrupt_data *data, unsigned long *flags, |
3273 | struct hl_user_pending_interrupt **pend) |
3274 | { |
3275 | struct hl_user_pending_interrupt *req_offset_record; |
3276 | struct hl_ts_buff *ts_buff = data->buf->private; |
3277 | bool need_lock = false; |
3278 | int rc; |
3279 | |
3280 | rc = validate_and_get_ts_record(dev: data->buf->mmg->dev, ts_buff, ts_offset: data->ts_offset, |
3281 | req_event_record: &req_offset_record); |
3282 | if (rc) |
3283 | return rc; |
3284 | |
3285 | /* In case the node already registered, need to unregister first then re-use */ |
3286 | if (req_offset_record->ts_reg_info.in_use) { |
3287 | dev_dbg(data->buf->mmg->dev, |
3288 | "Requested record %p is in use on irq: %u ts addr: %p, unregister first then put on irq: %u\n" , |
3289 | req_offset_record, |
3290 | req_offset_record->ts_reg_info.interrupt->interrupt_id, |
3291 | req_offset_record->ts_reg_info.timestamp_kernel_addr, |
3292 | data->interrupt->interrupt_id); |
3293 | /* |
3294 | * Since interrupt here can be different than the one the node currently registered |
3295 | * on, and we don't want to lock two lists while we're doing unregister, so |
3296 | * unlock the new interrupt wait list here and acquire the lock again after you done |
3297 | */ |
3298 | if (data->interrupt->interrupt_id != |
3299 | req_offset_record->ts_reg_info.interrupt->interrupt_id) { |
3300 | |
3301 | need_lock = true; |
3302 | spin_unlock_irqrestore(lock: &data->interrupt->ts_list_lock, flags: *flags); |
3303 | } |
3304 | |
3305 | unregister_timestamp_node(hdev, record: req_offset_record, need_lock); |
3306 | |
3307 | if (need_lock) |
3308 | spin_lock_irqsave(&data->interrupt->ts_list_lock, *flags); |
3309 | } |
3310 | |
3311 | /* Fill up the new registration node info and add it to the list */ |
3312 | req_offset_record->ts_reg_info.in_use = true; |
3313 | req_offset_record->ts_reg_info.buf = data->buf; |
3314 | req_offset_record->ts_reg_info.timestamp_kernel_addr = |
3315 | (u64 *) ts_buff->user_buff_address + data->ts_offset; |
3316 | req_offset_record->ts_reg_info.interrupt = data->interrupt; |
3317 | set_record_cq_info(record: req_offset_record, cq_cb: data->cq_cb, cq_offset: data->cq_offset, |
3318 | target_value: data->target_value); |
3319 | |
3320 | *pend = req_offset_record; |
3321 | |
3322 | return rc; |
3323 | } |
3324 | |
3325 | static int _hl_interrupt_ts_reg_ioctl(struct hl_device *hdev, struct hl_ctx *ctx, |
3326 | struct wait_interrupt_data *data, |
3327 | u32 *status, u64 *timestamp) |
3328 | { |
3329 | struct hl_user_pending_interrupt *pend; |
3330 | unsigned long flags; |
3331 | int rc = 0; |
3332 | |
3333 | hl_ctx_get(ctx); |
3334 | |
3335 | data->cq_cb = hl_cb_get(mmg: data->mmg, handle: data->cq_handle); |
3336 | if (!data->cq_cb) { |
3337 | rc = -EINVAL; |
3338 | goto put_ctx; |
3339 | } |
3340 | |
3341 | /* Validate the cq offset */ |
3342 | if (((u64 *) data->cq_cb->kernel_address + data->cq_offset) >= |
3343 | ((u64 *) data->cq_cb->kernel_address + (data->cq_cb->size / sizeof(u64)))) { |
3344 | rc = -EINVAL; |
3345 | goto put_cq_cb; |
3346 | } |
3347 | |
3348 | dev_dbg(hdev->dev, "Timestamp registration: interrupt id: %u, handle: 0x%llx, ts offset: %llu, cq_offset: %llu\n" , |
3349 | data->interrupt->interrupt_id, data->ts_handle, |
3350 | data->ts_offset, data->cq_offset); |
3351 | |
3352 | data->buf = hl_mmap_mem_buf_get(mmg: data->mmg, handle: data->ts_handle); |
3353 | if (!data->buf) { |
3354 | rc = -EINVAL; |
3355 | goto put_cq_cb; |
3356 | } |
3357 | |
3358 | spin_lock_irqsave(&data->interrupt->ts_list_lock, flags); |
3359 | |
3360 | /* get ts buffer record */ |
3361 | rc = ts_get_and_handle_kernel_record(hdev, ctx, data, flags: &flags, pend: &pend); |
3362 | if (rc) { |
3363 | spin_unlock_irqrestore(lock: &data->interrupt->ts_list_lock, flags); |
3364 | goto put_ts_buff; |
3365 | } |
3366 | |
3367 | /* We check for completion value as interrupt could have been received |
3368 | * before we add the timestamp node to the ts list. |
3369 | */ |
3370 | if (*pend->cq_kernel_addr >= data->target_value) { |
3371 | spin_unlock_irqrestore(lock: &data->interrupt->ts_list_lock, flags); |
3372 | |
3373 | dev_dbg(hdev->dev, "Target value already reached release ts record: pend: %p, offset: %llu, interrupt: %u\n" , |
3374 | pend, data->ts_offset, data->interrupt->interrupt_id); |
3375 | |
3376 | pend->ts_reg_info.in_use = 0; |
3377 | *status = HL_WAIT_CS_STATUS_COMPLETED; |
3378 | *pend->ts_reg_info.timestamp_kernel_addr = ktime_get_ns(); |
3379 | |
3380 | goto put_ts_buff; |
3381 | } |
3382 | |
3383 | list_add_tail(new: &pend->list_node, head: &data->interrupt->ts_list_head); |
3384 | spin_unlock_irqrestore(lock: &data->interrupt->ts_list_lock, flags); |
3385 | |
3386 | rc = *status = HL_WAIT_CS_STATUS_COMPLETED; |
3387 | |
3388 | hl_ctx_put(ctx); |
3389 | |
3390 | return rc; |
3391 | |
3392 | put_ts_buff: |
3393 | hl_mmap_mem_buf_put(buf: data->buf); |
3394 | put_cq_cb: |
3395 | hl_cb_put(cb: data->cq_cb); |
3396 | put_ctx: |
3397 | hl_ctx_put(ctx); |
3398 | |
3399 | return rc; |
3400 | } |
3401 | |
3402 | static int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx, |
3403 | struct wait_interrupt_data *data, |
3404 | u32 *status, u64 *timestamp) |
3405 | { |
3406 | struct hl_user_pending_interrupt *pend; |
3407 | unsigned long timeout, flags; |
3408 | long completion_rc; |
3409 | int rc = 0; |
3410 | |
3411 | timeout = hl_usecs64_to_jiffies(usecs: data->intr_timeout_us); |
3412 | |
3413 | hl_ctx_get(ctx); |
3414 | |
3415 | data->cq_cb = hl_cb_get(mmg: data->mmg, handle: data->cq_handle); |
3416 | if (!data->cq_cb) { |
3417 | rc = -EINVAL; |
3418 | goto put_ctx; |
3419 | } |
3420 | |
3421 | /* Validate the cq offset */ |
3422 | if (((u64 *) data->cq_cb->kernel_address + data->cq_offset) >= |
3423 | ((u64 *) data->cq_cb->kernel_address + (data->cq_cb->size / sizeof(u64)))) { |
3424 | rc = -EINVAL; |
3425 | goto put_cq_cb; |
3426 | } |
3427 | |
3428 | pend = kzalloc(size: sizeof(*pend), GFP_KERNEL); |
3429 | if (!pend) { |
3430 | rc = -ENOMEM; |
3431 | goto put_cq_cb; |
3432 | } |
3433 | |
3434 | hl_fence_init(fence: &pend->fence, ULONG_MAX); |
3435 | pend->cq_kernel_addr = (u64 *) data->cq_cb->kernel_address + data->cq_offset; |
3436 | pend->cq_target_value = data->target_value; |
3437 | spin_lock_irqsave(&data->interrupt->wait_list_lock, flags); |
3438 | |
3439 | |
3440 | /* We check for completion value as interrupt could have been received |
3441 | * before we add the wait node to the wait list. |
3442 | */ |
3443 | if (*pend->cq_kernel_addr >= data->target_value || (!data->intr_timeout_us)) { |
3444 | spin_unlock_irqrestore(lock: &data->interrupt->wait_list_lock, flags); |
3445 | |
3446 | if (*pend->cq_kernel_addr >= data->target_value) |
3447 | *status = HL_WAIT_CS_STATUS_COMPLETED; |
3448 | else |
3449 | *status = HL_WAIT_CS_STATUS_BUSY; |
3450 | |
3451 | pend->fence.timestamp = ktime_get(); |
3452 | goto set_timestamp; |
3453 | } |
3454 | |
3455 | /* Add pending user interrupt to relevant list for the interrupt |
3456 | * handler to monitor. |
3457 | * Note that we cannot have sorted list by target value, |
3458 | * in order to shorten the list pass loop, since |
3459 | * same list could have nodes for different cq counter handle. |
3460 | */ |
3461 | list_add_tail(new: &pend->list_node, head: &data->interrupt->wait_list_head); |
3462 | spin_unlock_irqrestore(lock: &data->interrupt->wait_list_lock, flags); |
3463 | |
3464 | /* Wait for interrupt handler to signal completion */ |
3465 | completion_rc = wait_for_completion_interruptible_timeout(x: &pend->fence.completion, |
3466 | timeout); |
3467 | if (completion_rc > 0) { |
3468 | if (pend->fence.error == -EIO) { |
3469 | dev_err_ratelimited(hdev->dev, |
3470 | "interrupt based wait ioctl aborted(error:%d) due to a reset cycle initiated\n" , |
3471 | pend->fence.error); |
3472 | rc = -EIO; |
3473 | *status = HL_WAIT_CS_STATUS_ABORTED; |
3474 | } else { |
3475 | *status = HL_WAIT_CS_STATUS_COMPLETED; |
3476 | } |
3477 | } else { |
3478 | if (completion_rc == -ERESTARTSYS) { |
3479 | dev_err_ratelimited(hdev->dev, |
3480 | "user process got signal while waiting for interrupt ID %d\n" , |
3481 | data->interrupt->interrupt_id); |
3482 | rc = -EINTR; |
3483 | *status = HL_WAIT_CS_STATUS_ABORTED; |
3484 | } else { |
3485 | /* The wait has timed-out. We don't know anything beyond that |
3486 | * because the workload was not submitted through the driver. |
3487 | * Therefore, from driver's perspective, the workload is still |
3488 | * executing. |
3489 | */ |
3490 | rc = 0; |
3491 | *status = HL_WAIT_CS_STATUS_BUSY; |
3492 | } |
3493 | } |
3494 | |
3495 | /* |
3496 | * We keep removing the node from list here, and not at the irq handler |
3497 | * for completion timeout case. and if it's a registration |
3498 | * for ts record, the node will be deleted in the irq handler after |
3499 | * we reach the target value. |
3500 | */ |
3501 | spin_lock_irqsave(&data->interrupt->wait_list_lock, flags); |
3502 | list_del(entry: &pend->list_node); |
3503 | spin_unlock_irqrestore(lock: &data->interrupt->wait_list_lock, flags); |
3504 | |
3505 | set_timestamp: |
3506 | *timestamp = ktime_to_ns(kt: pend->fence.timestamp); |
3507 | kfree(objp: pend); |
3508 | hl_cb_put(cb: data->cq_cb); |
3509 | hl_ctx_put(ctx); |
3510 | |
3511 | return rc; |
3512 | |
3513 | put_cq_cb: |
3514 | hl_cb_put(cb: data->cq_cb); |
3515 | put_ctx: |
3516 | hl_ctx_put(ctx); |
3517 | |
3518 | return rc; |
3519 | } |
3520 | |
3521 | static int _hl_interrupt_wait_ioctl_user_addr(struct hl_device *hdev, struct hl_ctx *ctx, |
3522 | u64 timeout_us, u64 user_address, |
3523 | u64 target_value, struct hl_user_interrupt *interrupt, |
3524 | u32 *status, |
3525 | u64 *timestamp) |
3526 | { |
3527 | struct hl_user_pending_interrupt *pend; |
3528 | unsigned long timeout, flags; |
3529 | u64 completion_value; |
3530 | long completion_rc; |
3531 | int rc = 0; |
3532 | |
3533 | timeout = hl_usecs64_to_jiffies(usecs: timeout_us); |
3534 | |
3535 | hl_ctx_get(ctx); |
3536 | |
3537 | pend = kzalloc(size: sizeof(*pend), GFP_KERNEL); |
3538 | if (!pend) { |
3539 | hl_ctx_put(ctx); |
3540 | return -ENOMEM; |
3541 | } |
3542 | |
3543 | hl_fence_init(fence: &pend->fence, ULONG_MAX); |
3544 | |
3545 | /* Add pending user interrupt to relevant list for the interrupt |
3546 | * handler to monitor |
3547 | */ |
3548 | spin_lock_irqsave(&interrupt->wait_list_lock, flags); |
3549 | list_add_tail(new: &pend->list_node, head: &interrupt->wait_list_head); |
3550 | spin_unlock_irqrestore(lock: &interrupt->wait_list_lock, flags); |
3551 | |
3552 | /* We check for completion value as interrupt could have been received |
3553 | * before we added the node to the wait list |
3554 | */ |
3555 | if (copy_from_user(to: &completion_value, u64_to_user_ptr(user_address), n: 8)) { |
3556 | dev_err(hdev->dev, "Failed to copy completion value from user\n" ); |
3557 | rc = -EFAULT; |
3558 | goto remove_pending_user_interrupt; |
3559 | } |
3560 | |
3561 | if (completion_value >= target_value) { |
3562 | *status = HL_WAIT_CS_STATUS_COMPLETED; |
3563 | /* There was no interrupt, we assume the completion is now. */ |
3564 | pend->fence.timestamp = ktime_get(); |
3565 | } else { |
3566 | *status = HL_WAIT_CS_STATUS_BUSY; |
3567 | } |
3568 | |
3569 | if (!timeout_us || (*status == HL_WAIT_CS_STATUS_COMPLETED)) |
3570 | goto remove_pending_user_interrupt; |
3571 | |
3572 | wait_again: |
3573 | /* Wait for interrupt handler to signal completion */ |
3574 | completion_rc = wait_for_completion_interruptible_timeout(x: &pend->fence.completion, |
3575 | timeout); |
3576 | |
3577 | /* If timeout did not expire we need to perform the comparison. |
3578 | * If comparison fails, keep waiting until timeout expires |
3579 | */ |
3580 | if (completion_rc > 0) { |
3581 | spin_lock_irqsave(&interrupt->wait_list_lock, flags); |
3582 | /* reinit_completion must be called before we check for user |
3583 | * completion value, otherwise, if interrupt is received after |
3584 | * the comparison and before the next wait_for_completion, |
3585 | * we will reach timeout and fail |
3586 | */ |
3587 | reinit_completion(x: &pend->fence.completion); |
3588 | spin_unlock_irqrestore(lock: &interrupt->wait_list_lock, flags); |
3589 | |
3590 | if (copy_from_user(to: &completion_value, u64_to_user_ptr(user_address), n: 8)) { |
3591 | dev_err(hdev->dev, "Failed to copy completion value from user\n" ); |
3592 | rc = -EFAULT; |
3593 | |
3594 | goto remove_pending_user_interrupt; |
3595 | } |
3596 | |
3597 | if (completion_value >= target_value) { |
3598 | *status = HL_WAIT_CS_STATUS_COMPLETED; |
3599 | } else if (pend->fence.error) { |
3600 | dev_err_ratelimited(hdev->dev, |
3601 | "interrupt based wait ioctl aborted(error:%d) due to a reset cycle initiated\n" , |
3602 | pend->fence.error); |
3603 | /* set the command completion status as ABORTED */ |
3604 | *status = HL_WAIT_CS_STATUS_ABORTED; |
3605 | } else { |
3606 | timeout = completion_rc; |
3607 | goto wait_again; |
3608 | } |
3609 | } else if (completion_rc == -ERESTARTSYS) { |
3610 | dev_err_ratelimited(hdev->dev, |
3611 | "user process got signal while waiting for interrupt ID %d\n" , |
3612 | interrupt->interrupt_id); |
3613 | rc = -EINTR; |
3614 | } else { |
3615 | /* The wait has timed-out. We don't know anything beyond that |
3616 | * because the workload wasn't submitted through the driver. |
3617 | * Therefore, from driver's perspective, the workload is still |
3618 | * executing. |
3619 | */ |
3620 | rc = 0; |
3621 | *status = HL_WAIT_CS_STATUS_BUSY; |
3622 | } |
3623 | |
3624 | remove_pending_user_interrupt: |
3625 | spin_lock_irqsave(&interrupt->wait_list_lock, flags); |
3626 | list_del(entry: &pend->list_node); |
3627 | spin_unlock_irqrestore(lock: &interrupt->wait_list_lock, flags); |
3628 | |
3629 | *timestamp = ktime_to_ns(kt: pend->fence.timestamp); |
3630 | |
3631 | kfree(objp: pend); |
3632 | hl_ctx_put(ctx); |
3633 | |
3634 | return rc; |
3635 | } |
3636 | |
3637 | static int hl_interrupt_wait_ioctl(struct hl_fpriv *hpriv, void *data) |
3638 | { |
3639 | u16 interrupt_id, first_interrupt, last_interrupt; |
3640 | struct hl_device *hdev = hpriv->hdev; |
3641 | struct asic_fixed_properties *prop; |
3642 | struct hl_user_interrupt *interrupt; |
3643 | union hl_wait_cs_args *args = data; |
3644 | u32 status = HL_WAIT_CS_STATUS_BUSY; |
3645 | u64 timestamp = 0; |
3646 | int rc, int_idx; |
3647 | |
3648 | prop = &hdev->asic_prop; |
3649 | |
3650 | if (!(prop->user_interrupt_count + prop->user_dec_intr_count)) { |
3651 | dev_err(hdev->dev, "no user interrupts allowed" ); |
3652 | return -EPERM; |
3653 | } |
3654 | |
3655 | interrupt_id = FIELD_GET(HL_WAIT_CS_FLAGS_INTERRUPT_MASK, args->in.flags); |
3656 | |
3657 | first_interrupt = prop->first_available_user_interrupt; |
3658 | last_interrupt = prop->first_available_user_interrupt + prop->user_interrupt_count - 1; |
3659 | |
3660 | if (interrupt_id < prop->user_dec_intr_count) { |
3661 | |
3662 | /* Check if the requested core is enabled */ |
3663 | if (!(prop->decoder_enabled_mask & BIT(interrupt_id))) { |
3664 | dev_err(hdev->dev, "interrupt on a disabled core(%u) not allowed" , |
3665 | interrupt_id); |
3666 | return -EINVAL; |
3667 | } |
3668 | |
3669 | interrupt = &hdev->user_interrupt[interrupt_id]; |
3670 | |
3671 | } else if (interrupt_id >= first_interrupt && interrupt_id <= last_interrupt) { |
3672 | |
3673 | int_idx = interrupt_id - first_interrupt + prop->user_dec_intr_count; |
3674 | interrupt = &hdev->user_interrupt[int_idx]; |
3675 | |
3676 | } else if (interrupt_id == HL_COMMON_USER_CQ_INTERRUPT_ID) { |
3677 | interrupt = &hdev->common_user_cq_interrupt; |
3678 | } else if (interrupt_id == HL_COMMON_DEC_INTERRUPT_ID) { |
3679 | interrupt = &hdev->common_decoder_interrupt; |
3680 | } else { |
3681 | dev_err(hdev->dev, "invalid user interrupt %u" , interrupt_id); |
3682 | return -EINVAL; |
3683 | } |
3684 | |
3685 | if (args->in.flags & HL_WAIT_CS_FLAGS_INTERRUPT_KERNEL_CQ) { |
3686 | struct wait_interrupt_data wait_intr_data = {0}; |
3687 | |
3688 | wait_intr_data.interrupt = interrupt; |
3689 | wait_intr_data.mmg = &hpriv->mem_mgr; |
3690 | wait_intr_data.cq_handle = args->in.cq_counters_handle; |
3691 | wait_intr_data.cq_offset = args->in.cq_counters_offset; |
3692 | wait_intr_data.ts_handle = args->in.timestamp_handle; |
3693 | wait_intr_data.ts_offset = args->in.timestamp_offset; |
3694 | wait_intr_data.target_value = args->in.target; |
3695 | wait_intr_data.intr_timeout_us = args->in.interrupt_timeout_us; |
3696 | |
3697 | if (args->in.flags & HL_WAIT_CS_FLAGS_REGISTER_INTERRUPT) { |
3698 | /* |
3699 | * Allow only one registration at a time. this is needed in order to prevent |
3700 | * issues while handling the flow of re-use of the same offset. |
3701 | * Since the registration flow is protected only by the interrupt lock, |
3702 | * re-use flow might request to move ts node to another interrupt list, |
3703 | * and in such case we're not protected. |
3704 | */ |
3705 | mutex_lock(&hpriv->ctx->ts_reg_lock); |
3706 | |
3707 | rc = _hl_interrupt_ts_reg_ioctl(hdev, ctx: hpriv->ctx, data: &wait_intr_data, |
3708 | status: &status, timestamp: ×tamp); |
3709 | |
3710 | mutex_unlock(lock: &hpriv->ctx->ts_reg_lock); |
3711 | } else |
3712 | rc = _hl_interrupt_wait_ioctl(hdev, ctx: hpriv->ctx, data: &wait_intr_data, |
3713 | status: &status, timestamp: ×tamp); |
3714 | } else { |
3715 | rc = _hl_interrupt_wait_ioctl_user_addr(hdev, ctx: hpriv->ctx, |
3716 | timeout_us: args->in.interrupt_timeout_us, user_address: args->in.addr, |
3717 | target_value: args->in.target, interrupt, status: &status, |
3718 | timestamp: ×tamp); |
3719 | } |
3720 | |
3721 | if (rc) |
3722 | return rc; |
3723 | |
3724 | memset(args, 0, sizeof(*args)); |
3725 | args->out.status = status; |
3726 | |
3727 | if (timestamp) { |
3728 | args->out.timestamp_nsec = timestamp; |
3729 | args->out.flags |= HL_WAIT_CS_STATUS_FLAG_TIMESTAMP_VLD; |
3730 | } |
3731 | |
3732 | return 0; |
3733 | } |
3734 | |
3735 | int hl_wait_ioctl(struct drm_device *ddev, void *data, struct drm_file *file_priv) |
3736 | { |
3737 | struct hl_fpriv *hpriv = file_priv->driver_priv; |
3738 | struct hl_device *hdev = hpriv->hdev; |
3739 | union hl_wait_cs_args *args = data; |
3740 | u32 flags = args->in.flags; |
3741 | int rc; |
3742 | |
3743 | /* If the device is not operational, or if an error has happened and user should release the |
3744 | * device, there is no point in waiting for any command submission or user interrupt. |
3745 | */ |
3746 | if (!hl_device_operational(hdev: hpriv->hdev, NULL) || hdev->reset_info.watchdog_active) |
3747 | return -EBUSY; |
3748 | |
3749 | if (flags & HL_WAIT_CS_FLAGS_INTERRUPT) |
3750 | rc = hl_interrupt_wait_ioctl(hpriv, data); |
3751 | else if (flags & HL_WAIT_CS_FLAGS_MULTI_CS) |
3752 | rc = hl_multi_cs_wait_ioctl(hpriv, data); |
3753 | else |
3754 | rc = hl_cs_wait_ioctl(hpriv, data); |
3755 | |
3756 | return rc; |
3757 | } |
3758 | |