1 | /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note |
2 | * |
3 | * Copyright 2016-2023 HabanaLabs, Ltd. |
4 | * All Rights Reserved. |
5 | * |
6 | */ |
7 | |
8 | #ifndef HABANALABS_H_ |
9 | #define HABANALABS_H_ |
10 | |
11 | #include <drm/drm.h> |
12 | |
13 | /* |
14 | * Defines that are asic-specific but constitutes as ABI between kernel driver |
15 | * and userspace |
16 | */ |
17 | #define GOYA_KMD_SRAM_RESERVED_SIZE_FROM_START 0x8000 /* 32KB */ |
18 | #define GAUDI_DRIVER_SRAM_RESERVED_SIZE_FROM_START 0x80 /* 128 bytes */ |
19 | |
20 | /* |
21 | * 128 SOBs reserved for collective wait |
22 | * 16 SOBs reserved for sync stream |
23 | */ |
24 | #define GAUDI_FIRST_AVAILABLE_W_S_SYNC_OBJECT 144 |
25 | |
26 | /* |
27 | * 64 monitors reserved for collective wait |
28 | * 8 monitors reserved for sync stream |
29 | */ |
30 | #define GAUDI_FIRST_AVAILABLE_W_S_MONITOR 72 |
31 | |
32 | /* Max number of elements in timestamps registration buffers */ |
33 | #define TS_MAX_ELEMENTS_NUM (1 << 20) /* 1MB */ |
34 | |
35 | /* |
36 | * Goya queue Numbering |
37 | * |
38 | * The external queues (PCI DMA channels) MUST be before the internal queues |
39 | * and each group (PCI DMA channels and internal) must be contiguous inside |
40 | * itself but there can be a gap between the two groups (although not |
41 | * recommended) |
42 | */ |
43 | |
44 | enum goya_queue_id { |
45 | GOYA_QUEUE_ID_DMA_0 = 0, |
46 | GOYA_QUEUE_ID_DMA_1 = 1, |
47 | GOYA_QUEUE_ID_DMA_2 = 2, |
48 | GOYA_QUEUE_ID_DMA_3 = 3, |
49 | GOYA_QUEUE_ID_DMA_4 = 4, |
50 | GOYA_QUEUE_ID_CPU_PQ = 5, |
51 | GOYA_QUEUE_ID_MME = 6, /* Internal queues start here */ |
52 | GOYA_QUEUE_ID_TPC0 = 7, |
53 | GOYA_QUEUE_ID_TPC1 = 8, |
54 | GOYA_QUEUE_ID_TPC2 = 9, |
55 | GOYA_QUEUE_ID_TPC3 = 10, |
56 | GOYA_QUEUE_ID_TPC4 = 11, |
57 | GOYA_QUEUE_ID_TPC5 = 12, |
58 | GOYA_QUEUE_ID_TPC6 = 13, |
59 | GOYA_QUEUE_ID_TPC7 = 14, |
60 | GOYA_QUEUE_ID_SIZE |
61 | }; |
62 | |
63 | /* |
64 | * Gaudi queue Numbering |
65 | * External queues (PCI DMA channels) are DMA_0_*, DMA_1_* and DMA_5_*. |
66 | * Except one CPU queue, all the rest are internal queues. |
67 | */ |
68 | |
69 | enum gaudi_queue_id { |
70 | GAUDI_QUEUE_ID_DMA_0_0 = 0, /* external */ |
71 | GAUDI_QUEUE_ID_DMA_0_1 = 1, /* external */ |
72 | GAUDI_QUEUE_ID_DMA_0_2 = 2, /* external */ |
73 | GAUDI_QUEUE_ID_DMA_0_3 = 3, /* external */ |
74 | GAUDI_QUEUE_ID_DMA_1_0 = 4, /* external */ |
75 | GAUDI_QUEUE_ID_DMA_1_1 = 5, /* external */ |
76 | GAUDI_QUEUE_ID_DMA_1_2 = 6, /* external */ |
77 | GAUDI_QUEUE_ID_DMA_1_3 = 7, /* external */ |
78 | GAUDI_QUEUE_ID_CPU_PQ = 8, /* CPU */ |
79 | GAUDI_QUEUE_ID_DMA_2_0 = 9, /* internal */ |
80 | GAUDI_QUEUE_ID_DMA_2_1 = 10, /* internal */ |
81 | GAUDI_QUEUE_ID_DMA_2_2 = 11, /* internal */ |
82 | GAUDI_QUEUE_ID_DMA_2_3 = 12, /* internal */ |
83 | GAUDI_QUEUE_ID_DMA_3_0 = 13, /* internal */ |
84 | GAUDI_QUEUE_ID_DMA_3_1 = 14, /* internal */ |
85 | GAUDI_QUEUE_ID_DMA_3_2 = 15, /* internal */ |
86 | GAUDI_QUEUE_ID_DMA_3_3 = 16, /* internal */ |
87 | GAUDI_QUEUE_ID_DMA_4_0 = 17, /* internal */ |
88 | GAUDI_QUEUE_ID_DMA_4_1 = 18, /* internal */ |
89 | GAUDI_QUEUE_ID_DMA_4_2 = 19, /* internal */ |
90 | GAUDI_QUEUE_ID_DMA_4_3 = 20, /* internal */ |
91 | GAUDI_QUEUE_ID_DMA_5_0 = 21, /* internal */ |
92 | GAUDI_QUEUE_ID_DMA_5_1 = 22, /* internal */ |
93 | GAUDI_QUEUE_ID_DMA_5_2 = 23, /* internal */ |
94 | GAUDI_QUEUE_ID_DMA_5_3 = 24, /* internal */ |
95 | GAUDI_QUEUE_ID_DMA_6_0 = 25, /* internal */ |
96 | GAUDI_QUEUE_ID_DMA_6_1 = 26, /* internal */ |
97 | GAUDI_QUEUE_ID_DMA_6_2 = 27, /* internal */ |
98 | GAUDI_QUEUE_ID_DMA_6_3 = 28, /* internal */ |
99 | GAUDI_QUEUE_ID_DMA_7_0 = 29, /* internal */ |
100 | GAUDI_QUEUE_ID_DMA_7_1 = 30, /* internal */ |
101 | GAUDI_QUEUE_ID_DMA_7_2 = 31, /* internal */ |
102 | GAUDI_QUEUE_ID_DMA_7_3 = 32, /* internal */ |
103 | GAUDI_QUEUE_ID_MME_0_0 = 33, /* internal */ |
104 | GAUDI_QUEUE_ID_MME_0_1 = 34, /* internal */ |
105 | GAUDI_QUEUE_ID_MME_0_2 = 35, /* internal */ |
106 | GAUDI_QUEUE_ID_MME_0_3 = 36, /* internal */ |
107 | GAUDI_QUEUE_ID_MME_1_0 = 37, /* internal */ |
108 | GAUDI_QUEUE_ID_MME_1_1 = 38, /* internal */ |
109 | GAUDI_QUEUE_ID_MME_1_2 = 39, /* internal */ |
110 | GAUDI_QUEUE_ID_MME_1_3 = 40, /* internal */ |
111 | GAUDI_QUEUE_ID_TPC_0_0 = 41, /* internal */ |
112 | GAUDI_QUEUE_ID_TPC_0_1 = 42, /* internal */ |
113 | GAUDI_QUEUE_ID_TPC_0_2 = 43, /* internal */ |
114 | GAUDI_QUEUE_ID_TPC_0_3 = 44, /* internal */ |
115 | GAUDI_QUEUE_ID_TPC_1_0 = 45, /* internal */ |
116 | GAUDI_QUEUE_ID_TPC_1_1 = 46, /* internal */ |
117 | GAUDI_QUEUE_ID_TPC_1_2 = 47, /* internal */ |
118 | GAUDI_QUEUE_ID_TPC_1_3 = 48, /* internal */ |
119 | GAUDI_QUEUE_ID_TPC_2_0 = 49, /* internal */ |
120 | GAUDI_QUEUE_ID_TPC_2_1 = 50, /* internal */ |
121 | GAUDI_QUEUE_ID_TPC_2_2 = 51, /* internal */ |
122 | GAUDI_QUEUE_ID_TPC_2_3 = 52, /* internal */ |
123 | GAUDI_QUEUE_ID_TPC_3_0 = 53, /* internal */ |
124 | GAUDI_QUEUE_ID_TPC_3_1 = 54, /* internal */ |
125 | GAUDI_QUEUE_ID_TPC_3_2 = 55, /* internal */ |
126 | GAUDI_QUEUE_ID_TPC_3_3 = 56, /* internal */ |
127 | GAUDI_QUEUE_ID_TPC_4_0 = 57, /* internal */ |
128 | GAUDI_QUEUE_ID_TPC_4_1 = 58, /* internal */ |
129 | GAUDI_QUEUE_ID_TPC_4_2 = 59, /* internal */ |
130 | GAUDI_QUEUE_ID_TPC_4_3 = 60, /* internal */ |
131 | GAUDI_QUEUE_ID_TPC_5_0 = 61, /* internal */ |
132 | GAUDI_QUEUE_ID_TPC_5_1 = 62, /* internal */ |
133 | GAUDI_QUEUE_ID_TPC_5_2 = 63, /* internal */ |
134 | GAUDI_QUEUE_ID_TPC_5_3 = 64, /* internal */ |
135 | GAUDI_QUEUE_ID_TPC_6_0 = 65, /* internal */ |
136 | GAUDI_QUEUE_ID_TPC_6_1 = 66, /* internal */ |
137 | GAUDI_QUEUE_ID_TPC_6_2 = 67, /* internal */ |
138 | GAUDI_QUEUE_ID_TPC_6_3 = 68, /* internal */ |
139 | GAUDI_QUEUE_ID_TPC_7_0 = 69, /* internal */ |
140 | GAUDI_QUEUE_ID_TPC_7_1 = 70, /* internal */ |
141 | GAUDI_QUEUE_ID_TPC_7_2 = 71, /* internal */ |
142 | GAUDI_QUEUE_ID_TPC_7_3 = 72, /* internal */ |
143 | GAUDI_QUEUE_ID_NIC_0_0 = 73, /* internal */ |
144 | GAUDI_QUEUE_ID_NIC_0_1 = 74, /* internal */ |
145 | GAUDI_QUEUE_ID_NIC_0_2 = 75, /* internal */ |
146 | GAUDI_QUEUE_ID_NIC_0_3 = 76, /* internal */ |
147 | GAUDI_QUEUE_ID_NIC_1_0 = 77, /* internal */ |
148 | GAUDI_QUEUE_ID_NIC_1_1 = 78, /* internal */ |
149 | GAUDI_QUEUE_ID_NIC_1_2 = 79, /* internal */ |
150 | GAUDI_QUEUE_ID_NIC_1_3 = 80, /* internal */ |
151 | GAUDI_QUEUE_ID_NIC_2_0 = 81, /* internal */ |
152 | GAUDI_QUEUE_ID_NIC_2_1 = 82, /* internal */ |
153 | GAUDI_QUEUE_ID_NIC_2_2 = 83, /* internal */ |
154 | GAUDI_QUEUE_ID_NIC_2_3 = 84, /* internal */ |
155 | GAUDI_QUEUE_ID_NIC_3_0 = 85, /* internal */ |
156 | GAUDI_QUEUE_ID_NIC_3_1 = 86, /* internal */ |
157 | GAUDI_QUEUE_ID_NIC_3_2 = 87, /* internal */ |
158 | GAUDI_QUEUE_ID_NIC_3_3 = 88, /* internal */ |
159 | GAUDI_QUEUE_ID_NIC_4_0 = 89, /* internal */ |
160 | GAUDI_QUEUE_ID_NIC_4_1 = 90, /* internal */ |
161 | GAUDI_QUEUE_ID_NIC_4_2 = 91, /* internal */ |
162 | GAUDI_QUEUE_ID_NIC_4_3 = 92, /* internal */ |
163 | GAUDI_QUEUE_ID_NIC_5_0 = 93, /* internal */ |
164 | GAUDI_QUEUE_ID_NIC_5_1 = 94, /* internal */ |
165 | GAUDI_QUEUE_ID_NIC_5_2 = 95, /* internal */ |
166 | GAUDI_QUEUE_ID_NIC_5_3 = 96, /* internal */ |
167 | GAUDI_QUEUE_ID_NIC_6_0 = 97, /* internal */ |
168 | GAUDI_QUEUE_ID_NIC_6_1 = 98, /* internal */ |
169 | GAUDI_QUEUE_ID_NIC_6_2 = 99, /* internal */ |
170 | GAUDI_QUEUE_ID_NIC_6_3 = 100, /* internal */ |
171 | GAUDI_QUEUE_ID_NIC_7_0 = 101, /* internal */ |
172 | GAUDI_QUEUE_ID_NIC_7_1 = 102, /* internal */ |
173 | GAUDI_QUEUE_ID_NIC_7_2 = 103, /* internal */ |
174 | GAUDI_QUEUE_ID_NIC_7_3 = 104, /* internal */ |
175 | GAUDI_QUEUE_ID_NIC_8_0 = 105, /* internal */ |
176 | GAUDI_QUEUE_ID_NIC_8_1 = 106, /* internal */ |
177 | GAUDI_QUEUE_ID_NIC_8_2 = 107, /* internal */ |
178 | GAUDI_QUEUE_ID_NIC_8_3 = 108, /* internal */ |
179 | GAUDI_QUEUE_ID_NIC_9_0 = 109, /* internal */ |
180 | GAUDI_QUEUE_ID_NIC_9_1 = 110, /* internal */ |
181 | GAUDI_QUEUE_ID_NIC_9_2 = 111, /* internal */ |
182 | GAUDI_QUEUE_ID_NIC_9_3 = 112, /* internal */ |
183 | GAUDI_QUEUE_ID_SIZE |
184 | }; |
185 | |
186 | /* |
187 | * In GAUDI2 we have two modes of operation in regard to queues: |
188 | * 1. Legacy mode, where each QMAN exposes 4 streams to the user |
189 | * 2. F/W mode, where we use F/W to schedule the JOBS to the different queues. |
190 | * |
191 | * When in legacy mode, the user sends the queue id per JOB according to |
192 | * enum gaudi2_queue_id below. |
193 | * |
194 | * When in F/W mode, the user sends a stream id per Command Submission. The |
195 | * stream id is a running number from 0 up to (N-1), where N is the number |
196 | * of streams the F/W exposes and is passed to the user in |
197 | * struct hl_info_hw_ip_info |
198 | */ |
199 | |
200 | enum gaudi2_queue_id { |
201 | GAUDI2_QUEUE_ID_PDMA_0_0 = 0, |
202 | GAUDI2_QUEUE_ID_PDMA_0_1 = 1, |
203 | GAUDI2_QUEUE_ID_PDMA_0_2 = 2, |
204 | GAUDI2_QUEUE_ID_PDMA_0_3 = 3, |
205 | GAUDI2_QUEUE_ID_PDMA_1_0 = 4, |
206 | GAUDI2_QUEUE_ID_PDMA_1_1 = 5, |
207 | GAUDI2_QUEUE_ID_PDMA_1_2 = 6, |
208 | GAUDI2_QUEUE_ID_PDMA_1_3 = 7, |
209 | GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0 = 8, |
210 | GAUDI2_QUEUE_ID_DCORE0_EDMA_0_1 = 9, |
211 | GAUDI2_QUEUE_ID_DCORE0_EDMA_0_2 = 10, |
212 | GAUDI2_QUEUE_ID_DCORE0_EDMA_0_3 = 11, |
213 | GAUDI2_QUEUE_ID_DCORE0_EDMA_1_0 = 12, |
214 | GAUDI2_QUEUE_ID_DCORE0_EDMA_1_1 = 13, |
215 | GAUDI2_QUEUE_ID_DCORE0_EDMA_1_2 = 14, |
216 | GAUDI2_QUEUE_ID_DCORE0_EDMA_1_3 = 15, |
217 | GAUDI2_QUEUE_ID_DCORE0_MME_0_0 = 16, |
218 | GAUDI2_QUEUE_ID_DCORE0_MME_0_1 = 17, |
219 | GAUDI2_QUEUE_ID_DCORE0_MME_0_2 = 18, |
220 | GAUDI2_QUEUE_ID_DCORE0_MME_0_3 = 19, |
221 | GAUDI2_QUEUE_ID_DCORE0_TPC_0_0 = 20, |
222 | GAUDI2_QUEUE_ID_DCORE0_TPC_0_1 = 21, |
223 | GAUDI2_QUEUE_ID_DCORE0_TPC_0_2 = 22, |
224 | GAUDI2_QUEUE_ID_DCORE0_TPC_0_3 = 23, |
225 | GAUDI2_QUEUE_ID_DCORE0_TPC_1_0 = 24, |
226 | GAUDI2_QUEUE_ID_DCORE0_TPC_1_1 = 25, |
227 | GAUDI2_QUEUE_ID_DCORE0_TPC_1_2 = 26, |
228 | GAUDI2_QUEUE_ID_DCORE0_TPC_1_3 = 27, |
229 | GAUDI2_QUEUE_ID_DCORE0_TPC_2_0 = 28, |
230 | GAUDI2_QUEUE_ID_DCORE0_TPC_2_1 = 29, |
231 | GAUDI2_QUEUE_ID_DCORE0_TPC_2_2 = 30, |
232 | GAUDI2_QUEUE_ID_DCORE0_TPC_2_3 = 31, |
233 | GAUDI2_QUEUE_ID_DCORE0_TPC_3_0 = 32, |
234 | GAUDI2_QUEUE_ID_DCORE0_TPC_3_1 = 33, |
235 | GAUDI2_QUEUE_ID_DCORE0_TPC_3_2 = 34, |
236 | GAUDI2_QUEUE_ID_DCORE0_TPC_3_3 = 35, |
237 | GAUDI2_QUEUE_ID_DCORE0_TPC_4_0 = 36, |
238 | GAUDI2_QUEUE_ID_DCORE0_TPC_4_1 = 37, |
239 | GAUDI2_QUEUE_ID_DCORE0_TPC_4_2 = 38, |
240 | GAUDI2_QUEUE_ID_DCORE0_TPC_4_3 = 39, |
241 | GAUDI2_QUEUE_ID_DCORE0_TPC_5_0 = 40, |
242 | GAUDI2_QUEUE_ID_DCORE0_TPC_5_1 = 41, |
243 | GAUDI2_QUEUE_ID_DCORE0_TPC_5_2 = 42, |
244 | GAUDI2_QUEUE_ID_DCORE0_TPC_5_3 = 43, |
245 | GAUDI2_QUEUE_ID_DCORE0_TPC_6_0 = 44, |
246 | GAUDI2_QUEUE_ID_DCORE0_TPC_6_1 = 45, |
247 | GAUDI2_QUEUE_ID_DCORE0_TPC_6_2 = 46, |
248 | GAUDI2_QUEUE_ID_DCORE0_TPC_6_3 = 47, |
249 | GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0 = 48, |
250 | GAUDI2_QUEUE_ID_DCORE1_EDMA_0_1 = 49, |
251 | GAUDI2_QUEUE_ID_DCORE1_EDMA_0_2 = 50, |
252 | GAUDI2_QUEUE_ID_DCORE1_EDMA_0_3 = 51, |
253 | GAUDI2_QUEUE_ID_DCORE1_EDMA_1_0 = 52, |
254 | GAUDI2_QUEUE_ID_DCORE1_EDMA_1_1 = 53, |
255 | GAUDI2_QUEUE_ID_DCORE1_EDMA_1_2 = 54, |
256 | GAUDI2_QUEUE_ID_DCORE1_EDMA_1_3 = 55, |
257 | GAUDI2_QUEUE_ID_DCORE1_MME_0_0 = 56, |
258 | GAUDI2_QUEUE_ID_DCORE1_MME_0_1 = 57, |
259 | GAUDI2_QUEUE_ID_DCORE1_MME_0_2 = 58, |
260 | GAUDI2_QUEUE_ID_DCORE1_MME_0_3 = 59, |
261 | GAUDI2_QUEUE_ID_DCORE1_TPC_0_0 = 60, |
262 | GAUDI2_QUEUE_ID_DCORE1_TPC_0_1 = 61, |
263 | GAUDI2_QUEUE_ID_DCORE1_TPC_0_2 = 62, |
264 | GAUDI2_QUEUE_ID_DCORE1_TPC_0_3 = 63, |
265 | GAUDI2_QUEUE_ID_DCORE1_TPC_1_0 = 64, |
266 | GAUDI2_QUEUE_ID_DCORE1_TPC_1_1 = 65, |
267 | GAUDI2_QUEUE_ID_DCORE1_TPC_1_2 = 66, |
268 | GAUDI2_QUEUE_ID_DCORE1_TPC_1_3 = 67, |
269 | GAUDI2_QUEUE_ID_DCORE1_TPC_2_0 = 68, |
270 | GAUDI2_QUEUE_ID_DCORE1_TPC_2_1 = 69, |
271 | GAUDI2_QUEUE_ID_DCORE1_TPC_2_2 = 70, |
272 | GAUDI2_QUEUE_ID_DCORE1_TPC_2_3 = 71, |
273 | GAUDI2_QUEUE_ID_DCORE1_TPC_3_0 = 72, |
274 | GAUDI2_QUEUE_ID_DCORE1_TPC_3_1 = 73, |
275 | GAUDI2_QUEUE_ID_DCORE1_TPC_3_2 = 74, |
276 | GAUDI2_QUEUE_ID_DCORE1_TPC_3_3 = 75, |
277 | GAUDI2_QUEUE_ID_DCORE1_TPC_4_0 = 76, |
278 | GAUDI2_QUEUE_ID_DCORE1_TPC_4_1 = 77, |
279 | GAUDI2_QUEUE_ID_DCORE1_TPC_4_2 = 78, |
280 | GAUDI2_QUEUE_ID_DCORE1_TPC_4_3 = 79, |
281 | GAUDI2_QUEUE_ID_DCORE1_TPC_5_0 = 80, |
282 | GAUDI2_QUEUE_ID_DCORE1_TPC_5_1 = 81, |
283 | GAUDI2_QUEUE_ID_DCORE1_TPC_5_2 = 82, |
284 | GAUDI2_QUEUE_ID_DCORE1_TPC_5_3 = 83, |
285 | GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0 = 84, |
286 | GAUDI2_QUEUE_ID_DCORE2_EDMA_0_1 = 85, |
287 | GAUDI2_QUEUE_ID_DCORE2_EDMA_0_2 = 86, |
288 | GAUDI2_QUEUE_ID_DCORE2_EDMA_0_3 = 87, |
289 | GAUDI2_QUEUE_ID_DCORE2_EDMA_1_0 = 88, |
290 | GAUDI2_QUEUE_ID_DCORE2_EDMA_1_1 = 89, |
291 | GAUDI2_QUEUE_ID_DCORE2_EDMA_1_2 = 90, |
292 | GAUDI2_QUEUE_ID_DCORE2_EDMA_1_3 = 91, |
293 | GAUDI2_QUEUE_ID_DCORE2_MME_0_0 = 92, |
294 | GAUDI2_QUEUE_ID_DCORE2_MME_0_1 = 93, |
295 | GAUDI2_QUEUE_ID_DCORE2_MME_0_2 = 94, |
296 | GAUDI2_QUEUE_ID_DCORE2_MME_0_3 = 95, |
297 | GAUDI2_QUEUE_ID_DCORE2_TPC_0_0 = 96, |
298 | GAUDI2_QUEUE_ID_DCORE2_TPC_0_1 = 97, |
299 | GAUDI2_QUEUE_ID_DCORE2_TPC_0_2 = 98, |
300 | GAUDI2_QUEUE_ID_DCORE2_TPC_0_3 = 99, |
301 | GAUDI2_QUEUE_ID_DCORE2_TPC_1_0 = 100, |
302 | GAUDI2_QUEUE_ID_DCORE2_TPC_1_1 = 101, |
303 | GAUDI2_QUEUE_ID_DCORE2_TPC_1_2 = 102, |
304 | GAUDI2_QUEUE_ID_DCORE2_TPC_1_3 = 103, |
305 | GAUDI2_QUEUE_ID_DCORE2_TPC_2_0 = 104, |
306 | GAUDI2_QUEUE_ID_DCORE2_TPC_2_1 = 105, |
307 | GAUDI2_QUEUE_ID_DCORE2_TPC_2_2 = 106, |
308 | GAUDI2_QUEUE_ID_DCORE2_TPC_2_3 = 107, |
309 | GAUDI2_QUEUE_ID_DCORE2_TPC_3_0 = 108, |
310 | GAUDI2_QUEUE_ID_DCORE2_TPC_3_1 = 109, |
311 | GAUDI2_QUEUE_ID_DCORE2_TPC_3_2 = 110, |
312 | GAUDI2_QUEUE_ID_DCORE2_TPC_3_3 = 111, |
313 | GAUDI2_QUEUE_ID_DCORE2_TPC_4_0 = 112, |
314 | GAUDI2_QUEUE_ID_DCORE2_TPC_4_1 = 113, |
315 | GAUDI2_QUEUE_ID_DCORE2_TPC_4_2 = 114, |
316 | GAUDI2_QUEUE_ID_DCORE2_TPC_4_3 = 115, |
317 | GAUDI2_QUEUE_ID_DCORE2_TPC_5_0 = 116, |
318 | GAUDI2_QUEUE_ID_DCORE2_TPC_5_1 = 117, |
319 | GAUDI2_QUEUE_ID_DCORE2_TPC_5_2 = 118, |
320 | GAUDI2_QUEUE_ID_DCORE2_TPC_5_3 = 119, |
321 | GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0 = 120, |
322 | GAUDI2_QUEUE_ID_DCORE3_EDMA_0_1 = 121, |
323 | GAUDI2_QUEUE_ID_DCORE3_EDMA_0_2 = 122, |
324 | GAUDI2_QUEUE_ID_DCORE3_EDMA_0_3 = 123, |
325 | GAUDI2_QUEUE_ID_DCORE3_EDMA_1_0 = 124, |
326 | GAUDI2_QUEUE_ID_DCORE3_EDMA_1_1 = 125, |
327 | GAUDI2_QUEUE_ID_DCORE3_EDMA_1_2 = 126, |
328 | GAUDI2_QUEUE_ID_DCORE3_EDMA_1_3 = 127, |
329 | GAUDI2_QUEUE_ID_DCORE3_MME_0_0 = 128, |
330 | GAUDI2_QUEUE_ID_DCORE3_MME_0_1 = 129, |
331 | GAUDI2_QUEUE_ID_DCORE3_MME_0_2 = 130, |
332 | GAUDI2_QUEUE_ID_DCORE3_MME_0_3 = 131, |
333 | GAUDI2_QUEUE_ID_DCORE3_TPC_0_0 = 132, |
334 | GAUDI2_QUEUE_ID_DCORE3_TPC_0_1 = 133, |
335 | GAUDI2_QUEUE_ID_DCORE3_TPC_0_2 = 134, |
336 | GAUDI2_QUEUE_ID_DCORE3_TPC_0_3 = 135, |
337 | GAUDI2_QUEUE_ID_DCORE3_TPC_1_0 = 136, |
338 | GAUDI2_QUEUE_ID_DCORE3_TPC_1_1 = 137, |
339 | GAUDI2_QUEUE_ID_DCORE3_TPC_1_2 = 138, |
340 | GAUDI2_QUEUE_ID_DCORE3_TPC_1_3 = 139, |
341 | GAUDI2_QUEUE_ID_DCORE3_TPC_2_0 = 140, |
342 | GAUDI2_QUEUE_ID_DCORE3_TPC_2_1 = 141, |
343 | GAUDI2_QUEUE_ID_DCORE3_TPC_2_2 = 142, |
344 | GAUDI2_QUEUE_ID_DCORE3_TPC_2_3 = 143, |
345 | GAUDI2_QUEUE_ID_DCORE3_TPC_3_0 = 144, |
346 | GAUDI2_QUEUE_ID_DCORE3_TPC_3_1 = 145, |
347 | GAUDI2_QUEUE_ID_DCORE3_TPC_3_2 = 146, |
348 | GAUDI2_QUEUE_ID_DCORE3_TPC_3_3 = 147, |
349 | GAUDI2_QUEUE_ID_DCORE3_TPC_4_0 = 148, |
350 | GAUDI2_QUEUE_ID_DCORE3_TPC_4_1 = 149, |
351 | GAUDI2_QUEUE_ID_DCORE3_TPC_4_2 = 150, |
352 | GAUDI2_QUEUE_ID_DCORE3_TPC_4_3 = 151, |
353 | GAUDI2_QUEUE_ID_DCORE3_TPC_5_0 = 152, |
354 | GAUDI2_QUEUE_ID_DCORE3_TPC_5_1 = 153, |
355 | GAUDI2_QUEUE_ID_DCORE3_TPC_5_2 = 154, |
356 | GAUDI2_QUEUE_ID_DCORE3_TPC_5_3 = 155, |
357 | GAUDI2_QUEUE_ID_NIC_0_0 = 156, |
358 | GAUDI2_QUEUE_ID_NIC_0_1 = 157, |
359 | GAUDI2_QUEUE_ID_NIC_0_2 = 158, |
360 | GAUDI2_QUEUE_ID_NIC_0_3 = 159, |
361 | GAUDI2_QUEUE_ID_NIC_1_0 = 160, |
362 | GAUDI2_QUEUE_ID_NIC_1_1 = 161, |
363 | GAUDI2_QUEUE_ID_NIC_1_2 = 162, |
364 | GAUDI2_QUEUE_ID_NIC_1_3 = 163, |
365 | GAUDI2_QUEUE_ID_NIC_2_0 = 164, |
366 | GAUDI2_QUEUE_ID_NIC_2_1 = 165, |
367 | GAUDI2_QUEUE_ID_NIC_2_2 = 166, |
368 | GAUDI2_QUEUE_ID_NIC_2_3 = 167, |
369 | GAUDI2_QUEUE_ID_NIC_3_0 = 168, |
370 | GAUDI2_QUEUE_ID_NIC_3_1 = 169, |
371 | GAUDI2_QUEUE_ID_NIC_3_2 = 170, |
372 | GAUDI2_QUEUE_ID_NIC_3_3 = 171, |
373 | GAUDI2_QUEUE_ID_NIC_4_0 = 172, |
374 | GAUDI2_QUEUE_ID_NIC_4_1 = 173, |
375 | GAUDI2_QUEUE_ID_NIC_4_2 = 174, |
376 | GAUDI2_QUEUE_ID_NIC_4_3 = 175, |
377 | GAUDI2_QUEUE_ID_NIC_5_0 = 176, |
378 | GAUDI2_QUEUE_ID_NIC_5_1 = 177, |
379 | GAUDI2_QUEUE_ID_NIC_5_2 = 178, |
380 | GAUDI2_QUEUE_ID_NIC_5_3 = 179, |
381 | GAUDI2_QUEUE_ID_NIC_6_0 = 180, |
382 | GAUDI2_QUEUE_ID_NIC_6_1 = 181, |
383 | GAUDI2_QUEUE_ID_NIC_6_2 = 182, |
384 | GAUDI2_QUEUE_ID_NIC_6_3 = 183, |
385 | GAUDI2_QUEUE_ID_NIC_7_0 = 184, |
386 | GAUDI2_QUEUE_ID_NIC_7_1 = 185, |
387 | GAUDI2_QUEUE_ID_NIC_7_2 = 186, |
388 | GAUDI2_QUEUE_ID_NIC_7_3 = 187, |
389 | GAUDI2_QUEUE_ID_NIC_8_0 = 188, |
390 | GAUDI2_QUEUE_ID_NIC_8_1 = 189, |
391 | GAUDI2_QUEUE_ID_NIC_8_2 = 190, |
392 | GAUDI2_QUEUE_ID_NIC_8_3 = 191, |
393 | GAUDI2_QUEUE_ID_NIC_9_0 = 192, |
394 | GAUDI2_QUEUE_ID_NIC_9_1 = 193, |
395 | GAUDI2_QUEUE_ID_NIC_9_2 = 194, |
396 | GAUDI2_QUEUE_ID_NIC_9_3 = 195, |
397 | GAUDI2_QUEUE_ID_NIC_10_0 = 196, |
398 | GAUDI2_QUEUE_ID_NIC_10_1 = 197, |
399 | GAUDI2_QUEUE_ID_NIC_10_2 = 198, |
400 | GAUDI2_QUEUE_ID_NIC_10_3 = 199, |
401 | GAUDI2_QUEUE_ID_NIC_11_0 = 200, |
402 | GAUDI2_QUEUE_ID_NIC_11_1 = 201, |
403 | GAUDI2_QUEUE_ID_NIC_11_2 = 202, |
404 | GAUDI2_QUEUE_ID_NIC_11_3 = 203, |
405 | GAUDI2_QUEUE_ID_NIC_12_0 = 204, |
406 | GAUDI2_QUEUE_ID_NIC_12_1 = 205, |
407 | GAUDI2_QUEUE_ID_NIC_12_2 = 206, |
408 | GAUDI2_QUEUE_ID_NIC_12_3 = 207, |
409 | GAUDI2_QUEUE_ID_NIC_13_0 = 208, |
410 | GAUDI2_QUEUE_ID_NIC_13_1 = 209, |
411 | GAUDI2_QUEUE_ID_NIC_13_2 = 210, |
412 | GAUDI2_QUEUE_ID_NIC_13_3 = 211, |
413 | GAUDI2_QUEUE_ID_NIC_14_0 = 212, |
414 | GAUDI2_QUEUE_ID_NIC_14_1 = 213, |
415 | GAUDI2_QUEUE_ID_NIC_14_2 = 214, |
416 | GAUDI2_QUEUE_ID_NIC_14_3 = 215, |
417 | GAUDI2_QUEUE_ID_NIC_15_0 = 216, |
418 | GAUDI2_QUEUE_ID_NIC_15_1 = 217, |
419 | GAUDI2_QUEUE_ID_NIC_15_2 = 218, |
420 | GAUDI2_QUEUE_ID_NIC_15_3 = 219, |
421 | GAUDI2_QUEUE_ID_NIC_16_0 = 220, |
422 | GAUDI2_QUEUE_ID_NIC_16_1 = 221, |
423 | GAUDI2_QUEUE_ID_NIC_16_2 = 222, |
424 | GAUDI2_QUEUE_ID_NIC_16_3 = 223, |
425 | GAUDI2_QUEUE_ID_NIC_17_0 = 224, |
426 | GAUDI2_QUEUE_ID_NIC_17_1 = 225, |
427 | GAUDI2_QUEUE_ID_NIC_17_2 = 226, |
428 | GAUDI2_QUEUE_ID_NIC_17_3 = 227, |
429 | GAUDI2_QUEUE_ID_NIC_18_0 = 228, |
430 | GAUDI2_QUEUE_ID_NIC_18_1 = 229, |
431 | GAUDI2_QUEUE_ID_NIC_18_2 = 230, |
432 | GAUDI2_QUEUE_ID_NIC_18_3 = 231, |
433 | GAUDI2_QUEUE_ID_NIC_19_0 = 232, |
434 | GAUDI2_QUEUE_ID_NIC_19_1 = 233, |
435 | GAUDI2_QUEUE_ID_NIC_19_2 = 234, |
436 | GAUDI2_QUEUE_ID_NIC_19_3 = 235, |
437 | GAUDI2_QUEUE_ID_NIC_20_0 = 236, |
438 | GAUDI2_QUEUE_ID_NIC_20_1 = 237, |
439 | GAUDI2_QUEUE_ID_NIC_20_2 = 238, |
440 | GAUDI2_QUEUE_ID_NIC_20_3 = 239, |
441 | GAUDI2_QUEUE_ID_NIC_21_0 = 240, |
442 | GAUDI2_QUEUE_ID_NIC_21_1 = 241, |
443 | GAUDI2_QUEUE_ID_NIC_21_2 = 242, |
444 | GAUDI2_QUEUE_ID_NIC_21_3 = 243, |
445 | GAUDI2_QUEUE_ID_NIC_22_0 = 244, |
446 | GAUDI2_QUEUE_ID_NIC_22_1 = 245, |
447 | GAUDI2_QUEUE_ID_NIC_22_2 = 246, |
448 | GAUDI2_QUEUE_ID_NIC_22_3 = 247, |
449 | GAUDI2_QUEUE_ID_NIC_23_0 = 248, |
450 | GAUDI2_QUEUE_ID_NIC_23_1 = 249, |
451 | GAUDI2_QUEUE_ID_NIC_23_2 = 250, |
452 | GAUDI2_QUEUE_ID_NIC_23_3 = 251, |
453 | GAUDI2_QUEUE_ID_ROT_0_0 = 252, |
454 | GAUDI2_QUEUE_ID_ROT_0_1 = 253, |
455 | GAUDI2_QUEUE_ID_ROT_0_2 = 254, |
456 | GAUDI2_QUEUE_ID_ROT_0_3 = 255, |
457 | GAUDI2_QUEUE_ID_ROT_1_0 = 256, |
458 | GAUDI2_QUEUE_ID_ROT_1_1 = 257, |
459 | GAUDI2_QUEUE_ID_ROT_1_2 = 258, |
460 | GAUDI2_QUEUE_ID_ROT_1_3 = 259, |
461 | GAUDI2_QUEUE_ID_CPU_PQ = 260, |
462 | GAUDI2_QUEUE_ID_SIZE |
463 | }; |
464 | |
465 | /* |
466 | * Engine Numbering |
467 | * |
468 | * Used in the "busy_engines_mask" field in `struct hl_info_hw_idle' |
469 | */ |
470 | |
471 | enum goya_engine_id { |
472 | GOYA_ENGINE_ID_DMA_0 = 0, |
473 | GOYA_ENGINE_ID_DMA_1, |
474 | GOYA_ENGINE_ID_DMA_2, |
475 | GOYA_ENGINE_ID_DMA_3, |
476 | GOYA_ENGINE_ID_DMA_4, |
477 | GOYA_ENGINE_ID_MME_0, |
478 | GOYA_ENGINE_ID_TPC_0, |
479 | GOYA_ENGINE_ID_TPC_1, |
480 | GOYA_ENGINE_ID_TPC_2, |
481 | GOYA_ENGINE_ID_TPC_3, |
482 | GOYA_ENGINE_ID_TPC_4, |
483 | GOYA_ENGINE_ID_TPC_5, |
484 | GOYA_ENGINE_ID_TPC_6, |
485 | GOYA_ENGINE_ID_TPC_7, |
486 | GOYA_ENGINE_ID_SIZE |
487 | }; |
488 | |
489 | enum gaudi_engine_id { |
490 | GAUDI_ENGINE_ID_DMA_0 = 0, |
491 | GAUDI_ENGINE_ID_DMA_1, |
492 | GAUDI_ENGINE_ID_DMA_2, |
493 | GAUDI_ENGINE_ID_DMA_3, |
494 | GAUDI_ENGINE_ID_DMA_4, |
495 | GAUDI_ENGINE_ID_DMA_5, |
496 | GAUDI_ENGINE_ID_DMA_6, |
497 | GAUDI_ENGINE_ID_DMA_7, |
498 | GAUDI_ENGINE_ID_MME_0, |
499 | GAUDI_ENGINE_ID_MME_1, |
500 | GAUDI_ENGINE_ID_MME_2, |
501 | GAUDI_ENGINE_ID_MME_3, |
502 | GAUDI_ENGINE_ID_TPC_0, |
503 | GAUDI_ENGINE_ID_TPC_1, |
504 | GAUDI_ENGINE_ID_TPC_2, |
505 | GAUDI_ENGINE_ID_TPC_3, |
506 | GAUDI_ENGINE_ID_TPC_4, |
507 | GAUDI_ENGINE_ID_TPC_5, |
508 | GAUDI_ENGINE_ID_TPC_6, |
509 | GAUDI_ENGINE_ID_TPC_7, |
510 | GAUDI_ENGINE_ID_NIC_0, |
511 | GAUDI_ENGINE_ID_NIC_1, |
512 | GAUDI_ENGINE_ID_NIC_2, |
513 | GAUDI_ENGINE_ID_NIC_3, |
514 | GAUDI_ENGINE_ID_NIC_4, |
515 | GAUDI_ENGINE_ID_NIC_5, |
516 | GAUDI_ENGINE_ID_NIC_6, |
517 | GAUDI_ENGINE_ID_NIC_7, |
518 | GAUDI_ENGINE_ID_NIC_8, |
519 | GAUDI_ENGINE_ID_NIC_9, |
520 | GAUDI_ENGINE_ID_SIZE |
521 | }; |
522 | |
523 | enum gaudi2_engine_id { |
524 | GAUDI2_DCORE0_ENGINE_ID_EDMA_0 = 0, |
525 | GAUDI2_DCORE0_ENGINE_ID_EDMA_1, |
526 | GAUDI2_DCORE0_ENGINE_ID_MME, |
527 | GAUDI2_DCORE0_ENGINE_ID_TPC_0, |
528 | GAUDI2_DCORE0_ENGINE_ID_TPC_1, |
529 | GAUDI2_DCORE0_ENGINE_ID_TPC_2, |
530 | GAUDI2_DCORE0_ENGINE_ID_TPC_3, |
531 | GAUDI2_DCORE0_ENGINE_ID_TPC_4, |
532 | GAUDI2_DCORE0_ENGINE_ID_TPC_5, |
533 | GAUDI2_DCORE0_ENGINE_ID_DEC_0, |
534 | GAUDI2_DCORE0_ENGINE_ID_DEC_1, |
535 | GAUDI2_DCORE1_ENGINE_ID_EDMA_0, |
536 | GAUDI2_DCORE1_ENGINE_ID_EDMA_1, |
537 | GAUDI2_DCORE1_ENGINE_ID_MME, |
538 | GAUDI2_DCORE1_ENGINE_ID_TPC_0, |
539 | GAUDI2_DCORE1_ENGINE_ID_TPC_1, |
540 | GAUDI2_DCORE1_ENGINE_ID_TPC_2, |
541 | GAUDI2_DCORE1_ENGINE_ID_TPC_3, |
542 | GAUDI2_DCORE1_ENGINE_ID_TPC_4, |
543 | GAUDI2_DCORE1_ENGINE_ID_TPC_5, |
544 | GAUDI2_DCORE1_ENGINE_ID_DEC_0, |
545 | GAUDI2_DCORE1_ENGINE_ID_DEC_1, |
546 | GAUDI2_DCORE2_ENGINE_ID_EDMA_0, |
547 | GAUDI2_DCORE2_ENGINE_ID_EDMA_1, |
548 | GAUDI2_DCORE2_ENGINE_ID_MME, |
549 | GAUDI2_DCORE2_ENGINE_ID_TPC_0, |
550 | GAUDI2_DCORE2_ENGINE_ID_TPC_1, |
551 | GAUDI2_DCORE2_ENGINE_ID_TPC_2, |
552 | GAUDI2_DCORE2_ENGINE_ID_TPC_3, |
553 | GAUDI2_DCORE2_ENGINE_ID_TPC_4, |
554 | GAUDI2_DCORE2_ENGINE_ID_TPC_5, |
555 | GAUDI2_DCORE2_ENGINE_ID_DEC_0, |
556 | GAUDI2_DCORE2_ENGINE_ID_DEC_1, |
557 | GAUDI2_DCORE3_ENGINE_ID_EDMA_0, |
558 | GAUDI2_DCORE3_ENGINE_ID_EDMA_1, |
559 | GAUDI2_DCORE3_ENGINE_ID_MME, |
560 | GAUDI2_DCORE3_ENGINE_ID_TPC_0, |
561 | GAUDI2_DCORE3_ENGINE_ID_TPC_1, |
562 | GAUDI2_DCORE3_ENGINE_ID_TPC_2, |
563 | GAUDI2_DCORE3_ENGINE_ID_TPC_3, |
564 | GAUDI2_DCORE3_ENGINE_ID_TPC_4, |
565 | GAUDI2_DCORE3_ENGINE_ID_TPC_5, |
566 | GAUDI2_DCORE3_ENGINE_ID_DEC_0, |
567 | GAUDI2_DCORE3_ENGINE_ID_DEC_1, |
568 | GAUDI2_DCORE0_ENGINE_ID_TPC_6, |
569 | GAUDI2_ENGINE_ID_PDMA_0, |
570 | GAUDI2_ENGINE_ID_PDMA_1, |
571 | GAUDI2_ENGINE_ID_ROT_0, |
572 | GAUDI2_ENGINE_ID_ROT_1, |
573 | GAUDI2_PCIE_ENGINE_ID_DEC_0, |
574 | GAUDI2_PCIE_ENGINE_ID_DEC_1, |
575 | GAUDI2_ENGINE_ID_NIC0_0, |
576 | GAUDI2_ENGINE_ID_NIC0_1, |
577 | GAUDI2_ENGINE_ID_NIC1_0, |
578 | GAUDI2_ENGINE_ID_NIC1_1, |
579 | GAUDI2_ENGINE_ID_NIC2_0, |
580 | GAUDI2_ENGINE_ID_NIC2_1, |
581 | GAUDI2_ENGINE_ID_NIC3_0, |
582 | GAUDI2_ENGINE_ID_NIC3_1, |
583 | GAUDI2_ENGINE_ID_NIC4_0, |
584 | GAUDI2_ENGINE_ID_NIC4_1, |
585 | GAUDI2_ENGINE_ID_NIC5_0, |
586 | GAUDI2_ENGINE_ID_NIC5_1, |
587 | GAUDI2_ENGINE_ID_NIC6_0, |
588 | GAUDI2_ENGINE_ID_NIC6_1, |
589 | GAUDI2_ENGINE_ID_NIC7_0, |
590 | GAUDI2_ENGINE_ID_NIC7_1, |
591 | GAUDI2_ENGINE_ID_NIC8_0, |
592 | GAUDI2_ENGINE_ID_NIC8_1, |
593 | GAUDI2_ENGINE_ID_NIC9_0, |
594 | GAUDI2_ENGINE_ID_NIC9_1, |
595 | GAUDI2_ENGINE_ID_NIC10_0, |
596 | GAUDI2_ENGINE_ID_NIC10_1, |
597 | GAUDI2_ENGINE_ID_NIC11_0, |
598 | GAUDI2_ENGINE_ID_NIC11_1, |
599 | GAUDI2_ENGINE_ID_PCIE, |
600 | GAUDI2_ENGINE_ID_PSOC, |
601 | GAUDI2_ENGINE_ID_ARC_FARM, |
602 | GAUDI2_ENGINE_ID_KDMA, |
603 | GAUDI2_ENGINE_ID_SIZE |
604 | }; |
605 | |
606 | /* |
607 | * ASIC specific PLL index |
608 | * |
609 | * Used to retrieve in frequency info of different IPs via HL_INFO_PLL_FREQUENCY under |
610 | * DRM_IOCTL_HL_INFO IOCTL. |
611 | * The enums need to be used as an index in struct hl_pll_frequency_info. |
612 | */ |
613 | |
614 | enum hl_goya_pll_index { |
615 | HL_GOYA_CPU_PLL = 0, |
616 | HL_GOYA_IC_PLL, |
617 | HL_GOYA_MC_PLL, |
618 | HL_GOYA_MME_PLL, |
619 | HL_GOYA_PCI_PLL, |
620 | HL_GOYA_EMMC_PLL, |
621 | HL_GOYA_TPC_PLL, |
622 | HL_GOYA_PLL_MAX |
623 | }; |
624 | |
625 | enum hl_gaudi_pll_index { |
626 | HL_GAUDI_CPU_PLL = 0, |
627 | HL_GAUDI_PCI_PLL, |
628 | HL_GAUDI_SRAM_PLL, |
629 | HL_GAUDI_HBM_PLL, |
630 | HL_GAUDI_NIC_PLL, |
631 | HL_GAUDI_DMA_PLL, |
632 | HL_GAUDI_MESH_PLL, |
633 | HL_GAUDI_MME_PLL, |
634 | HL_GAUDI_TPC_PLL, |
635 | HL_GAUDI_IF_PLL, |
636 | HL_GAUDI_PLL_MAX |
637 | }; |
638 | |
639 | enum hl_gaudi2_pll_index { |
640 | HL_GAUDI2_CPU_PLL = 0, |
641 | HL_GAUDI2_PCI_PLL, |
642 | HL_GAUDI2_SRAM_PLL, |
643 | HL_GAUDI2_HBM_PLL, |
644 | HL_GAUDI2_NIC_PLL, |
645 | HL_GAUDI2_DMA_PLL, |
646 | HL_GAUDI2_MESH_PLL, |
647 | HL_GAUDI2_MME_PLL, |
648 | HL_GAUDI2_TPC_PLL, |
649 | HL_GAUDI2_IF_PLL, |
650 | HL_GAUDI2_VID_PLL, |
651 | HL_GAUDI2_MSS_PLL, |
652 | HL_GAUDI2_PLL_MAX |
653 | }; |
654 | |
655 | /** |
656 | * enum hl_goya_dma_direction - Direction of DMA operation inside a LIN_DMA packet that is |
657 | * submitted to the GOYA's DMA QMAN. This attribute is not relevant |
658 | * to the H/W but the kernel driver use it to parse the packet's |
659 | * addresses and patch/validate them. |
660 | * @HL_DMA_HOST_TO_DRAM: DMA operation from Host memory to GOYA's DDR. |
661 | * @HL_DMA_HOST_TO_SRAM: DMA operation from Host memory to GOYA's SRAM. |
662 | * @HL_DMA_DRAM_TO_SRAM: DMA operation from GOYA's DDR to GOYA's SRAM. |
663 | * @HL_DMA_SRAM_TO_DRAM: DMA operation from GOYA's SRAM to GOYA's DDR. |
664 | * @HL_DMA_SRAM_TO_HOST: DMA operation from GOYA's SRAM to Host memory. |
665 | * @HL_DMA_DRAM_TO_HOST: DMA operation from GOYA's DDR to Host memory. |
666 | * @HL_DMA_DRAM_TO_DRAM: DMA operation from GOYA's DDR to GOYA's DDR. |
667 | * @HL_DMA_SRAM_TO_SRAM: DMA operation from GOYA's SRAM to GOYA's SRAM. |
668 | * @HL_DMA_ENUM_MAX: number of values in enum |
669 | */ |
670 | enum hl_goya_dma_direction { |
671 | HL_DMA_HOST_TO_DRAM, |
672 | HL_DMA_HOST_TO_SRAM, |
673 | HL_DMA_DRAM_TO_SRAM, |
674 | HL_DMA_SRAM_TO_DRAM, |
675 | HL_DMA_SRAM_TO_HOST, |
676 | HL_DMA_DRAM_TO_HOST, |
677 | HL_DMA_DRAM_TO_DRAM, |
678 | HL_DMA_SRAM_TO_SRAM, |
679 | HL_DMA_ENUM_MAX |
680 | }; |
681 | |
682 | /** |
683 | * enum hl_device_status - Device status information. |
684 | * @HL_DEVICE_STATUS_OPERATIONAL: Device is operational. |
685 | * @HL_DEVICE_STATUS_IN_RESET: Device is currently during reset. |
686 | * @HL_DEVICE_STATUS_MALFUNCTION: Device is unusable. |
687 | * @HL_DEVICE_STATUS_NEEDS_RESET: Device needs reset because auto reset was disabled. |
688 | * @HL_DEVICE_STATUS_IN_DEVICE_CREATION: Device is operational but its creation is still in |
689 | * progress. |
690 | * @HL_DEVICE_STATUS_IN_RESET_AFTER_DEVICE_RELEASE: Device is currently during reset that was |
691 | * triggered because the user released the device |
692 | * @HL_DEVICE_STATUS_LAST: Last status. |
693 | */ |
694 | enum hl_device_status { |
695 | HL_DEVICE_STATUS_OPERATIONAL, |
696 | HL_DEVICE_STATUS_IN_RESET, |
697 | HL_DEVICE_STATUS_MALFUNCTION, |
698 | HL_DEVICE_STATUS_NEEDS_RESET, |
699 | HL_DEVICE_STATUS_IN_DEVICE_CREATION, |
700 | HL_DEVICE_STATUS_IN_RESET_AFTER_DEVICE_RELEASE, |
701 | HL_DEVICE_STATUS_LAST = HL_DEVICE_STATUS_IN_RESET_AFTER_DEVICE_RELEASE |
702 | }; |
703 | |
704 | enum hl_server_type { |
705 | HL_SERVER_TYPE_UNKNOWN = 0, |
706 | HL_SERVER_GAUDI_HLS1 = 1, |
707 | HL_SERVER_GAUDI_HLS1H = 2, |
708 | HL_SERVER_GAUDI_TYPE1 = 3, |
709 | HL_SERVER_GAUDI_TYPE2 = 4, |
710 | HL_SERVER_GAUDI2_HLS2 = 5, |
711 | HL_SERVER_GAUDI2_TYPE1 = 7 |
712 | }; |
713 | |
714 | /* |
715 | * Notifier event values - for the notification mechanism and the HL_INFO_GET_EVENTS command |
716 | * |
717 | * HL_NOTIFIER_EVENT_TPC_ASSERT - Indicates TPC assert event |
718 | * HL_NOTIFIER_EVENT_UNDEFINED_OPCODE - Indicates undefined operation code |
719 | * HL_NOTIFIER_EVENT_DEVICE_RESET - Indicates device requires a reset |
720 | * HL_NOTIFIER_EVENT_CS_TIMEOUT - Indicates CS timeout error |
721 | * HL_NOTIFIER_EVENT_DEVICE_UNAVAILABLE - Indicates device is unavailable |
722 | * HL_NOTIFIER_EVENT_USER_ENGINE_ERR - Indicates device engine in error state |
723 | * HL_NOTIFIER_EVENT_GENERAL_HW_ERR - Indicates device HW error |
724 | * HL_NOTIFIER_EVENT_RAZWI - Indicates razwi happened |
725 | * HL_NOTIFIER_EVENT_PAGE_FAULT - Indicates page fault happened |
726 | * HL_NOTIFIER_EVENT_CRITICAL_HW_ERR - Indicates a HW error that requires SW abort and |
727 | * HW reset |
728 | * HL_NOTIFIER_EVENT_CRITICAL_FW_ERR - Indicates a FW error that requires SW abort and |
729 | * HW reset |
730 | */ |
731 | #define HL_NOTIFIER_EVENT_TPC_ASSERT (1ULL << 0) |
732 | #define HL_NOTIFIER_EVENT_UNDEFINED_OPCODE (1ULL << 1) |
733 | #define HL_NOTIFIER_EVENT_DEVICE_RESET (1ULL << 2) |
734 | #define HL_NOTIFIER_EVENT_CS_TIMEOUT (1ULL << 3) |
735 | #define HL_NOTIFIER_EVENT_DEVICE_UNAVAILABLE (1ULL << 4) |
736 | #define HL_NOTIFIER_EVENT_USER_ENGINE_ERR (1ULL << 5) |
737 | #define HL_NOTIFIER_EVENT_GENERAL_HW_ERR (1ULL << 6) |
738 | #define HL_NOTIFIER_EVENT_RAZWI (1ULL << 7) |
739 | #define HL_NOTIFIER_EVENT_PAGE_FAULT (1ULL << 8) |
740 | #define HL_NOTIFIER_EVENT_CRITICL_HW_ERR (1ULL << 9) |
741 | #define HL_NOTIFIER_EVENT_CRITICL_FW_ERR (1ULL << 10) |
742 | |
743 | /* Opcode for management ioctl |
744 | * |
745 | * HW_IP_INFO - Receive information about different IP blocks in the |
746 | * device. |
747 | * HL_INFO_HW_EVENTS - Receive an array describing how many times each event |
748 | * occurred since the last hard reset. |
749 | * HL_INFO_DRAM_USAGE - Retrieve the dram usage inside the device and of the |
750 | * specific context. This is relevant only for devices |
751 | * where the dram is managed by the kernel driver |
752 | * HL_INFO_HW_IDLE - Retrieve information about the idle status of each |
753 | * internal engine. |
754 | * HL_INFO_DEVICE_STATUS - Retrieve the device's status. This opcode doesn't |
755 | * require an open context. |
756 | * HL_INFO_DEVICE_UTILIZATION - Retrieve the total utilization of the device |
757 | * over the last period specified by the user. |
758 | * The period can be between 100ms to 1s, in |
759 | * resolution of 100ms. The return value is a |
760 | * percentage of the utilization rate. |
761 | * HL_INFO_HW_EVENTS_AGGREGATE - Receive an array describing how many times each |
762 | * event occurred since the driver was loaded. |
763 | * HL_INFO_CLK_RATE - Retrieve the current and maximum clock rate |
764 | * of the device in MHz. The maximum clock rate is |
765 | * configurable via sysfs parameter |
766 | * HL_INFO_RESET_COUNT - Retrieve the counts of the soft and hard reset |
767 | * operations performed on the device since the last |
768 | * time the driver was loaded. |
769 | * HL_INFO_TIME_SYNC - Retrieve the device's time alongside the host's time |
770 | * for synchronization. |
771 | * HL_INFO_CS_COUNTERS - Retrieve command submission counters |
772 | * HL_INFO_PCI_COUNTERS - Retrieve PCI counters |
773 | * HL_INFO_CLK_THROTTLE_REASON - Retrieve clock throttling reason |
774 | * HL_INFO_SYNC_MANAGER - Retrieve sync manager info per dcore |
775 | * HL_INFO_TOTAL_ENERGY - Retrieve total energy consumption |
776 | * HL_INFO_PLL_FREQUENCY - Retrieve PLL frequency |
777 | * HL_INFO_POWER - Retrieve power information |
778 | * HL_INFO_OPEN_STATS - Retrieve info regarding recent device open calls |
779 | * HL_INFO_DRAM_REPLACED_ROWS - Retrieve DRAM replaced rows info |
780 | * HL_INFO_DRAM_PENDING_ROWS - Retrieve DRAM pending rows num |
781 | * HL_INFO_LAST_ERR_OPEN_DEV_TIME - Retrieve timestamp of the last time the device was opened |
782 | * and CS timeout or razwi error occurred. |
783 | * HL_INFO_CS_TIMEOUT_EVENT - Retrieve CS timeout timestamp and its related CS sequence number. |
784 | * HL_INFO_RAZWI_EVENT - Retrieve parameters of razwi: |
785 | * Timestamp of razwi. |
786 | * The address which accessing it caused the razwi. |
787 | * Razwi initiator. |
788 | * Razwi cause, was it a page fault or MMU access error. |
789 | * May return 0 even though no new data is available, in that case |
790 | * timestamp will be 0. |
791 | * HL_INFO_DEV_MEM_ALLOC_PAGE_SIZES - Retrieve valid page sizes for device memory allocation |
792 | * HL_INFO_SECURED_ATTESTATION - Retrieve attestation report of the boot. |
793 | * HL_INFO_REGISTER_EVENTFD - Register eventfd for event notifications. |
794 | * HL_INFO_UNREGISTER_EVENTFD - Unregister eventfd |
795 | * HL_INFO_GET_EVENTS - Retrieve the last occurred events |
796 | * HL_INFO_UNDEFINED_OPCODE_EVENT - Retrieve last undefined opcode error information. |
797 | * May return 0 even though no new data is available, in that case |
798 | * timestamp will be 0. |
799 | * HL_INFO_ENGINE_STATUS - Retrieve the status of all the h/w engines in the asic. |
800 | * HL_INFO_PAGE_FAULT_EVENT - Retrieve parameters of captured page fault. |
801 | * May return 0 even though no new data is available, in that case |
802 | * timestamp will be 0. |
803 | * HL_INFO_USER_MAPPINGS - Retrieve user mappings, captured after page fault event. |
804 | * HL_INFO_FW_GENERIC_REQ - Send generic request to FW. |
805 | * HL_INFO_HW_ERR_EVENT - Retrieve information on the reported HW error. |
806 | * May return 0 even though no new data is available, in that case |
807 | * timestamp will be 0. |
808 | * HL_INFO_FW_ERR_EVENT - Retrieve information on the reported FW error. |
809 | * May return 0 even though no new data is available, in that case |
810 | * timestamp will be 0. |
811 | * HL_INFO_USER_ENGINE_ERR_EVENT - Retrieve the last engine id that reported an error. |
812 | */ |
813 | #define HL_INFO_HW_IP_INFO 0 |
814 | #define HL_INFO_HW_EVENTS 1 |
815 | #define HL_INFO_DRAM_USAGE 2 |
816 | #define HL_INFO_HW_IDLE 3 |
817 | #define HL_INFO_DEVICE_STATUS 4 |
818 | #define HL_INFO_DEVICE_UTILIZATION 6 |
819 | #define HL_INFO_HW_EVENTS_AGGREGATE 7 |
820 | #define HL_INFO_CLK_RATE 8 |
821 | #define HL_INFO_RESET_COUNT 9 |
822 | #define HL_INFO_TIME_SYNC 10 |
823 | #define HL_INFO_CS_COUNTERS 11 |
824 | #define HL_INFO_PCI_COUNTERS 12 |
825 | #define HL_INFO_CLK_THROTTLE_REASON 13 |
826 | #define HL_INFO_SYNC_MANAGER 14 |
827 | #define HL_INFO_TOTAL_ENERGY 15 |
828 | #define HL_INFO_PLL_FREQUENCY 16 |
829 | #define HL_INFO_POWER 17 |
830 | #define HL_INFO_OPEN_STATS 18 |
831 | #define HL_INFO_DRAM_REPLACED_ROWS 21 |
832 | #define HL_INFO_DRAM_PENDING_ROWS 22 |
833 | #define HL_INFO_LAST_ERR_OPEN_DEV_TIME 23 |
834 | #define HL_INFO_CS_TIMEOUT_EVENT 24 |
835 | #define HL_INFO_RAZWI_EVENT 25 |
836 | #define HL_INFO_DEV_MEM_ALLOC_PAGE_SIZES 26 |
837 | #define HL_INFO_SECURED_ATTESTATION 27 |
838 | #define HL_INFO_REGISTER_EVENTFD 28 |
839 | #define HL_INFO_UNREGISTER_EVENTFD 29 |
840 | #define HL_INFO_GET_EVENTS 30 |
841 | #define HL_INFO_UNDEFINED_OPCODE_EVENT 31 |
842 | #define HL_INFO_ENGINE_STATUS 32 |
843 | #define HL_INFO_PAGE_FAULT_EVENT 33 |
844 | #define HL_INFO_USER_MAPPINGS 34 |
845 | #define HL_INFO_FW_GENERIC_REQ 35 |
846 | #define HL_INFO_HW_ERR_EVENT 36 |
847 | #define HL_INFO_FW_ERR_EVENT 37 |
848 | #define HL_INFO_USER_ENGINE_ERR_EVENT 38 |
849 | |
850 | #define HL_INFO_VERSION_MAX_LEN 128 |
851 | #define HL_INFO_CARD_NAME_MAX_LEN 16 |
852 | |
853 | /* Maximum buffer size for retrieving engines status */ |
854 | #define HL_ENGINES_DATA_MAX_SIZE SZ_1M |
855 | |
856 | /** |
857 | * struct hl_info_hw_ip_info - hardware information on various IPs in the ASIC |
858 | * @sram_base_address: The first SRAM physical base address that is free to be |
859 | * used by the user. |
860 | * @dram_base_address: The first DRAM virtual or physical base address that is |
861 | * free to be used by the user. |
862 | * @dram_size: The DRAM size that is available to the user. |
863 | * @sram_size: The SRAM size that is available to the user. |
864 | * @num_of_events: The number of events that can be received from the f/w. This |
865 | * is needed so the user can what is the size of the h/w events |
866 | * array he needs to pass to the kernel when he wants to fetch |
867 | * the event counters. |
868 | * @device_id: PCI device ID of the ASIC. |
869 | * @module_id: Module ID of the ASIC for mezzanine cards in servers |
870 | * (From OCP spec). |
871 | * @decoder_enabled_mask: Bit-mask that represents which decoders are enabled. |
872 | * @first_available_interrupt_id: The first available interrupt ID for the user |
873 | * to be used when it works with user interrupts. |
874 | * Relevant for Gaudi2 and later. |
875 | * @server_type: Server type that the Gaudi ASIC is currently installed in. |
876 | * The value is according to enum hl_server_type |
877 | * @cpld_version: CPLD version on the board. |
878 | * @psoc_pci_pll_nr: PCI PLL NR value. Needed by the profiler in some ASICs. |
879 | * @psoc_pci_pll_nf: PCI PLL NF value. Needed by the profiler in some ASICs. |
880 | * @psoc_pci_pll_od: PCI PLL OD value. Needed by the profiler in some ASICs. |
881 | * @psoc_pci_pll_div_factor: PCI PLL DIV factor value. Needed by the profiler |
882 | * in some ASICs. |
883 | * @tpc_enabled_mask: Bit-mask that represents which TPCs are enabled. Relevant |
884 | * for Goya/Gaudi only. |
885 | * @dram_enabled: Whether the DRAM is enabled. |
886 | * @security_enabled: Whether security is enabled on device. |
887 | * @mme_master_slave_mode: Indicate whether the MME is working in master/slave |
888 | * configuration. Relevant for Gaudi2 and later. |
889 | * @cpucp_version: The CPUCP f/w version. |
890 | * @card_name: The card name as passed by the f/w. |
891 | * @tpc_enabled_mask_ext: Bit-mask that represents which TPCs are enabled. |
892 | * Relevant for Gaudi2 and later. |
893 | * @dram_page_size: The DRAM physical page size. |
894 | * @edma_enabled_mask: Bit-mask that represents which EDMAs are enabled. |
895 | * Relevant for Gaudi2 and later. |
896 | * @number_of_user_interrupts: The number of interrupts that are available to the userspace |
897 | * application to use. Relevant for Gaudi2 and later. |
898 | * @device_mem_alloc_default_page_size: default page size used in device memory allocation. |
899 | * @revision_id: PCI revision ID of the ASIC. |
900 | * @tpc_interrupt_id: interrupt id for TPC to use in order to raise events towards the host. |
901 | * @rotator_enabled_mask: Bit-mask that represents which rotators are enabled. |
902 | * Relevant for Gaudi3 and later. |
903 | * @engine_core_interrupt_reg_addr: interrupt register address for engine core to use |
904 | * in order to raise events toward FW. |
905 | * @reserved_dram_size: DRAM size reserved for driver and firmware. |
906 | */ |
907 | struct hl_info_hw_ip_info { |
908 | __u64 sram_base_address; |
909 | __u64 dram_base_address; |
910 | __u64 dram_size; |
911 | __u32 sram_size; |
912 | __u32 num_of_events; |
913 | __u32 device_id; |
914 | __u32 module_id; |
915 | __u32 decoder_enabled_mask; |
916 | __u16 first_available_interrupt_id; |
917 | __u16 server_type; |
918 | __u32 cpld_version; |
919 | __u32 psoc_pci_pll_nr; |
920 | __u32 psoc_pci_pll_nf; |
921 | __u32 psoc_pci_pll_od; |
922 | __u32 psoc_pci_pll_div_factor; |
923 | __u8 tpc_enabled_mask; |
924 | __u8 dram_enabled; |
925 | __u8 security_enabled; |
926 | __u8 mme_master_slave_mode; |
927 | __u8 cpucp_version[HL_INFO_VERSION_MAX_LEN]; |
928 | __u8 card_name[HL_INFO_CARD_NAME_MAX_LEN]; |
929 | __u64 tpc_enabled_mask_ext; |
930 | __u64 dram_page_size; |
931 | __u32 edma_enabled_mask; |
932 | __u16 number_of_user_interrupts; |
933 | __u8 reserved1; |
934 | __u8 reserved2; |
935 | __u64 reserved3; |
936 | __u64 device_mem_alloc_default_page_size; |
937 | __u64 reserved4; |
938 | __u64 reserved5; |
939 | __u32 reserved6; |
940 | __u8 reserved7; |
941 | __u8 revision_id; |
942 | __u16 tpc_interrupt_id; |
943 | __u32 rotator_enabled_mask; |
944 | __u32 reserved9; |
945 | __u64 engine_core_interrupt_reg_addr; |
946 | __u64 reserved_dram_size; |
947 | }; |
948 | |
949 | struct hl_info_dram_usage { |
950 | __u64 dram_free_mem; |
951 | __u64 ctx_dram_mem; |
952 | }; |
953 | |
954 | #define HL_BUSY_ENGINES_MASK_EXT_SIZE 4 |
955 | |
956 | struct hl_info_hw_idle { |
957 | __u32 is_idle; |
958 | /* |
959 | * Bitmask of busy engines. |
960 | * Bits definition is according to `enum <chip>_engine_id'. |
961 | */ |
962 | __u32 busy_engines_mask; |
963 | |
964 | /* |
965 | * Extended Bitmask of busy engines. |
966 | * Bits definition is according to `enum <chip>_engine_id'. |
967 | */ |
968 | __u64 busy_engines_mask_ext[HL_BUSY_ENGINES_MASK_EXT_SIZE]; |
969 | }; |
970 | |
971 | struct hl_info_device_status { |
972 | __u32 status; |
973 | __u32 pad; |
974 | }; |
975 | |
976 | struct hl_info_device_utilization { |
977 | __u32 utilization; |
978 | __u32 pad; |
979 | }; |
980 | |
981 | struct hl_info_clk_rate { |
982 | __u32 cur_clk_rate_mhz; |
983 | __u32 max_clk_rate_mhz; |
984 | }; |
985 | |
986 | struct hl_info_reset_count { |
987 | __u32 hard_reset_cnt; |
988 | __u32 soft_reset_cnt; |
989 | }; |
990 | |
991 | struct hl_info_time_sync { |
992 | __u64 device_time; |
993 | __u64 host_time; |
994 | __u64 tsc_time; |
995 | }; |
996 | |
997 | /** |
998 | * struct hl_info_pci_counters - pci counters |
999 | * @rx_throughput: PCI rx throughput KBps |
1000 | * @tx_throughput: PCI tx throughput KBps |
1001 | * @replay_cnt: PCI replay counter |
1002 | */ |
1003 | struct hl_info_pci_counters { |
1004 | __u64 rx_throughput; |
1005 | __u64 tx_throughput; |
1006 | __u64 replay_cnt; |
1007 | }; |
1008 | |
1009 | enum hl_clk_throttling_type { |
1010 | HL_CLK_THROTTLE_TYPE_POWER, |
1011 | HL_CLK_THROTTLE_TYPE_THERMAL, |
1012 | HL_CLK_THROTTLE_TYPE_MAX |
1013 | }; |
1014 | |
1015 | /* clk_throttling_reason masks */ |
1016 | #define HL_CLK_THROTTLE_POWER (1 << HL_CLK_THROTTLE_TYPE_POWER) |
1017 | #define HL_CLK_THROTTLE_THERMAL (1 << HL_CLK_THROTTLE_TYPE_THERMAL) |
1018 | |
1019 | /** |
1020 | * struct hl_info_clk_throttle - clock throttling reason |
1021 | * @clk_throttling_reason: each bit represents a clk throttling reason |
1022 | * @clk_throttling_timestamp_us: represents CPU timestamp in microseconds of the start-event |
1023 | * @clk_throttling_duration_ns: the clock throttle time in nanosec |
1024 | */ |
1025 | struct hl_info_clk_throttle { |
1026 | __u32 clk_throttling_reason; |
1027 | __u32 pad; |
1028 | __u64 clk_throttling_timestamp_us[HL_CLK_THROTTLE_TYPE_MAX]; |
1029 | __u64 clk_throttling_duration_ns[HL_CLK_THROTTLE_TYPE_MAX]; |
1030 | }; |
1031 | |
1032 | /** |
1033 | * struct hl_info_energy - device energy information |
1034 | * @total_energy_consumption: total device energy consumption |
1035 | */ |
1036 | struct hl_info_energy { |
1037 | __u64 total_energy_consumption; |
1038 | }; |
1039 | |
1040 | #define HL_PLL_NUM_OUTPUTS 4 |
1041 | |
1042 | struct hl_pll_frequency_info { |
1043 | __u16 output[HL_PLL_NUM_OUTPUTS]; |
1044 | }; |
1045 | |
1046 | /** |
1047 | * struct hl_open_stats_info - device open statistics information |
1048 | * @open_counter: ever growing counter, increased on each successful dev open |
1049 | * @last_open_period_ms: duration (ms) device was open last time |
1050 | * @is_compute_ctx_active: Whether there is an active compute context executing |
1051 | * @compute_ctx_in_release: true if the current compute context is being released |
1052 | */ |
1053 | struct hl_open_stats_info { |
1054 | __u64 open_counter; |
1055 | __u64 last_open_period_ms; |
1056 | __u8 is_compute_ctx_active; |
1057 | __u8 compute_ctx_in_release; |
1058 | __u8 pad[6]; |
1059 | }; |
1060 | |
1061 | /** |
1062 | * struct hl_power_info - power information |
1063 | * @power: power consumption |
1064 | */ |
1065 | struct hl_power_info { |
1066 | __u64 power; |
1067 | }; |
1068 | |
1069 | /** |
1070 | * struct hl_info_sync_manager - sync manager information |
1071 | * @first_available_sync_object: first available sob |
1072 | * @first_available_monitor: first available monitor |
1073 | * @first_available_cq: first available cq |
1074 | */ |
1075 | struct hl_info_sync_manager { |
1076 | __u32 first_available_sync_object; |
1077 | __u32 first_available_monitor; |
1078 | __u32 first_available_cq; |
1079 | __u32 reserved; |
1080 | }; |
1081 | |
1082 | /** |
1083 | * struct hl_info_cs_counters - command submission counters |
1084 | * @total_out_of_mem_drop_cnt: total dropped due to memory allocation issue |
1085 | * @ctx_out_of_mem_drop_cnt: context dropped due to memory allocation issue |
1086 | * @total_parsing_drop_cnt: total dropped due to error in packet parsing |
1087 | * @ctx_parsing_drop_cnt: context dropped due to error in packet parsing |
1088 | * @total_queue_full_drop_cnt: total dropped due to queue full |
1089 | * @ctx_queue_full_drop_cnt: context dropped due to queue full |
1090 | * @total_device_in_reset_drop_cnt: total dropped due to device in reset |
1091 | * @ctx_device_in_reset_drop_cnt: context dropped due to device in reset |
1092 | * @total_max_cs_in_flight_drop_cnt: total dropped due to maximum CS in-flight |
1093 | * @ctx_max_cs_in_flight_drop_cnt: context dropped due to maximum CS in-flight |
1094 | * @total_validation_drop_cnt: total dropped due to validation error |
1095 | * @ctx_validation_drop_cnt: context dropped due to validation error |
1096 | */ |
1097 | struct hl_info_cs_counters { |
1098 | __u64 total_out_of_mem_drop_cnt; |
1099 | __u64 ctx_out_of_mem_drop_cnt; |
1100 | __u64 total_parsing_drop_cnt; |
1101 | __u64 ctx_parsing_drop_cnt; |
1102 | __u64 total_queue_full_drop_cnt; |
1103 | __u64 ctx_queue_full_drop_cnt; |
1104 | __u64 total_device_in_reset_drop_cnt; |
1105 | __u64 ctx_device_in_reset_drop_cnt; |
1106 | __u64 total_max_cs_in_flight_drop_cnt; |
1107 | __u64 ctx_max_cs_in_flight_drop_cnt; |
1108 | __u64 total_validation_drop_cnt; |
1109 | __u64 ctx_validation_drop_cnt; |
1110 | }; |
1111 | |
1112 | /** |
1113 | * struct hl_info_last_err_open_dev_time - last error boot information. |
1114 | * @timestamp: timestamp of last time the device was opened and error occurred. |
1115 | */ |
1116 | struct hl_info_last_err_open_dev_time { |
1117 | __s64 timestamp; |
1118 | }; |
1119 | |
1120 | /** |
1121 | * struct hl_info_cs_timeout_event - last CS timeout information. |
1122 | * @timestamp: timestamp when last CS timeout event occurred. |
1123 | * @seq: sequence number of last CS timeout event. |
1124 | */ |
1125 | struct hl_info_cs_timeout_event { |
1126 | __s64 timestamp; |
1127 | __u64 seq; |
1128 | }; |
1129 | |
1130 | #define HL_RAZWI_NA_ENG_ID U16_MAX |
1131 | #define HL_RAZWI_MAX_NUM_OF_ENGINES_PER_RTR 128 |
1132 | #define HL_RAZWI_READ BIT(0) |
1133 | #define HL_RAZWI_WRITE BIT(1) |
1134 | #define HL_RAZWI_LBW BIT(2) |
1135 | #define HL_RAZWI_HBW BIT(3) |
1136 | #define HL_RAZWI_RR BIT(4) |
1137 | #define HL_RAZWI_ADDR_DEC BIT(5) |
1138 | |
1139 | /** |
1140 | * struct hl_info_razwi_event - razwi information. |
1141 | * @timestamp: timestamp of razwi. |
1142 | * @addr: address which accessing it caused razwi. |
1143 | * @engine_id: engine id of the razwi initiator, if it was initiated by engine that does not |
1144 | * have engine id it will be set to HL_RAZWI_NA_ENG_ID. If there are several possible |
1145 | * engines which caused the razwi, it will hold all of them. |
1146 | * @num_of_possible_engines: contains number of possible engine ids. In some asics, razwi indication |
1147 | * might be common for several engines and there is no way to get the |
1148 | * exact engine. In this way, engine_id array will be filled with all |
1149 | * possible engines caused this razwi. Also, there might be possibility |
1150 | * in gaudi, where we don't indication on specific engine, in that case |
1151 | * the value of this parameter will be zero. |
1152 | * @flags: bitmask for additional data: HL_RAZWI_READ - razwi caused by read operation |
1153 | * HL_RAZWI_WRITE - razwi caused by write operation |
1154 | * HL_RAZWI_LBW - razwi caused by lbw fabric transaction |
1155 | * HL_RAZWI_HBW - razwi caused by hbw fabric transaction |
1156 | * HL_RAZWI_RR - razwi caused by range register |
1157 | * HL_RAZWI_ADDR_DEC - razwi caused by address decode error |
1158 | * Note: this data is not supported by all asics, in that case the relevant bits will not |
1159 | * be set. |
1160 | */ |
1161 | struct hl_info_razwi_event { |
1162 | __s64 timestamp; |
1163 | __u64 addr; |
1164 | __u16 engine_id[HL_RAZWI_MAX_NUM_OF_ENGINES_PER_RTR]; |
1165 | __u16 num_of_possible_engines; |
1166 | __u8 flags; |
1167 | __u8 pad[5]; |
1168 | }; |
1169 | |
1170 | #define MAX_QMAN_STREAMS_INFO 4 |
1171 | #define OPCODE_INFO_MAX_ADDR_SIZE 8 |
1172 | /** |
1173 | * struct hl_info_undefined_opcode_event - info about last undefined opcode error |
1174 | * @timestamp: timestamp of the undefined opcode error |
1175 | * @cb_addr_streams: CB addresses (per stream) that are currently exists in the PQ |
1176 | * entries. In case all streams array entries are |
1177 | * filled with values, it means the execution was in Lower-CP. |
1178 | * @cq_addr: the address of the current handled command buffer |
1179 | * @cq_size: the size of the current handled command buffer |
1180 | * @cb_addr_streams_len: num of streams - actual len of cb_addr_streams array. |
1181 | * should be equal to 1 in case of undefined opcode |
1182 | * in Upper-CP (specific stream) and equal to 4 incase |
1183 | * of undefined opcode in Lower-CP. |
1184 | * @engine_id: engine-id that the error occurred on |
1185 | * @stream_id: the stream id the error occurred on. In case the stream equals to |
1186 | * MAX_QMAN_STREAMS_INFO it means the error occurred on a Lower-CP. |
1187 | */ |
1188 | struct hl_info_undefined_opcode_event { |
1189 | __s64 timestamp; |
1190 | __u64 cb_addr_streams[MAX_QMAN_STREAMS_INFO][OPCODE_INFO_MAX_ADDR_SIZE]; |
1191 | __u64 cq_addr; |
1192 | __u32 cq_size; |
1193 | __u32 cb_addr_streams_len; |
1194 | __u32 engine_id; |
1195 | __u32 stream_id; |
1196 | }; |
1197 | |
1198 | /** |
1199 | * struct hl_info_hw_err_event - info about HW error |
1200 | * @timestamp: timestamp of error occurrence |
1201 | * @event_id: The async event ID (specific to each device type). |
1202 | * @pad: size padding for u64 granularity. |
1203 | */ |
1204 | struct hl_info_hw_err_event { |
1205 | __s64 timestamp; |
1206 | __u16 event_id; |
1207 | __u16 pad[3]; |
1208 | }; |
1209 | |
1210 | /* FW error definition for event_type in struct hl_info_fw_err_event */ |
1211 | enum hl_info_fw_err_type { |
1212 | HL_INFO_FW_HEARTBEAT_ERR, |
1213 | HL_INFO_FW_REPORTED_ERR, |
1214 | }; |
1215 | |
1216 | /** |
1217 | * struct hl_info_fw_err_event - info about FW error |
1218 | * @timestamp: time-stamp of error occurrence |
1219 | * @err_type: The type of event as defined in hl_info_fw_err_type. |
1220 | * @event_id: The async event ID (specific to each device type, applicable only when event type is |
1221 | * HL_INFO_FW_REPORTED_ERR). |
1222 | * @pad: size padding for u64 granularity. |
1223 | */ |
1224 | struct hl_info_fw_err_event { |
1225 | __s64 timestamp; |
1226 | __u16 err_type; |
1227 | __u16 event_id; |
1228 | __u32 pad; |
1229 | }; |
1230 | |
1231 | /** |
1232 | * struct hl_info_engine_err_event - engine error info |
1233 | * @timestamp: time-stamp of error occurrence |
1234 | * @engine_id: engine id who reported the error. |
1235 | * @error_count: Amount of errors reported. |
1236 | * @pad: size padding for u64 granularity. |
1237 | */ |
1238 | struct hl_info_engine_err_event { |
1239 | __s64 timestamp; |
1240 | __u16 engine_id; |
1241 | __u16 error_count; |
1242 | __u32 pad; |
1243 | }; |
1244 | |
1245 | /** |
1246 | * struct hl_info_dev_memalloc_page_sizes - valid page sizes in device mem alloc information. |
1247 | * @page_order_bitmask: bitmap in which a set bit represents the order of the supported page size |
1248 | * (e.g. 0x2100000 means that 1MB and 32MB pages are supported). |
1249 | */ |
1250 | struct hl_info_dev_memalloc_page_sizes { |
1251 | __u64 page_order_bitmask; |
1252 | }; |
1253 | |
1254 | #define SEC_PCR_DATA_BUF_SZ 256 |
1255 | #define SEC_PCR_QUOTE_BUF_SZ 510 /* (512 - 2) 2 bytes used for size */ |
1256 | #define SEC_SIGNATURE_BUF_SZ 255 /* (256 - 1) 1 byte used for size */ |
1257 | #define SEC_PUB_DATA_BUF_SZ 510 /* (512 - 2) 2 bytes used for size */ |
1258 | #define SEC_CERTIFICATE_BUF_SZ 2046 /* (2048 - 2) 2 bytes used for size */ |
1259 | |
1260 | /* |
1261 | * struct hl_info_sec_attest - attestation report of the boot |
1262 | * @nonce: number only used once. random number provided by host. this also passed to the quote |
1263 | * command as a qualifying data. |
1264 | * @pcr_quote_len: length of the attestation quote data (bytes) |
1265 | * @pub_data_len: length of the public data (bytes) |
1266 | * @certificate_len: length of the certificate (bytes) |
1267 | * @pcr_num_reg: number of PCR registers in the pcr_data array |
1268 | * @pcr_reg_len: length of each PCR register in the pcr_data array (bytes) |
1269 | * @quote_sig_len: length of the attestation report signature (bytes) |
1270 | * @pcr_data: raw values of the PCR registers |
1271 | * @pcr_quote: attestation report data structure |
1272 | * @quote_sig: signature structure of the attestation report |
1273 | * @public_data: public key for the signed attestation |
1274 | * (outPublic + name + qualifiedName) |
1275 | * @certificate: certificate for the attestation signing key |
1276 | */ |
1277 | struct hl_info_sec_attest { |
1278 | __u32 nonce; |
1279 | __u16 pcr_quote_len; |
1280 | __u16 pub_data_len; |
1281 | __u16 certificate_len; |
1282 | __u8 pcr_num_reg; |
1283 | __u8 pcr_reg_len; |
1284 | __u8 quote_sig_len; |
1285 | __u8 pcr_data[SEC_PCR_DATA_BUF_SZ]; |
1286 | __u8 pcr_quote[SEC_PCR_QUOTE_BUF_SZ]; |
1287 | __u8 quote_sig[SEC_SIGNATURE_BUF_SZ]; |
1288 | __u8 public_data[SEC_PUB_DATA_BUF_SZ]; |
1289 | __u8 certificate[SEC_CERTIFICATE_BUF_SZ]; |
1290 | __u8 pad0[2]; |
1291 | }; |
1292 | |
1293 | /** |
1294 | * struct hl_page_fault_info - page fault information. |
1295 | * @timestamp: timestamp of page fault. |
1296 | * @addr: address which accessing it caused page fault. |
1297 | * @engine_id: engine id which caused the page fault, supported only in gaudi3. |
1298 | */ |
1299 | struct hl_page_fault_info { |
1300 | __s64 timestamp; |
1301 | __u64 addr; |
1302 | __u16 engine_id; |
1303 | __u8 pad[6]; |
1304 | }; |
1305 | |
1306 | /** |
1307 | * struct hl_user_mapping - user mapping information. |
1308 | * @dev_va: device virtual address. |
1309 | * @size: virtual address mapping size. |
1310 | */ |
1311 | struct hl_user_mapping { |
1312 | __u64 dev_va; |
1313 | __u64 size; |
1314 | }; |
1315 | |
1316 | enum gaudi_dcores { |
1317 | HL_GAUDI_WS_DCORE, |
1318 | HL_GAUDI_WN_DCORE, |
1319 | HL_GAUDI_EN_DCORE, |
1320 | HL_GAUDI_ES_DCORE |
1321 | }; |
1322 | |
1323 | /** |
1324 | * struct hl_info_args - Main structure to retrieve device related information. |
1325 | * @return_pointer: User space address of the relevant structure related to HL_INFO_* operation |
1326 | * mentioned in @op. |
1327 | * @return_size: Size of the structure used in @return_pointer, just like "size" in "snprintf", it |
1328 | * limits how many bytes the kernel can write. For hw_events array, the size should be |
1329 | * hl_info_hw_ip_info.num_of_events * sizeof(__u32). |
1330 | * @op: Defines which type of information to be retrieved. Refer HL_INFO_* for details. |
1331 | * @dcore_id: DCORE id for which the information is relevant (for Gaudi refer to enum gaudi_dcores). |
1332 | * @ctx_id: Context ID of the user. Currently not in use. |
1333 | * @period_ms: Period value, in milliseconds, for utilization rate in range 100ms - 1000ms in 100 ms |
1334 | * resolution. Currently not in use. |
1335 | * @pll_index: Index as defined in hl_<asic type>_pll_index enumeration. |
1336 | * @eventfd: event file descriptor for event notifications. |
1337 | * @user_buffer_actual_size: Actual data size which was copied to user allocated buffer by the |
1338 | * driver. It is possible for the user to allocate buffer larger than |
1339 | * needed, hence updating this variable so user will know the exact amount |
1340 | * of bytes copied by the kernel to the buffer. |
1341 | * @sec_attest_nonce: Nonce number used for attestation report. |
1342 | * @array_size: Number of array members copied to user buffer. |
1343 | * Relevant for HL_INFO_USER_MAPPINGS info ioctl. |
1344 | * @fw_sub_opcode: generic requests sub opcodes. |
1345 | * @pad: Padding to 64 bit. |
1346 | */ |
1347 | struct hl_info_args { |
1348 | __u64 return_pointer; |
1349 | __u32 return_size; |
1350 | __u32 op; |
1351 | |
1352 | union { |
1353 | __u32 dcore_id; |
1354 | __u32 ctx_id; |
1355 | __u32 period_ms; |
1356 | __u32 pll_index; |
1357 | __u32 eventfd; |
1358 | __u32 user_buffer_actual_size; |
1359 | __u32 sec_attest_nonce; |
1360 | __u32 array_size; |
1361 | __u32 fw_sub_opcode; |
1362 | }; |
1363 | |
1364 | __u32 pad; |
1365 | }; |
1366 | |
1367 | /* Opcode to create a new command buffer */ |
1368 | #define HL_CB_OP_CREATE 0 |
1369 | /* Opcode to destroy previously created command buffer */ |
1370 | #define HL_CB_OP_DESTROY 1 |
1371 | /* Opcode to retrieve information about a command buffer */ |
1372 | #define HL_CB_OP_INFO 2 |
1373 | |
1374 | /* 2MB minus 32 bytes for 2xMSG_PROT */ |
1375 | #define HL_MAX_CB_SIZE (0x200000 - 32) |
1376 | |
1377 | /* Indicates whether the command buffer should be mapped to the device's MMU */ |
1378 | #define HL_CB_FLAGS_MAP 0x1 |
1379 | |
1380 | /* Used with HL_CB_OP_INFO opcode to get the device va address for kernel mapped CB */ |
1381 | #define HL_CB_FLAGS_GET_DEVICE_VA 0x2 |
1382 | |
1383 | struct hl_cb_in { |
1384 | /* Handle of CB or 0 if we want to create one */ |
1385 | __u64 cb_handle; |
1386 | /* HL_CB_OP_* */ |
1387 | __u32 op; |
1388 | |
1389 | /* Size of CB. Maximum size is HL_MAX_CB_SIZE. The minimum size that |
1390 | * will be allocated, regardless of this parameter's value, is PAGE_SIZE |
1391 | */ |
1392 | __u32 cb_size; |
1393 | |
1394 | /* Context ID - Currently not in use */ |
1395 | __u32 ctx_id; |
1396 | /* HL_CB_FLAGS_* */ |
1397 | __u32 flags; |
1398 | }; |
1399 | |
1400 | struct hl_cb_out { |
1401 | union { |
1402 | /* Handle of CB */ |
1403 | __u64 cb_handle; |
1404 | |
1405 | union { |
1406 | /* Information about CB */ |
1407 | struct { |
1408 | /* Usage count of CB */ |
1409 | __u32 usage_cnt; |
1410 | __u32 pad; |
1411 | }; |
1412 | |
1413 | /* CB mapped address to device MMU */ |
1414 | __u64 device_va; |
1415 | }; |
1416 | }; |
1417 | }; |
1418 | |
1419 | union hl_cb_args { |
1420 | struct hl_cb_in in; |
1421 | struct hl_cb_out out; |
1422 | }; |
1423 | |
1424 | /* HL_CS_CHUNK_FLAGS_ values |
1425 | * |
1426 | * HL_CS_CHUNK_FLAGS_USER_ALLOC_CB: |
1427 | * Indicates if the CB was allocated and mapped by userspace |
1428 | * (relevant to Gaudi2 and later). User allocated CB is a command buffer, |
1429 | * allocated by the user, via malloc (or similar). After allocating the |
1430 | * CB, the user invokes - “memory ioctl” to map the user memory into a |
1431 | * device virtual address. The user provides this address via the |
1432 | * cb_handle field. The interface provides the ability to create a |
1433 | * large CBs, Which aren’t limited to “HL_MAX_CB_SIZE”. Therefore, it |
1434 | * increases the PCI-DMA queues throughput. This CB allocation method |
1435 | * also reduces the use of Linux DMA-able memory pool. Which are limited |
1436 | * and used by other Linux sub-systems. |
1437 | */ |
1438 | #define HL_CS_CHUNK_FLAGS_USER_ALLOC_CB 0x1 |
1439 | |
1440 | /* |
1441 | * This structure size must always be fixed to 64-bytes for backward |
1442 | * compatibility |
1443 | */ |
1444 | struct hl_cs_chunk { |
1445 | union { |
1446 | /* Goya/Gaudi: |
1447 | * For external queue, this represents a Handle of CB on the |
1448 | * Host. |
1449 | * For internal queue in Goya, this represents an SRAM or |
1450 | * a DRAM address of the internal CB. In Gaudi, this might also |
1451 | * represent a mapped host address of the CB. |
1452 | * |
1453 | * Gaudi2 onwards: |
1454 | * For H/W queue, this represents either a Handle of CB on the |
1455 | * Host, or an SRAM, a DRAM, or a mapped host address of the CB. |
1456 | * |
1457 | * A mapped host address is in the device address space, after |
1458 | * a host address was mapped by the device MMU. |
1459 | */ |
1460 | __u64 cb_handle; |
1461 | |
1462 | /* Relevant only when HL_CS_FLAGS_WAIT or |
1463 | * HL_CS_FLAGS_COLLECTIVE_WAIT is set |
1464 | * This holds address of array of u64 values that contain |
1465 | * signal CS sequence numbers. The wait described by |
1466 | * this job will listen on all those signals |
1467 | * (wait event per signal) |
1468 | */ |
1469 | __u64 signal_seq_arr; |
1470 | |
1471 | /* |
1472 | * Relevant only when HL_CS_FLAGS_WAIT or |
1473 | * HL_CS_FLAGS_COLLECTIVE_WAIT is set |
1474 | * along with HL_CS_FLAGS_ENCAP_SIGNALS. |
1475 | * This is the CS sequence which has the encapsulated signals. |
1476 | */ |
1477 | __u64 encaps_signal_seq; |
1478 | }; |
1479 | |
1480 | /* Index of queue to put the CB on */ |
1481 | __u32 queue_index; |
1482 | |
1483 | union { |
1484 | /* |
1485 | * Size of command buffer with valid packets |
1486 | * Can be smaller then actual CB size |
1487 | */ |
1488 | __u32 cb_size; |
1489 | |
1490 | /* Relevant only when HL_CS_FLAGS_WAIT or |
1491 | * HL_CS_FLAGS_COLLECTIVE_WAIT is set. |
1492 | * Number of entries in signal_seq_arr |
1493 | */ |
1494 | __u32 num_signal_seq_arr; |
1495 | |
1496 | /* Relevant only when HL_CS_FLAGS_WAIT or |
1497 | * HL_CS_FLAGS_COLLECTIVE_WAIT is set along |
1498 | * with HL_CS_FLAGS_ENCAP_SIGNALS |
1499 | * This set the signals range that the user want to wait for |
1500 | * out of the whole reserved signals range. |
1501 | * e.g if the signals range is 20, and user don't want |
1502 | * to wait for signal 8, so he set this offset to 7, then |
1503 | * he call the API again with 9 and so on till 20. |
1504 | */ |
1505 | __u32 encaps_signal_offset; |
1506 | }; |
1507 | |
1508 | /* HL_CS_CHUNK_FLAGS_* */ |
1509 | __u32 cs_chunk_flags; |
1510 | |
1511 | /* Relevant only when HL_CS_FLAGS_COLLECTIVE_WAIT is set. |
1512 | * This holds the collective engine ID. The wait described by this job |
1513 | * will sync with this engine and with all NICs before completion. |
1514 | */ |
1515 | __u32 collective_engine_id; |
1516 | |
1517 | /* Align structure to 64 bytes */ |
1518 | __u32 pad[10]; |
1519 | }; |
1520 | |
1521 | /* SIGNAL/WAIT/COLLECTIVE_WAIT flags are mutually exclusive */ |
1522 | #define HL_CS_FLAGS_FORCE_RESTORE 0x1 |
1523 | #define HL_CS_FLAGS_SIGNAL 0x2 |
1524 | #define HL_CS_FLAGS_WAIT 0x4 |
1525 | #define HL_CS_FLAGS_COLLECTIVE_WAIT 0x8 |
1526 | |
1527 | #define HL_CS_FLAGS_TIMESTAMP 0x20 |
1528 | #define HL_CS_FLAGS_STAGED_SUBMISSION 0x40 |
1529 | #define HL_CS_FLAGS_STAGED_SUBMISSION_FIRST 0x80 |
1530 | #define HL_CS_FLAGS_STAGED_SUBMISSION_LAST 0x100 |
1531 | #define HL_CS_FLAGS_CUSTOM_TIMEOUT 0x200 |
1532 | #define HL_CS_FLAGS_SKIP_RESET_ON_TIMEOUT 0x400 |
1533 | |
1534 | /* |
1535 | * The encapsulated signals CS is merged into the existing CS ioctls. |
1536 | * In order to use this feature need to follow the below procedure: |
1537 | * 1. Reserve signals, set the CS type to HL_CS_FLAGS_RESERVE_SIGNALS_ONLY |
1538 | * the output of this API will be the SOB offset from CFG_BASE. |
1539 | * this address will be used to patch CB cmds to do the signaling for this |
1540 | * SOB by incrementing it's value. |
1541 | * for reverting the reservation use HL_CS_FLAGS_UNRESERVE_SIGNALS_ONLY |
1542 | * CS type, note that this might fail if out-of-sync happened to the SOB |
1543 | * value, in case other signaling request to the same SOB occurred between |
1544 | * reserve-unreserve calls. |
1545 | * 2. Use the staged CS to do the encapsulated signaling jobs. |
1546 | * use HL_CS_FLAGS_STAGED_SUBMISSION and HL_CS_FLAGS_STAGED_SUBMISSION_FIRST |
1547 | * along with HL_CS_FLAGS_ENCAP_SIGNALS flag, and set encaps_signal_offset |
1548 | * field. This offset allows app to wait on part of the reserved signals. |
1549 | * 3. Use WAIT/COLLECTIVE WAIT CS along with HL_CS_FLAGS_ENCAP_SIGNALS flag |
1550 | * to wait for the encapsulated signals. |
1551 | */ |
1552 | #define HL_CS_FLAGS_ENCAP_SIGNALS 0x800 |
1553 | #define HL_CS_FLAGS_RESERVE_SIGNALS_ONLY 0x1000 |
1554 | #define HL_CS_FLAGS_UNRESERVE_SIGNALS_ONLY 0x2000 |
1555 | |
1556 | /* |
1557 | * The engine cores CS is merged into the existing CS ioctls. |
1558 | * Use it to control the engine cores mode. |
1559 | */ |
1560 | #define HL_CS_FLAGS_ENGINE_CORE_COMMAND 0x4000 |
1561 | |
1562 | /* |
1563 | * The flush HBW PCI writes is merged into the existing CS ioctls. |
1564 | * Used to flush all HBW PCI writes. |
1565 | * This is a blocking operation and for this reason the user shall not use |
1566 | * the return sequence number (which will be invalid anyway) |
1567 | */ |
1568 | #define HL_CS_FLAGS_FLUSH_PCI_HBW_WRITES 0x8000 |
1569 | |
1570 | /* |
1571 | * The engines CS is merged into the existing CS ioctls. |
1572 | * Use it to control engines modes. |
1573 | */ |
1574 | #define HL_CS_FLAGS_ENGINES_COMMAND 0x10000 |
1575 | |
1576 | #define HL_CS_STATUS_SUCCESS 0 |
1577 | |
1578 | #define HL_MAX_JOBS_PER_CS 512 |
1579 | |
1580 | /* |
1581 | * enum hl_engine_command - engine command |
1582 | * |
1583 | * @HL_ENGINE_CORE_HALT: engine core halt |
1584 | * @HL_ENGINE_CORE_RUN: engine core run |
1585 | * @HL_ENGINE_STALL: user engine/s stall |
1586 | * @HL_ENGINE_RESUME: user engine/s resume |
1587 | */ |
1588 | enum hl_engine_command { |
1589 | HL_ENGINE_CORE_HALT = 1, |
1590 | HL_ENGINE_CORE_RUN = 2, |
1591 | HL_ENGINE_STALL = 3, |
1592 | HL_ENGINE_RESUME = 4, |
1593 | HL_ENGINE_COMMAND_MAX |
1594 | }; |
1595 | |
1596 | struct hl_cs_in { |
1597 | |
1598 | union { |
1599 | struct { |
1600 | /* this holds address of array of hl_cs_chunk for restore phase */ |
1601 | __u64 chunks_restore; |
1602 | |
1603 | /* holds address of array of hl_cs_chunk for execution phase */ |
1604 | __u64 chunks_execute; |
1605 | }; |
1606 | |
1607 | /* Valid only when HL_CS_FLAGS_ENGINE_CORE_COMMAND is set */ |
1608 | struct { |
1609 | /* this holds address of array of uint32 for engine_cores */ |
1610 | __u64 engine_cores; |
1611 | |
1612 | /* number of engine cores in engine_cores array */ |
1613 | __u32 num_engine_cores; |
1614 | |
1615 | /* the core command to be sent towards engine cores */ |
1616 | __u32 core_command; |
1617 | }; |
1618 | |
1619 | /* Valid only when HL_CS_FLAGS_ENGINES_COMMAND is set */ |
1620 | struct { |
1621 | /* this holds address of array of uint32 for engines */ |
1622 | __u64 engines; |
1623 | |
1624 | /* number of engines in engines array */ |
1625 | __u32 num_engines; |
1626 | |
1627 | /* the engine command to be sent towards engines */ |
1628 | __u32 engine_command; |
1629 | }; |
1630 | }; |
1631 | |
1632 | union { |
1633 | /* |
1634 | * Sequence number of a staged submission CS |
1635 | * valid only if HL_CS_FLAGS_STAGED_SUBMISSION is set and |
1636 | * HL_CS_FLAGS_STAGED_SUBMISSION_FIRST is unset. |
1637 | */ |
1638 | __u64 seq; |
1639 | |
1640 | /* |
1641 | * Encapsulated signals handle id |
1642 | * Valid for two flows: |
1643 | * 1. CS with encapsulated signals: |
1644 | * when HL_CS_FLAGS_STAGED_SUBMISSION and |
1645 | * HL_CS_FLAGS_STAGED_SUBMISSION_FIRST |
1646 | * and HL_CS_FLAGS_ENCAP_SIGNALS are set. |
1647 | * 2. unreserve signals: |
1648 | * valid when HL_CS_FLAGS_UNRESERVE_SIGNALS_ONLY is set. |
1649 | */ |
1650 | __u32 encaps_sig_handle_id; |
1651 | |
1652 | /* Valid only when HL_CS_FLAGS_RESERVE_SIGNALS_ONLY is set */ |
1653 | struct { |
1654 | /* Encapsulated signals number */ |
1655 | __u32 encaps_signals_count; |
1656 | |
1657 | /* Encapsulated signals queue index (stream) */ |
1658 | __u32 encaps_signals_q_idx; |
1659 | }; |
1660 | }; |
1661 | |
1662 | /* Number of chunks in restore phase array. Maximum number is |
1663 | * HL_MAX_JOBS_PER_CS |
1664 | */ |
1665 | __u32 num_chunks_restore; |
1666 | |
1667 | /* Number of chunks in execution array. Maximum number is |
1668 | * HL_MAX_JOBS_PER_CS |
1669 | */ |
1670 | __u32 num_chunks_execute; |
1671 | |
1672 | /* timeout in seconds - valid only if HL_CS_FLAGS_CUSTOM_TIMEOUT |
1673 | * is set |
1674 | */ |
1675 | __u32 timeout; |
1676 | |
1677 | /* HL_CS_FLAGS_* */ |
1678 | __u32 cs_flags; |
1679 | |
1680 | /* Context ID - Currently not in use */ |
1681 | __u32 ctx_id; |
1682 | __u8 pad[4]; |
1683 | }; |
1684 | |
1685 | struct hl_cs_out { |
1686 | union { |
1687 | /* |
1688 | * seq holds the sequence number of the CS to pass to wait |
1689 | * ioctl. All values are valid except for 0 and ULLONG_MAX |
1690 | */ |
1691 | __u64 seq; |
1692 | |
1693 | /* Valid only when HL_CS_FLAGS_RESERVE_SIGNALS_ONLY is set */ |
1694 | struct { |
1695 | /* This is the reserved signal handle id */ |
1696 | __u32 handle_id; |
1697 | |
1698 | /* This is the signals count */ |
1699 | __u32 count; |
1700 | }; |
1701 | }; |
1702 | |
1703 | /* HL_CS_STATUS */ |
1704 | __u32 status; |
1705 | |
1706 | /* |
1707 | * SOB base address offset |
1708 | * Valid only when HL_CS_FLAGS_RESERVE_SIGNALS_ONLY or HL_CS_FLAGS_SIGNAL is set |
1709 | */ |
1710 | __u32 sob_base_addr_offset; |
1711 | |
1712 | /* |
1713 | * Count of completed signals in SOB before current signal submission. |
1714 | * Valid only when (HL_CS_FLAGS_ENCAP_SIGNALS & HL_CS_FLAGS_STAGED_SUBMISSION) |
1715 | * or HL_CS_FLAGS_SIGNAL is set |
1716 | */ |
1717 | __u16 sob_count_before_submission; |
1718 | __u16 pad[3]; |
1719 | }; |
1720 | |
1721 | union hl_cs_args { |
1722 | struct hl_cs_in in; |
1723 | struct hl_cs_out out; |
1724 | }; |
1725 | |
1726 | #define HL_WAIT_CS_FLAGS_INTERRUPT 0x2 |
1727 | #define HL_WAIT_CS_FLAGS_INTERRUPT_MASK 0xFFF00000 |
1728 | #define HL_WAIT_CS_FLAGS_ANY_CQ_INTERRUPT 0xFFF00000 |
1729 | #define HL_WAIT_CS_FLAGS_ANY_DEC_INTERRUPT 0xFFE00000 |
1730 | #define HL_WAIT_CS_FLAGS_MULTI_CS 0x4 |
1731 | #define HL_WAIT_CS_FLAGS_INTERRUPT_KERNEL_CQ 0x10 |
1732 | #define HL_WAIT_CS_FLAGS_REGISTER_INTERRUPT 0x20 |
1733 | |
1734 | #define HL_WAIT_MULTI_CS_LIST_MAX_LEN 32 |
1735 | |
1736 | struct hl_wait_cs_in { |
1737 | union { |
1738 | struct { |
1739 | /* |
1740 | * In case of wait_cs holds the CS sequence number. |
1741 | * In case of wait for multi CS hold a user pointer to |
1742 | * an array of CS sequence numbers |
1743 | */ |
1744 | __u64 seq; |
1745 | /* Absolute timeout to wait for command submission |
1746 | * in microseconds |
1747 | */ |
1748 | __u64 timeout_us; |
1749 | }; |
1750 | |
1751 | struct { |
1752 | union { |
1753 | /* User address for completion comparison. |
1754 | * upon interrupt, driver will compare the value pointed |
1755 | * by this address with the supplied target value. |
1756 | * in order not to perform any comparison, set address |
1757 | * to all 1s. |
1758 | * Relevant only when HL_WAIT_CS_FLAGS_INTERRUPT is set |
1759 | */ |
1760 | __u64 addr; |
1761 | |
1762 | /* cq_counters_handle to a kernel mapped cb which contains |
1763 | * cq counters. |
1764 | * Relevant only when HL_WAIT_CS_FLAGS_INTERRUPT_KERNEL_CQ is set |
1765 | */ |
1766 | __u64 cq_counters_handle; |
1767 | }; |
1768 | |
1769 | /* Target value for completion comparison */ |
1770 | __u64 target; |
1771 | }; |
1772 | }; |
1773 | |
1774 | /* Context ID - Currently not in use */ |
1775 | __u32 ctx_id; |
1776 | |
1777 | /* HL_WAIT_CS_FLAGS_* |
1778 | * If HL_WAIT_CS_FLAGS_INTERRUPT is set, this field should include |
1779 | * interrupt id according to HL_WAIT_CS_FLAGS_INTERRUPT_MASK |
1780 | * |
1781 | * in order to wait for any CQ interrupt, set interrupt value to |
1782 | * HL_WAIT_CS_FLAGS_ANY_CQ_INTERRUPT. |
1783 | * |
1784 | * in order to wait for any decoder interrupt, set interrupt value to |
1785 | * HL_WAIT_CS_FLAGS_ANY_DEC_INTERRUPT. |
1786 | */ |
1787 | __u32 flags; |
1788 | |
1789 | union { |
1790 | struct { |
1791 | /* Multi CS API info- valid entries in multi-CS array */ |
1792 | __u8 seq_arr_len; |
1793 | __u8 pad[7]; |
1794 | }; |
1795 | |
1796 | /* Absolute timeout to wait for an interrupt in microseconds. |
1797 | * Relevant only when HL_WAIT_CS_FLAGS_INTERRUPT is set |
1798 | */ |
1799 | __u64 interrupt_timeout_us; |
1800 | }; |
1801 | |
1802 | /* |
1803 | * cq counter offset inside the counters cb pointed by cq_counters_handle above. |
1804 | * upon interrupt, driver will compare the value pointed |
1805 | * by this address (cq_counters_handle + cq_counters_offset) |
1806 | * with the supplied target value. |
1807 | * relevant only when HL_WAIT_CS_FLAGS_INTERRUPT_KERNEL_CQ is set |
1808 | */ |
1809 | __u64 cq_counters_offset; |
1810 | |
1811 | /* |
1812 | * Timestamp_handle timestamps buffer handle. |
1813 | * relevant only when HL_WAIT_CS_FLAGS_REGISTER_INTERRUPT is set |
1814 | */ |
1815 | __u64 timestamp_handle; |
1816 | |
1817 | /* |
1818 | * Timestamp_offset is offset inside the timestamp buffer pointed by timestamp_handle above. |
1819 | * upon interrupt, if the cq reached the target value then driver will write |
1820 | * timestamp to this offset. |
1821 | * relevant only when HL_WAIT_CS_FLAGS_REGISTER_INTERRUPT is set |
1822 | */ |
1823 | __u64 timestamp_offset; |
1824 | }; |
1825 | |
1826 | #define HL_WAIT_CS_STATUS_COMPLETED 0 |
1827 | #define HL_WAIT_CS_STATUS_BUSY 1 |
1828 | #define HL_WAIT_CS_STATUS_TIMEDOUT 2 |
1829 | #define HL_WAIT_CS_STATUS_ABORTED 3 |
1830 | |
1831 | #define HL_WAIT_CS_STATUS_FLAG_GONE 0x1 |
1832 | #define HL_WAIT_CS_STATUS_FLAG_TIMESTAMP_VLD 0x2 |
1833 | |
1834 | struct hl_wait_cs_out { |
1835 | /* HL_WAIT_CS_STATUS_* */ |
1836 | __u32 status; |
1837 | /* HL_WAIT_CS_STATUS_FLAG* */ |
1838 | __u32 flags; |
1839 | /* |
1840 | * valid only if HL_WAIT_CS_STATUS_FLAG_TIMESTAMP_VLD is set |
1841 | * for wait_cs: timestamp of CS completion |
1842 | * for wait_multi_cs: timestamp of FIRST CS completion |
1843 | */ |
1844 | __s64 timestamp_nsec; |
1845 | /* multi CS completion bitmap */ |
1846 | __u32 cs_completion_map; |
1847 | __u32 pad; |
1848 | }; |
1849 | |
1850 | union hl_wait_cs_args { |
1851 | struct hl_wait_cs_in in; |
1852 | struct hl_wait_cs_out out; |
1853 | }; |
1854 | |
1855 | /* Opcode to allocate device memory */ |
1856 | #define HL_MEM_OP_ALLOC 0 |
1857 | |
1858 | /* Opcode to free previously allocated device memory */ |
1859 | #define HL_MEM_OP_FREE 1 |
1860 | |
1861 | /* Opcode to map host and device memory */ |
1862 | #define HL_MEM_OP_MAP 2 |
1863 | |
1864 | /* Opcode to unmap previously mapped host and device memory */ |
1865 | #define HL_MEM_OP_UNMAP 3 |
1866 | |
1867 | /* Opcode to map a hw block */ |
1868 | #define HL_MEM_OP_MAP_BLOCK 4 |
1869 | |
1870 | /* Opcode to create DMA-BUF object for an existing device memory allocation |
1871 | * and to export an FD of that DMA-BUF back to the caller |
1872 | */ |
1873 | #define HL_MEM_OP_EXPORT_DMABUF_FD 5 |
1874 | |
1875 | /* Opcode to create timestamps pool for user interrupts registration support |
1876 | * The memory will be allocated by the kernel driver, A timestamp buffer which the user |
1877 | * will get handle to it for mmap, and another internal buffer used by the |
1878 | * driver for registration management |
1879 | * The memory will be freed when the user closes the file descriptor(ctx close) |
1880 | */ |
1881 | #define HL_MEM_OP_TS_ALLOC 6 |
1882 | |
1883 | /* Memory flags */ |
1884 | #define HL_MEM_CONTIGUOUS 0x1 |
1885 | #define HL_MEM_SHARED 0x2 |
1886 | #define HL_MEM_USERPTR 0x4 |
1887 | #define HL_MEM_FORCE_HINT 0x8 |
1888 | #define HL_MEM_PREFETCH 0x40 |
1889 | |
1890 | /** |
1891 | * structure hl_mem_in - structure that handle input args for memory IOCTL |
1892 | * @union arg: union of structures to be used based on the input operation |
1893 | * @op: specify the requested memory operation (one of the HL_MEM_OP_* definitions). |
1894 | * @flags: flags for the memory operation (one of the HL_MEM_* definitions). |
1895 | * For the HL_MEM_OP_EXPORT_DMABUF_FD opcode, this field holds the DMA-BUF file/FD flags. |
1896 | * @ctx_id: context ID - currently not in use. |
1897 | * @num_of_elements: number of timestamp elements used only with HL_MEM_OP_TS_ALLOC opcode. |
1898 | */ |
1899 | struct hl_mem_in { |
1900 | union { |
1901 | /** |
1902 | * structure for device memory allocation (used with the HL_MEM_OP_ALLOC op) |
1903 | * @mem_size: memory size to allocate |
1904 | * @page_size: page size to use on allocation. when the value is 0 the default page |
1905 | * size will be taken. |
1906 | */ |
1907 | struct { |
1908 | __u64 mem_size; |
1909 | __u64 page_size; |
1910 | } alloc; |
1911 | |
1912 | /** |
1913 | * structure for free-ing device memory (used with the HL_MEM_OP_FREE op) |
1914 | * @handle: handle returned from HL_MEM_OP_ALLOC |
1915 | */ |
1916 | struct { |
1917 | __u64 handle; |
1918 | } free; |
1919 | |
1920 | /** |
1921 | * structure for mapping device memory (used with the HL_MEM_OP_MAP op) |
1922 | * @hint_addr: requested virtual address of mapped memory. |
1923 | * the driver will try to map the requested region to this hint |
1924 | * address, as long as the address is valid and not already mapped. |
1925 | * the user should check the returned address of the IOCTL to make |
1926 | * sure he got the hint address. |
1927 | * passing 0 here means that the driver will choose the address itself. |
1928 | * @handle: handle returned from HL_MEM_OP_ALLOC. |
1929 | */ |
1930 | struct { |
1931 | __u64 hint_addr; |
1932 | __u64 handle; |
1933 | } map_device; |
1934 | |
1935 | /** |
1936 | * structure for mapping host memory (used with the HL_MEM_OP_MAP op) |
1937 | * @host_virt_addr: address of allocated host memory. |
1938 | * @hint_addr: requested virtual address of mapped memory. |
1939 | * the driver will try to map the requested region to this hint |
1940 | * address, as long as the address is valid and not already mapped. |
1941 | * the user should check the returned address of the IOCTL to make |
1942 | * sure he got the hint address. |
1943 | * passing 0 here means that the driver will choose the address itself. |
1944 | * @size: size of allocated host memory. |
1945 | */ |
1946 | struct { |
1947 | __u64 host_virt_addr; |
1948 | __u64 hint_addr; |
1949 | __u64 mem_size; |
1950 | } map_host; |
1951 | |
1952 | /** |
1953 | * structure for mapping hw block (used with the HL_MEM_OP_MAP_BLOCK op) |
1954 | * @block_addr:HW block address to map, a handle and size will be returned |
1955 | * to the user and will be used to mmap the relevant block. |
1956 | * only addresses from configuration space are allowed. |
1957 | */ |
1958 | struct { |
1959 | __u64 block_addr; |
1960 | } map_block; |
1961 | |
1962 | /** |
1963 | * structure for unmapping host memory (used with the HL_MEM_OP_UNMAP op) |
1964 | * @device_virt_addr: virtual address returned from HL_MEM_OP_MAP |
1965 | */ |
1966 | struct { |
1967 | __u64 device_virt_addr; |
1968 | } unmap; |
1969 | |
1970 | /** |
1971 | * structure for exporting DMABUF object (used with |
1972 | * the HL_MEM_OP_EXPORT_DMABUF_FD op) |
1973 | * @addr: for Gaudi1, the driver expects a physical address |
1974 | * inside the device's DRAM. this is because in Gaudi1 |
1975 | * we don't have MMU that covers the device's DRAM. |
1976 | * for all other ASICs, the driver expects a device |
1977 | * virtual address that represents the start address of |
1978 | * a mapped DRAM memory area inside the device. |
1979 | * the address must be the same as was received from the |
1980 | * driver during a previous HL_MEM_OP_MAP operation. |
1981 | * @mem_size: size of memory to export. |
1982 | * @offset: for Gaudi1, this value must be 0. For all other ASICs, |
1983 | * the driver expects an offset inside of the memory area |
1984 | * describe by addr. the offset represents the start |
1985 | * address of that the exported dma-buf object describes. |
1986 | */ |
1987 | struct { |
1988 | __u64 addr; |
1989 | __u64 mem_size; |
1990 | __u64 offset; |
1991 | } export_dmabuf_fd; |
1992 | }; |
1993 | |
1994 | __u32 op; |
1995 | __u32 flags; |
1996 | __u32 ctx_id; |
1997 | __u32 num_of_elements; |
1998 | }; |
1999 | |
2000 | struct hl_mem_out { |
2001 | union { |
2002 | /* |
2003 | * Used for HL_MEM_OP_MAP as the virtual address that was |
2004 | * assigned in the device VA space. |
2005 | * A value of 0 means the requested operation failed. |
2006 | */ |
2007 | __u64 device_virt_addr; |
2008 | |
2009 | /* |
2010 | * Used in HL_MEM_OP_ALLOC |
2011 | * This is the assigned handle for the allocated memory |
2012 | */ |
2013 | __u64 handle; |
2014 | |
2015 | struct { |
2016 | /* |
2017 | * Used in HL_MEM_OP_MAP_BLOCK. |
2018 | * This is the assigned handle for the mapped block |
2019 | */ |
2020 | __u64 block_handle; |
2021 | |
2022 | /* |
2023 | * Used in HL_MEM_OP_MAP_BLOCK |
2024 | * This is the size of the mapped block |
2025 | */ |
2026 | __u32 block_size; |
2027 | |
2028 | __u32 pad; |
2029 | }; |
2030 | |
2031 | /* Returned in HL_MEM_OP_EXPORT_DMABUF_FD. Represents the |
2032 | * DMA-BUF object that was created to describe a memory |
2033 | * allocation on the device's memory space. The FD should be |
2034 | * passed to the importer driver |
2035 | */ |
2036 | __s32 fd; |
2037 | }; |
2038 | }; |
2039 | |
2040 | union hl_mem_args { |
2041 | struct hl_mem_in in; |
2042 | struct hl_mem_out out; |
2043 | }; |
2044 | |
2045 | #define HL_DEBUG_MAX_AUX_VALUES 10 |
2046 | |
2047 | struct hl_debug_params_etr { |
2048 | /* Address in memory to allocate buffer */ |
2049 | __u64 buffer_address; |
2050 | |
2051 | /* Size of buffer to allocate */ |
2052 | __u64 buffer_size; |
2053 | |
2054 | /* Sink operation mode: SW fifo, HW fifo, Circular buffer */ |
2055 | __u32 sink_mode; |
2056 | __u32 pad; |
2057 | }; |
2058 | |
2059 | struct hl_debug_params_etf { |
2060 | /* Address in memory to allocate buffer */ |
2061 | __u64 buffer_address; |
2062 | |
2063 | /* Size of buffer to allocate */ |
2064 | __u64 buffer_size; |
2065 | |
2066 | /* Sink operation mode: SW fifo, HW fifo, Circular buffer */ |
2067 | __u32 sink_mode; |
2068 | __u32 pad; |
2069 | }; |
2070 | |
2071 | struct hl_debug_params_stm { |
2072 | /* Two bit masks for HW event and Stimulus Port */ |
2073 | __u64 he_mask; |
2074 | __u64 sp_mask; |
2075 | |
2076 | /* Trace source ID */ |
2077 | __u32 id; |
2078 | |
2079 | /* Frequency for the timestamp register */ |
2080 | __u32 frequency; |
2081 | }; |
2082 | |
2083 | struct hl_debug_params_bmon { |
2084 | /* Two address ranges that the user can request to filter */ |
2085 | __u64 start_addr0; |
2086 | __u64 addr_mask0; |
2087 | |
2088 | __u64 start_addr1; |
2089 | __u64 addr_mask1; |
2090 | |
2091 | /* Capture window configuration */ |
2092 | __u32 bw_win; |
2093 | __u32 win_capture; |
2094 | |
2095 | /* Trace source ID */ |
2096 | __u32 id; |
2097 | |
2098 | /* Control register */ |
2099 | __u32 control; |
2100 | |
2101 | /* Two more address ranges that the user can request to filter */ |
2102 | __u64 start_addr2; |
2103 | __u64 end_addr2; |
2104 | |
2105 | __u64 start_addr3; |
2106 | __u64 end_addr3; |
2107 | }; |
2108 | |
2109 | struct hl_debug_params_spmu { |
2110 | /* Event types selection */ |
2111 | __u64 event_types[HL_DEBUG_MAX_AUX_VALUES]; |
2112 | |
2113 | /* Number of event types selection */ |
2114 | __u32 event_types_num; |
2115 | |
2116 | /* TRC configuration register values */ |
2117 | __u32 pmtrc_val; |
2118 | __u32 trc_ctrl_host_val; |
2119 | __u32 trc_en_host_val; |
2120 | }; |
2121 | |
2122 | /* Opcode for ETR component */ |
2123 | #define HL_DEBUG_OP_ETR 0 |
2124 | /* Opcode for ETF component */ |
2125 | #define HL_DEBUG_OP_ETF 1 |
2126 | /* Opcode for STM component */ |
2127 | #define HL_DEBUG_OP_STM 2 |
2128 | /* Opcode for FUNNEL component */ |
2129 | #define HL_DEBUG_OP_FUNNEL 3 |
2130 | /* Opcode for BMON component */ |
2131 | #define HL_DEBUG_OP_BMON 4 |
2132 | /* Opcode for SPMU component */ |
2133 | #define HL_DEBUG_OP_SPMU 5 |
2134 | /* Opcode for timestamp (deprecated) */ |
2135 | #define HL_DEBUG_OP_TIMESTAMP 6 |
2136 | /* Opcode for setting the device into or out of debug mode. The enable |
2137 | * variable should be 1 for enabling debug mode and 0 for disabling it |
2138 | */ |
2139 | #define HL_DEBUG_OP_SET_MODE 7 |
2140 | |
2141 | struct hl_debug_args { |
2142 | /* |
2143 | * Pointer to user input structure. |
2144 | * This field is relevant to specific opcodes. |
2145 | */ |
2146 | __u64 input_ptr; |
2147 | /* Pointer to user output structure */ |
2148 | __u64 output_ptr; |
2149 | /* Size of user input structure */ |
2150 | __u32 input_size; |
2151 | /* Size of user output structure */ |
2152 | __u32 output_size; |
2153 | /* HL_DEBUG_OP_* */ |
2154 | __u32 op; |
2155 | /* |
2156 | * Register index in the component, taken from the debug_regs_index enum |
2157 | * in the various ASIC header files |
2158 | */ |
2159 | __u32 reg_idx; |
2160 | /* Enable/disable */ |
2161 | __u32 enable; |
2162 | /* Context ID - Currently not in use */ |
2163 | __u32 ctx_id; |
2164 | }; |
2165 | |
2166 | #define HL_IOCTL_INFO 0x00 |
2167 | #define HL_IOCTL_CB 0x01 |
2168 | #define HL_IOCTL_CS 0x02 |
2169 | #define HL_IOCTL_WAIT_CS 0x03 |
2170 | #define HL_IOCTL_MEMORY 0x04 |
2171 | #define HL_IOCTL_DEBUG 0x05 |
2172 | |
2173 | /* |
2174 | * Various information operations such as: |
2175 | * - H/W IP information |
2176 | * - Current dram usage |
2177 | * |
2178 | * The user calls this IOCTL with an opcode that describes the required |
2179 | * information. The user should supply a pointer to a user-allocated memory |
2180 | * chunk, which will be filled by the driver with the requested information. |
2181 | * |
2182 | * The user supplies the maximum amount of size to copy into the user's memory, |
2183 | * in order to prevent data corruption in case of differences between the |
2184 | * definitions of structures in kernel and userspace, e.g. in case of old |
2185 | * userspace and new kernel driver |
2186 | */ |
2187 | #define DRM_IOCTL_HL_INFO DRM_IOWR(DRM_COMMAND_BASE + HL_IOCTL_INFO, struct hl_info_args) |
2188 | |
2189 | /* |
2190 | * Command Buffer |
2191 | * - Request a Command Buffer |
2192 | * - Destroy a Command Buffer |
2193 | * |
2194 | * The command buffers are memory blocks that reside in DMA-able address |
2195 | * space and are physically contiguous so they can be accessed by the device |
2196 | * directly. They are allocated using the coherent DMA API. |
2197 | * |
2198 | * When creating a new CB, the IOCTL returns a handle of it, and the user-space |
2199 | * process needs to use that handle to mmap the buffer so it can access them. |
2200 | * |
2201 | * In some instances, the device must access the command buffer through the |
2202 | * device's MMU, and thus its memory should be mapped. In these cases, user can |
2203 | * indicate the driver that such a mapping is required. |
2204 | * The resulting device virtual address will be used internally by the driver, |
2205 | * and won't be returned to user. |
2206 | * |
2207 | */ |
2208 | #define DRM_IOCTL_HL_CB DRM_IOWR(DRM_COMMAND_BASE + HL_IOCTL_CB, union hl_cb_args) |
2209 | |
2210 | /* |
2211 | * Command Submission |
2212 | * |
2213 | * To submit work to the device, the user need to call this IOCTL with a set |
2214 | * of JOBS. That set of JOBS constitutes a CS object. |
2215 | * Each JOB will be enqueued on a specific queue, according to the user's input. |
2216 | * There can be more then one JOB per queue. |
2217 | * |
2218 | * The CS IOCTL will receive two sets of JOBS. One set is for "restore" phase |
2219 | * and a second set is for "execution" phase. |
2220 | * The JOBS on the "restore" phase are enqueued only after context-switch |
2221 | * (or if its the first CS for this context). The user can also order the |
2222 | * driver to run the "restore" phase explicitly |
2223 | * |
2224 | * Goya/Gaudi: |
2225 | * There are two types of queues - external and internal. External queues |
2226 | * are DMA queues which transfer data from/to the Host. All other queues are |
2227 | * internal. The driver will get completion notifications from the device only |
2228 | * on JOBS which are enqueued in the external queues. |
2229 | * |
2230 | * Gaudi2 onwards: |
2231 | * There is a single type of queue for all types of engines, either DMA engines |
2232 | * for transfers from/to the host or inside the device, or compute engines. |
2233 | * The driver will get completion notifications from the device for all queues. |
2234 | * |
2235 | * For jobs on external queues, the user needs to create command buffers |
2236 | * through the CB ioctl and give the CB's handle to the CS ioctl. For jobs on |
2237 | * internal queues, the user needs to prepare a "command buffer" with packets |
2238 | * on either the device SRAM/DRAM or the host, and give the device address of |
2239 | * that buffer to the CS ioctl. |
2240 | * For jobs on H/W queues both options of command buffers are valid. |
2241 | * |
2242 | * This IOCTL is asynchronous in regard to the actual execution of the CS. This |
2243 | * means it returns immediately after ALL the JOBS were enqueued on their |
2244 | * relevant queues. Therefore, the user mustn't assume the CS has been completed |
2245 | * or has even started to execute. |
2246 | * |
2247 | * Upon successful enqueue, the IOCTL returns a sequence number which the user |
2248 | * can use with the "Wait for CS" IOCTL to check whether the handle's CS |
2249 | * non-internal JOBS have been completed. Note that if the CS has internal JOBS |
2250 | * which can execute AFTER the external JOBS have finished, the driver might |
2251 | * report that the CS has finished executing BEFORE the internal JOBS have |
2252 | * actually finished executing. |
2253 | * |
2254 | * Even though the sequence number increments per CS, the user can NOT |
2255 | * automatically assume that if CS with sequence number N finished, then CS |
2256 | * with sequence number N-1 also finished. The user can make this assumption if |
2257 | * and only if CS N and CS N-1 are exactly the same (same CBs for the same |
2258 | * queues). |
2259 | */ |
2260 | #define DRM_IOCTL_HL_CS DRM_IOWR(DRM_COMMAND_BASE + HL_IOCTL_CS, union hl_cs_args) |
2261 | |
2262 | /* |
2263 | * Wait for Command Submission |
2264 | * |
2265 | * The user can call this IOCTL with a handle it received from the CS IOCTL |
2266 | * to wait until the handle's CS has finished executing. The user will wait |
2267 | * inside the kernel until the CS has finished or until the user-requested |
2268 | * timeout has expired. |
2269 | * |
2270 | * If the timeout value is 0, the driver won't sleep at all. It will check |
2271 | * the status of the CS and return immediately |
2272 | * |
2273 | * The return value of the IOCTL is a standard Linux error code. The possible |
2274 | * values are: |
2275 | * |
2276 | * EINTR - Kernel waiting has been interrupted, e.g. due to OS signal |
2277 | * that the user process received |
2278 | * ETIMEDOUT - The CS has caused a timeout on the device |
2279 | * EIO - The CS was aborted (usually because the device was reset) |
2280 | * ENODEV - The device wants to do hard-reset (so user need to close FD) |
2281 | * |
2282 | * The driver also returns a custom define in case the IOCTL call returned 0. |
2283 | * The define can be one of the following: |
2284 | * |
2285 | * HL_WAIT_CS_STATUS_COMPLETED - The CS has been completed successfully (0) |
2286 | * HL_WAIT_CS_STATUS_BUSY - The CS is still executing (0) |
2287 | * HL_WAIT_CS_STATUS_TIMEDOUT - The CS has caused a timeout on the device |
2288 | * (ETIMEDOUT) |
2289 | * HL_WAIT_CS_STATUS_ABORTED - The CS was aborted, usually because the |
2290 | * device was reset (EIO) |
2291 | */ |
2292 | #define DRM_IOCTL_HL_WAIT_CS DRM_IOWR(DRM_COMMAND_BASE + HL_IOCTL_WAIT_CS, union hl_wait_cs_args) |
2293 | |
2294 | /* |
2295 | * Memory |
2296 | * - Map host memory to device MMU |
2297 | * - Unmap host memory from device MMU |
2298 | * |
2299 | * This IOCTL allows the user to map host memory to the device MMU |
2300 | * |
2301 | * For host memory, the IOCTL doesn't allocate memory. The user is supposed |
2302 | * to allocate the memory in user-space (malloc/new). The driver pins the |
2303 | * physical pages (up to the allowed limit by the OS), assigns a virtual |
2304 | * address in the device VA space and initializes the device MMU. |
2305 | * |
2306 | * There is an option for the user to specify the requested virtual address. |
2307 | * |
2308 | */ |
2309 | #define DRM_IOCTL_HL_MEMORY DRM_IOWR(DRM_COMMAND_BASE + HL_IOCTL_MEMORY, union hl_mem_args) |
2310 | |
2311 | /* |
2312 | * Debug |
2313 | * - Enable/disable the ETR/ETF/FUNNEL/STM/BMON/SPMU debug traces |
2314 | * |
2315 | * This IOCTL allows the user to get debug traces from the chip. |
2316 | * |
2317 | * Before the user can send configuration requests of the various |
2318 | * debug/profile engines, it needs to set the device into debug mode. |
2319 | * This is because the debug/profile infrastructure is shared component in the |
2320 | * device and we can't allow multiple users to access it at the same time. |
2321 | * |
2322 | * Once a user set the device into debug mode, the driver won't allow other |
2323 | * users to "work" with the device, i.e. open a FD. If there are multiple users |
2324 | * opened on the device, the driver won't allow any user to debug the device. |
2325 | * |
2326 | * For each configuration request, the user needs to provide the register index |
2327 | * and essential data such as buffer address and size. |
2328 | * |
2329 | * Once the user has finished using the debug/profile engines, he should |
2330 | * set the device into non-debug mode, i.e. disable debug mode. |
2331 | * |
2332 | * The driver can decide to "kick out" the user if he abuses this interface. |
2333 | * |
2334 | */ |
2335 | #define DRM_IOCTL_HL_DEBUG DRM_IOWR(DRM_COMMAND_BASE + HL_IOCTL_DEBUG, struct hl_debug_args) |
2336 | |
2337 | #define HL_COMMAND_START (DRM_COMMAND_BASE + HL_IOCTL_INFO) |
2338 | #define HL_COMMAND_END (DRM_COMMAND_BASE + HL_IOCTL_DEBUG + 1) |
2339 | |
2340 | #endif /* HABANALABS_H_ */ |
2341 | |