1// SPDX-License-Identifier: GPL-2.0
2
3/*
4 * Copyright 2016-2022 HabanaLabs, Ltd.
5 * All Rights Reserved.
6 */
7
8#include "gaudiP.h"
9#include "../include/hw_ip/mmu/mmu_general.h"
10#include "../include/hw_ip/mmu/mmu_v1_1.h"
11#include "../include/gaudi/gaudi_masks.h"
12#include "../include/gaudi/gaudi_fw_if.h"
13#include "../include/gaudi/gaudi_reg_map.h"
14#include "../include/gaudi/gaudi_async_ids_map_extended.h"
15
16#include <linux/module.h>
17#include <linux/pci.h>
18#include <linux/firmware.h>
19#include <linux/hwmon.h>
20#include <linux/iommu.h>
21#include <linux/seq_file.h>
22
23/*
24 * Gaudi security scheme:
25 *
26 * 1. Host is protected by:
27 * - Range registers
28 * - MMU
29 *
30 * 2. DDR is protected by:
31 * - Range registers (protect the first 512MB)
32 *
33 * 3. Configuration is protected by:
34 * - Range registers
35 * - Protection bits
36 *
37 * MMU is always enabled.
38 *
39 * QMAN DMA channels 0,1 (PCI DMAN):
40 * - DMA is not secured.
41 * - PQ and CQ are secured.
42 * - CP is secured: The driver needs to parse CB but WREG should be allowed
43 * because of TDMA (tensor DMA). Hence, WREG is always not
44 * secured.
45 *
46 * When the driver needs to use DMA it will check that Gaudi is idle, set DMA
47 * channel 0 to be secured, execute the DMA and change it back to not secured.
48 * Currently, the driver doesn't use the DMA while there are compute jobs
49 * running.
50 *
51 * The current use cases for the driver to use the DMA are:
52 * - Clear SRAM on context switch (happens on context switch when device is
53 * idle)
54 * - MMU page tables area clear (happens on init)
55 *
56 * QMAN DMA 2-7, TPC, MME, NIC:
57 * PQ is secured and is located on the Host (HBM CON TPC3 bug)
58 * CQ, CP and the engine are not secured
59 *
60 */
61
62#define GAUDI_BOOT_FIT_FILE "habanalabs/gaudi/gaudi-boot-fit.itb"
63#define GAUDI_LINUX_FW_FILE "habanalabs/gaudi/gaudi-fit.itb"
64#define GAUDI_TPC_FW_FILE "habanalabs/gaudi/gaudi_tpc.bin"
65
66MODULE_FIRMWARE(GAUDI_BOOT_FIT_FILE);
67MODULE_FIRMWARE(GAUDI_LINUX_FW_FILE);
68MODULE_FIRMWARE(GAUDI_TPC_FW_FILE);
69
70#define GAUDI_DMA_POOL_BLK_SIZE 0x100 /* 256 bytes */
71
72#define GAUDI_RESET_TIMEOUT_MSEC 2000 /* 2000ms */
73#define GAUDI_RESET_WAIT_MSEC 1 /* 1ms */
74#define GAUDI_CPU_RESET_WAIT_MSEC 200 /* 200ms */
75#define GAUDI_TEST_QUEUE_WAIT_USEC 100000 /* 100ms */
76
77#define GAUDI_PLDM_RESET_WAIT_MSEC 1000 /* 1s */
78#define GAUDI_PLDM_HRESET_TIMEOUT_MSEC 20000 /* 20s */
79#define GAUDI_PLDM_TEST_QUEUE_WAIT_USEC 1000000 /* 1s */
80#define GAUDI_PLDM_MMU_TIMEOUT_USEC (MMU_CONFIG_TIMEOUT_USEC * 100)
81#define GAUDI_PLDM_QMAN0_TIMEOUT_USEC (HL_DEVICE_TIMEOUT_USEC * 30)
82#define GAUDI_PLDM_TPC_KERNEL_WAIT_USEC (HL_DEVICE_TIMEOUT_USEC * 30)
83#define GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC 4000000 /* 4s */
84#define GAUDI_MSG_TO_CPU_TIMEOUT_USEC 4000000 /* 4s */
85#define GAUDI_WAIT_FOR_BL_TIMEOUT_USEC 15000000 /* 15s */
86
87#define GAUDI_QMAN0_FENCE_VAL 0x72E91AB9
88
89#define GAUDI_MAX_STRING_LEN 20
90
91#define GAUDI_CB_POOL_CB_CNT 512
92#define GAUDI_CB_POOL_CB_SIZE 0x20000 /* 128KB */
93
94#define GAUDI_ALLOC_CPU_MEM_RETRY_CNT 3
95
96#define GAUDI_NUM_OF_TPC_INTR_CAUSE 20
97
98#define GAUDI_NUM_OF_QM_ERR_CAUSE 16
99
100#define GAUDI_NUM_OF_QM_ARB_ERR_CAUSE 3
101
102#define GAUDI_ARB_WDT_TIMEOUT 0xEE6b27FF /* 8 seconds */
103
104#define HBM_SCRUBBING_TIMEOUT_US 1000000 /* 1s */
105
106#define BIN_REG_STRING_SIZE sizeof("0b10101010101010101010101010101010")
107
108#define MONITOR_SOB_STRING_SIZE 256
109
110static u32 gaudi_stream_master[GAUDI_STREAM_MASTER_ARR_SIZE] = {
111 GAUDI_QUEUE_ID_DMA_0_0,
112 GAUDI_QUEUE_ID_DMA_0_1,
113 GAUDI_QUEUE_ID_DMA_0_2,
114 GAUDI_QUEUE_ID_DMA_0_3,
115 GAUDI_QUEUE_ID_DMA_1_0,
116 GAUDI_QUEUE_ID_DMA_1_1,
117 GAUDI_QUEUE_ID_DMA_1_2,
118 GAUDI_QUEUE_ID_DMA_1_3
119};
120
121static const u8 gaudi_dma_assignment[GAUDI_DMA_MAX] = {
122 [GAUDI_PCI_DMA_1] = GAUDI_ENGINE_ID_DMA_0,
123 [GAUDI_PCI_DMA_2] = GAUDI_ENGINE_ID_DMA_1,
124 [GAUDI_HBM_DMA_1] = GAUDI_ENGINE_ID_DMA_2,
125 [GAUDI_HBM_DMA_2] = GAUDI_ENGINE_ID_DMA_3,
126 [GAUDI_HBM_DMA_3] = GAUDI_ENGINE_ID_DMA_4,
127 [GAUDI_HBM_DMA_4] = GAUDI_ENGINE_ID_DMA_5,
128 [GAUDI_HBM_DMA_5] = GAUDI_ENGINE_ID_DMA_6,
129 [GAUDI_HBM_DMA_6] = GAUDI_ENGINE_ID_DMA_7
130};
131
132static const u8 gaudi_cq_assignment[NUMBER_OF_CMPLT_QUEUES] = {
133 [0] = GAUDI_QUEUE_ID_DMA_0_0,
134 [1] = GAUDI_QUEUE_ID_DMA_0_1,
135 [2] = GAUDI_QUEUE_ID_DMA_0_2,
136 [3] = GAUDI_QUEUE_ID_DMA_0_3,
137 [4] = GAUDI_QUEUE_ID_DMA_1_0,
138 [5] = GAUDI_QUEUE_ID_DMA_1_1,
139 [6] = GAUDI_QUEUE_ID_DMA_1_2,
140 [7] = GAUDI_QUEUE_ID_DMA_1_3,
141};
142
143static const u16 gaudi_packet_sizes[MAX_PACKET_ID] = {
144 [PACKET_WREG_32] = sizeof(struct packet_wreg32),
145 [PACKET_WREG_BULK] = sizeof(struct packet_wreg_bulk),
146 [PACKET_MSG_LONG] = sizeof(struct packet_msg_long),
147 [PACKET_MSG_SHORT] = sizeof(struct packet_msg_short),
148 [PACKET_CP_DMA] = sizeof(struct packet_cp_dma),
149 [PACKET_REPEAT] = sizeof(struct packet_repeat),
150 [PACKET_MSG_PROT] = sizeof(struct packet_msg_prot),
151 [PACKET_FENCE] = sizeof(struct packet_fence),
152 [PACKET_LIN_DMA] = sizeof(struct packet_lin_dma),
153 [PACKET_NOP] = sizeof(struct packet_nop),
154 [PACKET_STOP] = sizeof(struct packet_stop),
155 [PACKET_ARB_POINT] = sizeof(struct packet_arb_point),
156 [PACKET_WAIT] = sizeof(struct packet_wait),
157 [PACKET_LOAD_AND_EXE] = sizeof(struct packet_load_and_exe)
158};
159
160static inline bool validate_packet_id(enum packet_id id)
161{
162 switch (id) {
163 case PACKET_WREG_32:
164 case PACKET_WREG_BULK:
165 case PACKET_MSG_LONG:
166 case PACKET_MSG_SHORT:
167 case PACKET_CP_DMA:
168 case PACKET_REPEAT:
169 case PACKET_MSG_PROT:
170 case PACKET_FENCE:
171 case PACKET_LIN_DMA:
172 case PACKET_NOP:
173 case PACKET_STOP:
174 case PACKET_ARB_POINT:
175 case PACKET_WAIT:
176 case PACKET_LOAD_AND_EXE:
177 return true;
178 default:
179 return false;
180 }
181}
182
183static const char * const
184gaudi_tpc_interrupts_cause[GAUDI_NUM_OF_TPC_INTR_CAUSE] = {
185 "tpc_address_exceed_slm",
186 "tpc_div_by_0",
187 "tpc_spu_mac_overflow",
188 "tpc_spu_addsub_overflow",
189 "tpc_spu_abs_overflow",
190 "tpc_spu_fp_dst_nan_inf",
191 "tpc_spu_fp_dst_denorm",
192 "tpc_vpu_mac_overflow",
193 "tpc_vpu_addsub_overflow",
194 "tpc_vpu_abs_overflow",
195 "tpc_vpu_fp_dst_nan_inf",
196 "tpc_vpu_fp_dst_denorm",
197 "tpc_assertions",
198 "tpc_illegal_instruction",
199 "tpc_pc_wrap_around",
200 "tpc_qm_sw_err",
201 "tpc_hbw_rresp_err",
202 "tpc_hbw_bresp_err",
203 "tpc_lbw_rresp_err",
204 "tpc_lbw_bresp_err"
205};
206
207static const char * const
208gaudi_qman_error_cause[GAUDI_NUM_OF_QM_ERR_CAUSE] = {
209 "PQ AXI HBW error",
210 "CQ AXI HBW error",
211 "CP AXI HBW error",
212 "CP error due to undefined OPCODE",
213 "CP encountered STOP OPCODE",
214 "CP AXI LBW error",
215 "CP WRREG32 or WRBULK returned error",
216 "N/A",
217 "FENCE 0 inc over max value and clipped",
218 "FENCE 1 inc over max value and clipped",
219 "FENCE 2 inc over max value and clipped",
220 "FENCE 3 inc over max value and clipped",
221 "FENCE 0 dec under min value and clipped",
222 "FENCE 1 dec under min value and clipped",
223 "FENCE 2 dec under min value and clipped",
224 "FENCE 3 dec under min value and clipped"
225};
226
227static const char * const
228gaudi_qman_arb_error_cause[GAUDI_NUM_OF_QM_ARB_ERR_CAUSE] = {
229 "Choice push while full error",
230 "Choice Q watchdog error",
231 "MSG AXI LBW returned with error"
232};
233
234static enum hl_queue_type gaudi_queue_type[GAUDI_QUEUE_ID_SIZE] = {
235 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_0 */
236 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_1 */
237 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_2 */
238 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_3 */
239 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_0 */
240 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_1 */
241 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_2 */
242 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_3 */
243 QUEUE_TYPE_CPU, /* GAUDI_QUEUE_ID_CPU_PQ */
244 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_0 */
245 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_1 */
246 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_2 */
247 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_3 */
248 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_0 */
249 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_1 */
250 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_2 */
251 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_3 */
252 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_0 */
253 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_1 */
254 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_2 */
255 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_3 */
256 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_0 */
257 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_1 */
258 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_2 */
259 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_3 */
260 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_0 */
261 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_1 */
262 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_2 */
263 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_3 */
264 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_0 */
265 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_1 */
266 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_2 */
267 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_3 */
268 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_0 */
269 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_1 */
270 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_2 */
271 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_3 */
272 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_0 */
273 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_1 */
274 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_2 */
275 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_3 */
276 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_0 */
277 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_1 */
278 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_2 */
279 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_3 */
280 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_0 */
281 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_1 */
282 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_2 */
283 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_3 */
284 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_0 */
285 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_1 */
286 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_2 */
287 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_3 */
288 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_0 */
289 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_1 */
290 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_2 */
291 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_3 */
292 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_0 */
293 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_1 */
294 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_2 */
295 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_3 */
296 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_0 */
297 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_1 */
298 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_2 */
299 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_3 */
300 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_0 */
301 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_1 */
302 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_2 */
303 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_3 */
304 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_0 */
305 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_1 */
306 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_2 */
307 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_3 */
308 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_0 */
309 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_1 */
310 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_2 */
311 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_3 */
312 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_0 */
313 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_1 */
314 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_2 */
315 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_3 */
316 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_0 */
317 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_1 */
318 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_2 */
319 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_3 */
320 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_0 */
321 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_1 */
322 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_2 */
323 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_3 */
324 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_0 */
325 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_1 */
326 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_2 */
327 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_3 */
328 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_0 */
329 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_1 */
330 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_2 */
331 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_3 */
332 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_0 */
333 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_1 */
334 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_2 */
335 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_3 */
336 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_0 */
337 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_1 */
338 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_2 */
339 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_3 */
340 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_0 */
341 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_1 */
342 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_2 */
343 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_3 */
344 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_0 */
345 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_1 */
346 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_2 */
347 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_3 */
348};
349
350static struct hl_hw_obj_name_entry gaudi_so_id_to_str[] = {
351 { .id = 0, .name = "SYNC_OBJ_DMA_DOWN_FEEDBACK" },
352 { .id = 1, .name = "SYNC_OBJ_DMA_UP_FEEDBACK" },
353 { .id = 2, .name = "SYNC_OBJ_DMA_STATIC_DRAM_SRAM_FEEDBACK" },
354 { .id = 3, .name = "SYNC_OBJ_DMA_SRAM_DRAM_FEEDBACK" },
355 { .id = 4, .name = "SYNC_OBJ_FIRST_COMPUTE_FINISH" },
356 { .id = 5, .name = "SYNC_OBJ_HOST_DRAM_DONE" },
357 { .id = 6, .name = "SYNC_OBJ_DBG_CTR_DEPRECATED" },
358 { .id = 7, .name = "SYNC_OBJ_DMA_ACTIVATIONS_DRAM_SRAM_FEEDBACK" },
359 { .id = 8, .name = "SYNC_OBJ_ENGINE_SEM_MME_0" },
360 { .id = 9, .name = "SYNC_OBJ_ENGINE_SEM_MME_1" },
361 { .id = 10, .name = "SYNC_OBJ_ENGINE_SEM_TPC_0" },
362 { .id = 11, .name = "SYNC_OBJ_ENGINE_SEM_TPC_1" },
363 { .id = 12, .name = "SYNC_OBJ_ENGINE_SEM_TPC_2" },
364 { .id = 13, .name = "SYNC_OBJ_ENGINE_SEM_TPC_3" },
365 { .id = 14, .name = "SYNC_OBJ_ENGINE_SEM_TPC_4" },
366 { .id = 15, .name = "SYNC_OBJ_ENGINE_SEM_TPC_5" },
367 { .id = 16, .name = "SYNC_OBJ_ENGINE_SEM_TPC_6" },
368 { .id = 17, .name = "SYNC_OBJ_ENGINE_SEM_TPC_7" },
369 { .id = 18, .name = "SYNC_OBJ_ENGINE_SEM_DMA_1" },
370 { .id = 19, .name = "SYNC_OBJ_ENGINE_SEM_DMA_2" },
371 { .id = 20, .name = "SYNC_OBJ_ENGINE_SEM_DMA_3" },
372 { .id = 21, .name = "SYNC_OBJ_ENGINE_SEM_DMA_4" },
373 { .id = 22, .name = "SYNC_OBJ_ENGINE_SEM_DMA_5" },
374 { .id = 23, .name = "SYNC_OBJ_ENGINE_SEM_DMA_6" },
375 { .id = 24, .name = "SYNC_OBJ_ENGINE_SEM_DMA_7" },
376 { .id = 25, .name = "SYNC_OBJ_DBG_CTR_0" },
377 { .id = 26, .name = "SYNC_OBJ_DBG_CTR_1" },
378};
379
380static struct hl_hw_obj_name_entry gaudi_monitor_id_to_str[] = {
381 { .id = 200, .name = "MON_OBJ_DMA_DOWN_FEEDBACK_RESET" },
382 { .id = 201, .name = "MON_OBJ_DMA_UP_FEEDBACK_RESET" },
383 { .id = 203, .name = "MON_OBJ_DRAM_TO_SRAM_QUEUE_FENCE" },
384 { .id = 204, .name = "MON_OBJ_TPC_0_CLK_GATE" },
385 { .id = 205, .name = "MON_OBJ_TPC_1_CLK_GATE" },
386 { .id = 206, .name = "MON_OBJ_TPC_2_CLK_GATE" },
387 { .id = 207, .name = "MON_OBJ_TPC_3_CLK_GATE" },
388 { .id = 208, .name = "MON_OBJ_TPC_4_CLK_GATE" },
389 { .id = 209, .name = "MON_OBJ_TPC_5_CLK_GATE" },
390 { .id = 210, .name = "MON_OBJ_TPC_6_CLK_GATE" },
391 { .id = 211, .name = "MON_OBJ_TPC_7_CLK_GATE" },
392};
393
394static s64 gaudi_state_dump_specs_props[] = {
395 [SP_SYNC_OBJ_BASE_ADDR] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0,
396 [SP_NEXT_SYNC_OBJ_ADDR] = NEXT_SYNC_OBJ_ADDR_INTERVAL,
397 [SP_SYNC_OBJ_AMOUNT] = NUM_OF_SOB_IN_BLOCK,
398 [SP_MON_OBJ_WR_ADDR_LOW] =
399 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0,
400 [SP_MON_OBJ_WR_ADDR_HIGH] =
401 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0,
402 [SP_MON_OBJ_WR_DATA] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_DATA_0,
403 [SP_MON_OBJ_ARM_DATA] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_ARM_0,
404 [SP_MON_OBJ_STATUS] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_0,
405 [SP_MONITORS_AMOUNT] = NUM_OF_MONITORS_IN_BLOCK,
406 [SP_TPC0_CMDQ] = mmTPC0_QM_GLBL_CFG0,
407 [SP_TPC0_CFG_SO] = mmTPC0_CFG_QM_SYNC_OBJECT_ADDR,
408 [SP_NEXT_TPC] = mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0,
409 [SP_MME_CMDQ] = mmMME0_QM_GLBL_CFG0,
410 [SP_MME_CFG_SO] = mmMME0_CTRL_ARCH_DESC_SYNC_OBJECT_ADDR_LOW_LOCAL,
411 [SP_NEXT_MME] = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0,
412 [SP_DMA_CMDQ] = mmDMA0_QM_GLBL_CFG0,
413 [SP_DMA_CFG_SO] = mmDMA0_CORE_WR_COMP_ADDR_LO,
414 [SP_DMA_QUEUES_OFFSET] = mmDMA1_QM_GLBL_CFG0 - mmDMA0_QM_GLBL_CFG0,
415 [SP_NUM_OF_MME_ENGINES] = NUM_OF_MME_ENGINES,
416 [SP_SUB_MME_ENG_NUM] = NUM_OF_MME_SUB_ENGINES,
417 [SP_NUM_OF_DMA_ENGINES] = NUM_OF_DMA_ENGINES,
418 [SP_NUM_OF_TPC_ENGINES] = NUM_OF_TPC_ENGINES,
419 [SP_ENGINE_NUM_OF_QUEUES] = NUM_OF_QUEUES,
420 [SP_ENGINE_NUM_OF_STREAMS] = NUM_OF_STREAMS,
421 [SP_ENGINE_NUM_OF_FENCES] = NUM_OF_FENCES,
422 [SP_FENCE0_CNT_OFFSET] =
423 mmDMA0_QM_CP_FENCE0_CNT_0 - mmDMA0_QM_GLBL_CFG0,
424 [SP_FENCE0_RDATA_OFFSET] =
425 mmDMA0_QM_CP_FENCE0_RDATA_0 - mmDMA0_QM_GLBL_CFG0,
426 [SP_CP_STS_OFFSET] = mmDMA0_QM_CP_STS_0 - mmDMA0_QM_GLBL_CFG0,
427 [SP_NUM_CORES] = 1,
428};
429
430static const int gaudi_queue_id_to_engine_id[] = {
431 [GAUDI_QUEUE_ID_DMA_0_0...GAUDI_QUEUE_ID_DMA_0_3] = GAUDI_ENGINE_ID_DMA_0,
432 [GAUDI_QUEUE_ID_DMA_1_0...GAUDI_QUEUE_ID_DMA_1_3] = GAUDI_ENGINE_ID_DMA_1,
433 [GAUDI_QUEUE_ID_CPU_PQ] = GAUDI_ENGINE_ID_SIZE,
434 [GAUDI_QUEUE_ID_DMA_2_0...GAUDI_QUEUE_ID_DMA_2_3] = GAUDI_ENGINE_ID_DMA_2,
435 [GAUDI_QUEUE_ID_DMA_3_0...GAUDI_QUEUE_ID_DMA_3_3] = GAUDI_ENGINE_ID_DMA_3,
436 [GAUDI_QUEUE_ID_DMA_4_0...GAUDI_QUEUE_ID_DMA_4_3] = GAUDI_ENGINE_ID_DMA_4,
437 [GAUDI_QUEUE_ID_DMA_5_0...GAUDI_QUEUE_ID_DMA_5_3] = GAUDI_ENGINE_ID_DMA_5,
438 [GAUDI_QUEUE_ID_DMA_6_0...GAUDI_QUEUE_ID_DMA_6_3] = GAUDI_ENGINE_ID_DMA_6,
439 [GAUDI_QUEUE_ID_DMA_7_0...GAUDI_QUEUE_ID_DMA_7_3] = GAUDI_ENGINE_ID_DMA_7,
440 [GAUDI_QUEUE_ID_MME_0_0...GAUDI_QUEUE_ID_MME_0_3] = GAUDI_ENGINE_ID_MME_0,
441 [GAUDI_QUEUE_ID_MME_1_0...GAUDI_QUEUE_ID_MME_1_3] = GAUDI_ENGINE_ID_MME_2,
442 [GAUDI_QUEUE_ID_TPC_0_0...GAUDI_QUEUE_ID_TPC_0_3] = GAUDI_ENGINE_ID_TPC_0,
443 [GAUDI_QUEUE_ID_TPC_1_0...GAUDI_QUEUE_ID_TPC_1_3] = GAUDI_ENGINE_ID_TPC_1,
444 [GAUDI_QUEUE_ID_TPC_2_0...GAUDI_QUEUE_ID_TPC_2_3] = GAUDI_ENGINE_ID_TPC_2,
445 [GAUDI_QUEUE_ID_TPC_3_0...GAUDI_QUEUE_ID_TPC_3_3] = GAUDI_ENGINE_ID_TPC_3,
446 [GAUDI_QUEUE_ID_TPC_4_0...GAUDI_QUEUE_ID_TPC_4_3] = GAUDI_ENGINE_ID_TPC_4,
447 [GAUDI_QUEUE_ID_TPC_5_0...GAUDI_QUEUE_ID_TPC_5_3] = GAUDI_ENGINE_ID_TPC_5,
448 [GAUDI_QUEUE_ID_TPC_6_0...GAUDI_QUEUE_ID_TPC_6_3] = GAUDI_ENGINE_ID_TPC_6,
449 [GAUDI_QUEUE_ID_TPC_7_0...GAUDI_QUEUE_ID_TPC_7_3] = GAUDI_ENGINE_ID_TPC_7,
450 [GAUDI_QUEUE_ID_NIC_0_0...GAUDI_QUEUE_ID_NIC_0_3] = GAUDI_ENGINE_ID_NIC_0,
451 [GAUDI_QUEUE_ID_NIC_1_0...GAUDI_QUEUE_ID_NIC_1_3] = GAUDI_ENGINE_ID_NIC_1,
452 [GAUDI_QUEUE_ID_NIC_2_0...GAUDI_QUEUE_ID_NIC_2_3] = GAUDI_ENGINE_ID_NIC_2,
453 [GAUDI_QUEUE_ID_NIC_3_0...GAUDI_QUEUE_ID_NIC_3_3] = GAUDI_ENGINE_ID_NIC_3,
454 [GAUDI_QUEUE_ID_NIC_4_0...GAUDI_QUEUE_ID_NIC_4_3] = GAUDI_ENGINE_ID_NIC_4,
455 [GAUDI_QUEUE_ID_NIC_5_0...GAUDI_QUEUE_ID_NIC_5_3] = GAUDI_ENGINE_ID_NIC_5,
456 [GAUDI_QUEUE_ID_NIC_6_0...GAUDI_QUEUE_ID_NIC_6_3] = GAUDI_ENGINE_ID_NIC_6,
457 [GAUDI_QUEUE_ID_NIC_7_0...GAUDI_QUEUE_ID_NIC_7_3] = GAUDI_ENGINE_ID_NIC_7,
458 [GAUDI_QUEUE_ID_NIC_8_0...GAUDI_QUEUE_ID_NIC_8_3] = GAUDI_ENGINE_ID_NIC_8,
459 [GAUDI_QUEUE_ID_NIC_9_0...GAUDI_QUEUE_ID_NIC_9_3] = GAUDI_ENGINE_ID_NIC_9,
460};
461
462/* The order here is opposite to the order of the indexing in the h/w.
463 * i.e. SYNC_MGR_W_S is actually 0, SYNC_MGR_E_S is 1, etc.
464 */
465static const char * const gaudi_sync_manager_names[] = {
466 "SYNC_MGR_E_N",
467 "SYNC_MGR_W_N",
468 "SYNC_MGR_E_S",
469 "SYNC_MGR_W_S",
470 NULL
471};
472
473struct ecc_info_extract_params {
474 u64 block_address;
475 u32 num_memories;
476 bool derr;
477};
478
479static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev, u32 asid,
480 u64 phys_addr);
481static int gaudi_send_job_on_qman0(struct hl_device *hdev,
482 struct hl_cs_job *job);
483static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr,
484 u32 size, u64 val);
485static int gaudi_memset_registers(struct hl_device *hdev, u64 reg_base,
486 u32 num_regs, u32 val);
487static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel,
488 u32 tpc_id);
489static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev);
490static int gaudi_cpucp_info_get(struct hl_device *hdev);
491static void gaudi_disable_clock_gating(struct hl_device *hdev);
492static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid);
493static u32 gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id,
494 u32 size, bool eb);
495static u32 gaudi_gen_wait_cb(struct hl_device *hdev,
496 struct hl_gen_wait_properties *prop);
497static inline enum hl_collective_mode
498get_collective_mode(struct hl_device *hdev, u32 queue_id)
499{
500 if (gaudi_queue_type[queue_id] == QUEUE_TYPE_EXT)
501 return HL_COLLECTIVE_MASTER;
502
503 if (queue_id >= GAUDI_QUEUE_ID_DMA_5_0 &&
504 queue_id <= GAUDI_QUEUE_ID_DMA_5_3)
505 return HL_COLLECTIVE_SLAVE;
506
507 if (queue_id >= GAUDI_QUEUE_ID_TPC_7_0 &&
508 queue_id <= GAUDI_QUEUE_ID_TPC_7_3)
509 return HL_COLLECTIVE_SLAVE;
510
511 if (queue_id >= GAUDI_QUEUE_ID_NIC_0_0 &&
512 queue_id <= GAUDI_QUEUE_ID_NIC_9_3)
513 return HL_COLLECTIVE_SLAVE;
514
515 return HL_COLLECTIVE_NOT_SUPPORTED;
516}
517
518static inline void set_default_power_values(struct hl_device *hdev)
519{
520 struct asic_fixed_properties *prop = &hdev->asic_prop;
521
522 if (hdev->card_type == cpucp_card_type_pmc) {
523 prop->max_power_default = MAX_POWER_DEFAULT_PMC;
524
525 if (prop->fw_security_enabled)
526 prop->dc_power_default = DC_POWER_DEFAULT_PMC_SEC;
527 else
528 prop->dc_power_default = DC_POWER_DEFAULT_PMC;
529 } else {
530 prop->max_power_default = MAX_POWER_DEFAULT_PCI;
531 prop->dc_power_default = DC_POWER_DEFAULT_PCI;
532 }
533}
534
535static int gaudi_set_fixed_properties(struct hl_device *hdev)
536{
537 struct asic_fixed_properties *prop = &hdev->asic_prop;
538 u32 num_sync_stream_queues = 0;
539 int i;
540
541 prop->max_queues = GAUDI_QUEUE_ID_SIZE;
542 prop->hw_queues_props = kcalloc(prop->max_queues,
543 sizeof(struct hw_queue_properties),
544 GFP_KERNEL);
545
546 if (!prop->hw_queues_props)
547 return -ENOMEM;
548
549 for (i = 0 ; i < prop->max_queues ; i++) {
550 if (gaudi_queue_type[i] == QUEUE_TYPE_EXT) {
551 prop->hw_queues_props[i].type = QUEUE_TYPE_EXT;
552 prop->hw_queues_props[i].driver_only = 0;
553 prop->hw_queues_props[i].supports_sync_stream = 1;
554 prop->hw_queues_props[i].cb_alloc_flags =
555 CB_ALLOC_KERNEL;
556 num_sync_stream_queues++;
557 } else if (gaudi_queue_type[i] == QUEUE_TYPE_CPU) {
558 prop->hw_queues_props[i].type = QUEUE_TYPE_CPU;
559 prop->hw_queues_props[i].driver_only = 1;
560 prop->hw_queues_props[i].supports_sync_stream = 0;
561 prop->hw_queues_props[i].cb_alloc_flags =
562 CB_ALLOC_KERNEL;
563 } else if (gaudi_queue_type[i] == QUEUE_TYPE_INT) {
564 prop->hw_queues_props[i].type = QUEUE_TYPE_INT;
565 prop->hw_queues_props[i].driver_only = 0;
566 prop->hw_queues_props[i].supports_sync_stream = 0;
567 prop->hw_queues_props[i].cb_alloc_flags =
568 CB_ALLOC_USER;
569
570 }
571 prop->hw_queues_props[i].collective_mode =
572 get_collective_mode(hdev, queue_id: i);
573 }
574
575 prop->cache_line_size = DEVICE_CACHE_LINE_SIZE;
576 prop->cfg_base_address = CFG_BASE;
577 prop->device_dma_offset_for_host_access = HOST_PHYS_BASE;
578 prop->host_base_address = HOST_PHYS_BASE;
579 prop->host_end_address = prop->host_base_address + HOST_PHYS_SIZE;
580 prop->completion_queues_count = NUMBER_OF_CMPLT_QUEUES;
581 prop->completion_mode = HL_COMPLETION_MODE_JOB;
582 prop->collective_first_sob = 0;
583 prop->collective_first_mon = 0;
584
585 /* 2 SOBs per internal queue stream are reserved for collective */
586 prop->sync_stream_first_sob =
587 ALIGN(NUMBER_OF_SOBS_IN_GRP, HL_MAX_SOBS_PER_MONITOR)
588 * QMAN_STREAMS * HL_RSVD_SOBS;
589
590 /* 1 monitor per internal queue stream are reserved for collective
591 * 2 monitors per external queue stream are reserved for collective
592 */
593 prop->sync_stream_first_mon =
594 (NUMBER_OF_COLLECTIVE_QUEUES * QMAN_STREAMS) +
595 (NUMBER_OF_EXT_HW_QUEUES * 2);
596
597 prop->dram_base_address = DRAM_PHYS_BASE;
598 prop->dram_size = GAUDI_HBM_SIZE_32GB;
599 prop->dram_end_address = prop->dram_base_address + prop->dram_size;
600 prop->dram_user_base_address = DRAM_BASE_ADDR_USER;
601
602 prop->sram_base_address = SRAM_BASE_ADDR;
603 prop->sram_size = SRAM_SIZE;
604 prop->sram_end_address = prop->sram_base_address + prop->sram_size;
605 prop->sram_user_base_address =
606 prop->sram_base_address + SRAM_USER_BASE_OFFSET;
607
608 prop->mmu_cache_mng_addr = MMU_CACHE_MNG_ADDR;
609 prop->mmu_cache_mng_size = MMU_CACHE_MNG_SIZE;
610
611 prop->mmu_pgt_addr = MMU_PAGE_TABLES_ADDR;
612 if (hdev->pldm)
613 prop->mmu_pgt_size = 0x800000; /* 8MB */
614 else
615 prop->mmu_pgt_size = MMU_PAGE_TABLES_SIZE;
616 prop->mmu_pte_size = HL_PTE_SIZE;
617 prop->dram_page_size = PAGE_SIZE_2MB;
618 prop->device_mem_alloc_default_page_size = prop->dram_page_size;
619 prop->dram_supports_virtual_memory = false;
620
621 prop->pmmu.hop_shifts[MMU_HOP0] = MMU_V1_1_HOP0_SHIFT;
622 prop->pmmu.hop_shifts[MMU_HOP1] = MMU_V1_1_HOP1_SHIFT;
623 prop->pmmu.hop_shifts[MMU_HOP2] = MMU_V1_1_HOP2_SHIFT;
624 prop->pmmu.hop_shifts[MMU_HOP3] = MMU_V1_1_HOP3_SHIFT;
625 prop->pmmu.hop_shifts[MMU_HOP4] = MMU_V1_1_HOP4_SHIFT;
626 prop->pmmu.hop_masks[MMU_HOP0] = MMU_V1_1_HOP0_MASK;
627 prop->pmmu.hop_masks[MMU_HOP1] = MMU_V1_1_HOP1_MASK;
628 prop->pmmu.hop_masks[MMU_HOP2] = MMU_V1_1_HOP2_MASK;
629 prop->pmmu.hop_masks[MMU_HOP3] = MMU_V1_1_HOP3_MASK;
630 prop->pmmu.hop_masks[MMU_HOP4] = MMU_V1_1_HOP4_MASK;
631 prop->pmmu.start_addr = VA_HOST_SPACE_START;
632 prop->pmmu.end_addr =
633 (VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2) - 1;
634 prop->pmmu.page_size = PAGE_SIZE_4KB;
635 prop->pmmu.num_hops = MMU_ARCH_5_HOPS;
636 prop->pmmu.last_mask = LAST_MASK;
637 /* TODO: will be duplicated until implementing per-MMU props */
638 prop->pmmu.hop_table_size = HOP_TABLE_SIZE_512_PTE;
639 prop->pmmu.hop0_tables_total_size = HOP0_512_PTE_TABLES_TOTAL_SIZE;
640
641 /* PMMU and HPMMU are the same except of page size */
642 memcpy(&prop->pmmu_huge, &prop->pmmu, sizeof(prop->pmmu));
643 prop->pmmu_huge.page_size = PAGE_SIZE_2MB;
644
645 /* shifts and masks are the same in PMMU and DMMU */
646 memcpy(&prop->dmmu, &prop->pmmu, sizeof(prop->pmmu));
647 prop->dmmu.start_addr = (VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2);
648 prop->dmmu.end_addr = VA_HOST_SPACE_END;
649 prop->dmmu.page_size = PAGE_SIZE_2MB;
650 prop->dmmu.pgt_size = prop->mmu_pgt_size;
651
652 prop->cfg_size = CFG_SIZE;
653 prop->max_asid = MAX_ASID;
654 prop->num_of_events = GAUDI_EVENT_SIZE;
655 prop->max_num_of_engines = GAUDI_ENGINE_ID_SIZE;
656 prop->tpc_enabled_mask = TPC_ENABLED_MASK;
657
658 set_default_power_values(hdev);
659
660 prop->cb_pool_cb_cnt = GAUDI_CB_POOL_CB_CNT;
661 prop->cb_pool_cb_size = GAUDI_CB_POOL_CB_SIZE;
662
663 prop->pcie_dbi_base_address = mmPCIE_DBI_BASE;
664 prop->pcie_aux_dbi_reg_addr = CFG_BASE + mmPCIE_AUX_DBI;
665
666 strscpy_pad(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
667 CARD_NAME_MAX_LEN);
668
669 prop->max_pending_cs = GAUDI_MAX_PENDING_CS;
670
671 prop->first_available_user_sob[HL_GAUDI_WS_DCORE] =
672 prop->sync_stream_first_sob +
673 (num_sync_stream_queues * HL_RSVD_SOBS);
674 prop->first_available_user_mon[HL_GAUDI_WS_DCORE] =
675 prop->sync_stream_first_mon +
676 (num_sync_stream_queues * HL_RSVD_MONS);
677
678 prop->first_available_user_interrupt = USHRT_MAX;
679 prop->tpc_interrupt_id = USHRT_MAX;
680
681 /* single msi */
682 prop->eq_interrupt_id = 0;
683
684 for (i = 0 ; i < HL_MAX_DCORES ; i++)
685 prop->first_available_cq[i] = USHRT_MAX;
686
687 prop->fw_cpu_boot_dev_sts0_valid = false;
688 prop->fw_cpu_boot_dev_sts1_valid = false;
689 prop->hard_reset_done_by_fw = false;
690 prop->gic_interrupts_enable = true;
691
692 prop->server_type = HL_SERVER_TYPE_UNKNOWN;
693
694 prop->clk_pll_index = HL_GAUDI_MME_PLL;
695 prop->max_freq_value = GAUDI_MAX_CLK_FREQ;
696
697 prop->use_get_power_for_reset_history = true;
698
699 prop->configurable_stop_on_err = true;
700
701 prop->set_max_power_on_device_init = true;
702
703 prop->dma_mask = 48;
704
705 prop->hbw_flush_reg = mmPCIE_WRAP_RR_ELBI_RD_SEC_REG_CTRL;
706
707 return 0;
708}
709
710static int gaudi_pci_bars_map(struct hl_device *hdev)
711{
712 static const char * const name[] = {"SRAM", "CFG", "HBM"};
713 bool is_wc[3] = {false, false, true};
714 int rc;
715
716 rc = hl_pci_bars_map(hdev, name, is_wc);
717 if (rc)
718 return rc;
719
720 hdev->rmmio = hdev->pcie_bar[CFG_BAR_ID] +
721 (CFG_BASE - SPI_FLASH_BASE_ADDR);
722
723 return 0;
724}
725
726static u64 gaudi_set_hbm_bar_base(struct hl_device *hdev, u64 addr)
727{
728 struct gaudi_device *gaudi = hdev->asic_specific;
729 struct hl_inbound_pci_region pci_region;
730 u64 old_addr = addr;
731 int rc;
732
733 if ((gaudi) && (gaudi->hbm_bar_cur_addr == addr))
734 return old_addr;
735
736 if (hdev->asic_prop.iatu_done_by_fw)
737 return U64_MAX;
738
739 /* Inbound Region 2 - Bar 4 - Point to HBM */
740 pci_region.mode = PCI_BAR_MATCH_MODE;
741 pci_region.bar = HBM_BAR_ID;
742 pci_region.addr = addr;
743 rc = hl_pci_set_inbound_region(hdev, region: 2, pci_region: &pci_region);
744 if (rc)
745 return U64_MAX;
746
747 if (gaudi) {
748 old_addr = gaudi->hbm_bar_cur_addr;
749 gaudi->hbm_bar_cur_addr = addr;
750 }
751
752 return old_addr;
753}
754
755static int gaudi_init_iatu(struct hl_device *hdev)
756{
757 struct hl_inbound_pci_region inbound_region;
758 struct hl_outbound_pci_region outbound_region;
759 int rc;
760
761 if (hdev->asic_prop.iatu_done_by_fw)
762 return 0;
763
764 /* Inbound Region 0 - Bar 0 - Point to SRAM + CFG */
765 inbound_region.mode = PCI_BAR_MATCH_MODE;
766 inbound_region.bar = SRAM_BAR_ID;
767 inbound_region.addr = SRAM_BASE_ADDR;
768 rc = hl_pci_set_inbound_region(hdev, region: 0, pci_region: &inbound_region);
769 if (rc)
770 goto done;
771
772 /* Inbound Region 1 - Bar 2 - Point to SPI FLASH */
773 inbound_region.mode = PCI_BAR_MATCH_MODE;
774 inbound_region.bar = CFG_BAR_ID;
775 inbound_region.addr = SPI_FLASH_BASE_ADDR;
776 rc = hl_pci_set_inbound_region(hdev, region: 1, pci_region: &inbound_region);
777 if (rc)
778 goto done;
779
780 /* Inbound Region 2 - Bar 4 - Point to HBM */
781 inbound_region.mode = PCI_BAR_MATCH_MODE;
782 inbound_region.bar = HBM_BAR_ID;
783 inbound_region.addr = DRAM_PHYS_BASE;
784 rc = hl_pci_set_inbound_region(hdev, region: 2, pci_region: &inbound_region);
785 if (rc)
786 goto done;
787
788 /* Outbound Region 0 - Point to Host */
789 outbound_region.addr = HOST_PHYS_BASE;
790 outbound_region.size = HOST_PHYS_SIZE;
791 rc = hl_pci_set_outbound_region(hdev, pci_region: &outbound_region);
792
793done:
794 return rc;
795}
796
797static enum hl_device_hw_state gaudi_get_hw_state(struct hl_device *hdev)
798{
799 return RREG32(mmHW_STATE);
800}
801
802static int gaudi_early_init(struct hl_device *hdev)
803{
804 struct asic_fixed_properties *prop = &hdev->asic_prop;
805 struct pci_dev *pdev = hdev->pdev;
806 resource_size_t pci_bar_size;
807 u32 fw_boot_status;
808 int rc;
809
810 rc = gaudi_set_fixed_properties(hdev);
811 if (rc) {
812 dev_err(hdev->dev, "Failed setting fixed properties\n");
813 return rc;
814 }
815
816 /* Check BAR sizes */
817 pci_bar_size = pci_resource_len(pdev, SRAM_BAR_ID);
818
819 if (pci_bar_size != SRAM_BAR_SIZE) {
820 dev_err(hdev->dev, "Not " HL_NAME "? BAR %d size %pa, expecting %llu\n",
821 SRAM_BAR_ID, &pci_bar_size, SRAM_BAR_SIZE);
822 rc = -ENODEV;
823 goto free_queue_props;
824 }
825
826 pci_bar_size = pci_resource_len(pdev, CFG_BAR_ID);
827
828 if (pci_bar_size != CFG_BAR_SIZE) {
829 dev_err(hdev->dev, "Not " HL_NAME "? BAR %d size %pa, expecting %llu\n",
830 CFG_BAR_ID, &pci_bar_size, CFG_BAR_SIZE);
831 rc = -ENODEV;
832 goto free_queue_props;
833 }
834
835 prop->dram_pci_bar_size = pci_resource_len(pdev, HBM_BAR_ID);
836 hdev->dram_pci_bar_start = pci_resource_start(pdev, HBM_BAR_ID);
837
838 /* If FW security is enabled at this point it means no access to ELBI */
839 if (hdev->asic_prop.fw_security_enabled) {
840 hdev->asic_prop.iatu_done_by_fw = true;
841
842 /*
843 * GIC-security-bit can ONLY be set by CPUCP, so in this stage
844 * decision can only be taken based on PCI ID security.
845 */
846 hdev->asic_prop.gic_interrupts_enable = false;
847 goto pci_init;
848 }
849
850 rc = hl_pci_elbi_read(hdev, CFG_BASE + mmCPU_BOOT_DEV_STS0,
851 data: &fw_boot_status);
852 if (rc)
853 goto free_queue_props;
854
855 /* Check whether FW is configuring iATU */
856 if ((fw_boot_status & CPU_BOOT_DEV_STS0_ENABLED) &&
857 (fw_boot_status & CPU_BOOT_DEV_STS0_FW_IATU_CONF_EN))
858 hdev->asic_prop.iatu_done_by_fw = true;
859
860pci_init:
861 rc = hl_pci_init(hdev);
862 if (rc)
863 goto free_queue_props;
864
865 /* Before continuing in the initialization, we need to read the preboot
866 * version to determine whether we run with a security-enabled firmware
867 */
868 rc = hl_fw_read_preboot_status(hdev);
869 if (rc) {
870 if (hdev->reset_on_preboot_fail)
871 /* we are already on failure flow, so don't check if hw_fini fails. */
872 hdev->asic_funcs->hw_fini(hdev, true, false);
873 goto pci_fini;
874 }
875
876 if (gaudi_get_hw_state(hdev) == HL_DEVICE_HW_STATE_DIRTY) {
877 dev_dbg(hdev->dev, "H/W state is dirty, must reset before initializing\n");
878 rc = hdev->asic_funcs->hw_fini(hdev, true, false);
879 if (rc) {
880 dev_err(hdev->dev, "failed to reset HW in dirty state (%d)\n", rc);
881 goto pci_fini;
882 }
883 }
884
885 return 0;
886
887pci_fini:
888 hl_pci_fini(hdev);
889free_queue_props:
890 kfree(objp: hdev->asic_prop.hw_queues_props);
891 return rc;
892}
893
894static int gaudi_early_fini(struct hl_device *hdev)
895{
896 kfree(objp: hdev->asic_prop.hw_queues_props);
897 hl_pci_fini(hdev);
898
899 return 0;
900}
901
902/**
903 * gaudi_fetch_psoc_frequency - Fetch PSOC frequency values
904 *
905 * @hdev: pointer to hl_device structure
906 *
907 */
908static int gaudi_fetch_psoc_frequency(struct hl_device *hdev)
909{
910 u32 nr = 0, nf = 0, od = 0, div_fctr = 0, pll_clk, div_sel;
911 struct asic_fixed_properties *prop = &hdev->asic_prop;
912 u16 pll_freq_arr[HL_PLL_NUM_OUTPUTS], freq;
913 int rc;
914
915 if ((hdev->fw_components & FW_TYPE_LINUX) &&
916 (prop->fw_app_cpu_boot_dev_sts0 & CPU_BOOT_DEV_STS0_PLL_INFO_EN)) {
917 struct gaudi_device *gaudi = hdev->asic_specific;
918
919 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
920 return 0;
921
922 rc = hl_fw_cpucp_pll_info_get(hdev, pll_index: HL_GAUDI_CPU_PLL, pll_freq_arr);
923
924 if (rc)
925 return rc;
926
927 freq = pll_freq_arr[2];
928 } else {
929 /* Backward compatibility */
930 div_fctr = RREG32(mmPSOC_CPU_PLL_DIV_FACTOR_2);
931 div_sel = RREG32(mmPSOC_CPU_PLL_DIV_SEL_2);
932 nr = RREG32(mmPSOC_CPU_PLL_NR);
933 nf = RREG32(mmPSOC_CPU_PLL_NF);
934 od = RREG32(mmPSOC_CPU_PLL_OD);
935
936 if (div_sel == DIV_SEL_REF_CLK ||
937 div_sel == DIV_SEL_DIVIDED_REF) {
938 if (div_sel == DIV_SEL_REF_CLK)
939 freq = PLL_REF_CLK;
940 else
941 freq = PLL_REF_CLK / (div_fctr + 1);
942 } else if (div_sel == DIV_SEL_PLL_CLK ||
943 div_sel == DIV_SEL_DIVIDED_PLL) {
944 pll_clk = PLL_REF_CLK * (nf + 1) /
945 ((nr + 1) * (od + 1));
946 if (div_sel == DIV_SEL_PLL_CLK)
947 freq = pll_clk;
948 else
949 freq = pll_clk / (div_fctr + 1);
950 } else {
951 dev_warn(hdev->dev, "Received invalid div select value: %#x", div_sel);
952 freq = 0;
953 }
954 }
955
956 prop->psoc_timestamp_frequency = freq;
957 prop->psoc_pci_pll_nr = nr;
958 prop->psoc_pci_pll_nf = nf;
959 prop->psoc_pci_pll_od = od;
960 prop->psoc_pci_pll_div_factor = div_fctr;
961
962 return 0;
963}
964
965static int _gaudi_init_tpc_mem(struct hl_device *hdev,
966 dma_addr_t tpc_kernel_src_addr, u32 tpc_kernel_size)
967{
968 struct asic_fixed_properties *prop = &hdev->asic_prop;
969 struct packet_lin_dma *init_tpc_mem_pkt;
970 struct hl_cs_job *job;
971 struct hl_cb *cb;
972 u64 dst_addr;
973 u32 cb_size, ctl;
974 u8 tpc_id;
975 int rc;
976
977 cb = hl_cb_kernel_create(hdev, PAGE_SIZE, internal_cb: false);
978 if (!cb)
979 return -EFAULT;
980
981 init_tpc_mem_pkt = cb->kernel_address;
982 cb_size = sizeof(*init_tpc_mem_pkt);
983 memset(init_tpc_mem_pkt, 0, cb_size);
984
985 init_tpc_mem_pkt->tsize = cpu_to_le32(tpc_kernel_size);
986
987 ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA);
988 ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_LIN_MASK, 1);
989 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
990 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
991
992 init_tpc_mem_pkt->ctl = cpu_to_le32(ctl);
993
994 init_tpc_mem_pkt->src_addr = cpu_to_le64(tpc_kernel_src_addr);
995
996 /* TPC_CMD is configured with I$ prefetch enabled, so address should be aligned to 8KB */
997 dst_addr = FIELD_PREP(GAUDI_PKT_LIN_DMA_DST_ADDR_MASK,
998 round_up(prop->sram_user_base_address, SZ_8K));
999 init_tpc_mem_pkt->dst_addr |= cpu_to_le64(dst_addr);
1000
1001 job = hl_cs_allocate_job(hdev, queue_type: QUEUE_TYPE_EXT, is_kernel_allocated_cb: true);
1002 if (!job) {
1003 dev_err(hdev->dev, "Failed to allocate a new job\n");
1004 rc = -ENOMEM;
1005 goto release_cb;
1006 }
1007
1008 job->id = 0;
1009 job->user_cb = cb;
1010 atomic_inc(v: &job->user_cb->cs_cnt);
1011 job->user_cb_size = cb_size;
1012 job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
1013 job->patched_cb = job->user_cb;
1014 job->job_cb_size = job->user_cb_size + sizeof(struct packet_msg_prot);
1015
1016 hl_debugfs_add_job(hdev, job);
1017
1018 rc = gaudi_send_job_on_qman0(hdev, job);
1019
1020 if (rc)
1021 goto free_job;
1022
1023 for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
1024 rc = gaudi_run_tpc_kernel(hdev, tpc_kernel: dst_addr, tpc_id);
1025 if (rc)
1026 break;
1027 }
1028
1029free_job:
1030 hl_userptr_delete_list(hdev, userptr_list: &job->userptr_list);
1031 hl_debugfs_remove_job(hdev, job);
1032 kfree(objp: job);
1033 atomic_dec(v: &cb->cs_cnt);
1034
1035release_cb:
1036 hl_cb_put(cb);
1037 hl_cb_destroy(mmg: &hdev->kernel_mem_mgr, cb_handle: cb->buf->handle);
1038
1039 return rc;
1040}
1041
1042/*
1043 * gaudi_init_tpc_mem() - Initialize TPC memories.
1044 * @hdev: Pointer to hl_device structure.
1045 *
1046 * Copy TPC kernel fw from firmware file and run it to initialize TPC memories.
1047 *
1048 * Return: 0 for success, negative value for error.
1049 */
1050static int gaudi_init_tpc_mem(struct hl_device *hdev)
1051{
1052 const struct firmware *fw;
1053 size_t fw_size;
1054 void *cpu_addr;
1055 dma_addr_t dma_handle;
1056 int rc, count = 5;
1057
1058again:
1059 rc = request_firmware(fw: &fw, GAUDI_TPC_FW_FILE, device: hdev->dev);
1060 if (rc == -EINTR && count-- > 0) {
1061 msleep(msecs: 50);
1062 goto again;
1063 }
1064
1065 if (rc) {
1066 dev_err(hdev->dev, "Failed to load firmware file %s\n",
1067 GAUDI_TPC_FW_FILE);
1068 goto out;
1069 }
1070
1071 fw_size = fw->size;
1072 cpu_addr = hl_asic_dma_alloc_coherent(hdev, fw_size, &dma_handle, GFP_KERNEL | __GFP_ZERO);
1073 if (!cpu_addr) {
1074 dev_err(hdev->dev,
1075 "Failed to allocate %zu of dma memory for TPC kernel\n",
1076 fw_size);
1077 rc = -ENOMEM;
1078 goto out;
1079 }
1080
1081 memcpy(cpu_addr, fw->data, fw_size);
1082
1083 rc = _gaudi_init_tpc_mem(hdev, tpc_kernel_src_addr: dma_handle, tpc_kernel_size: fw_size);
1084
1085 hl_asic_dma_free_coherent(hdev, fw->size, cpu_addr, dma_handle);
1086
1087out:
1088 release_firmware(fw);
1089 return rc;
1090}
1091
1092static void gaudi_collective_map_sobs(struct hl_device *hdev, u32 stream)
1093{
1094 struct gaudi_device *gaudi = hdev->asic_specific;
1095 struct gaudi_collective_properties *prop = &gaudi->collective_props;
1096 struct hl_hw_queue *q;
1097 u32 i, sob_id, sob_group_id, queue_id;
1098
1099 /* Iterate through SOB groups and assign a SOB for each slave queue */
1100 sob_group_id =
1101 stream * HL_RSVD_SOBS + prop->curr_sob_group_idx[stream];
1102 sob_id = prop->hw_sob_group[sob_group_id].base_sob_id;
1103
1104 queue_id = GAUDI_QUEUE_ID_NIC_0_0 + stream;
1105 for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++) {
1106 q = &hdev->kernel_queues[queue_id + (4 * i)];
1107 q->sync_stream_prop.collective_sob_id = sob_id + i;
1108 }
1109
1110 /* Both DMA5 and TPC7 use the same resources since only a single
1111 * engine need to participate in the reduction process
1112 */
1113 queue_id = GAUDI_QUEUE_ID_DMA_5_0 + stream;
1114 q = &hdev->kernel_queues[queue_id];
1115 q->sync_stream_prop.collective_sob_id =
1116 sob_id + NIC_NUMBER_OF_ENGINES;
1117
1118 queue_id = GAUDI_QUEUE_ID_TPC_7_0 + stream;
1119 q = &hdev->kernel_queues[queue_id];
1120 q->sync_stream_prop.collective_sob_id =
1121 sob_id + NIC_NUMBER_OF_ENGINES;
1122}
1123
1124static void gaudi_sob_group_hw_reset(struct kref *ref)
1125{
1126 struct gaudi_hw_sob_group *hw_sob_group =
1127 container_of(ref, struct gaudi_hw_sob_group, kref);
1128 struct hl_device *hdev = hw_sob_group->hdev;
1129 int i;
1130
1131 for (i = 0 ; i < NUMBER_OF_SOBS_IN_GRP ; i++)
1132 WREG32((mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
1133 (hw_sob_group->base_sob_id * 4) + (i * 4)), 0);
1134
1135 kref_init(kref: &hw_sob_group->kref);
1136}
1137
1138static void gaudi_sob_group_reset_error(struct kref *ref)
1139{
1140 struct gaudi_hw_sob_group *hw_sob_group =
1141 container_of(ref, struct gaudi_hw_sob_group, kref);
1142 struct hl_device *hdev = hw_sob_group->hdev;
1143
1144 dev_crit(hdev->dev,
1145 "SOB release shouldn't be called here, base_sob_id: %d\n",
1146 hw_sob_group->base_sob_id);
1147}
1148
1149static void gaudi_collective_mstr_sob_mask_set(struct gaudi_device *gaudi)
1150{
1151 struct gaudi_collective_properties *prop;
1152 int i;
1153
1154 prop = &gaudi->collective_props;
1155
1156 memset(prop->mstr_sob_mask, 0, sizeof(prop->mstr_sob_mask));
1157
1158 for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++)
1159 if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + i))
1160 prop->mstr_sob_mask[i / HL_MAX_SOBS_PER_MONITOR] |=
1161 BIT(i % HL_MAX_SOBS_PER_MONITOR);
1162 /* Set collective engine bit */
1163 prop->mstr_sob_mask[i / HL_MAX_SOBS_PER_MONITOR] |=
1164 BIT(i % HL_MAX_SOBS_PER_MONITOR);
1165}
1166
1167static int gaudi_collective_init(struct hl_device *hdev)
1168{
1169 u32 i, sob_id, reserved_sobs_per_group;
1170 struct gaudi_collective_properties *prop;
1171 struct gaudi_device *gaudi;
1172
1173 gaudi = hdev->asic_specific;
1174 prop = &gaudi->collective_props;
1175 sob_id = hdev->asic_prop.collective_first_sob;
1176
1177 /* First sob in group must be aligned to HL_MAX_SOBS_PER_MONITOR */
1178 reserved_sobs_per_group =
1179 ALIGN(NUMBER_OF_SOBS_IN_GRP, HL_MAX_SOBS_PER_MONITOR);
1180
1181 /* Init SOB groups */
1182 for (i = 0 ; i < NUM_SOB_GROUPS; i++) {
1183 prop->hw_sob_group[i].hdev = hdev;
1184 prop->hw_sob_group[i].base_sob_id = sob_id;
1185 sob_id += reserved_sobs_per_group;
1186 gaudi_sob_group_hw_reset(ref: &prop->hw_sob_group[i].kref);
1187 }
1188
1189 for (i = 0 ; i < QMAN_STREAMS; i++) {
1190 prop->next_sob_group_val[i] = 1;
1191 prop->curr_sob_group_idx[i] = 0;
1192 gaudi_collective_map_sobs(hdev, stream: i);
1193 }
1194
1195 gaudi_collective_mstr_sob_mask_set(gaudi);
1196
1197 return 0;
1198}
1199
1200static void gaudi_reset_sob_group(struct hl_device *hdev, u16 sob_group)
1201{
1202 struct gaudi_device *gaudi = hdev->asic_specific;
1203 struct gaudi_collective_properties *cprop = &gaudi->collective_props;
1204
1205 kref_put(kref: &cprop->hw_sob_group[sob_group].kref,
1206 release: gaudi_sob_group_hw_reset);
1207}
1208
1209static void gaudi_collective_master_init_job(struct hl_device *hdev,
1210 struct hl_cs_job *job, u32 stream, u32 sob_group_offset)
1211{
1212 u32 master_sob_base, master_monitor, queue_id, cb_size = 0;
1213 struct gaudi_collective_properties *cprop;
1214 struct hl_gen_wait_properties wait_prop;
1215 struct hl_sync_stream_properties *prop;
1216 struct gaudi_device *gaudi;
1217
1218 gaudi = hdev->asic_specific;
1219 cprop = &gaudi->collective_props;
1220 queue_id = job->hw_queue_id;
1221 prop = &hdev->kernel_queues[queue_id].sync_stream_prop;
1222
1223 master_sob_base =
1224 cprop->hw_sob_group[sob_group_offset].base_sob_id;
1225 master_monitor = prop->collective_mstr_mon_id[0];
1226
1227 cprop->hw_sob_group[sob_group_offset].queue_id = queue_id;
1228
1229 dev_dbg(hdev->dev,
1230 "Generate master wait CBs, sob %d (mask %#x), val:0x%x, mon %u, q %d\n",
1231 master_sob_base, cprop->mstr_sob_mask[0],
1232 cprop->next_sob_group_val[stream],
1233 master_monitor, queue_id);
1234
1235 wait_prop.data = (void *) job->patched_cb;
1236 wait_prop.sob_base = master_sob_base;
1237 wait_prop.sob_mask = cprop->mstr_sob_mask[0];
1238 wait_prop.sob_val = cprop->next_sob_group_val[stream];
1239 wait_prop.mon_id = master_monitor;
1240 wait_prop.q_idx = queue_id;
1241 wait_prop.size = cb_size;
1242 cb_size += gaudi_gen_wait_cb(hdev, prop: &wait_prop);
1243
1244 master_sob_base += HL_MAX_SOBS_PER_MONITOR;
1245 master_monitor = prop->collective_mstr_mon_id[1];
1246
1247 dev_dbg(hdev->dev,
1248 "Generate master wait CBs, sob %d (mask %#x), val:0x%x, mon %u, q %d\n",
1249 master_sob_base, cprop->mstr_sob_mask[1],
1250 cprop->next_sob_group_val[stream],
1251 master_monitor, queue_id);
1252
1253 wait_prop.sob_base = master_sob_base;
1254 wait_prop.sob_mask = cprop->mstr_sob_mask[1];
1255 wait_prop.mon_id = master_monitor;
1256 wait_prop.size = cb_size;
1257 cb_size += gaudi_gen_wait_cb(hdev, prop: &wait_prop);
1258}
1259
1260static void gaudi_collective_slave_init_job(struct hl_device *hdev,
1261 struct hl_cs_job *job, struct hl_cs_compl *cs_cmpl)
1262{
1263 struct hl_gen_wait_properties wait_prop;
1264 struct hl_sync_stream_properties *prop;
1265 u32 queue_id, cb_size = 0;
1266
1267 queue_id = job->hw_queue_id;
1268 prop = &hdev->kernel_queues[queue_id].sync_stream_prop;
1269
1270 if (job->cs->encaps_signals) {
1271 /* use the encaps signal handle store earlier in the flow
1272 * and set the SOB information from the encaps
1273 * signals handle
1274 */
1275 hl_hw_queue_encaps_sig_set_sob_info(hdev, cs: job->cs, job,
1276 cs_cmpl);
1277
1278 dev_dbg(hdev->dev, "collective wait: Sequence %llu found, sob_id: %u, wait for sob_val: %u\n",
1279 job->cs->sequence,
1280 cs_cmpl->hw_sob->sob_id,
1281 cs_cmpl->sob_val);
1282 }
1283
1284 /* Add to wait CBs using slave monitor */
1285 wait_prop.data = (void *) job->user_cb;
1286 wait_prop.sob_base = cs_cmpl->hw_sob->sob_id;
1287 wait_prop.sob_mask = 0x1;
1288 wait_prop.sob_val = cs_cmpl->sob_val;
1289 wait_prop.mon_id = prop->collective_slave_mon_id;
1290 wait_prop.q_idx = queue_id;
1291 wait_prop.size = cb_size;
1292
1293 dev_dbg(hdev->dev,
1294 "Generate slave wait CB, sob %d, val:%x, mon %d, q %d\n",
1295 cs_cmpl->hw_sob->sob_id, cs_cmpl->sob_val,
1296 prop->collective_slave_mon_id, queue_id);
1297
1298 cb_size += gaudi_gen_wait_cb(hdev, prop: &wait_prop);
1299
1300 dev_dbg(hdev->dev,
1301 "generate signal CB, sob_id: %d, sob val: 1, q_idx: %d\n",
1302 prop->collective_sob_id, queue_id);
1303
1304 cb_size += gaudi_gen_signal_cb(hdev, data: job->user_cb,
1305 sob_id: prop->collective_sob_id, size: cb_size, eb: false);
1306}
1307
1308static int gaudi_collective_wait_init_cs(struct hl_cs *cs)
1309{
1310 struct hl_cs_compl *signal_cs_cmpl =
1311 container_of(cs->signal_fence, struct hl_cs_compl, base_fence);
1312 struct hl_cs_compl *cs_cmpl =
1313 container_of(cs->fence, struct hl_cs_compl, base_fence);
1314 struct hl_cs_encaps_sig_handle *handle = cs->encaps_sig_hdl;
1315 struct gaudi_collective_properties *cprop;
1316 u32 stream, queue_id, sob_group_offset;
1317 struct gaudi_device *gaudi;
1318 struct hl_device *hdev;
1319 struct hl_cs_job *job;
1320 struct hl_ctx *ctx;
1321
1322 ctx = cs->ctx;
1323 hdev = ctx->hdev;
1324 gaudi = hdev->asic_specific;
1325 cprop = &gaudi->collective_props;
1326
1327 if (cs->encaps_signals) {
1328 cs_cmpl->hw_sob = handle->hw_sob;
1329 /* at this checkpoint we only need the hw_sob pointer
1330 * for the completion check before start going over the jobs
1331 * of the master/slaves, the sob_value will be taken later on
1332 * in gaudi_collective_slave_init_job depends on each
1333 * job wait offset value.
1334 */
1335 cs_cmpl->sob_val = 0;
1336 } else {
1337 /* copy the SOB id and value of the signal CS */
1338 cs_cmpl->hw_sob = signal_cs_cmpl->hw_sob;
1339 cs_cmpl->sob_val = signal_cs_cmpl->sob_val;
1340 }
1341
1342 /* check again if the signal cs already completed.
1343 * if yes then don't send any wait cs since the hw_sob
1344 * could be in reset already. if signal is not completed
1345 * then get refcount to hw_sob to prevent resetting the sob
1346 * while wait cs is not submitted.
1347 * note that this check is protected by two locks,
1348 * hw queue lock and completion object lock,
1349 * and the same completion object lock also protects
1350 * the hw_sob reset handler function.
1351 * The hw_queue lock prevent out of sync of hw_sob
1352 * refcount value, changed by signal/wait flows.
1353 */
1354 spin_lock(lock: &signal_cs_cmpl->lock);
1355
1356 if (completion_done(x: &cs->signal_fence->completion)) {
1357 spin_unlock(lock: &signal_cs_cmpl->lock);
1358 return -EINVAL;
1359 }
1360 /* Increment kref since all slave queues are now waiting on it */
1361 kref_get(kref: &cs_cmpl->hw_sob->kref);
1362
1363 spin_unlock(lock: &signal_cs_cmpl->lock);
1364
1365 /* Calculate the stream from collective master queue (1st job) */
1366 job = list_first_entry(&cs->job_list, struct hl_cs_job, cs_node);
1367 stream = job->hw_queue_id % 4;
1368 sob_group_offset =
1369 stream * HL_RSVD_SOBS + cprop->curr_sob_group_idx[stream];
1370
1371 list_for_each_entry(job, &cs->job_list, cs_node) {
1372 queue_id = job->hw_queue_id;
1373
1374 if (hdev->kernel_queues[queue_id].collective_mode ==
1375 HL_COLLECTIVE_MASTER)
1376 gaudi_collective_master_init_job(hdev, job, stream,
1377 sob_group_offset);
1378 else
1379 gaudi_collective_slave_init_job(hdev, job, cs_cmpl);
1380 }
1381
1382 cs_cmpl->sob_group = sob_group_offset;
1383
1384 /* Handle sob group kref and wraparound */
1385 kref_get(kref: &cprop->hw_sob_group[sob_group_offset].kref);
1386 cprop->next_sob_group_val[stream]++;
1387
1388 if (cprop->next_sob_group_val[stream] == HL_MAX_SOB_VAL) {
1389 /*
1390 * Decrement as we reached the max value.
1391 * The release function won't be called here as we've
1392 * just incremented the refcount.
1393 */
1394 kref_put(kref: &cprop->hw_sob_group[sob_group_offset].kref,
1395 release: gaudi_sob_group_reset_error);
1396 cprop->next_sob_group_val[stream] = 1;
1397 /* only two SOBs are currently in use */
1398 cprop->curr_sob_group_idx[stream] =
1399 (cprop->curr_sob_group_idx[stream] + 1) &
1400 (HL_RSVD_SOBS - 1);
1401
1402 gaudi_collective_map_sobs(hdev, stream);
1403
1404 dev_dbg(hdev->dev, "switched to SOB group %d, stream: %d\n",
1405 cprop->curr_sob_group_idx[stream], stream);
1406 }
1407
1408 mb();
1409 hl_fence_put(fence: cs->signal_fence);
1410 cs->signal_fence = NULL;
1411
1412 return 0;
1413}
1414
1415static u32 gaudi_get_patched_cb_extra_size(u32 user_cb_size)
1416{
1417 u32 cacheline_end, additional_commands;
1418
1419 cacheline_end = round_up(user_cb_size, DEVICE_CACHE_LINE_SIZE);
1420 additional_commands = sizeof(struct packet_msg_prot) * 2;
1421
1422 if (user_cb_size + additional_commands > cacheline_end)
1423 return cacheline_end - user_cb_size + additional_commands;
1424 else
1425 return additional_commands;
1426}
1427
1428static int gaudi_collective_wait_create_job(struct hl_device *hdev,
1429 struct hl_ctx *ctx, struct hl_cs *cs,
1430 enum hl_collective_mode mode, u32 queue_id, u32 wait_queue_id,
1431 u32 encaps_signal_offset)
1432{
1433 struct hw_queue_properties *hw_queue_prop;
1434 struct hl_cs_counters_atomic *cntr;
1435 struct hl_cs_job *job;
1436 struct hl_cb *cb;
1437 u32 cb_size;
1438 bool patched_cb;
1439
1440 cntr = &hdev->aggregated_cs_counters;
1441
1442 if (mode == HL_COLLECTIVE_MASTER) {
1443 /* CB size of collective master queue contains
1444 * 4 msg short packets for monitor 1 configuration
1445 * 1 fence packet
1446 * 4 msg short packets for monitor 2 configuration
1447 * 1 fence packet
1448 * 2 msg prot packets for completion and MSI
1449 */
1450 cb_size = sizeof(struct packet_msg_short) * 8 +
1451 sizeof(struct packet_fence) * 2 +
1452 sizeof(struct packet_msg_prot) * 2;
1453 patched_cb = true;
1454 } else {
1455 /* CB size of collective slave queues contains
1456 * 4 msg short packets for monitor configuration
1457 * 1 fence packet
1458 * 1 additional msg short packet for sob signal
1459 */
1460 cb_size = sizeof(struct packet_msg_short) * 5 +
1461 sizeof(struct packet_fence);
1462 patched_cb = false;
1463 }
1464
1465 hw_queue_prop = &hdev->asic_prop.hw_queues_props[queue_id];
1466 job = hl_cs_allocate_job(hdev, queue_type: hw_queue_prop->type, is_kernel_allocated_cb: true);
1467 if (!job) {
1468 atomic64_inc(v: &ctx->cs_counters.out_of_mem_drop_cnt);
1469 atomic64_inc(v: &cntr->out_of_mem_drop_cnt);
1470 dev_err(hdev->dev, "Failed to allocate a new job\n");
1471 return -ENOMEM;
1472 }
1473
1474 /* Allocate internal mapped CB for non patched CBs */
1475 cb = hl_cb_kernel_create(hdev, cb_size, internal_cb: !patched_cb);
1476 if (!cb) {
1477 atomic64_inc(v: &ctx->cs_counters.out_of_mem_drop_cnt);
1478 atomic64_inc(v: &cntr->out_of_mem_drop_cnt);
1479 kfree(objp: job);
1480 return -EFAULT;
1481 }
1482
1483 job->id = 0;
1484 job->cs = cs;
1485 job->user_cb = cb;
1486 atomic_inc(v: &job->user_cb->cs_cnt);
1487 job->user_cb_size = cb_size;
1488 job->hw_queue_id = queue_id;
1489
1490 /* since its guaranteed to have only one chunk in the collective wait
1491 * cs, we can use this chunk to set the encapsulated signal offset
1492 * in the jobs.
1493 */
1494 if (cs->encaps_signals)
1495 job->encaps_sig_wait_offset = encaps_signal_offset;
1496
1497 /*
1498 * No need in parsing, user CB is the patched CB.
1499 * We call hl_cb_destroy() out of two reasons - we don't need
1500 * the CB in the CB idr anymore and to decrement its refcount as
1501 * it was incremented inside hl_cb_kernel_create().
1502 */
1503 if (patched_cb)
1504 job->patched_cb = job->user_cb;
1505 else
1506 job->patched_cb = NULL;
1507
1508 job->job_cb_size = job->user_cb_size;
1509 hl_cb_destroy(mmg: &hdev->kernel_mem_mgr, cb_handle: cb->buf->handle);
1510
1511 /* increment refcount as for external queues we get completion */
1512 if (hw_queue_prop->type == QUEUE_TYPE_EXT)
1513 cs_get(cs);
1514
1515 cs->jobs_in_queue_cnt[job->hw_queue_id]++;
1516
1517 list_add_tail(new: &job->cs_node, head: &cs->job_list);
1518
1519 hl_debugfs_add_job(hdev, job);
1520
1521 return 0;
1522}
1523
1524static int gaudi_collective_wait_create_jobs(struct hl_device *hdev,
1525 struct hl_ctx *ctx, struct hl_cs *cs,
1526 u32 wait_queue_id, u32 collective_engine_id,
1527 u32 encaps_signal_offset)
1528{
1529 struct gaudi_device *gaudi = hdev->asic_specific;
1530 struct hw_queue_properties *hw_queue_prop;
1531 u32 queue_id, collective_queue, num_jobs;
1532 u32 stream, nic_queue, nic_idx = 0;
1533 bool skip;
1534 int i, rc = 0;
1535
1536 /* Verify wait queue id is configured as master */
1537 hw_queue_prop = &hdev->asic_prop.hw_queues_props[wait_queue_id];
1538 if (!(hw_queue_prop->collective_mode == HL_COLLECTIVE_MASTER)) {
1539 dev_err(hdev->dev,
1540 "Queue %d is not configured as collective master\n",
1541 wait_queue_id);
1542 return -EINVAL;
1543 }
1544
1545 /* Verify engine id is supported */
1546 if (collective_engine_id != GAUDI_ENGINE_ID_DMA_5 &&
1547 collective_engine_id != GAUDI_ENGINE_ID_TPC_7) {
1548 dev_err(hdev->dev,
1549 "Collective wait does not support engine %u\n",
1550 collective_engine_id);
1551 return -EINVAL;
1552 }
1553
1554 stream = wait_queue_id % 4;
1555
1556 if (collective_engine_id == GAUDI_ENGINE_ID_DMA_5)
1557 collective_queue = GAUDI_QUEUE_ID_DMA_5_0 + stream;
1558 else
1559 collective_queue = GAUDI_QUEUE_ID_TPC_7_0 + stream;
1560
1561 num_jobs = NUMBER_OF_SOBS_IN_GRP + 1;
1562 nic_queue = GAUDI_QUEUE_ID_NIC_0_0 + stream;
1563
1564 /* First job goes to the collective master queue, it will wait for
1565 * the collective slave queues to finish execution.
1566 * The synchronization is done using two monitors:
1567 * First monitor for NICs 0-7, second monitor for NICs 8-9 and the
1568 * reduction engine (DMA5/TPC7).
1569 *
1570 * Rest of the jobs goes to the collective slave queues which will
1571 * all wait for the user to signal sob 'cs_cmpl->sob_val'.
1572 */
1573 for (i = 0 ; i < num_jobs ; i++) {
1574 if (i == 0) {
1575 queue_id = wait_queue_id;
1576 rc = gaudi_collective_wait_create_job(hdev, ctx, cs,
1577 mode: HL_COLLECTIVE_MASTER, queue_id,
1578 wait_queue_id, encaps_signal_offset);
1579 } else {
1580 if (nic_idx < NIC_NUMBER_OF_ENGINES) {
1581 if (gaudi->hw_cap_initialized &
1582 BIT(HW_CAP_NIC_SHIFT + nic_idx))
1583 skip = false;
1584 else
1585 skip = true;
1586
1587 queue_id = nic_queue;
1588 nic_queue += 4;
1589 nic_idx++;
1590
1591 if (skip)
1592 continue;
1593 } else {
1594 queue_id = collective_queue;
1595 }
1596
1597 rc = gaudi_collective_wait_create_job(hdev, ctx, cs,
1598 mode: HL_COLLECTIVE_SLAVE, queue_id,
1599 wait_queue_id, encaps_signal_offset);
1600 }
1601
1602 if (rc)
1603 return rc;
1604 }
1605
1606 return rc;
1607}
1608
1609static int gaudi_late_init(struct hl_device *hdev)
1610{
1611 struct gaudi_device *gaudi = hdev->asic_specific;
1612 int rc;
1613
1614 rc = gaudi->cpucp_info_get(hdev);
1615 if (rc) {
1616 dev_err(hdev->dev, "Failed to get cpucp info\n");
1617 return rc;
1618 }
1619
1620 if ((hdev->card_type == cpucp_card_type_pci) &&
1621 (hdev->nic_ports_mask & 0x3)) {
1622 dev_info(hdev->dev,
1623 "PCI card detected, only 8 ports are enabled\n");
1624 hdev->nic_ports_mask &= ~0x3;
1625
1626 /* Stop and disable unused NIC QMANs */
1627 WREG32(mmNIC0_QM0_GLBL_CFG1, NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
1628 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
1629 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
1630
1631 WREG32(mmNIC0_QM1_GLBL_CFG1, NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
1632 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
1633 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
1634
1635 WREG32(mmNIC0_QM0_GLBL_CFG0, 0);
1636 WREG32(mmNIC0_QM1_GLBL_CFG0, 0);
1637
1638 gaudi->hw_cap_initialized &= ~(HW_CAP_NIC0 | HW_CAP_NIC1);
1639 }
1640
1641 rc = hl_fw_send_pci_access_msg(hdev, opcode: CPUCP_PACKET_ENABLE_PCI_ACCESS, value: 0x0);
1642 if (rc)
1643 return rc;
1644
1645 /* Scrub both SRAM and DRAM */
1646 rc = hdev->asic_funcs->scrub_device_mem(hdev);
1647 if (rc)
1648 goto disable_pci_access;
1649
1650 rc = gaudi_fetch_psoc_frequency(hdev);
1651 if (rc) {
1652 dev_err(hdev->dev, "Failed to fetch psoc frequency\n");
1653 goto disable_pci_access;
1654 }
1655
1656 rc = gaudi_mmu_clear_pgt_range(hdev);
1657 if (rc) {
1658 dev_err(hdev->dev, "Failed to clear MMU page tables range\n");
1659 goto disable_pci_access;
1660 }
1661
1662 rc = gaudi_init_tpc_mem(hdev);
1663 if (rc) {
1664 dev_err(hdev->dev, "Failed to initialize TPC memories\n");
1665 goto disable_pci_access;
1666 }
1667
1668 rc = gaudi_collective_init(hdev);
1669 if (rc) {
1670 dev_err(hdev->dev, "Failed to init collective\n");
1671 goto disable_pci_access;
1672 }
1673
1674 /* We only support a single ASID for the user, so for the sake of optimization, just
1675 * initialize the ASID one time during device initialization with the fixed value of 1
1676 */
1677 gaudi_mmu_prepare(hdev, asid: 1);
1678
1679 hl_fw_set_pll_profile(hdev);
1680
1681 return 0;
1682
1683disable_pci_access:
1684 hl_fw_send_pci_access_msg(hdev, opcode: CPUCP_PACKET_DISABLE_PCI_ACCESS, value: 0x0);
1685
1686 return rc;
1687}
1688
1689static void gaudi_late_fini(struct hl_device *hdev)
1690{
1691 hl_hwmon_release_resources(hdev);
1692}
1693
1694static int gaudi_alloc_cpu_accessible_dma_mem(struct hl_device *hdev)
1695{
1696 dma_addr_t dma_addr_arr[GAUDI_ALLOC_CPU_MEM_RETRY_CNT] = {}, end_addr;
1697 void *virt_addr_arr[GAUDI_ALLOC_CPU_MEM_RETRY_CNT] = {};
1698 int i, j, rc = 0;
1699
1700 /*
1701 * The device CPU works with 40-bits addresses, while bit 39 must be set
1702 * to '1' when accessing the host.
1703 * Bits 49:39 of the full host address are saved for a later
1704 * configuration of the HW to perform extension to 50 bits.
1705 * Because there is a single HW register that holds the extension bits,
1706 * these bits must be identical in all allocated range.
1707 */
1708
1709 for (i = 0 ; i < GAUDI_ALLOC_CPU_MEM_RETRY_CNT ; i++) {
1710 virt_addr_arr[i] = hl_asic_dma_alloc_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE,
1711 &dma_addr_arr[i],
1712 GFP_KERNEL | __GFP_ZERO);
1713 if (!virt_addr_arr[i]) {
1714 rc = -ENOMEM;
1715 goto free_dma_mem_arr;
1716 }
1717
1718 end_addr = dma_addr_arr[i] + HL_CPU_ACCESSIBLE_MEM_SIZE - 1;
1719 if (GAUDI_CPU_PCI_MSB_ADDR(dma_addr_arr[i]) ==
1720 GAUDI_CPU_PCI_MSB_ADDR(end_addr))
1721 break;
1722 }
1723
1724 if (i == GAUDI_ALLOC_CPU_MEM_RETRY_CNT) {
1725 dev_err(hdev->dev,
1726 "MSB of CPU accessible DMA memory are not identical in all range\n");
1727 rc = -EFAULT;
1728 goto free_dma_mem_arr;
1729 }
1730
1731 hdev->cpu_accessible_dma_mem = virt_addr_arr[i];
1732 hdev->cpu_accessible_dma_address = dma_addr_arr[i];
1733 hdev->cpu_pci_msb_addr =
1734 GAUDI_CPU_PCI_MSB_ADDR(hdev->cpu_accessible_dma_address);
1735
1736 if (!hdev->asic_prop.fw_security_enabled)
1737 GAUDI_PCI_TO_CPU_ADDR(hdev->cpu_accessible_dma_address);
1738
1739free_dma_mem_arr:
1740 for (j = 0 ; j < i ; j++)
1741 hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, virt_addr_arr[j],
1742 dma_addr_arr[j]);
1743
1744 return rc;
1745}
1746
1747static void gaudi_free_internal_qmans_pq_mem(struct hl_device *hdev)
1748{
1749 struct gaudi_device *gaudi = hdev->asic_specific;
1750 struct gaudi_internal_qman_info *q;
1751 u32 i;
1752
1753 for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) {
1754 q = &gaudi->internal_qmans[i];
1755 if (!q->pq_kernel_addr)
1756 continue;
1757 hl_asic_dma_free_coherent(hdev, q->pq_size, q->pq_kernel_addr, q->pq_dma_addr);
1758 }
1759}
1760
1761static int gaudi_alloc_internal_qmans_pq_mem(struct hl_device *hdev)
1762{
1763 struct gaudi_device *gaudi = hdev->asic_specific;
1764 struct gaudi_internal_qman_info *q;
1765 int rc, i;
1766
1767 for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) {
1768 if (gaudi_queue_type[i] != QUEUE_TYPE_INT)
1769 continue;
1770
1771 q = &gaudi->internal_qmans[i];
1772
1773 switch (i) {
1774 case GAUDI_QUEUE_ID_DMA_2_0 ... GAUDI_QUEUE_ID_DMA_7_3:
1775 q->pq_size = HBM_DMA_QMAN_SIZE_IN_BYTES;
1776 break;
1777 case GAUDI_QUEUE_ID_MME_0_0 ... GAUDI_QUEUE_ID_MME_1_3:
1778 q->pq_size = MME_QMAN_SIZE_IN_BYTES;
1779 break;
1780 case GAUDI_QUEUE_ID_TPC_0_0 ... GAUDI_QUEUE_ID_TPC_7_3:
1781 q->pq_size = TPC_QMAN_SIZE_IN_BYTES;
1782 break;
1783 case GAUDI_QUEUE_ID_NIC_0_0 ... GAUDI_QUEUE_ID_NIC_9_3:
1784 q->pq_size = NIC_QMAN_SIZE_IN_BYTES;
1785 break;
1786 default:
1787 dev_err(hdev->dev, "Bad internal queue index %d", i);
1788 rc = -EINVAL;
1789 goto free_internal_qmans_pq_mem;
1790 }
1791
1792 q->pq_kernel_addr = hl_asic_dma_alloc_coherent(hdev, q->pq_size, &q->pq_dma_addr,
1793 GFP_KERNEL | __GFP_ZERO);
1794 if (!q->pq_kernel_addr) {
1795 rc = -ENOMEM;
1796 goto free_internal_qmans_pq_mem;
1797 }
1798 }
1799
1800 return 0;
1801
1802free_internal_qmans_pq_mem:
1803 gaudi_free_internal_qmans_pq_mem(hdev);
1804 return rc;
1805}
1806
1807static void gaudi_set_pci_memory_regions(struct hl_device *hdev)
1808{
1809 struct asic_fixed_properties *prop = &hdev->asic_prop;
1810 struct pci_mem_region *region;
1811
1812 /* CFG */
1813 region = &hdev->pci_mem_region[PCI_REGION_CFG];
1814 region->region_base = CFG_BASE;
1815 region->region_size = CFG_SIZE;
1816 region->offset_in_bar = CFG_BASE - SPI_FLASH_BASE_ADDR;
1817 region->bar_size = CFG_BAR_SIZE;
1818 region->bar_id = CFG_BAR_ID;
1819 region->used = 1;
1820
1821 /* SRAM */
1822 region = &hdev->pci_mem_region[PCI_REGION_SRAM];
1823 region->region_base = SRAM_BASE_ADDR;
1824 region->region_size = SRAM_SIZE;
1825 region->offset_in_bar = 0;
1826 region->bar_size = SRAM_BAR_SIZE;
1827 region->bar_id = SRAM_BAR_ID;
1828 region->used = 1;
1829
1830 /* DRAM */
1831 region = &hdev->pci_mem_region[PCI_REGION_DRAM];
1832 region->region_base = DRAM_PHYS_BASE;
1833 region->region_size = hdev->asic_prop.dram_size;
1834 region->offset_in_bar = 0;
1835 region->bar_size = prop->dram_pci_bar_size;
1836 region->bar_id = HBM_BAR_ID;
1837 region->used = 1;
1838
1839 /* SP SRAM */
1840 region = &hdev->pci_mem_region[PCI_REGION_SP_SRAM];
1841 region->region_base = PSOC_SCRATCHPAD_ADDR;
1842 region->region_size = PSOC_SCRATCHPAD_SIZE;
1843 region->offset_in_bar = PSOC_SCRATCHPAD_ADDR - SPI_FLASH_BASE_ADDR;
1844 region->bar_size = CFG_BAR_SIZE;
1845 region->bar_id = CFG_BAR_ID;
1846 region->used = 1;
1847}
1848
1849static int gaudi_sw_init(struct hl_device *hdev)
1850{
1851 struct gaudi_device *gaudi;
1852 u32 i, event_id = 0;
1853 int rc;
1854
1855 /* Allocate device structure */
1856 gaudi = kzalloc(sizeof(*gaudi), GFP_KERNEL);
1857 if (!gaudi)
1858 return -ENOMEM;
1859
1860 for (i = 0 ; i < ARRAY_SIZE(gaudi_irq_map_table) ; i++) {
1861 if (gaudi_irq_map_table[i].valid) {
1862 if (event_id == GAUDI_EVENT_SIZE) {
1863 dev_err(hdev->dev,
1864 "Event array exceeds the limit of %u events\n",
1865 GAUDI_EVENT_SIZE);
1866 rc = -EINVAL;
1867 goto free_gaudi_device;
1868 }
1869
1870 gaudi->events[event_id++] =
1871 gaudi_irq_map_table[i].fc_id;
1872 }
1873 }
1874
1875 gaudi->cpucp_info_get = gaudi_cpucp_info_get;
1876
1877 hdev->asic_specific = gaudi;
1878
1879 /* Create DMA pool for small allocations */
1880 hdev->dma_pool = dma_pool_create(name: dev_name(dev: hdev->dev),
1881 dev: &hdev->pdev->dev, GAUDI_DMA_POOL_BLK_SIZE, align: 8, boundary: 0);
1882 if (!hdev->dma_pool) {
1883 dev_err(hdev->dev, "failed to create DMA pool\n");
1884 rc = -ENOMEM;
1885 goto free_gaudi_device;
1886 }
1887
1888 rc = gaudi_alloc_cpu_accessible_dma_mem(hdev);
1889 if (rc)
1890 goto free_dma_pool;
1891
1892 hdev->cpu_accessible_dma_pool = gen_pool_create(ilog2(32), -1);
1893 if (!hdev->cpu_accessible_dma_pool) {
1894 dev_err(hdev->dev,
1895 "Failed to create CPU accessible DMA pool\n");
1896 rc = -ENOMEM;
1897 goto free_cpu_dma_mem;
1898 }
1899
1900 rc = gen_pool_add(pool: hdev->cpu_accessible_dma_pool,
1901 addr: (uintptr_t) hdev->cpu_accessible_dma_mem,
1902 HL_CPU_ACCESSIBLE_MEM_SIZE, nid: -1);
1903 if (rc) {
1904 dev_err(hdev->dev,
1905 "Failed to add memory to CPU accessible DMA pool\n");
1906 rc = -EFAULT;
1907 goto free_cpu_accessible_dma_pool;
1908 }
1909
1910 rc = gaudi_alloc_internal_qmans_pq_mem(hdev);
1911 if (rc)
1912 goto free_cpu_accessible_dma_pool;
1913
1914 spin_lock_init(&gaudi->hw_queues_lock);
1915
1916 hdev->supports_sync_stream = true;
1917 hdev->supports_coresight = true;
1918 hdev->supports_staged_submission = true;
1919 hdev->supports_wait_for_multi_cs = true;
1920
1921 hdev->asic_funcs->set_pci_memory_regions(hdev);
1922 hdev->stream_master_qid_arr =
1923 hdev->asic_funcs->get_stream_master_qid_arr();
1924 hdev->stream_master_qid_arr_size = GAUDI_STREAM_MASTER_ARR_SIZE;
1925
1926 return 0;
1927
1928free_cpu_accessible_dma_pool:
1929 gen_pool_destroy(hdev->cpu_accessible_dma_pool);
1930free_cpu_dma_mem:
1931 if (!hdev->asic_prop.fw_security_enabled)
1932 GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address,
1933 hdev->cpu_pci_msb_addr);
1934 hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, hdev->cpu_accessible_dma_mem,
1935 hdev->cpu_accessible_dma_address);
1936free_dma_pool:
1937 dma_pool_destroy(pool: hdev->dma_pool);
1938free_gaudi_device:
1939 kfree(objp: gaudi);
1940 return rc;
1941}
1942
1943static int gaudi_sw_fini(struct hl_device *hdev)
1944{
1945 struct gaudi_device *gaudi = hdev->asic_specific;
1946
1947 gaudi_free_internal_qmans_pq_mem(hdev);
1948
1949 gen_pool_destroy(hdev->cpu_accessible_dma_pool);
1950
1951 if (!hdev->asic_prop.fw_security_enabled)
1952 GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address,
1953 hdev->cpu_pci_msb_addr);
1954
1955 hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, hdev->cpu_accessible_dma_mem,
1956 hdev->cpu_accessible_dma_address);
1957
1958 dma_pool_destroy(pool: hdev->dma_pool);
1959
1960 kfree(objp: gaudi);
1961
1962 return 0;
1963}
1964
1965static irqreturn_t gaudi_irq_handler_single(int irq, void *arg)
1966{
1967 struct hl_device *hdev = arg;
1968 int i;
1969
1970 if (hdev->disabled)
1971 return IRQ_HANDLED;
1972
1973 for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
1974 hl_irq_handler_cq(irq, arg: &hdev->completion_queue[i]);
1975
1976 hl_irq_handler_eq(irq, arg: &hdev->event_queue);
1977
1978 return IRQ_HANDLED;
1979}
1980
1981/*
1982 * For backward compatibility, new MSI interrupts should be set after the
1983 * existing CPU and NIC interrupts.
1984 */
1985static int gaudi_pci_irq_vector(struct hl_device *hdev, unsigned int nr,
1986 bool cpu_eq)
1987{
1988 int msi_vec;
1989
1990 if ((nr != GAUDI_EVENT_QUEUE_MSI_IDX) && (cpu_eq))
1991 dev_crit(hdev->dev, "CPU EQ must use IRQ %d\n",
1992 GAUDI_EVENT_QUEUE_MSI_IDX);
1993
1994 msi_vec = ((nr < GAUDI_EVENT_QUEUE_MSI_IDX) || (cpu_eq)) ? nr :
1995 (nr + NIC_NUMBER_OF_ENGINES + 1);
1996
1997 return pci_irq_vector(dev: hdev->pdev, nr: msi_vec);
1998}
1999
2000static int gaudi_enable_msi_single(struct hl_device *hdev)
2001{
2002 int rc, irq;
2003
2004 dev_dbg(hdev->dev, "Working in single MSI IRQ mode\n");
2005
2006 irq = gaudi_pci_irq_vector(hdev, nr: 0, cpu_eq: false);
2007 rc = request_irq(irq, handler: gaudi_irq_handler_single, flags: 0,
2008 name: "gaudi single msi", dev: hdev);
2009 if (rc)
2010 dev_err(hdev->dev,
2011 "Failed to request single MSI IRQ\n");
2012
2013 return rc;
2014}
2015
2016static int gaudi_enable_msi(struct hl_device *hdev)
2017{
2018 struct gaudi_device *gaudi = hdev->asic_specific;
2019 int rc;
2020
2021 if (gaudi->hw_cap_initialized & HW_CAP_MSI)
2022 return 0;
2023
2024 rc = pci_alloc_irq_vectors(dev: hdev->pdev, min_vecs: 1, max_vecs: 1, PCI_IRQ_MSI);
2025 if (rc < 0) {
2026 dev_err(hdev->dev, "MSI: Failed to enable support %d\n", rc);
2027 return rc;
2028 }
2029
2030 rc = gaudi_enable_msi_single(hdev);
2031 if (rc)
2032 goto free_pci_irq_vectors;
2033
2034 gaudi->hw_cap_initialized |= HW_CAP_MSI;
2035
2036 return 0;
2037
2038free_pci_irq_vectors:
2039 pci_free_irq_vectors(dev: hdev->pdev);
2040 return rc;
2041}
2042
2043static void gaudi_sync_irqs(struct hl_device *hdev)
2044{
2045 struct gaudi_device *gaudi = hdev->asic_specific;
2046
2047 if (!(gaudi->hw_cap_initialized & HW_CAP_MSI))
2048 return;
2049
2050 /* Wait for all pending IRQs to be finished */
2051 synchronize_irq(irq: gaudi_pci_irq_vector(hdev, nr: 0, cpu_eq: false));
2052}
2053
2054static void gaudi_disable_msi(struct hl_device *hdev)
2055{
2056 struct gaudi_device *gaudi = hdev->asic_specific;
2057
2058 if (!(gaudi->hw_cap_initialized & HW_CAP_MSI))
2059 return;
2060
2061 gaudi_sync_irqs(hdev);
2062 free_irq(gaudi_pci_irq_vector(hdev, nr: 0, cpu_eq: false), hdev);
2063 pci_free_irq_vectors(dev: hdev->pdev);
2064
2065 gaudi->hw_cap_initialized &= ~HW_CAP_MSI;
2066}
2067
2068static void gaudi_init_scrambler_sram(struct hl_device *hdev)
2069{
2070 struct gaudi_device *gaudi = hdev->asic_specific;
2071
2072 if (hdev->asic_prop.fw_security_enabled)
2073 return;
2074
2075 if (hdev->asic_prop.fw_app_cpu_boot_dev_sts0 &
2076 CPU_BOOT_DEV_STS0_SRAM_SCR_EN)
2077 return;
2078
2079 if (gaudi->hw_cap_initialized & HW_CAP_SRAM_SCRAMBLER)
2080 return;
2081
2082 WREG32(mmNIF_RTR_CTRL_0_SCRAM_SRAM_EN,
2083 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2084 WREG32(mmNIF_RTR_CTRL_1_SCRAM_SRAM_EN,
2085 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2086 WREG32(mmNIF_RTR_CTRL_2_SCRAM_SRAM_EN,
2087 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2088 WREG32(mmNIF_RTR_CTRL_3_SCRAM_SRAM_EN,
2089 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2090 WREG32(mmNIF_RTR_CTRL_4_SCRAM_SRAM_EN,
2091 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2092 WREG32(mmNIF_RTR_CTRL_5_SCRAM_SRAM_EN,
2093 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2094 WREG32(mmNIF_RTR_CTRL_6_SCRAM_SRAM_EN,
2095 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2096 WREG32(mmNIF_RTR_CTRL_7_SCRAM_SRAM_EN,
2097 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2098
2099 WREG32(mmSIF_RTR_CTRL_0_SCRAM_SRAM_EN,
2100 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2101 WREG32(mmSIF_RTR_CTRL_1_SCRAM_SRAM_EN,
2102 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2103 WREG32(mmSIF_RTR_CTRL_2_SCRAM_SRAM_EN,
2104 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2105 WREG32(mmSIF_RTR_CTRL_3_SCRAM_SRAM_EN,
2106 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2107 WREG32(mmSIF_RTR_CTRL_4_SCRAM_SRAM_EN,
2108 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2109 WREG32(mmSIF_RTR_CTRL_5_SCRAM_SRAM_EN,
2110 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2111 WREG32(mmSIF_RTR_CTRL_6_SCRAM_SRAM_EN,
2112 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2113 WREG32(mmSIF_RTR_CTRL_7_SCRAM_SRAM_EN,
2114 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2115
2116 WREG32(mmDMA_IF_E_N_DOWN_CH0_SCRAM_SRAM_EN,
2117 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2118 WREG32(mmDMA_IF_E_N_DOWN_CH1_SCRAM_SRAM_EN,
2119 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2120 WREG32(mmDMA_IF_E_S_DOWN_CH0_SCRAM_SRAM_EN,
2121 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2122 WREG32(mmDMA_IF_E_S_DOWN_CH1_SCRAM_SRAM_EN,
2123 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2124 WREG32(mmDMA_IF_W_N_DOWN_CH0_SCRAM_SRAM_EN,
2125 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2126 WREG32(mmDMA_IF_W_N_DOWN_CH1_SCRAM_SRAM_EN,
2127 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2128 WREG32(mmDMA_IF_W_S_DOWN_CH0_SCRAM_SRAM_EN,
2129 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2130 WREG32(mmDMA_IF_W_S_DOWN_CH1_SCRAM_SRAM_EN,
2131 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2132
2133 gaudi->hw_cap_initialized |= HW_CAP_SRAM_SCRAMBLER;
2134}
2135
2136static void gaudi_init_scrambler_hbm(struct hl_device *hdev)
2137{
2138 struct gaudi_device *gaudi = hdev->asic_specific;
2139
2140 if (hdev->asic_prop.fw_security_enabled)
2141 return;
2142
2143 if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 &
2144 CPU_BOOT_DEV_STS0_DRAM_SCR_EN)
2145 return;
2146
2147 if (gaudi->hw_cap_initialized & HW_CAP_HBM_SCRAMBLER)
2148 return;
2149
2150 WREG32(mmNIF_RTR_CTRL_0_SCRAM_HBM_EN,
2151 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2152 WREG32(mmNIF_RTR_CTRL_1_SCRAM_HBM_EN,
2153 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2154 WREG32(mmNIF_RTR_CTRL_2_SCRAM_HBM_EN,
2155 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2156 WREG32(mmNIF_RTR_CTRL_3_SCRAM_HBM_EN,
2157 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2158 WREG32(mmNIF_RTR_CTRL_4_SCRAM_HBM_EN,
2159 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2160 WREG32(mmNIF_RTR_CTRL_5_SCRAM_HBM_EN,
2161 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2162 WREG32(mmNIF_RTR_CTRL_6_SCRAM_HBM_EN,
2163 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2164 WREG32(mmNIF_RTR_CTRL_7_SCRAM_HBM_EN,
2165 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2166
2167 WREG32(mmSIF_RTR_CTRL_0_SCRAM_HBM_EN,
2168 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2169 WREG32(mmSIF_RTR_CTRL_1_SCRAM_HBM_EN,
2170 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2171 WREG32(mmSIF_RTR_CTRL_2_SCRAM_HBM_EN,
2172 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2173 WREG32(mmSIF_RTR_CTRL_3_SCRAM_HBM_EN,
2174 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2175 WREG32(mmSIF_RTR_CTRL_4_SCRAM_HBM_EN,
2176 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2177 WREG32(mmSIF_RTR_CTRL_5_SCRAM_HBM_EN,
2178 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2179 WREG32(mmSIF_RTR_CTRL_6_SCRAM_HBM_EN,
2180 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2181 WREG32(mmSIF_RTR_CTRL_7_SCRAM_HBM_EN,
2182 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2183
2184 WREG32(mmDMA_IF_E_N_DOWN_CH0_SCRAM_HBM_EN,
2185 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2186 WREG32(mmDMA_IF_E_N_DOWN_CH1_SCRAM_HBM_EN,
2187 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2188 WREG32(mmDMA_IF_E_S_DOWN_CH0_SCRAM_HBM_EN,
2189 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2190 WREG32(mmDMA_IF_E_S_DOWN_CH1_SCRAM_HBM_EN,
2191 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2192 WREG32(mmDMA_IF_W_N_DOWN_CH0_SCRAM_HBM_EN,
2193 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2194 WREG32(mmDMA_IF_W_N_DOWN_CH1_SCRAM_HBM_EN,
2195 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2196 WREG32(mmDMA_IF_W_S_DOWN_CH0_SCRAM_HBM_EN,
2197 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2198 WREG32(mmDMA_IF_W_S_DOWN_CH1_SCRAM_HBM_EN,
2199 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2200
2201 gaudi->hw_cap_initialized |= HW_CAP_HBM_SCRAMBLER;
2202}
2203
2204static void gaudi_init_e2e(struct hl_device *hdev)
2205{
2206 if (hdev->asic_prop.fw_security_enabled)
2207 return;
2208
2209 if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 &
2210 CPU_BOOT_DEV_STS0_E2E_CRED_EN)
2211 return;
2212
2213 WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_WR_SIZE, 247 >> 3);
2214 WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_RD_SIZE, 785 >> 3);
2215 WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_WR_SIZE, 49);
2216 WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_RD_SIZE, 101);
2217
2218 WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_WR_SIZE, 275 >> 3);
2219 WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_RD_SIZE, 614 >> 3);
2220 WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_WR_SIZE, 1);
2221 WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_RD_SIZE, 39);
2222
2223 WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_WR_SIZE, 1);
2224 WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_RD_SIZE, 1);
2225 WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_WR_SIZE, 1);
2226 WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_RD_SIZE, 32);
2227
2228 WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_WR_SIZE, 176 >> 3);
2229 WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_RD_SIZE, 32 >> 3);
2230 WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_WR_SIZE, 19);
2231 WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_RD_SIZE, 32);
2232
2233 WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_WR_SIZE, 176 >> 3);
2234 WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_RD_SIZE, 32 >> 3);
2235 WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_WR_SIZE, 19);
2236 WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_RD_SIZE, 32);
2237
2238 WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_WR_SIZE, 1);
2239 WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_RD_SIZE, 1);
2240 WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_WR_SIZE, 1);
2241 WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_RD_SIZE, 32);
2242
2243 WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_WR_SIZE, 275 >> 3);
2244 WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_RD_SIZE, 614 >> 3);
2245 WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_WR_SIZE, 1);
2246 WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_RD_SIZE, 39);
2247
2248 WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_WR_SIZE, 297 >> 3);
2249 WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_RD_SIZE, 908 >> 3);
2250 WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_WR_SIZE, 19);
2251 WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_RD_SIZE, 19);
2252
2253 WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_WR_SIZE, 318 >> 3);
2254 WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_RD_SIZE, 956 >> 3);
2255 WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_WR_SIZE, 79);
2256 WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_RD_SIZE, 163);
2257
2258 WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_WR_SIZE, 275 >> 3);
2259 WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_RD_SIZE, 614 >> 3);
2260 WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_WR_SIZE, 1);
2261 WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_RD_SIZE, 39);
2262
2263 WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_WR_SIZE, 1);
2264 WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_RD_SIZE, 1);
2265 WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_WR_SIZE, 1);
2266 WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_RD_SIZE, 32);
2267
2268 WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_WR_SIZE, 176 >> 3);
2269 WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_RD_SIZE, 32 >> 3);
2270 WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_WR_SIZE, 19);
2271 WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_RD_SIZE, 32);
2272
2273 WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_WR_SIZE, 176 >> 3);
2274 WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_RD_SIZE, 32 >> 3);
2275 WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_WR_SIZE, 19);
2276 WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_RD_SIZE, 32);
2277
2278 WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_WR_SIZE, 1);
2279 WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_RD_SIZE, 1);
2280 WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_WR_SIZE, 1);
2281 WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_RD_SIZE, 32);
2282
2283 WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_WR_SIZE, 275 >> 3);
2284 WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_RD_SIZE, 614 >> 3);
2285 WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_WR_SIZE, 1);
2286 WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_RD_SIZE, 39);
2287
2288 WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_WR_SIZE, 318 >> 3);
2289 WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_RD_SIZE, 956 >> 3);
2290 WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_WR_SIZE, 79);
2291 WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_RD_SIZE, 79);
2292
2293 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2294 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2295 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2296 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2297
2298 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2299 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2300 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2301 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2302
2303 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2304 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2305 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2306 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2307
2308 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2309 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2310 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2311 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2312
2313 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2314 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2315 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2316 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2317
2318 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2319 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2320 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2321 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2322
2323 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2324 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2325 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2326 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2327
2328 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2329 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2330 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2331 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2332
2333 WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_EN,
2334 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2335 WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_EN,
2336 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2337
2338 WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_EN,
2339 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2340 WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_EN,
2341 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2342
2343 WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_EN,
2344 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2345 WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_EN,
2346 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2347
2348 WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_EN,
2349 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2350 WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_EN,
2351 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2352
2353 WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_EN,
2354 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2355 WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_EN,
2356 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2357
2358 WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_EN,
2359 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2360 WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_EN,
2361 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2362
2363 WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_EN,
2364 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2365 WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_EN,
2366 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2367
2368 WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_EN,
2369 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2370 WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_EN,
2371 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2372
2373 WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_EN,
2374 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2375 WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_EN,
2376 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2377
2378 WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_EN,
2379 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2380 WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_EN,
2381 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2382
2383 WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_EN,
2384 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2385 WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_EN,
2386 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2387
2388 WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_EN,
2389 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2390 WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_EN,
2391 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2392
2393 WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_EN,
2394 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2395 WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_EN,
2396 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2397
2398 WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_EN,
2399 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2400 WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_EN,
2401 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2402
2403 WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_EN,
2404 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2405 WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_EN,
2406 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2407
2408 WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_EN,
2409 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2410 WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_EN,
2411 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2412
2413 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_EN,
2414 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2415 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_EN,
2416 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2417
2418 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_EN,
2419 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2420 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_EN,
2421 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2422
2423 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_EN,
2424 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2425 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_EN,
2426 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2427
2428 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_EN,
2429 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2430 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_EN,
2431 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2432
2433 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_EN,
2434 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2435 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_EN,
2436 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2437
2438 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_EN,
2439 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2440 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_EN,
2441 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2442
2443 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_EN,
2444 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2445 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_EN,
2446 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2447
2448 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_EN,
2449 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2450 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_EN,
2451 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2452}
2453
2454static void gaudi_init_hbm_cred(struct hl_device *hdev)
2455{
2456 u32 hbm0_wr, hbm1_wr, hbm0_rd, hbm1_rd;
2457
2458 if (hdev->asic_prop.fw_security_enabled)
2459 return;
2460
2461 if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 &
2462 CPU_BOOT_DEV_STS0_HBM_CRED_EN)
2463 return;
2464
2465 hbm0_wr = 0x33333333;
2466 hbm0_rd = 0x77777777;
2467 hbm1_wr = 0x55555555;
2468 hbm1_rd = 0xDDDDDDDD;
2469
2470 WREG32(mmDMA_IF_E_N_HBM0_WR_CRED_CNT, hbm0_wr);
2471 WREG32(mmDMA_IF_E_N_HBM1_WR_CRED_CNT, hbm1_wr);
2472 WREG32(mmDMA_IF_E_N_HBM0_RD_CRED_CNT, hbm0_rd);
2473 WREG32(mmDMA_IF_E_N_HBM1_RD_CRED_CNT, hbm1_rd);
2474
2475 WREG32(mmDMA_IF_E_S_HBM0_WR_CRED_CNT, hbm0_wr);
2476 WREG32(mmDMA_IF_E_S_HBM1_WR_CRED_CNT, hbm1_wr);
2477 WREG32(mmDMA_IF_E_S_HBM0_RD_CRED_CNT, hbm0_rd);
2478 WREG32(mmDMA_IF_E_S_HBM1_RD_CRED_CNT, hbm1_rd);
2479
2480 WREG32(mmDMA_IF_W_N_HBM0_WR_CRED_CNT, hbm0_wr);
2481 WREG32(mmDMA_IF_W_N_HBM1_WR_CRED_CNT, hbm1_wr);
2482 WREG32(mmDMA_IF_W_N_HBM0_RD_CRED_CNT, hbm0_rd);
2483 WREG32(mmDMA_IF_W_N_HBM1_RD_CRED_CNT, hbm1_rd);
2484
2485 WREG32(mmDMA_IF_W_S_HBM0_WR_CRED_CNT, hbm0_wr);
2486 WREG32(mmDMA_IF_W_S_HBM1_WR_CRED_CNT, hbm1_wr);
2487 WREG32(mmDMA_IF_W_S_HBM0_RD_CRED_CNT, hbm0_rd);
2488 WREG32(mmDMA_IF_W_S_HBM1_RD_CRED_CNT, hbm1_rd);
2489
2490 WREG32(mmDMA_IF_E_N_HBM_CRED_EN_0,
2491 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2492 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2493 WREG32(mmDMA_IF_E_S_HBM_CRED_EN_0,
2494 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2495 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2496 WREG32(mmDMA_IF_W_N_HBM_CRED_EN_0,
2497 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2498 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2499 WREG32(mmDMA_IF_W_S_HBM_CRED_EN_0,
2500 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2501 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2502
2503 WREG32(mmDMA_IF_E_N_HBM_CRED_EN_1,
2504 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2505 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2506 WREG32(mmDMA_IF_E_S_HBM_CRED_EN_1,
2507 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2508 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2509 WREG32(mmDMA_IF_W_N_HBM_CRED_EN_1,
2510 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2511 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2512 WREG32(mmDMA_IF_W_S_HBM_CRED_EN_1,
2513 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2514 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2515}
2516
2517static void gaudi_init_golden_registers(struct hl_device *hdev)
2518{
2519 u32 tpc_offset;
2520 int tpc_id, i;
2521
2522 gaudi_init_e2e(hdev);
2523 gaudi_init_hbm_cred(hdev);
2524
2525 for (tpc_id = 0, tpc_offset = 0;
2526 tpc_id < TPC_NUMBER_OF_ENGINES;
2527 tpc_id++, tpc_offset += TPC_CFG_OFFSET) {
2528 /* Mask all arithmetic interrupts from TPC */
2529 WREG32(mmTPC0_CFG_TPC_INTR_MASK + tpc_offset, 0x8FFE);
2530 /* Set 16 cache lines */
2531 WREG32_FIELD(TPC0_CFG_MSS_CONFIG, tpc_offset,
2532 ICACHE_FETCH_LINE_NUM, 2);
2533 }
2534
2535 /* Make sure 1st 128 bytes in SRAM are 0 for Tensor DMA */
2536 for (i = 0 ; i < 128 ; i += 8)
2537 writeq(val: 0, addr: hdev->pcie_bar[SRAM_BAR_ID] + i);
2538
2539 WREG32(mmMME0_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2540 WREG32(mmMME1_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2541 WREG32(mmMME2_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2542 WREG32(mmMME3_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2543}
2544
2545static void gaudi_init_pci_dma_qman(struct hl_device *hdev, int dma_id,
2546 int qman_id, dma_addr_t qman_pq_addr)
2547{
2548 struct cpu_dyn_regs *dyn_regs =
2549 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2550 u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
2551 u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
2552 u32 q_off, dma_qm_offset;
2553 u32 dma_qm_err_cfg, irq_handler_offset;
2554
2555 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
2556
2557 mtr_base_en_lo = lower_32_bits(CFG_BASE +
2558 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2559 mtr_base_en_hi = upper_32_bits(CFG_BASE +
2560 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2561 so_base_en_lo = lower_32_bits(CFG_BASE +
2562 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2563 so_base_en_hi = upper_32_bits(CFG_BASE +
2564 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2565 mtr_base_ws_lo = lower_32_bits(CFG_BASE +
2566 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2567 mtr_base_ws_hi = upper_32_bits(CFG_BASE +
2568 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2569 so_base_ws_lo = lower_32_bits(CFG_BASE +
2570 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2571 so_base_ws_hi = upper_32_bits(CFG_BASE +
2572 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2573
2574 q_off = dma_qm_offset + qman_id * 4;
2575
2576 WREG32(mmDMA0_QM_PQ_BASE_LO_0 + q_off, lower_32_bits(qman_pq_addr));
2577 WREG32(mmDMA0_QM_PQ_BASE_HI_0 + q_off, upper_32_bits(qman_pq_addr));
2578
2579 WREG32(mmDMA0_QM_PQ_SIZE_0 + q_off, ilog2(HL_QUEUE_LENGTH));
2580 WREG32(mmDMA0_QM_PQ_PI_0 + q_off, 0);
2581 WREG32(mmDMA0_QM_PQ_CI_0 + q_off, 0);
2582
2583 WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, QMAN_LDMA_SIZE_OFFSET);
2584 WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2585 QMAN_LDMA_SRC_OFFSET);
2586 WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2587 QMAN_LDMA_DST_OFFSET);
2588
2589 WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
2590 WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
2591 WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
2592 WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
2593 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off, mtr_base_ws_lo);
2594 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off, mtr_base_ws_hi);
2595 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off, so_base_ws_lo);
2596 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off, so_base_ws_hi);
2597
2598 WREG32(mmDMA0_QM_CP_BARRIER_CFG_0 + q_off, 0x100);
2599
2600 /* The following configuration is needed only once per QMAN */
2601 if (qman_id == 0) {
2602 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
2603 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
2604 le32_to_cpu(dyn_regs->gic_dma_qm_irq_ctrl);
2605
2606 /* Configure RAZWI IRQ */
2607 dma_qm_err_cfg = PCI_DMA_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2608 if (hdev->stop_on_err)
2609 dma_qm_err_cfg |=
2610 PCI_DMA_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2611
2612 WREG32(mmDMA0_QM_GLBL_ERR_CFG + dma_qm_offset, dma_qm_err_cfg);
2613
2614 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_LO + dma_qm_offset,
2615 lower_32_bits(CFG_BASE + irq_handler_offset));
2616 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_HI + dma_qm_offset,
2617 upper_32_bits(CFG_BASE + irq_handler_offset));
2618
2619 WREG32(mmDMA0_QM_GLBL_ERR_WDATA + dma_qm_offset,
2620 gaudi_irq_map_table[GAUDI_EVENT_DMA0_QM].cpu_id +
2621 dma_id);
2622
2623 WREG32(mmDMA0_QM_ARB_ERR_MSG_EN + dma_qm_offset,
2624 QM_ARB_ERR_MSG_EN_MASK);
2625
2626 /* Set timeout to maximum */
2627 WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset, GAUDI_ARB_WDT_TIMEOUT);
2628
2629 WREG32(mmDMA0_QM_GLBL_PROT + dma_qm_offset,
2630 QMAN_EXTERNAL_MAKE_TRUSTED);
2631
2632 WREG32(mmDMA0_QM_GLBL_CFG1 + dma_qm_offset, 0);
2633 }
2634}
2635
2636static void gaudi_init_dma_core(struct hl_device *hdev, int dma_id)
2637{
2638 struct cpu_dyn_regs *dyn_regs =
2639 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2640 u32 dma_err_cfg = 1 << DMA0_CORE_ERR_CFG_ERR_MSG_EN_SHIFT;
2641 u32 dma_offset = dma_id * DMA_CORE_OFFSET;
2642 u32 irq_handler_offset;
2643
2644 /* Set to maximum possible according to physical size */
2645 WREG32(mmDMA0_CORE_RD_MAX_OUTSTAND + dma_offset, 0);
2646 WREG32(mmDMA0_CORE_RD_MAX_SIZE + dma_offset, 0);
2647
2648 /* WA for H/W bug H3-2116 */
2649 WREG32(mmDMA0_CORE_LBW_MAX_OUTSTAND + dma_offset, 15);
2650
2651 /* STOP_ON bit implies no completion to operation in case of RAZWI */
2652 if (hdev->stop_on_err)
2653 dma_err_cfg |= 1 << DMA0_CORE_ERR_CFG_STOP_ON_ERR_SHIFT;
2654
2655 WREG32(mmDMA0_CORE_ERR_CFG + dma_offset, dma_err_cfg);
2656
2657 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
2658 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
2659 le32_to_cpu(dyn_regs->gic_dma_core_irq_ctrl);
2660
2661 WREG32(mmDMA0_CORE_ERRMSG_ADDR_LO + dma_offset,
2662 lower_32_bits(CFG_BASE + irq_handler_offset));
2663 WREG32(mmDMA0_CORE_ERRMSG_ADDR_HI + dma_offset,
2664 upper_32_bits(CFG_BASE + irq_handler_offset));
2665
2666 WREG32(mmDMA0_CORE_ERRMSG_WDATA + dma_offset,
2667 gaudi_irq_map_table[GAUDI_EVENT_DMA0_CORE].cpu_id + dma_id);
2668 WREG32(mmDMA0_CORE_PROT + dma_offset,
2669 1 << DMA0_CORE_PROT_ERR_VAL_SHIFT);
2670 /* If the channel is secured, it should be in MMU bypass mode */
2671 WREG32(mmDMA0_CORE_SECURE_PROPS + dma_offset,
2672 1 << DMA0_CORE_SECURE_PROPS_MMBP_SHIFT);
2673 WREG32(mmDMA0_CORE_CFG_0 + dma_offset, 1 << DMA0_CORE_CFG_0_EN_SHIFT);
2674}
2675
2676static void gaudi_enable_qman(struct hl_device *hdev, int dma_id,
2677 u32 enable_mask)
2678{
2679 u32 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
2680
2681 WREG32(mmDMA0_QM_GLBL_CFG0 + dma_qm_offset, enable_mask);
2682}
2683
2684static void gaudi_init_pci_dma_qmans(struct hl_device *hdev)
2685{
2686 struct gaudi_device *gaudi = hdev->asic_specific;
2687 struct hl_hw_queue *q;
2688 int i, j, dma_id, cpu_skip, nic_skip, cq_id = 0, q_idx, msi_vec = 0;
2689
2690 if (gaudi->hw_cap_initialized & HW_CAP_PCI_DMA)
2691 return;
2692
2693 for (i = 0 ; i < PCI_DMA_NUMBER_OF_CHNLS ; i++) {
2694 dma_id = gaudi_dma_assignment[i];
2695 /*
2696 * For queues after the CPU Q need to add 1 to get the correct
2697 * queue. In addition, need to add the CPU EQ and NIC IRQs in
2698 * order to get the correct MSI register.
2699 */
2700 if (dma_id > 1) {
2701 cpu_skip = 1;
2702 nic_skip = NIC_NUMBER_OF_ENGINES;
2703 } else {
2704 cpu_skip = 0;
2705 nic_skip = 0;
2706 }
2707
2708 for (j = 0 ; j < QMAN_STREAMS ; j++) {
2709 q_idx = 4 * dma_id + j + cpu_skip;
2710 q = &hdev->kernel_queues[q_idx];
2711 q->cq_id = cq_id++;
2712 q->msi_vec = nic_skip + cpu_skip + msi_vec++;
2713 gaudi_init_pci_dma_qman(hdev, dma_id, qman_id: j,
2714 qman_pq_addr: q->bus_address);
2715 }
2716
2717 gaudi_init_dma_core(hdev, dma_id);
2718
2719 gaudi_enable_qman(hdev, dma_id, PCI_DMA_QMAN_ENABLE);
2720 }
2721
2722 gaudi->hw_cap_initialized |= HW_CAP_PCI_DMA;
2723}
2724
2725static void gaudi_init_hbm_dma_qman(struct hl_device *hdev, int dma_id,
2726 int qman_id, u64 qman_base_addr)
2727{
2728 struct cpu_dyn_regs *dyn_regs =
2729 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2730 u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
2731 u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
2732 u32 dma_qm_err_cfg, irq_handler_offset;
2733 u32 q_off, dma_qm_offset;
2734
2735 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
2736
2737 mtr_base_en_lo = lower_32_bits(CFG_BASE +
2738 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2739 mtr_base_en_hi = upper_32_bits(CFG_BASE +
2740 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2741 so_base_en_lo = lower_32_bits(CFG_BASE +
2742 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2743 so_base_en_hi = upper_32_bits(CFG_BASE +
2744 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2745 mtr_base_ws_lo = lower_32_bits(CFG_BASE +
2746 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2747 mtr_base_ws_hi = upper_32_bits(CFG_BASE +
2748 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2749 so_base_ws_lo = lower_32_bits(CFG_BASE +
2750 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2751 so_base_ws_hi = upper_32_bits(CFG_BASE +
2752 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2753
2754 q_off = dma_qm_offset + qman_id * 4;
2755
2756 if (qman_id < 4) {
2757 WREG32(mmDMA0_QM_PQ_BASE_LO_0 + q_off,
2758 lower_32_bits(qman_base_addr));
2759 WREG32(mmDMA0_QM_PQ_BASE_HI_0 + q_off,
2760 upper_32_bits(qman_base_addr));
2761
2762 WREG32(mmDMA0_QM_PQ_SIZE_0 + q_off, ilog2(HBM_DMA_QMAN_LENGTH));
2763 WREG32(mmDMA0_QM_PQ_PI_0 + q_off, 0);
2764 WREG32(mmDMA0_QM_PQ_CI_0 + q_off, 0);
2765
2766 WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2767 QMAN_CPDMA_SIZE_OFFSET);
2768 WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2769 QMAN_CPDMA_SRC_OFFSET);
2770 WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2771 QMAN_CPDMA_DST_OFFSET);
2772 } else {
2773 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
2774 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
2775 le32_to_cpu(dyn_regs->gic_dma_qm_irq_ctrl);
2776
2777 WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2778 QMAN_LDMA_SIZE_OFFSET);
2779 WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2780 QMAN_LDMA_SRC_OFFSET);
2781 WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2782 QMAN_LDMA_DST_OFFSET);
2783
2784 /* Configure RAZWI IRQ */
2785 dma_qm_err_cfg = HBM_DMA_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2786 if (hdev->stop_on_err)
2787 dma_qm_err_cfg |=
2788 HBM_DMA_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2789
2790 WREG32(mmDMA0_QM_GLBL_ERR_CFG + dma_qm_offset, dma_qm_err_cfg);
2791
2792 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_LO + dma_qm_offset,
2793 lower_32_bits(CFG_BASE + irq_handler_offset));
2794 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_HI + dma_qm_offset,
2795 upper_32_bits(CFG_BASE + irq_handler_offset));
2796
2797 WREG32(mmDMA0_QM_GLBL_ERR_WDATA + dma_qm_offset,
2798 gaudi_irq_map_table[GAUDI_EVENT_DMA0_QM].cpu_id +
2799 dma_id);
2800
2801 WREG32(mmDMA0_QM_ARB_ERR_MSG_EN + dma_qm_offset,
2802 QM_ARB_ERR_MSG_EN_MASK);
2803
2804 /* Set timeout to maximum */
2805 WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset, GAUDI_ARB_WDT_TIMEOUT);
2806
2807 WREG32(mmDMA0_QM_GLBL_CFG1 + dma_qm_offset, 0);
2808 WREG32(mmDMA0_QM_GLBL_PROT + dma_qm_offset,
2809 QMAN_INTERNAL_MAKE_TRUSTED);
2810 }
2811
2812 WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
2813 WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
2814 WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
2815 WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
2816
2817 /* Configure DMA5 CP_MSG_BASE 2/3 for sync stream collective */
2818 if (gaudi_dma_assignment[dma_id] == GAUDI_ENGINE_ID_DMA_5) {
2819 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off,
2820 mtr_base_ws_lo);
2821 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off,
2822 mtr_base_ws_hi);
2823 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off,
2824 so_base_ws_lo);
2825 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off,
2826 so_base_ws_hi);
2827 }
2828}
2829
2830static void gaudi_init_hbm_dma_qmans(struct hl_device *hdev)
2831{
2832 struct gaudi_device *gaudi = hdev->asic_specific;
2833 struct gaudi_internal_qman_info *q;
2834 u64 qman_base_addr;
2835 int i, j, dma_id, internal_q_index;
2836
2837 if (gaudi->hw_cap_initialized & HW_CAP_HBM_DMA)
2838 return;
2839
2840 for (i = 0 ; i < HBM_DMA_NUMBER_OF_CHNLS ; i++) {
2841 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_1 + i];
2842
2843 for (j = 0 ; j < QMAN_STREAMS ; j++) {
2844 /*
2845 * Add the CPU queue in order to get the correct queue
2846 * number as all internal queue are placed after it
2847 */
2848 internal_q_index = dma_id * QMAN_STREAMS + j + 1;
2849
2850 q = &gaudi->internal_qmans[internal_q_index];
2851 qman_base_addr = (u64) q->pq_dma_addr;
2852 gaudi_init_hbm_dma_qman(hdev, dma_id, qman_id: j,
2853 qman_base_addr);
2854 }
2855
2856 /* Initializing lower CP for HBM DMA QMAN */
2857 gaudi_init_hbm_dma_qman(hdev, dma_id, qman_id: 4, qman_base_addr: 0);
2858
2859 gaudi_init_dma_core(hdev, dma_id);
2860
2861 gaudi_enable_qman(hdev, dma_id, HBM_DMA_QMAN_ENABLE);
2862 }
2863
2864 gaudi->hw_cap_initialized |= HW_CAP_HBM_DMA;
2865}
2866
2867static void gaudi_init_mme_qman(struct hl_device *hdev, u32 mme_offset,
2868 int qman_id, u64 qman_base_addr)
2869{
2870 struct cpu_dyn_regs *dyn_regs =
2871 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2872 u32 mtr_base_lo, mtr_base_hi;
2873 u32 so_base_lo, so_base_hi;
2874 u32 irq_handler_offset;
2875 u32 q_off, mme_id;
2876 u32 mme_qm_err_cfg;
2877
2878 mtr_base_lo = lower_32_bits(CFG_BASE +
2879 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2880 mtr_base_hi = upper_32_bits(CFG_BASE +
2881 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2882 so_base_lo = lower_32_bits(CFG_BASE +
2883 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2884 so_base_hi = upper_32_bits(CFG_BASE +
2885 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2886
2887 q_off = mme_offset + qman_id * 4;
2888
2889 if (qman_id < 4) {
2890 WREG32(mmMME0_QM_PQ_BASE_LO_0 + q_off,
2891 lower_32_bits(qman_base_addr));
2892 WREG32(mmMME0_QM_PQ_BASE_HI_0 + q_off,
2893 upper_32_bits(qman_base_addr));
2894
2895 WREG32(mmMME0_QM_PQ_SIZE_0 + q_off, ilog2(MME_QMAN_LENGTH));
2896 WREG32(mmMME0_QM_PQ_PI_0 + q_off, 0);
2897 WREG32(mmMME0_QM_PQ_CI_0 + q_off, 0);
2898
2899 WREG32(mmMME0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2900 QMAN_CPDMA_SIZE_OFFSET);
2901 WREG32(mmMME0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2902 QMAN_CPDMA_SRC_OFFSET);
2903 WREG32(mmMME0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2904 QMAN_CPDMA_DST_OFFSET);
2905 } else {
2906 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
2907 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
2908 le32_to_cpu(dyn_regs->gic_mme_qm_irq_ctrl);
2909
2910 WREG32(mmMME0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2911 QMAN_LDMA_SIZE_OFFSET);
2912 WREG32(mmMME0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2913 QMAN_LDMA_SRC_OFFSET);
2914 WREG32(mmMME0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2915 QMAN_LDMA_DST_OFFSET);
2916
2917 /* Configure RAZWI IRQ */
2918 mme_id = mme_offset /
2919 (mmMME1_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0) / 2;
2920
2921 mme_qm_err_cfg = MME_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2922 if (hdev->stop_on_err)
2923 mme_qm_err_cfg |=
2924 MME_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2925
2926 WREG32(mmMME0_QM_GLBL_ERR_CFG + mme_offset, mme_qm_err_cfg);
2927
2928 WREG32(mmMME0_QM_GLBL_ERR_ADDR_LO + mme_offset,
2929 lower_32_bits(CFG_BASE + irq_handler_offset));
2930 WREG32(mmMME0_QM_GLBL_ERR_ADDR_HI + mme_offset,
2931 upper_32_bits(CFG_BASE + irq_handler_offset));
2932
2933 WREG32(mmMME0_QM_GLBL_ERR_WDATA + mme_offset,
2934 gaudi_irq_map_table[GAUDI_EVENT_MME0_QM].cpu_id +
2935 mme_id);
2936
2937 WREG32(mmMME0_QM_ARB_ERR_MSG_EN + mme_offset,
2938 QM_ARB_ERR_MSG_EN_MASK);
2939
2940 /* Set timeout to maximum */
2941 WREG32(mmMME0_QM_ARB_SLV_CHOISE_WDT + mme_offset, GAUDI_ARB_WDT_TIMEOUT);
2942
2943 WREG32(mmMME0_QM_GLBL_CFG1 + mme_offset, 0);
2944 WREG32(mmMME0_QM_GLBL_PROT + mme_offset,
2945 QMAN_INTERNAL_MAKE_TRUSTED);
2946 }
2947
2948 WREG32(mmMME0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_lo);
2949 WREG32(mmMME0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_hi);
2950 WREG32(mmMME0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_lo);
2951 WREG32(mmMME0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_hi);
2952}
2953
2954static void gaudi_init_mme_qmans(struct hl_device *hdev)
2955{
2956 struct gaudi_device *gaudi = hdev->asic_specific;
2957 struct gaudi_internal_qman_info *q;
2958 u64 qman_base_addr;
2959 u32 mme_offset;
2960 int i, internal_q_index;
2961
2962 if (gaudi->hw_cap_initialized & HW_CAP_MME)
2963 return;
2964
2965 /*
2966 * map GAUDI_QUEUE_ID_MME_0_X to the N_W_MME (mmMME2_QM_BASE)
2967 * and GAUDI_QUEUE_ID_MME_1_X to the S_W_MME (mmMME0_QM_BASE)
2968 */
2969
2970 mme_offset = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0;
2971
2972 for (i = 0 ; i < MME_NUMBER_OF_QMANS ; i++) {
2973 internal_q_index = GAUDI_QUEUE_ID_MME_0_0 + i;
2974 q = &gaudi->internal_qmans[internal_q_index];
2975 qman_base_addr = (u64) q->pq_dma_addr;
2976 gaudi_init_mme_qman(hdev, mme_offset, qman_id: (i & 0x3),
2977 qman_base_addr);
2978 if (i == 3)
2979 mme_offset = 0;
2980 }
2981
2982 /* Initializing lower CP for MME QMANs */
2983 mme_offset = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0;
2984 gaudi_init_mme_qman(hdev, mme_offset, qman_id: 4, qman_base_addr: 0);
2985 gaudi_init_mme_qman(hdev, mme_offset: 0, qman_id: 4, qman_base_addr: 0);
2986
2987 WREG32(mmMME2_QM_GLBL_CFG0, QMAN_MME_ENABLE);
2988 WREG32(mmMME0_QM_GLBL_CFG0, QMAN_MME_ENABLE);
2989
2990 gaudi->hw_cap_initialized |= HW_CAP_MME;
2991}
2992
2993static void gaudi_init_tpc_qman(struct hl_device *hdev, u32 tpc_offset,
2994 int qman_id, u64 qman_base_addr)
2995{
2996 struct cpu_dyn_regs *dyn_regs =
2997 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2998 u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
2999 u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
3000 u32 tpc_qm_err_cfg, irq_handler_offset;
3001 u32 q_off, tpc_id;
3002
3003 mtr_base_en_lo = lower_32_bits(CFG_BASE +
3004 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3005 mtr_base_en_hi = upper_32_bits(CFG_BASE +
3006 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3007 so_base_en_lo = lower_32_bits(CFG_BASE +
3008 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3009 so_base_en_hi = upper_32_bits(CFG_BASE +
3010 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3011 mtr_base_ws_lo = lower_32_bits(CFG_BASE +
3012 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3013 mtr_base_ws_hi = upper_32_bits(CFG_BASE +
3014 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3015 so_base_ws_lo = lower_32_bits(CFG_BASE +
3016 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3017 so_base_ws_hi = upper_32_bits(CFG_BASE +
3018 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3019
3020 q_off = tpc_offset + qman_id * 4;
3021
3022 tpc_id = tpc_offset /
3023 (mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0);
3024
3025 if (qman_id < 4) {
3026 WREG32(mmTPC0_QM_PQ_BASE_LO_0 + q_off,
3027 lower_32_bits(qman_base_addr));
3028 WREG32(mmTPC0_QM_PQ_BASE_HI_0 + q_off,
3029 upper_32_bits(qman_base_addr));
3030
3031 WREG32(mmTPC0_QM_PQ_SIZE_0 + q_off, ilog2(TPC_QMAN_LENGTH));
3032 WREG32(mmTPC0_QM_PQ_PI_0 + q_off, 0);
3033 WREG32(mmTPC0_QM_PQ_CI_0 + q_off, 0);
3034
3035 WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3036 QMAN_CPDMA_SIZE_OFFSET);
3037 WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3038 QMAN_CPDMA_SRC_OFFSET);
3039 WREG32(mmTPC0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3040 QMAN_CPDMA_DST_OFFSET);
3041 } else {
3042 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
3043 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
3044 le32_to_cpu(dyn_regs->gic_tpc_qm_irq_ctrl);
3045
3046 WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3047 QMAN_LDMA_SIZE_OFFSET);
3048 WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3049 QMAN_LDMA_SRC_OFFSET);
3050 WREG32(mmTPC0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3051 QMAN_LDMA_DST_OFFSET);
3052
3053 /* Configure RAZWI IRQ */
3054 tpc_qm_err_cfg = TPC_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
3055 if (hdev->stop_on_err)
3056 tpc_qm_err_cfg |=
3057 TPC_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
3058
3059 WREG32(mmTPC0_QM_GLBL_ERR_CFG + tpc_offset, tpc_qm_err_cfg);
3060
3061 WREG32(mmTPC0_QM_GLBL_ERR_ADDR_LO + tpc_offset,
3062 lower_32_bits(CFG_BASE + irq_handler_offset));
3063 WREG32(mmTPC0_QM_GLBL_ERR_ADDR_HI + tpc_offset,
3064 upper_32_bits(CFG_BASE + irq_handler_offset));
3065
3066 WREG32(mmTPC0_QM_GLBL_ERR_WDATA + tpc_offset,
3067 gaudi_irq_map_table[GAUDI_EVENT_TPC0_QM].cpu_id +
3068 tpc_id);
3069
3070 WREG32(mmTPC0_QM_ARB_ERR_MSG_EN + tpc_offset,
3071 QM_ARB_ERR_MSG_EN_MASK);
3072
3073 /* Set timeout to maximum */
3074 WREG32(mmTPC0_QM_ARB_SLV_CHOISE_WDT + tpc_offset, GAUDI_ARB_WDT_TIMEOUT);
3075
3076 WREG32(mmTPC0_QM_GLBL_CFG1 + tpc_offset, 0);
3077 WREG32(mmTPC0_QM_GLBL_PROT + tpc_offset,
3078 QMAN_INTERNAL_MAKE_TRUSTED);
3079 }
3080
3081 WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
3082 WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
3083 WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
3084 WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
3085
3086 /* Configure TPC7 CP_MSG_BASE 2/3 for sync stream collective */
3087 if (tpc_id == 6) {
3088 WREG32(mmTPC0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off,
3089 mtr_base_ws_lo);
3090 WREG32(mmTPC0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off,
3091 mtr_base_ws_hi);
3092 WREG32(mmTPC0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off,
3093 so_base_ws_lo);
3094 WREG32(mmTPC0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off,
3095 so_base_ws_hi);
3096 }
3097}
3098
3099static void gaudi_init_tpc_qmans(struct hl_device *hdev)
3100{
3101 struct gaudi_device *gaudi = hdev->asic_specific;
3102 struct gaudi_internal_qman_info *q;
3103 u64 qman_base_addr;
3104 u32 so_base_hi, tpc_offset = 0;
3105 u32 tpc_delta = mmTPC1_CFG_SM_BASE_ADDRESS_HIGH -
3106 mmTPC0_CFG_SM_BASE_ADDRESS_HIGH;
3107 int i, tpc_id, internal_q_index;
3108
3109 if (gaudi->hw_cap_initialized & HW_CAP_TPC_MASK)
3110 return;
3111
3112 so_base_hi = upper_32_bits(CFG_BASE +
3113 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3114
3115 for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
3116 for (i = 0 ; i < QMAN_STREAMS ; i++) {
3117 internal_q_index = GAUDI_QUEUE_ID_TPC_0_0 +
3118 tpc_id * QMAN_STREAMS + i;
3119 q = &gaudi->internal_qmans[internal_q_index];
3120 qman_base_addr = (u64) q->pq_dma_addr;
3121 gaudi_init_tpc_qman(hdev, tpc_offset, qman_id: i,
3122 qman_base_addr);
3123
3124 if (i == 3) {
3125 /* Initializing lower CP for TPC QMAN */
3126 gaudi_init_tpc_qman(hdev, tpc_offset, qman_id: 4, qman_base_addr: 0);
3127
3128 /* Enable the QMAN and TPC channel */
3129 WREG32(mmTPC0_QM_GLBL_CFG0 + tpc_offset,
3130 QMAN_TPC_ENABLE);
3131 }
3132 }
3133
3134 WREG32(mmTPC0_CFG_SM_BASE_ADDRESS_HIGH + tpc_id * tpc_delta,
3135 so_base_hi);
3136
3137 tpc_offset += mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0;
3138
3139 gaudi->hw_cap_initialized |=
3140 FIELD_PREP(HW_CAP_TPC_MASK, 1 << tpc_id);
3141 }
3142}
3143
3144static void gaudi_init_nic_qman(struct hl_device *hdev, u32 nic_offset,
3145 int qman_id, u64 qman_base_addr, int nic_id)
3146{
3147 struct cpu_dyn_regs *dyn_regs =
3148 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
3149 u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
3150 u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
3151 u32 nic_qm_err_cfg, irq_handler_offset;
3152 u32 q_off;
3153
3154 mtr_base_en_lo = lower_32_bits((CFG_BASE & U32_MAX) +
3155 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3156 mtr_base_en_hi = upper_32_bits(CFG_BASE +
3157 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3158 so_base_en_lo = lower_32_bits((CFG_BASE & U32_MAX) +
3159 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3160 so_base_en_hi = upper_32_bits(CFG_BASE +
3161 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3162 mtr_base_ws_lo = lower_32_bits((CFG_BASE & U32_MAX) +
3163 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3164 mtr_base_ws_hi = upper_32_bits(CFG_BASE +
3165 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3166 so_base_ws_lo = lower_32_bits((CFG_BASE & U32_MAX) +
3167 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3168 so_base_ws_hi = upper_32_bits(CFG_BASE +
3169 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3170
3171 q_off = nic_offset + qman_id * 4;
3172
3173 WREG32(mmNIC0_QM0_PQ_BASE_LO_0 + q_off, lower_32_bits(qman_base_addr));
3174 WREG32(mmNIC0_QM0_PQ_BASE_HI_0 + q_off, upper_32_bits(qman_base_addr));
3175
3176 WREG32(mmNIC0_QM0_PQ_SIZE_0 + q_off, ilog2(NIC_QMAN_LENGTH));
3177 WREG32(mmNIC0_QM0_PQ_PI_0 + q_off, 0);
3178 WREG32(mmNIC0_QM0_PQ_CI_0 + q_off, 0);
3179
3180 WREG32(mmNIC0_QM0_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3181 QMAN_LDMA_SIZE_OFFSET);
3182 WREG32(mmNIC0_QM0_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3183 QMAN_LDMA_SRC_OFFSET);
3184 WREG32(mmNIC0_QM0_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3185 QMAN_LDMA_DST_OFFSET);
3186
3187 WREG32(mmNIC0_QM0_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
3188 WREG32(mmNIC0_QM0_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
3189 WREG32(mmNIC0_QM0_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
3190 WREG32(mmNIC0_QM0_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
3191
3192 /* Configure NIC CP_MSG_BASE 2/3 for sync stream collective */
3193 WREG32(mmNIC0_QM0_CP_MSG_BASE2_ADDR_LO_0 + q_off, mtr_base_ws_lo);
3194 WREG32(mmNIC0_QM0_CP_MSG_BASE2_ADDR_HI_0 + q_off, mtr_base_ws_hi);
3195 WREG32(mmNIC0_QM0_CP_MSG_BASE3_ADDR_LO_0 + q_off, so_base_ws_lo);
3196 WREG32(mmNIC0_QM0_CP_MSG_BASE3_ADDR_HI_0 + q_off, so_base_ws_hi);
3197
3198 if (qman_id == 0) {
3199 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
3200 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
3201 le32_to_cpu(dyn_regs->gic_nic_qm_irq_ctrl);
3202
3203 /* Configure RAZWI IRQ */
3204 nic_qm_err_cfg = NIC_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
3205 if (hdev->stop_on_err)
3206 nic_qm_err_cfg |=
3207 NIC_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
3208
3209 WREG32(mmNIC0_QM0_GLBL_ERR_CFG + nic_offset, nic_qm_err_cfg);
3210
3211 WREG32(mmNIC0_QM0_GLBL_ERR_ADDR_LO + nic_offset,
3212 lower_32_bits(CFG_BASE + irq_handler_offset));
3213 WREG32(mmNIC0_QM0_GLBL_ERR_ADDR_HI + nic_offset,
3214 upper_32_bits(CFG_BASE + irq_handler_offset));
3215
3216 WREG32(mmNIC0_QM0_GLBL_ERR_WDATA + nic_offset,
3217 gaudi_irq_map_table[GAUDI_EVENT_NIC0_QM0].cpu_id +
3218 nic_id);
3219
3220 WREG32(mmNIC0_QM0_ARB_ERR_MSG_EN + nic_offset,
3221 QM_ARB_ERR_MSG_EN_MASK);
3222
3223 /* Set timeout to maximum */
3224 WREG32(mmNIC0_QM0_ARB_SLV_CHOISE_WDT + nic_offset, GAUDI_ARB_WDT_TIMEOUT);
3225
3226 WREG32(mmNIC0_QM0_GLBL_CFG1 + nic_offset, 0);
3227 WREG32(mmNIC0_QM0_GLBL_PROT + nic_offset,
3228 QMAN_INTERNAL_MAKE_TRUSTED);
3229 }
3230}
3231
3232static void gaudi_init_nic_qmans(struct hl_device *hdev)
3233{
3234 struct gaudi_device *gaudi = hdev->asic_specific;
3235 struct gaudi_internal_qman_info *q;
3236 u64 qman_base_addr;
3237 u32 nic_offset = 0;
3238 u32 nic_delta_between_qmans =
3239 mmNIC0_QM1_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3240 u32 nic_delta_between_nics =
3241 mmNIC1_QM0_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3242 int i, nic_id, internal_q_index;
3243
3244 if (!hdev->nic_ports_mask)
3245 return;
3246
3247 if (gaudi->hw_cap_initialized & HW_CAP_NIC_MASK)
3248 return;
3249
3250 dev_dbg(hdev->dev, "Initializing NIC QMANs\n");
3251
3252 for (nic_id = 0 ; nic_id < NIC_NUMBER_OF_ENGINES ; nic_id++) {
3253 if (!(hdev->nic_ports_mask & (1 << nic_id))) {
3254 nic_offset += nic_delta_between_qmans;
3255 if (nic_id & 1) {
3256 nic_offset -= (nic_delta_between_qmans * 2);
3257 nic_offset += nic_delta_between_nics;
3258 }
3259 continue;
3260 }
3261
3262 for (i = 0 ; i < QMAN_STREAMS ; i++) {
3263 internal_q_index = GAUDI_QUEUE_ID_NIC_0_0 +
3264 nic_id * QMAN_STREAMS + i;
3265 q = &gaudi->internal_qmans[internal_q_index];
3266 qman_base_addr = (u64) q->pq_dma_addr;
3267 gaudi_init_nic_qman(hdev, nic_offset, qman_id: (i & 0x3),
3268 qman_base_addr, nic_id);
3269 }
3270
3271 /* Enable the QMAN */
3272 WREG32(mmNIC0_QM0_GLBL_CFG0 + nic_offset, NIC_QMAN_ENABLE);
3273
3274 nic_offset += nic_delta_between_qmans;
3275 if (nic_id & 1) {
3276 nic_offset -= (nic_delta_between_qmans * 2);
3277 nic_offset += nic_delta_between_nics;
3278 }
3279
3280 gaudi->hw_cap_initialized |= 1 << (HW_CAP_NIC_SHIFT + nic_id);
3281 }
3282}
3283
3284static void gaudi_disable_pci_dma_qmans(struct hl_device *hdev)
3285{
3286 struct gaudi_device *gaudi = hdev->asic_specific;
3287
3288 if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
3289 return;
3290
3291 WREG32(mmDMA0_QM_GLBL_CFG0, 0);
3292 WREG32(mmDMA1_QM_GLBL_CFG0, 0);
3293 WREG32(mmDMA5_QM_GLBL_CFG0, 0);
3294}
3295
3296static void gaudi_disable_hbm_dma_qmans(struct hl_device *hdev)
3297{
3298 struct gaudi_device *gaudi = hdev->asic_specific;
3299
3300 if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
3301 return;
3302
3303 WREG32(mmDMA2_QM_GLBL_CFG0, 0);
3304 WREG32(mmDMA3_QM_GLBL_CFG0, 0);
3305 WREG32(mmDMA4_QM_GLBL_CFG0, 0);
3306 WREG32(mmDMA6_QM_GLBL_CFG0, 0);
3307 WREG32(mmDMA7_QM_GLBL_CFG0, 0);
3308}
3309
3310static void gaudi_disable_mme_qmans(struct hl_device *hdev)
3311{
3312 struct gaudi_device *gaudi = hdev->asic_specific;
3313
3314 if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
3315 return;
3316
3317 WREG32(mmMME2_QM_GLBL_CFG0, 0);
3318 WREG32(mmMME0_QM_GLBL_CFG0, 0);
3319}
3320
3321static void gaudi_disable_tpc_qmans(struct hl_device *hdev)
3322{
3323 struct gaudi_device *gaudi = hdev->asic_specific;
3324 u32 tpc_offset = 0;
3325 int tpc_id;
3326
3327 if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
3328 return;
3329
3330 for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
3331 WREG32(mmTPC0_QM_GLBL_CFG0 + tpc_offset, 0);
3332 tpc_offset += mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0;
3333 }
3334}
3335
3336static void gaudi_disable_nic_qmans(struct hl_device *hdev)
3337{
3338 struct gaudi_device *gaudi = hdev->asic_specific;
3339 u32 nic_mask, nic_offset = 0;
3340 u32 nic_delta_between_qmans =
3341 mmNIC0_QM1_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3342 u32 nic_delta_between_nics =
3343 mmNIC1_QM0_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3344 int nic_id;
3345
3346 for (nic_id = 0 ; nic_id < NIC_NUMBER_OF_ENGINES ; nic_id++) {
3347 nic_mask = 1 << (HW_CAP_NIC_SHIFT + nic_id);
3348
3349 if (gaudi->hw_cap_initialized & nic_mask)
3350 WREG32(mmNIC0_QM0_GLBL_CFG0 + nic_offset, 0);
3351
3352 nic_offset += nic_delta_between_qmans;
3353 if (nic_id & 1) {
3354 nic_offset -= (nic_delta_between_qmans * 2);
3355 nic_offset += nic_delta_between_nics;
3356 }
3357 }
3358}
3359
3360static void gaudi_stop_pci_dma_qmans(struct hl_device *hdev)
3361{
3362 struct gaudi_device *gaudi = hdev->asic_specific;
3363
3364 if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
3365 return;
3366
3367 /* Stop upper CPs of QMANs 0.0 to 1.3 and 5.0 to 5.3 */
3368 WREG32(mmDMA0_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3369 WREG32(mmDMA1_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3370 WREG32(mmDMA5_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3371}
3372
3373static void gaudi_stop_hbm_dma_qmans(struct hl_device *hdev)
3374{
3375 struct gaudi_device *gaudi = hdev->asic_specific;
3376
3377 if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
3378 return;
3379
3380 /* Stop CPs of HBM DMA QMANs */
3381
3382 WREG32(mmDMA2_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3383 WREG32(mmDMA3_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3384 WREG32(mmDMA4_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3385 WREG32(mmDMA6_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3386 WREG32(mmDMA7_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3387}
3388
3389static void gaudi_stop_mme_qmans(struct hl_device *hdev)
3390{
3391 struct gaudi_device *gaudi = hdev->asic_specific;
3392
3393 if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
3394 return;
3395
3396 /* Stop CPs of MME QMANs */
3397 WREG32(mmMME2_QM_GLBL_CFG1, 0x1F << MME0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3398 WREG32(mmMME0_QM_GLBL_CFG1, 0x1F << MME0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3399}
3400
3401static void gaudi_stop_tpc_qmans(struct hl_device *hdev)
3402{
3403 struct gaudi_device *gaudi = hdev->asic_specific;
3404
3405 if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
3406 return;
3407
3408 WREG32(mmTPC0_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3409 WREG32(mmTPC1_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3410 WREG32(mmTPC2_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3411 WREG32(mmTPC3_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3412 WREG32(mmTPC4_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3413 WREG32(mmTPC5_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3414 WREG32(mmTPC6_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3415 WREG32(mmTPC7_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3416}
3417
3418static void gaudi_stop_nic_qmans(struct hl_device *hdev)
3419{
3420 struct gaudi_device *gaudi = hdev->asic_specific;
3421
3422 /* Stop upper CPs of QMANs */
3423
3424 if (gaudi->hw_cap_initialized & HW_CAP_NIC0)
3425 WREG32(mmNIC0_QM0_GLBL_CFG1,
3426 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3427 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3428 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3429
3430 if (gaudi->hw_cap_initialized & HW_CAP_NIC1)
3431 WREG32(mmNIC0_QM1_GLBL_CFG1,
3432 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3433 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3434 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3435
3436 if (gaudi->hw_cap_initialized & HW_CAP_NIC2)
3437 WREG32(mmNIC1_QM0_GLBL_CFG1,
3438 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3439 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3440 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3441
3442 if (gaudi->hw_cap_initialized & HW_CAP_NIC3)
3443 WREG32(mmNIC1_QM1_GLBL_CFG1,
3444 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3445 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3446 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3447
3448 if (gaudi->hw_cap_initialized & HW_CAP_NIC4)
3449 WREG32(mmNIC2_QM0_GLBL_CFG1,
3450 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3451 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3452 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3453
3454 if (gaudi->hw_cap_initialized & HW_CAP_NIC5)
3455 WREG32(mmNIC2_QM1_GLBL_CFG1,
3456 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3457 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3458 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3459
3460 if (gaudi->hw_cap_initialized & HW_CAP_NIC6)
3461 WREG32(mmNIC3_QM0_GLBL_CFG1,
3462 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3463 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3464 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3465
3466 if (gaudi->hw_cap_initialized & HW_CAP_NIC7)
3467 WREG32(mmNIC3_QM1_GLBL_CFG1,
3468 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3469 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3470 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3471
3472 if (gaudi->hw_cap_initialized & HW_CAP_NIC8)
3473 WREG32(mmNIC4_QM0_GLBL_CFG1,
3474 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3475 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3476 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3477
3478 if (gaudi->hw_cap_initialized & HW_CAP_NIC9)
3479 WREG32(mmNIC4_QM1_GLBL_CFG1,
3480 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3481 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3482 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3483}
3484
3485static void gaudi_pci_dma_stall(struct hl_device *hdev)
3486{
3487 struct gaudi_device *gaudi = hdev->asic_specific;
3488
3489 if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
3490 return;
3491
3492 WREG32(mmDMA0_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3493 WREG32(mmDMA1_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3494 WREG32(mmDMA5_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3495}
3496
3497static void gaudi_hbm_dma_stall(struct hl_device *hdev)
3498{
3499 struct gaudi_device *gaudi = hdev->asic_specific;
3500
3501 if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
3502 return;
3503
3504 WREG32(mmDMA2_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3505 WREG32(mmDMA3_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3506 WREG32(mmDMA4_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3507 WREG32(mmDMA6_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3508 WREG32(mmDMA7_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3509}
3510
3511static void gaudi_mme_stall(struct hl_device *hdev)
3512{
3513 struct gaudi_device *gaudi = hdev->asic_specific;
3514
3515 if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
3516 return;
3517
3518 /* WA for H3-1800 bug: do ACC and SBAB writes twice */
3519 WREG32(mmMME0_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3520 WREG32(mmMME0_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3521 WREG32(mmMME0_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3522 WREG32(mmMME0_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3523 WREG32(mmMME1_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3524 WREG32(mmMME1_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3525 WREG32(mmMME1_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3526 WREG32(mmMME1_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3527 WREG32(mmMME2_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3528 WREG32(mmMME2_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3529 WREG32(mmMME2_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3530 WREG32(mmMME2_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3531 WREG32(mmMME3_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3532 WREG32(mmMME3_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3533 WREG32(mmMME3_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3534 WREG32(mmMME3_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3535}
3536
3537static void gaudi_tpc_stall(struct hl_device *hdev)
3538{
3539 struct gaudi_device *gaudi = hdev->asic_specific;
3540
3541 if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
3542 return;
3543
3544 WREG32(mmTPC0_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3545 WREG32(mmTPC1_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3546 WREG32(mmTPC2_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3547 WREG32(mmTPC3_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3548 WREG32(mmTPC4_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3549 WREG32(mmTPC5_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3550 WREG32(mmTPC6_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3551 WREG32(mmTPC7_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3552}
3553
3554static void gaudi_disable_clock_gating(struct hl_device *hdev)
3555{
3556 u32 qman_offset;
3557 int i;
3558
3559 if (hdev->asic_prop.fw_security_enabled)
3560 return;
3561
3562 for (i = 0, qman_offset = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
3563 WREG32(mmDMA0_QM_CGM_CFG + qman_offset, 0);
3564 WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset, 0);
3565
3566 qman_offset += (mmDMA1_QM_CGM_CFG - mmDMA0_QM_CGM_CFG);
3567 }
3568
3569 WREG32(mmMME0_QM_CGM_CFG, 0);
3570 WREG32(mmMME0_QM_CGM_CFG1, 0);
3571 WREG32(mmMME2_QM_CGM_CFG, 0);
3572 WREG32(mmMME2_QM_CGM_CFG1, 0);
3573
3574 for (i = 0, qman_offset = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
3575 WREG32(mmTPC0_QM_CGM_CFG + qman_offset, 0);
3576 WREG32(mmTPC0_QM_CGM_CFG1 + qman_offset, 0);
3577
3578 qman_offset += (mmTPC1_QM_CGM_CFG - mmTPC0_QM_CGM_CFG);
3579 }
3580}
3581
3582static void gaudi_enable_timestamp(struct hl_device *hdev)
3583{
3584 /* Disable the timestamp counter */
3585 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
3586
3587 /* Zero the lower/upper parts of the 64-bit counter */
3588 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0xC, 0);
3589 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0x8, 0);
3590
3591 /* Enable the counter */
3592 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 1);
3593}
3594
3595static void gaudi_disable_timestamp(struct hl_device *hdev)
3596{
3597 /* Disable the timestamp counter */
3598 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
3599}
3600
3601static void gaudi_halt_engines(struct hl_device *hdev, bool hard_reset, bool fw_reset)
3602{
3603 u32 wait_timeout_ms;
3604
3605 if (hdev->pldm)
3606 wait_timeout_ms = GAUDI_PLDM_RESET_WAIT_MSEC;
3607 else
3608 wait_timeout_ms = GAUDI_RESET_WAIT_MSEC;
3609
3610 if (fw_reset)
3611 goto skip_engines;
3612
3613 gaudi_stop_nic_qmans(hdev);
3614 gaudi_stop_mme_qmans(hdev);
3615 gaudi_stop_tpc_qmans(hdev);
3616 gaudi_stop_hbm_dma_qmans(hdev);
3617 gaudi_stop_pci_dma_qmans(hdev);
3618
3619 msleep(msecs: wait_timeout_ms);
3620
3621 gaudi_pci_dma_stall(hdev);
3622 gaudi_hbm_dma_stall(hdev);
3623 gaudi_tpc_stall(hdev);
3624 gaudi_mme_stall(hdev);
3625
3626 msleep(msecs: wait_timeout_ms);
3627
3628 gaudi_disable_nic_qmans(hdev);
3629 gaudi_disable_mme_qmans(hdev);
3630 gaudi_disable_tpc_qmans(hdev);
3631 gaudi_disable_hbm_dma_qmans(hdev);
3632 gaudi_disable_pci_dma_qmans(hdev);
3633
3634 gaudi_disable_timestamp(hdev);
3635
3636skip_engines:
3637 gaudi_disable_msi(hdev);
3638}
3639
3640static int gaudi_mmu_init(struct hl_device *hdev)
3641{
3642 struct asic_fixed_properties *prop = &hdev->asic_prop;
3643 struct gaudi_device *gaudi = hdev->asic_specific;
3644 u64 hop0_addr;
3645 int rc, i;
3646
3647 if (gaudi->hw_cap_initialized & HW_CAP_MMU)
3648 return 0;
3649
3650 for (i = 0 ; i < prop->max_asid ; i++) {
3651 hop0_addr = prop->mmu_pgt_addr +
3652 (i * prop->dmmu.hop_table_size);
3653
3654 rc = gaudi_mmu_update_asid_hop0_addr(hdev, asid: i, phys_addr: hop0_addr);
3655 if (rc) {
3656 dev_err(hdev->dev,
3657 "failed to set hop0 addr for asid %d\n", i);
3658 return rc;
3659 }
3660 }
3661
3662 /* init MMU cache manage page */
3663 WREG32(mmSTLB_CACHE_INV_BASE_39_8, prop->mmu_cache_mng_addr >> 8);
3664 WREG32(mmSTLB_CACHE_INV_BASE_49_40, prop->mmu_cache_mng_addr >> 40);
3665
3666 /* mem cache invalidation */
3667 WREG32(mmSTLB_MEM_CACHE_INVALIDATION, 1);
3668
3669 rc = hl_mmu_invalidate_cache(hdev, is_hard: true, flags: 0);
3670 if (rc)
3671 return rc;
3672
3673 WREG32(mmMMU_UP_MMU_ENABLE, 1);
3674 WREG32(mmMMU_UP_SPI_MASK, 0xF);
3675
3676 WREG32(mmSTLB_HOP_CONFIGURATION, 0x30440);
3677
3678 /*
3679 * The H/W expects the first PI after init to be 1. After wraparound
3680 * we'll write 0.
3681 */
3682 gaudi->mmu_cache_inv_pi = 1;
3683
3684 gaudi->hw_cap_initialized |= HW_CAP_MMU;
3685
3686 return 0;
3687}
3688
3689static int gaudi_load_firmware_to_device(struct hl_device *hdev)
3690{
3691 void __iomem *dst;
3692
3693 dst = hdev->pcie_bar[HBM_BAR_ID] + LINUX_FW_OFFSET;
3694
3695 return hl_fw_load_fw_to_device(hdev, GAUDI_LINUX_FW_FILE, dst, src_offset: 0, size: 0);
3696}
3697
3698static int gaudi_load_boot_fit_to_device(struct hl_device *hdev)
3699{
3700 void __iomem *dst;
3701
3702 dst = hdev->pcie_bar[SRAM_BAR_ID] + BOOT_FIT_SRAM_OFFSET;
3703
3704 return hl_fw_load_fw_to_device(hdev, GAUDI_BOOT_FIT_FILE, dst, src_offset: 0, size: 0);
3705}
3706
3707static void gaudi_init_dynamic_firmware_loader(struct hl_device *hdev)
3708{
3709 struct dynamic_fw_load_mgr *dynamic_loader;
3710 struct cpu_dyn_regs *dyn_regs;
3711
3712 dynamic_loader = &hdev->fw_loader.dynamic_loader;
3713
3714 /*
3715 * here we update initial values for few specific dynamic regs (as
3716 * before reading the first descriptor from FW those value has to be
3717 * hard-coded) in later stages of the protocol those values will be
3718 * updated automatically by reading the FW descriptor so data there
3719 * will always be up-to-date
3720 */
3721 dyn_regs = &dynamic_loader->comm_desc.cpu_dyn_regs;
3722 dyn_regs->kmd_msg_to_cpu =
3723 cpu_to_le32(mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU);
3724 dyn_regs->cpu_cmd_status_to_host =
3725 cpu_to_le32(mmCPU_CMD_STATUS_TO_HOST);
3726
3727 dynamic_loader->wait_for_bl_timeout = GAUDI_WAIT_FOR_BL_TIMEOUT_USEC;
3728}
3729
3730static void gaudi_init_static_firmware_loader(struct hl_device *hdev)
3731{
3732 struct static_fw_load_mgr *static_loader;
3733
3734 static_loader = &hdev->fw_loader.static_loader;
3735
3736 static_loader->preboot_version_max_off = SRAM_SIZE - VERSION_MAX_LEN;
3737 static_loader->boot_fit_version_max_off = SRAM_SIZE - VERSION_MAX_LEN;
3738 static_loader->kmd_msg_to_cpu_reg = mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU;
3739 static_loader->cpu_cmd_status_to_host_reg = mmCPU_CMD_STATUS_TO_HOST;
3740 static_loader->cpu_boot_status_reg = mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS;
3741 static_loader->cpu_boot_dev_status0_reg = mmCPU_BOOT_DEV_STS0;
3742 static_loader->cpu_boot_dev_status1_reg = mmCPU_BOOT_DEV_STS1;
3743 static_loader->boot_err0_reg = mmCPU_BOOT_ERR0;
3744 static_loader->boot_err1_reg = mmCPU_BOOT_ERR1;
3745 static_loader->preboot_version_offset_reg = mmPREBOOT_VER_OFFSET;
3746 static_loader->boot_fit_version_offset_reg = mmUBOOT_VER_OFFSET;
3747 static_loader->sram_offset_mask = ~(lower_32_bits(SRAM_BASE_ADDR));
3748 static_loader->cpu_reset_wait_msec = hdev->pldm ?
3749 GAUDI_PLDM_RESET_WAIT_MSEC :
3750 GAUDI_CPU_RESET_WAIT_MSEC;
3751}
3752
3753static void gaudi_init_firmware_preload_params(struct hl_device *hdev)
3754{
3755 struct pre_fw_load_props *pre_fw_load = &hdev->fw_loader.pre_fw_load;
3756
3757 pre_fw_load->cpu_boot_status_reg = mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS;
3758 pre_fw_load->sts_boot_dev_sts0_reg = mmCPU_BOOT_DEV_STS0;
3759 pre_fw_load->sts_boot_dev_sts1_reg = mmCPU_BOOT_DEV_STS1;
3760 pre_fw_load->boot_err0_reg = mmCPU_BOOT_ERR0;
3761 pre_fw_load->boot_err1_reg = mmCPU_BOOT_ERR1;
3762 pre_fw_load->wait_for_preboot_timeout = GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC;
3763}
3764
3765static void gaudi_init_firmware_loader(struct hl_device *hdev)
3766{
3767 struct asic_fixed_properties *prop = &hdev->asic_prop;
3768 struct fw_load_mgr *fw_loader = &hdev->fw_loader;
3769
3770 /* fill common fields */
3771 fw_loader->fw_comp_loaded = FW_TYPE_NONE;
3772 fw_loader->boot_fit_img.image_name = GAUDI_BOOT_FIT_FILE;
3773 fw_loader->linux_img.image_name = GAUDI_LINUX_FW_FILE;
3774 fw_loader->cpu_timeout = GAUDI_CPU_TIMEOUT_USEC;
3775 fw_loader->boot_fit_timeout = GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC;
3776 fw_loader->skip_bmc = !hdev->bmc_enable;
3777 fw_loader->sram_bar_id = SRAM_BAR_ID;
3778 fw_loader->dram_bar_id = HBM_BAR_ID;
3779
3780 if (prop->dynamic_fw_load)
3781 gaudi_init_dynamic_firmware_loader(hdev);
3782 else
3783 gaudi_init_static_firmware_loader(hdev);
3784}
3785
3786static int gaudi_init_cpu(struct hl_device *hdev)
3787{
3788 struct gaudi_device *gaudi = hdev->asic_specific;
3789 int rc;
3790
3791 if (!(hdev->fw_components & FW_TYPE_PREBOOT_CPU))
3792 return 0;
3793
3794 if (gaudi->hw_cap_initialized & HW_CAP_CPU)
3795 return 0;
3796
3797 /*
3798 * The device CPU works with 40 bits addresses.
3799 * This register sets the extension to 50 bits.
3800 */
3801 if (!hdev->asic_prop.fw_security_enabled)
3802 WREG32(mmCPU_IF_CPU_MSB_ADDR, hdev->cpu_pci_msb_addr);
3803
3804 rc = hl_fw_init_cpu(hdev);
3805
3806 if (rc)
3807 return rc;
3808
3809 gaudi->hw_cap_initialized |= HW_CAP_CPU;
3810
3811 return 0;
3812}
3813
3814static int gaudi_init_cpu_queues(struct hl_device *hdev, u32 cpu_timeout)
3815{
3816 struct cpu_dyn_regs *dyn_regs =
3817 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
3818 struct asic_fixed_properties *prop = &hdev->asic_prop;
3819 struct gaudi_device *gaudi = hdev->asic_specific;
3820 u32 status, irq_handler_offset;
3821 struct hl_eq *eq;
3822 struct hl_hw_queue *cpu_pq =
3823 &hdev->kernel_queues[GAUDI_QUEUE_ID_CPU_PQ];
3824 int err;
3825
3826 if (!hdev->cpu_queues_enable)
3827 return 0;
3828
3829 if (gaudi->hw_cap_initialized & HW_CAP_CPU_Q)
3830 return 0;
3831
3832 eq = &hdev->event_queue;
3833
3834 WREG32(mmCPU_IF_PQ_BASE_ADDR_LOW, lower_32_bits(cpu_pq->bus_address));
3835 WREG32(mmCPU_IF_PQ_BASE_ADDR_HIGH, upper_32_bits(cpu_pq->bus_address));
3836
3837 WREG32(mmCPU_IF_EQ_BASE_ADDR_LOW, lower_32_bits(eq->bus_address));
3838 WREG32(mmCPU_IF_EQ_BASE_ADDR_HIGH, upper_32_bits(eq->bus_address));
3839
3840 WREG32(mmCPU_IF_CQ_BASE_ADDR_LOW,
3841 lower_32_bits(hdev->cpu_accessible_dma_address));
3842 WREG32(mmCPU_IF_CQ_BASE_ADDR_HIGH,
3843 upper_32_bits(hdev->cpu_accessible_dma_address));
3844
3845 WREG32(mmCPU_IF_PQ_LENGTH, HL_QUEUE_SIZE_IN_BYTES);
3846 WREG32(mmCPU_IF_EQ_LENGTH, HL_EQ_SIZE_IN_BYTES);
3847 WREG32(mmCPU_IF_CQ_LENGTH, HL_CPU_ACCESSIBLE_MEM_SIZE);
3848
3849 /* Used for EQ CI */
3850 WREG32(mmCPU_IF_EQ_RD_OFFS, 0);
3851
3852 WREG32(mmCPU_IF_PF_PQ_PI, 0);
3853
3854 WREG32(mmCPU_IF_QUEUE_INIT, PQ_INIT_STATUS_READY_FOR_CP_SINGLE_MSI);
3855
3856 irq_handler_offset = prop->gic_interrupts_enable ?
3857 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
3858 le32_to_cpu(dyn_regs->gic_host_pi_upd_irq);
3859
3860 WREG32(irq_handler_offset,
3861 gaudi_irq_map_table[GAUDI_EVENT_PI_UPDATE].cpu_id);
3862
3863 err = hl_poll_timeout(
3864 hdev,
3865 mmCPU_IF_QUEUE_INIT,
3866 status,
3867 (status == PQ_INIT_STATUS_READY_FOR_HOST),
3868 1000,
3869 cpu_timeout);
3870
3871 if (err) {
3872 dev_err(hdev->dev,
3873 "Failed to communicate with Device CPU (CPU-CP timeout)\n");
3874 return -EIO;
3875 }
3876
3877 /* update FW application security bits */
3878 if (prop->fw_cpu_boot_dev_sts0_valid)
3879 prop->fw_app_cpu_boot_dev_sts0 = RREG32(mmCPU_BOOT_DEV_STS0);
3880 if (prop->fw_cpu_boot_dev_sts1_valid)
3881 prop->fw_app_cpu_boot_dev_sts1 = RREG32(mmCPU_BOOT_DEV_STS1);
3882
3883 gaudi->hw_cap_initialized |= HW_CAP_CPU_Q;
3884 return 0;
3885}
3886
3887static void gaudi_pre_hw_init(struct hl_device *hdev)
3888{
3889 /* Perform read from the device to make sure device is up */
3890 RREG32(mmHW_STATE);
3891
3892 if (!hdev->asic_prop.fw_security_enabled) {
3893 /* Set the access through PCI bars (Linux driver only) as
3894 * secured
3895 */
3896 WREG32(mmPCIE_WRAP_LBW_PROT_OVR,
3897 (PCIE_WRAP_LBW_PROT_OVR_RD_EN_MASK |
3898 PCIE_WRAP_LBW_PROT_OVR_WR_EN_MASK));
3899
3900 /* Perform read to flush the waiting writes to ensure
3901 * configuration was set in the device
3902 */
3903 RREG32(mmPCIE_WRAP_LBW_PROT_OVR);
3904 }
3905
3906 /*
3907 * Let's mark in the H/W that we have reached this point. We check
3908 * this value in the reset_before_init function to understand whether
3909 * we need to reset the chip before doing H/W init. This register is
3910 * cleared by the H/W upon H/W reset
3911 */
3912 WREG32(mmHW_STATE, HL_DEVICE_HW_STATE_DIRTY);
3913}
3914
3915static int gaudi_hw_init(struct hl_device *hdev)
3916{
3917 struct gaudi_device *gaudi = hdev->asic_specific;
3918 int rc;
3919
3920 gaudi_pre_hw_init(hdev);
3921
3922 /* If iATU is done by FW, the HBM bar ALWAYS points to DRAM_PHYS_BASE.
3923 * So we set it here and if anyone tries to move it later to
3924 * a different address, there will be an error
3925 */
3926 if (hdev->asic_prop.iatu_done_by_fw)
3927 gaudi->hbm_bar_cur_addr = DRAM_PHYS_BASE;
3928
3929 /*
3930 * Before pushing u-boot/linux to device, need to set the hbm bar to
3931 * base address of dram
3932 */
3933 if (gaudi_set_hbm_bar_base(hdev, DRAM_PHYS_BASE) == U64_MAX) {
3934 dev_err(hdev->dev,
3935 "failed to map HBM bar to DRAM base address\n");
3936 return -EIO;
3937 }
3938
3939 rc = gaudi_init_cpu(hdev);
3940 if (rc) {
3941 dev_err(hdev->dev, "failed to initialize CPU\n");
3942 return rc;
3943 }
3944
3945 /* In case the clock gating was enabled in preboot we need to disable
3946 * it here before touching the MME/TPC registers.
3947 */
3948 gaudi_disable_clock_gating(hdev);
3949
3950 /* SRAM scrambler must be initialized after CPU is running from HBM */
3951 gaudi_init_scrambler_sram(hdev);
3952
3953 /* This is here just in case we are working without CPU */
3954 gaudi_init_scrambler_hbm(hdev);
3955
3956 gaudi_init_golden_registers(hdev);
3957
3958 rc = gaudi_mmu_init(hdev);
3959 if (rc)
3960 return rc;
3961
3962 gaudi_init_security(hdev);
3963
3964 gaudi_init_pci_dma_qmans(hdev);
3965
3966 gaudi_init_hbm_dma_qmans(hdev);
3967
3968 gaudi_init_mme_qmans(hdev);
3969
3970 gaudi_init_tpc_qmans(hdev);
3971
3972 gaudi_init_nic_qmans(hdev);
3973
3974 gaudi_enable_timestamp(hdev);
3975
3976 /* MSI must be enabled before CPU queues and NIC are initialized */
3977 rc = gaudi_enable_msi(hdev);
3978 if (rc)
3979 goto disable_queues;
3980
3981 /* must be called after MSI was enabled */
3982 rc = gaudi_init_cpu_queues(hdev, GAUDI_CPU_TIMEOUT_USEC);
3983 if (rc) {
3984 dev_err(hdev->dev, "failed to initialize CPU H/W queues %d\n",
3985 rc);
3986 goto disable_msi;
3987 }
3988
3989 /* Perform read from the device to flush all configuration */
3990 RREG32(mmHW_STATE);
3991
3992 return 0;
3993
3994disable_msi:
3995 gaudi_disable_msi(hdev);
3996disable_queues:
3997 gaudi_disable_mme_qmans(hdev);
3998 gaudi_disable_pci_dma_qmans(hdev);
3999
4000 return rc;
4001}
4002
4003static int gaudi_hw_fini(struct hl_device *hdev, bool hard_reset, bool fw_reset)
4004{
4005 struct cpu_dyn_regs *dyn_regs =
4006 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
4007 u32 status, reset_timeout_ms, cpu_timeout_ms, irq_handler_offset;
4008 struct gaudi_device *gaudi = hdev->asic_specific;
4009 bool driver_performs_reset;
4010
4011 if (!hard_reset) {
4012 dev_err(hdev->dev, "GAUDI doesn't support soft-reset\n");
4013 return 0;
4014 }
4015
4016 if (hdev->pldm) {
4017 reset_timeout_ms = GAUDI_PLDM_HRESET_TIMEOUT_MSEC;
4018 cpu_timeout_ms = GAUDI_PLDM_RESET_WAIT_MSEC;
4019 } else {
4020 reset_timeout_ms = GAUDI_RESET_TIMEOUT_MSEC;
4021 cpu_timeout_ms = GAUDI_CPU_RESET_WAIT_MSEC;
4022 }
4023
4024 if (fw_reset) {
4025 dev_dbg(hdev->dev,
4026 "Firmware performs HARD reset, going to wait %dms\n",
4027 reset_timeout_ms);
4028
4029 goto skip_reset;
4030 }
4031
4032 driver_performs_reset = !!(!hdev->asic_prop.fw_security_enabled &&
4033 !hdev->asic_prop.hard_reset_done_by_fw);
4034
4035 /* Set device to handle FLR by H/W as we will put the device CPU to
4036 * halt mode
4037 */
4038 if (driver_performs_reset)
4039 WREG32(mmPCIE_AUX_FLR_CTRL, (PCIE_AUX_FLR_CTRL_HW_CTRL_MASK |
4040 PCIE_AUX_FLR_CTRL_INT_MASK_MASK));
4041
4042 /* If linux is loaded in the device CPU we need to communicate with it
4043 * via the GIC. Otherwise, we need to use COMMS or the MSG_TO_CPU
4044 * registers in case of old F/Ws
4045 */
4046 if (hdev->fw_loader.fw_comp_loaded & FW_TYPE_LINUX) {
4047 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
4048 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
4049 le32_to_cpu(dyn_regs->gic_host_halt_irq);
4050
4051 WREG32(irq_handler_offset,
4052 gaudi_irq_map_table[GAUDI_EVENT_HALT_MACHINE].cpu_id);
4053
4054 /* This is a hail-mary attempt to revive the card in the small chance that the
4055 * f/w has experienced a watchdog event, which caused it to return back to preboot.
4056 * In that case, triggering reset through GIC won't help. We need to trigger the
4057 * reset as if Linux wasn't loaded.
4058 *
4059 * We do it only if the reset cause was HB, because that would be the indication
4060 * of such an event.
4061 *
4062 * In case watchdog hasn't expired but we still got HB, then this won't do any
4063 * damage.
4064 */
4065 if (hdev->reset_info.curr_reset_cause == HL_RESET_CAUSE_HEARTBEAT) {
4066 if (hdev->asic_prop.hard_reset_done_by_fw)
4067 hl_fw_ask_hard_reset_without_linux(hdev);
4068 else
4069 hl_fw_ask_halt_machine_without_linux(hdev);
4070 }
4071 } else {
4072 if (hdev->asic_prop.hard_reset_done_by_fw)
4073 hl_fw_ask_hard_reset_without_linux(hdev);
4074 else
4075 hl_fw_ask_halt_machine_without_linux(hdev);
4076 }
4077
4078 if (driver_performs_reset) {
4079
4080 /* Configure the reset registers. Must be done as early as
4081 * possible in case we fail during H/W initialization
4082 */
4083 WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST_CFG_H,
4084 (CFG_RST_H_DMA_MASK |
4085 CFG_RST_H_MME_MASK |
4086 CFG_RST_H_SM_MASK |
4087 CFG_RST_H_TPC_7_MASK));
4088
4089 WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST_CFG_L, CFG_RST_L_TPC_MASK);
4090
4091 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG_H,
4092 (CFG_RST_H_HBM_MASK |
4093 CFG_RST_H_TPC_7_MASK |
4094 CFG_RST_H_NIC_MASK |
4095 CFG_RST_H_SM_MASK |
4096 CFG_RST_H_DMA_MASK |
4097 CFG_RST_H_MME_MASK |
4098 CFG_RST_H_CPU_MASK |
4099 CFG_RST_H_MMU_MASK));
4100
4101 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG_L,
4102 (CFG_RST_L_IF_MASK |
4103 CFG_RST_L_PSOC_MASK |
4104 CFG_RST_L_TPC_MASK));
4105
4106 msleep(msecs: cpu_timeout_ms);
4107
4108 /* Tell ASIC not to re-initialize PCIe */
4109 WREG32(mmPREBOOT_PCIE_EN, LKD_HARD_RESET_MAGIC);
4110
4111 /* Restart BTL/BLR upon hard-reset */
4112 WREG32(mmPSOC_GLOBAL_CONF_BOOT_SEQ_RE_START, 1);
4113
4114 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST,
4115 1 << PSOC_GLOBAL_CONF_SW_ALL_RST_IND_SHIFT);
4116
4117 dev_dbg(hdev->dev,
4118 "Issued HARD reset command, going to wait %dms\n",
4119 reset_timeout_ms);
4120 } else {
4121 dev_dbg(hdev->dev,
4122 "Firmware performs HARD reset, going to wait %dms\n",
4123 reset_timeout_ms);
4124 }
4125
4126skip_reset:
4127 /*
4128 * After hard reset, we can't poll the BTM_FSM register because the PSOC
4129 * itself is in reset. Need to wait until the reset is deasserted
4130 */
4131 msleep(msecs: reset_timeout_ms);
4132
4133 status = RREG32(mmPSOC_GLOBAL_CONF_BTM_FSM);
4134 if (status & PSOC_GLOBAL_CONF_BTM_FSM_STATE_MASK) {
4135 dev_err(hdev->dev, "Timeout while waiting for device to reset 0x%x\n", status);
4136 return -ETIMEDOUT;
4137 }
4138
4139 if (gaudi) {
4140 gaudi->hw_cap_initialized &= ~(HW_CAP_CPU | HW_CAP_CPU_Q | HW_CAP_HBM |
4141 HW_CAP_PCI_DMA | HW_CAP_MME | HW_CAP_TPC_MASK |
4142 HW_CAP_HBM_DMA | HW_CAP_PLL | HW_CAP_NIC_MASK |
4143 HW_CAP_MMU | HW_CAP_SRAM_SCRAMBLER |
4144 HW_CAP_HBM_SCRAMBLER);
4145
4146 memset(gaudi->events_stat, 0, sizeof(gaudi->events_stat));
4147
4148 hdev->device_cpu_is_halted = false;
4149 }
4150 return 0;
4151}
4152
4153static int gaudi_suspend(struct hl_device *hdev)
4154{
4155 return hl_fw_send_pci_access_msg(hdev, opcode: CPUCP_PACKET_DISABLE_PCI_ACCESS, value: 0x0);
4156}
4157
4158static int gaudi_resume(struct hl_device *hdev)
4159{
4160 return gaudi_init_iatu(hdev);
4161}
4162
4163static int gaudi_mmap(struct hl_device *hdev, struct vm_area_struct *vma,
4164 void *cpu_addr, dma_addr_t dma_addr, size_t size)
4165{
4166 int rc;
4167
4168 vm_flags_set(vma, VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP |
4169 VM_DONTCOPY | VM_NORESERVE);
4170
4171#ifdef _HAS_DMA_MMAP_COHERENT
4172 /*
4173 * If dma_alloc_coherent() returns a vmalloc address, set VM_MIXEDMAP
4174 * so vm_insert_page() can handle it safely. Without this, the kernel
4175 * may BUG_ON due to VM_PFNMAP.
4176 */
4177 if (is_vmalloc_addr(cpu_addr))
4178 vm_flags_set(vma, VM_MIXEDMAP);
4179
4180 rc = dma_mmap_coherent(hdev->dev, vma, cpu_addr,
4181 (dma_addr - HOST_PHYS_BASE), size);
4182 if (rc)
4183 dev_err(hdev->dev, "dma_mmap_coherent error %d", rc);
4184#else
4185
4186 rc = remap_pfn_range(vma, addr: vma->vm_start,
4187 virt_to_phys(address: cpu_addr) >> PAGE_SHIFT,
4188 size, pgprot: vma->vm_page_prot);
4189 if (rc)
4190 dev_err(hdev->dev, "remap_pfn_range error %d", rc);
4191
4192 #endif
4193
4194
4195 return rc;
4196}
4197
4198static void gaudi_ring_doorbell(struct hl_device *hdev, u32 hw_queue_id, u32 pi)
4199{
4200 struct cpu_dyn_regs *dyn_regs =
4201 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
4202 u32 db_reg_offset, db_value, dma_qm_offset, q_off, irq_handler_offset;
4203 struct gaudi_device *gaudi = hdev->asic_specific;
4204 bool invalid_queue = false;
4205 int dma_id;
4206
4207 switch (hw_queue_id) {
4208 case GAUDI_QUEUE_ID_DMA_0_0...GAUDI_QUEUE_ID_DMA_0_3:
4209 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_1];
4210 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4211 q_off = dma_qm_offset + (hw_queue_id & 0x3) * 4;
4212 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4213 break;
4214
4215 case GAUDI_QUEUE_ID_DMA_1_0...GAUDI_QUEUE_ID_DMA_1_3:
4216 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_2];
4217 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4218 q_off = dma_qm_offset + (hw_queue_id & 0x3) * 4;
4219 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4220 break;
4221
4222 case GAUDI_QUEUE_ID_DMA_2_0...GAUDI_QUEUE_ID_DMA_2_3:
4223 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_1];
4224 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4225 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4226 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4227 break;
4228
4229 case GAUDI_QUEUE_ID_DMA_3_0...GAUDI_QUEUE_ID_DMA_3_3:
4230 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_2];
4231 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4232 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4233 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4234 break;
4235
4236 case GAUDI_QUEUE_ID_DMA_4_0...GAUDI_QUEUE_ID_DMA_4_3:
4237 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_3];
4238 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4239 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4240 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4241 break;
4242
4243 case GAUDI_QUEUE_ID_DMA_5_0...GAUDI_QUEUE_ID_DMA_5_3:
4244 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_4];
4245 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4246 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4247 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4248 break;
4249
4250 case GAUDI_QUEUE_ID_DMA_6_0...GAUDI_QUEUE_ID_DMA_6_3:
4251 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_5];
4252 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4253 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4254 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4255 break;
4256
4257 case GAUDI_QUEUE_ID_DMA_7_0...GAUDI_QUEUE_ID_DMA_7_3:
4258 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_6];
4259 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4260 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4261 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4262 break;
4263
4264 case GAUDI_QUEUE_ID_CPU_PQ:
4265 if (gaudi->hw_cap_initialized & HW_CAP_CPU_Q)
4266 db_reg_offset = mmCPU_IF_PF_PQ_PI;
4267 else
4268 invalid_queue = true;
4269 break;
4270
4271 case GAUDI_QUEUE_ID_MME_0_0:
4272 db_reg_offset = mmMME2_QM_PQ_PI_0;
4273 break;
4274
4275 case GAUDI_QUEUE_ID_MME_0_1:
4276 db_reg_offset = mmMME2_QM_PQ_PI_1;
4277 break;
4278
4279 case GAUDI_QUEUE_ID_MME_0_2:
4280 db_reg_offset = mmMME2_QM_PQ_PI_2;
4281 break;
4282
4283 case GAUDI_QUEUE_ID_MME_0_3:
4284 db_reg_offset = mmMME2_QM_PQ_PI_3;
4285 break;
4286
4287 case GAUDI_QUEUE_ID_MME_1_0:
4288 db_reg_offset = mmMME0_QM_PQ_PI_0;
4289 break;
4290
4291 case GAUDI_QUEUE_ID_MME_1_1:
4292 db_reg_offset = mmMME0_QM_PQ_PI_1;
4293 break;
4294
4295 case GAUDI_QUEUE_ID_MME_1_2:
4296 db_reg_offset = mmMME0_QM_PQ_PI_2;
4297 break;
4298
4299 case GAUDI_QUEUE_ID_MME_1_3:
4300 db_reg_offset = mmMME0_QM_PQ_PI_3;
4301 break;
4302
4303 case GAUDI_QUEUE_ID_TPC_0_0:
4304 db_reg_offset = mmTPC0_QM_PQ_PI_0;
4305 break;
4306
4307 case GAUDI_QUEUE_ID_TPC_0_1:
4308 db_reg_offset = mmTPC0_QM_PQ_PI_1;
4309 break;
4310
4311 case GAUDI_QUEUE_ID_TPC_0_2:
4312 db_reg_offset = mmTPC0_QM_PQ_PI_2;
4313 break;
4314
4315 case GAUDI_QUEUE_ID_TPC_0_3:
4316 db_reg_offset = mmTPC0_QM_PQ_PI_3;
4317 break;
4318
4319 case GAUDI_QUEUE_ID_TPC_1_0:
4320 db_reg_offset = mmTPC1_QM_PQ_PI_0;
4321 break;
4322
4323 case GAUDI_QUEUE_ID_TPC_1_1:
4324 db_reg_offset = mmTPC1_QM_PQ_PI_1;
4325 break;
4326
4327 case GAUDI_QUEUE_ID_TPC_1_2:
4328 db_reg_offset = mmTPC1_QM_PQ_PI_2;
4329 break;
4330
4331 case GAUDI_QUEUE_ID_TPC_1_3:
4332 db_reg_offset = mmTPC1_QM_PQ_PI_3;
4333 break;
4334
4335 case GAUDI_QUEUE_ID_TPC_2_0:
4336 db_reg_offset = mmTPC2_QM_PQ_PI_0;
4337 break;
4338
4339 case GAUDI_QUEUE_ID_TPC_2_1:
4340 db_reg_offset = mmTPC2_QM_PQ_PI_1;
4341 break;
4342
4343 case GAUDI_QUEUE_ID_TPC_2_2:
4344 db_reg_offset = mmTPC2_QM_PQ_PI_2;
4345 break;
4346
4347 case GAUDI_QUEUE_ID_TPC_2_3:
4348 db_reg_offset = mmTPC2_QM_PQ_PI_3;
4349 break;
4350
4351 case GAUDI_QUEUE_ID_TPC_3_0:
4352 db_reg_offset = mmTPC3_QM_PQ_PI_0;
4353 break;
4354
4355 case GAUDI_QUEUE_ID_TPC_3_1:
4356 db_reg_offset = mmTPC3_QM_PQ_PI_1;
4357 break;
4358
4359 case GAUDI_QUEUE_ID_TPC_3_2:
4360 db_reg_offset = mmTPC3_QM_PQ_PI_2;
4361 break;
4362
4363 case GAUDI_QUEUE_ID_TPC_3_3:
4364 db_reg_offset = mmTPC3_QM_PQ_PI_3;
4365 break;
4366
4367 case GAUDI_QUEUE_ID_TPC_4_0:
4368 db_reg_offset = mmTPC4_QM_PQ_PI_0;
4369 break;
4370
4371 case GAUDI_QUEUE_ID_TPC_4_1:
4372 db_reg_offset = mmTPC4_QM_PQ_PI_1;
4373 break;
4374
4375 case GAUDI_QUEUE_ID_TPC_4_2:
4376 db_reg_offset = mmTPC4_QM_PQ_PI_2;
4377 break;
4378
4379 case GAUDI_QUEUE_ID_TPC_4_3:
4380 db_reg_offset = mmTPC4_QM_PQ_PI_3;
4381 break;
4382
4383 case GAUDI_QUEUE_ID_TPC_5_0:
4384 db_reg_offset = mmTPC5_QM_PQ_PI_0;
4385 break;
4386
4387 case GAUDI_QUEUE_ID_TPC_5_1:
4388 db_reg_offset = mmTPC5_QM_PQ_PI_1;
4389 break;
4390
4391 case GAUDI_QUEUE_ID_TPC_5_2:
4392 db_reg_offset = mmTPC5_QM_PQ_PI_2;
4393 break;
4394
4395 case GAUDI_QUEUE_ID_TPC_5_3:
4396 db_reg_offset = mmTPC5_QM_PQ_PI_3;
4397 break;
4398
4399 case GAUDI_QUEUE_ID_TPC_6_0:
4400 db_reg_offset = mmTPC6_QM_PQ_PI_0;
4401 break;
4402
4403 case GAUDI_QUEUE_ID_TPC_6_1:
4404 db_reg_offset = mmTPC6_QM_PQ_PI_1;
4405 break;
4406
4407 case GAUDI_QUEUE_ID_TPC_6_2:
4408 db_reg_offset = mmTPC6_QM_PQ_PI_2;
4409 break;
4410
4411 case GAUDI_QUEUE_ID_TPC_6_3:
4412 db_reg_offset = mmTPC6_QM_PQ_PI_3;
4413 break;
4414
4415 case GAUDI_QUEUE_ID_TPC_7_0:
4416 db_reg_offset = mmTPC7_QM_PQ_PI_0;
4417 break;
4418
4419 case GAUDI_QUEUE_ID_TPC_7_1:
4420 db_reg_offset = mmTPC7_QM_PQ_PI_1;
4421 break;
4422
4423 case GAUDI_QUEUE_ID_TPC_7_2:
4424 db_reg_offset = mmTPC7_QM_PQ_PI_2;
4425 break;
4426
4427 case GAUDI_QUEUE_ID_TPC_7_3:
4428 db_reg_offset = mmTPC7_QM_PQ_PI_3;
4429 break;
4430
4431 case GAUDI_QUEUE_ID_NIC_0_0...GAUDI_QUEUE_ID_NIC_0_3:
4432 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC0))
4433 invalid_queue = true;
4434
4435 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4436 db_reg_offset = mmNIC0_QM0_PQ_PI_0 + q_off;
4437 break;
4438
4439 case GAUDI_QUEUE_ID_NIC_1_0...GAUDI_QUEUE_ID_NIC_1_3:
4440 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC1))
4441 invalid_queue = true;
4442
4443 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4444 db_reg_offset = mmNIC0_QM1_PQ_PI_0 + q_off;
4445 break;
4446
4447 case GAUDI_QUEUE_ID_NIC_2_0...GAUDI_QUEUE_ID_NIC_2_3:
4448 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC2))
4449 invalid_queue = true;
4450
4451 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4452 db_reg_offset = mmNIC1_QM0_PQ_PI_0 + q_off;
4453 break;
4454
4455 case GAUDI_QUEUE_ID_NIC_3_0...GAUDI_QUEUE_ID_NIC_3_3:
4456 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC3))
4457 invalid_queue = true;
4458
4459 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4460 db_reg_offset = mmNIC1_QM1_PQ_PI_0 + q_off;
4461 break;
4462
4463 case GAUDI_QUEUE_ID_NIC_4_0...GAUDI_QUEUE_ID_NIC_4_3:
4464 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC4))
4465 invalid_queue = true;
4466
4467 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4468 db_reg_offset = mmNIC2_QM0_PQ_PI_0 + q_off;
4469 break;
4470
4471 case GAUDI_QUEUE_ID_NIC_5_0...GAUDI_QUEUE_ID_NIC_5_3:
4472 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC5))
4473 invalid_queue = true;
4474
4475 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4476 db_reg_offset = mmNIC2_QM1_PQ_PI_0 + q_off;
4477 break;
4478
4479 case GAUDI_QUEUE_ID_NIC_6_0...GAUDI_QUEUE_ID_NIC_6_3:
4480 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC6))
4481 invalid_queue = true;
4482
4483 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4484 db_reg_offset = mmNIC3_QM0_PQ_PI_0 + q_off;
4485 break;
4486
4487 case GAUDI_QUEUE_ID_NIC_7_0...GAUDI_QUEUE_ID_NIC_7_3:
4488 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC7))
4489 invalid_queue = true;
4490
4491 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4492 db_reg_offset = mmNIC3_QM1_PQ_PI_0 + q_off;
4493 break;
4494
4495 case GAUDI_QUEUE_ID_NIC_8_0...GAUDI_QUEUE_ID_NIC_8_3:
4496 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC8))
4497 invalid_queue = true;
4498
4499 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4500 db_reg_offset = mmNIC4_QM0_PQ_PI_0 + q_off;
4501 break;
4502
4503 case GAUDI_QUEUE_ID_NIC_9_0...GAUDI_QUEUE_ID_NIC_9_3:
4504 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC9))
4505 invalid_queue = true;
4506
4507 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4508 db_reg_offset = mmNIC4_QM1_PQ_PI_0 + q_off;
4509 break;
4510
4511 default:
4512 invalid_queue = true;
4513 }
4514
4515 if (invalid_queue) {
4516 /* Should never get here */
4517 dev_err(hdev->dev, "h/w queue %d is invalid. Can't set pi\n",
4518 hw_queue_id);
4519 return;
4520 }
4521
4522 db_value = pi;
4523
4524 /* ring the doorbell */
4525 WREG32(db_reg_offset, db_value);
4526
4527 if (hw_queue_id == GAUDI_QUEUE_ID_CPU_PQ) {
4528 /* make sure device CPU will read latest data from host */
4529 mb();
4530
4531 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
4532 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
4533 le32_to_cpu(dyn_regs->gic_host_pi_upd_irq);
4534
4535 WREG32(irq_handler_offset,
4536 gaudi_irq_map_table[GAUDI_EVENT_PI_UPDATE].cpu_id);
4537 }
4538}
4539
4540static void gaudi_pqe_write(struct hl_device *hdev, __le64 *pqe,
4541 struct hl_bd *bd)
4542{
4543 __le64 *pbd = (__le64 *) bd;
4544
4545 /* The QMANs are on the host memory so a simple copy suffice */
4546 pqe[0] = pbd[0];
4547 pqe[1] = pbd[1];
4548}
4549
4550static void *gaudi_dma_alloc_coherent(struct hl_device *hdev, size_t size,
4551 dma_addr_t *dma_handle, gfp_t flags)
4552{
4553 void *kernel_addr = dma_alloc_coherent(dev: &hdev->pdev->dev, size,
4554 dma_handle, gfp: flags);
4555
4556 /* Shift to the device's base physical address of host memory */
4557 if (kernel_addr)
4558 *dma_handle += HOST_PHYS_BASE;
4559
4560 return kernel_addr;
4561}
4562
4563static void gaudi_dma_free_coherent(struct hl_device *hdev, size_t size,
4564 void *cpu_addr, dma_addr_t dma_handle)
4565{
4566 /* Cancel the device's base physical address of host memory */
4567 dma_addr_t fixed_dma_handle = dma_handle - HOST_PHYS_BASE;
4568
4569 dma_free_coherent(dev: &hdev->pdev->dev, size, cpu_addr, dma_handle: fixed_dma_handle);
4570}
4571
4572static int gaudi_scrub_device_dram(struct hl_device *hdev, u64 val)
4573{
4574 struct asic_fixed_properties *prop = &hdev->asic_prop;
4575 u64 cur_addr = prop->dram_user_base_address;
4576 u32 chunk_size, busy;
4577 int rc, dma_id;
4578
4579 while (cur_addr < prop->dram_end_address) {
4580 for (dma_id = 0 ; dma_id < DMA_NUMBER_OF_CHANNELS ; dma_id++) {
4581 u32 dma_offset = dma_id * DMA_CORE_OFFSET;
4582
4583 chunk_size =
4584 min((u64)SZ_2G, prop->dram_end_address - cur_addr);
4585
4586 dev_dbg(hdev->dev,
4587 "Doing HBM scrubbing for 0x%09llx - 0x%09llx\n",
4588 cur_addr, cur_addr + chunk_size);
4589
4590 WREG32(mmDMA0_CORE_SRC_BASE_LO + dma_offset,
4591 lower_32_bits(val));
4592 WREG32(mmDMA0_CORE_SRC_BASE_HI + dma_offset,
4593 upper_32_bits(val));
4594 WREG32(mmDMA0_CORE_DST_BASE_LO + dma_offset,
4595 lower_32_bits(cur_addr));
4596 WREG32(mmDMA0_CORE_DST_BASE_HI + dma_offset,
4597 upper_32_bits(cur_addr));
4598 WREG32(mmDMA0_CORE_DST_TSIZE_0 + dma_offset,
4599 chunk_size);
4600 WREG32(mmDMA0_CORE_COMMIT + dma_offset,
4601 ((1 << DMA0_CORE_COMMIT_LIN_SHIFT) |
4602 (1 << DMA0_CORE_COMMIT_MEM_SET_SHIFT)));
4603
4604 cur_addr += chunk_size;
4605
4606 if (cur_addr == prop->dram_end_address)
4607 break;
4608 }
4609
4610 for (dma_id = 0 ; dma_id < DMA_NUMBER_OF_CHANNELS ; dma_id++) {
4611 u32 dma_offset = dma_id * DMA_CORE_OFFSET;
4612
4613 rc = hl_poll_timeout(
4614 hdev,
4615 mmDMA0_CORE_STS0 + dma_offset,
4616 busy,
4617 ((busy & DMA0_CORE_STS0_BUSY_MASK) == 0),
4618 1000,
4619 HBM_SCRUBBING_TIMEOUT_US);
4620
4621 if (rc) {
4622 dev_err(hdev->dev,
4623 "DMA Timeout during HBM scrubbing of DMA #%d\n",
4624 dma_id);
4625 return -EIO;
4626 }
4627 }
4628 }
4629
4630 return 0;
4631}
4632
4633static int gaudi_scrub_device_mem(struct hl_device *hdev)
4634{
4635 struct asic_fixed_properties *prop = &hdev->asic_prop;
4636 u64 wait_to_idle_time = HBM_SCRUBBING_TIMEOUT_US;
4637 u64 addr, size, val = hdev->memory_scrub_val;
4638 ktime_t timeout;
4639 int rc = 0;
4640
4641 if (!hdev->memory_scrub)
4642 return 0;
4643
4644 timeout = ktime_add_us(kt: ktime_get(), usec: wait_to_idle_time);
4645 while (!hdev->asic_funcs->is_device_idle(hdev, NULL, 0, NULL)) {
4646 if (ktime_compare(cmp1: ktime_get(), cmp2: timeout) > 0) {
4647 dev_err(hdev->dev, "waiting for idle timeout\n");
4648 return -ETIMEDOUT;
4649 }
4650 usleep_range(min: (1000 >> 2) + 1, max: 1000);
4651 }
4652
4653 /* Scrub SRAM */
4654 addr = prop->sram_user_base_address;
4655 size = hdev->pldm ? 0x10000 : prop->sram_size - SRAM_USER_BASE_OFFSET;
4656
4657 dev_dbg(hdev->dev, "Scrubbing SRAM: 0x%09llx - 0x%09llx val: 0x%llx\n",
4658 addr, addr + size, val);
4659 rc = gaudi_memset_device_memory(hdev, addr, size, val);
4660 if (rc) {
4661 dev_err(hdev->dev, "Failed to clear SRAM (%d)\n", rc);
4662 return rc;
4663 }
4664
4665 /* Scrub HBM using all DMA channels in parallel */
4666 rc = gaudi_scrub_device_dram(hdev, val);
4667 if (rc) {
4668 dev_err(hdev->dev, "Failed to clear HBM (%d)\n", rc);
4669 return rc;
4670 }
4671
4672 return 0;
4673}
4674
4675static void *gaudi_get_int_queue_base(struct hl_device *hdev,
4676 u32 queue_id, dma_addr_t *dma_handle,
4677 u16 *queue_len)
4678{
4679 struct gaudi_device *gaudi = hdev->asic_specific;
4680 struct gaudi_internal_qman_info *q;
4681
4682 if (queue_id >= GAUDI_QUEUE_ID_SIZE ||
4683 gaudi_queue_type[queue_id] != QUEUE_TYPE_INT) {
4684 dev_err(hdev->dev, "Got invalid queue id %d\n", queue_id);
4685 return NULL;
4686 }
4687
4688 q = &gaudi->internal_qmans[queue_id];
4689 *dma_handle = q->pq_dma_addr;
4690 *queue_len = q->pq_size / QMAN_PQ_ENTRY_SIZE;
4691
4692 return q->pq_kernel_addr;
4693}
4694
4695static int gaudi_send_cpu_message(struct hl_device *hdev, u32 *msg,
4696 u16 len, u32 timeout, u64 *result)
4697{
4698 struct gaudi_device *gaudi = hdev->asic_specific;
4699
4700 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q)) {
4701 if (result)
4702 *result = 0;
4703 return 0;
4704 }
4705
4706 if (!timeout)
4707 timeout = GAUDI_MSG_TO_CPU_TIMEOUT_USEC;
4708
4709 return hl_fw_send_cpu_message(hdev, hw_queue_id: GAUDI_QUEUE_ID_CPU_PQ, msg, len,
4710 timeout, result);
4711}
4712
4713static int gaudi_test_queue(struct hl_device *hdev, u32 hw_queue_id)
4714{
4715 struct packet_msg_prot *fence_pkt;
4716 dma_addr_t pkt_dma_addr;
4717 u32 fence_val, tmp, timeout_usec;
4718 dma_addr_t fence_dma_addr;
4719 u32 *fence_ptr;
4720 int rc;
4721
4722 if (hdev->pldm)
4723 timeout_usec = GAUDI_PLDM_TEST_QUEUE_WAIT_USEC;
4724 else
4725 timeout_usec = GAUDI_TEST_QUEUE_WAIT_USEC;
4726
4727 fence_val = GAUDI_QMAN0_FENCE_VAL;
4728
4729 fence_ptr = hl_asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL, &fence_dma_addr);
4730 if (!fence_ptr) {
4731 dev_err(hdev->dev,
4732 "Failed to allocate memory for H/W queue %d testing\n",
4733 hw_queue_id);
4734 return -ENOMEM;
4735 }
4736
4737 *fence_ptr = 0;
4738
4739 fence_pkt = hl_asic_dma_pool_zalloc(hdev, sizeof(struct packet_msg_prot), GFP_KERNEL,
4740 &pkt_dma_addr);
4741 if (!fence_pkt) {
4742 dev_err(hdev->dev,
4743 "Failed to allocate packet for H/W queue %d testing\n",
4744 hw_queue_id);
4745 rc = -ENOMEM;
4746 goto free_fence_ptr;
4747 }
4748
4749 tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
4750 tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
4751 tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
4752
4753 fence_pkt->ctl = cpu_to_le32(tmp);
4754 fence_pkt->value = cpu_to_le32(fence_val);
4755 fence_pkt->addr = cpu_to_le64(fence_dma_addr);
4756
4757 rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id,
4758 cb_size: sizeof(struct packet_msg_prot),
4759 cb_ptr: pkt_dma_addr);
4760 if (rc) {
4761 dev_err(hdev->dev,
4762 "Failed to send fence packet to H/W queue %d\n",
4763 hw_queue_id);
4764 goto free_pkt;
4765 }
4766
4767 rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp, (tmp == fence_val),
4768 1000, timeout_usec, true);
4769
4770 hl_hw_queue_inc_ci_kernel(hdev, hw_queue_id);
4771
4772 if (rc == -ETIMEDOUT) {
4773 dev_err(hdev->dev,
4774 "H/W queue %d test failed (scratch(0x%08llX) == 0x%08X)\n",
4775 hw_queue_id, (unsigned long long) fence_dma_addr, tmp);
4776 rc = -EIO;
4777 }
4778
4779free_pkt:
4780 hl_asic_dma_pool_free(hdev, (void *) fence_pkt, pkt_dma_addr);
4781free_fence_ptr:
4782 hl_asic_dma_pool_free(hdev, (void *) fence_ptr, fence_dma_addr);
4783 return rc;
4784}
4785
4786static int gaudi_test_cpu_queue(struct hl_device *hdev)
4787{
4788 struct gaudi_device *gaudi = hdev->asic_specific;
4789
4790 /*
4791 * check capability here as send_cpu_message() won't update the result
4792 * value if no capability
4793 */
4794 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
4795 return 0;
4796
4797 return hl_fw_test_cpu_queue(hdev);
4798}
4799
4800static int gaudi_test_queues(struct hl_device *hdev)
4801{
4802 int i, rc, ret_val = 0;
4803
4804 for (i = 0 ; i < hdev->asic_prop.max_queues ; i++) {
4805 if (hdev->asic_prop.hw_queues_props[i].type == QUEUE_TYPE_EXT) {
4806 rc = gaudi_test_queue(hdev, hw_queue_id: i);
4807 if (rc)
4808 ret_val = -EINVAL;
4809 }
4810 }
4811
4812 rc = gaudi_test_cpu_queue(hdev);
4813 if (rc)
4814 ret_val = -EINVAL;
4815
4816 return ret_val;
4817}
4818
4819static void *gaudi_dma_pool_zalloc(struct hl_device *hdev, size_t size,
4820 gfp_t mem_flags, dma_addr_t *dma_handle)
4821{
4822 void *kernel_addr;
4823
4824 if (size > GAUDI_DMA_POOL_BLK_SIZE)
4825 return NULL;
4826
4827 kernel_addr = dma_pool_zalloc(pool: hdev->dma_pool, mem_flags, handle: dma_handle);
4828
4829 /* Shift to the device's base physical address of host memory */
4830 if (kernel_addr)
4831 *dma_handle += HOST_PHYS_BASE;
4832
4833 return kernel_addr;
4834}
4835
4836static void gaudi_dma_pool_free(struct hl_device *hdev, void *vaddr,
4837 dma_addr_t dma_addr)
4838{
4839 /* Cancel the device's base physical address of host memory */
4840 dma_addr_t fixed_dma_addr = dma_addr - HOST_PHYS_BASE;
4841
4842 dma_pool_free(pool: hdev->dma_pool, vaddr, addr: fixed_dma_addr);
4843}
4844
4845static void *gaudi_cpu_accessible_dma_pool_alloc(struct hl_device *hdev,
4846 size_t size, dma_addr_t *dma_handle)
4847{
4848 return hl_fw_cpu_accessible_dma_pool_alloc(hdev, size, dma_handle);
4849}
4850
4851static void gaudi_cpu_accessible_dma_pool_free(struct hl_device *hdev,
4852 size_t size, void *vaddr)
4853{
4854 hl_fw_cpu_accessible_dma_pool_free(hdev, size, vaddr);
4855}
4856
4857static u32 gaudi_get_dma_desc_list_size(struct hl_device *hdev, struct sg_table *sgt)
4858{
4859 struct scatterlist *sg, *sg_next_iter;
4860 u32 count, dma_desc_cnt;
4861 u64 len, len_next;
4862 dma_addr_t addr, addr_next;
4863
4864 dma_desc_cnt = 0;
4865
4866 for_each_sgtable_dma_sg(sgt, sg, count) {
4867 len = sg_dma_len(sg);
4868 addr = sg_dma_address(sg);
4869
4870 if (len == 0)
4871 break;
4872
4873 while ((count + 1) < sgt->nents) {
4874 sg_next_iter = sg_next(sg);
4875 len_next = sg_dma_len(sg_next_iter);
4876 addr_next = sg_dma_address(sg_next_iter);
4877
4878 if (len_next == 0)
4879 break;
4880
4881 if ((addr + len == addr_next) &&
4882 (len + len_next <= DMA_MAX_TRANSFER_SIZE)) {
4883 len += len_next;
4884 count++;
4885 sg = sg_next_iter;
4886 } else {
4887 break;
4888 }
4889 }
4890
4891 dma_desc_cnt++;
4892 }
4893
4894 return dma_desc_cnt * sizeof(struct packet_lin_dma);
4895}
4896
4897static int gaudi_pin_memory_before_cs(struct hl_device *hdev,
4898 struct hl_cs_parser *parser,
4899 struct packet_lin_dma *user_dma_pkt,
4900 u64 addr, enum dma_data_direction dir)
4901{
4902 struct hl_userptr *userptr;
4903 int rc;
4904
4905 if (hl_userptr_is_pinned(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
4906 userptr_list: parser->job_userptr_list, userptr: &userptr))
4907 goto already_pinned;
4908
4909 userptr = kzalloc(sizeof(*userptr), GFP_KERNEL);
4910 if (!userptr)
4911 return -ENOMEM;
4912
4913 rc = hl_pin_host_memory(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
4914 userptr);
4915 if (rc)
4916 goto free_userptr;
4917
4918 list_add_tail(new: &userptr->job_node, head: parser->job_userptr_list);
4919
4920 rc = hl_dma_map_sgtable(hdev, userptr->sgt, dir);
4921 if (rc) {
4922 dev_err(hdev->dev, "failed to map sgt with DMA region\n");
4923 goto unpin_memory;
4924 }
4925
4926 userptr->dma_mapped = true;
4927 userptr->dir = dir;
4928
4929already_pinned:
4930 parser->patched_cb_size +=
4931 gaudi_get_dma_desc_list_size(hdev, sgt: userptr->sgt);
4932
4933 return 0;
4934
4935unpin_memory:
4936 list_del(entry: &userptr->job_node);
4937 hl_unpin_host_memory(hdev, userptr);
4938free_userptr:
4939 kfree(objp: userptr);
4940 return rc;
4941}
4942
4943static int gaudi_validate_dma_pkt_host(struct hl_device *hdev,
4944 struct hl_cs_parser *parser,
4945 struct packet_lin_dma *user_dma_pkt,
4946 bool src_in_host)
4947{
4948 enum dma_data_direction dir;
4949 bool skip_host_mem_pin = false, user_memset;
4950 u64 addr;
4951 int rc = 0;
4952
4953 user_memset = (le32_to_cpu(user_dma_pkt->ctl) &
4954 GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK) >>
4955 GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT;
4956
4957 if (src_in_host) {
4958 if (user_memset)
4959 skip_host_mem_pin = true;
4960
4961 dev_dbg(hdev->dev, "DMA direction is HOST --> DEVICE\n");
4962 dir = DMA_TO_DEVICE;
4963 addr = le64_to_cpu(user_dma_pkt->src_addr);
4964 } else {
4965 dev_dbg(hdev->dev, "DMA direction is DEVICE --> HOST\n");
4966 dir = DMA_FROM_DEVICE;
4967 addr = (le64_to_cpu(user_dma_pkt->dst_addr) &
4968 GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
4969 GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
4970 }
4971
4972 if (skip_host_mem_pin)
4973 parser->patched_cb_size += sizeof(*user_dma_pkt);
4974 else
4975 rc = gaudi_pin_memory_before_cs(hdev, parser, user_dma_pkt,
4976 addr, dir);
4977
4978 return rc;
4979}
4980
4981static int gaudi_validate_dma_pkt_no_mmu(struct hl_device *hdev,
4982 struct hl_cs_parser *parser,
4983 struct packet_lin_dma *user_dma_pkt)
4984{
4985 bool src_in_host = false;
4986 u64 dst_addr = (le64_to_cpu(user_dma_pkt->dst_addr) &
4987 GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
4988 GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
4989
4990 dev_dbg(hdev->dev, "DMA packet details:\n");
4991 dev_dbg(hdev->dev, "source == 0x%llx\n",
4992 le64_to_cpu(user_dma_pkt->src_addr));
4993 dev_dbg(hdev->dev, "destination == 0x%llx\n", dst_addr);
4994 dev_dbg(hdev->dev, "size == %u\n", le32_to_cpu(user_dma_pkt->tsize));
4995
4996 /*
4997 * Special handling for DMA with size 0. Bypass all validations
4998 * because no transactions will be done except for WR_COMP, which
4999 * is not a security issue
5000 */
5001 if (!le32_to_cpu(user_dma_pkt->tsize)) {
5002 parser->patched_cb_size += sizeof(*user_dma_pkt);
5003 return 0;
5004 }
5005
5006 if (parser->hw_queue_id <= GAUDI_QUEUE_ID_DMA_0_3)
5007 src_in_host = true;
5008
5009 return gaudi_validate_dma_pkt_host(hdev, parser, user_dma_pkt,
5010 src_in_host);
5011}
5012
5013static int gaudi_validate_load_and_exe_pkt(struct hl_device *hdev,
5014 struct hl_cs_parser *parser,
5015 struct packet_load_and_exe *user_pkt)
5016{
5017 u32 cfg;
5018
5019 cfg = le32_to_cpu(user_pkt->cfg);
5020
5021 if (cfg & GAUDI_PKT_LOAD_AND_EXE_CFG_DST_MASK) {
5022 dev_err(hdev->dev,
5023 "User not allowed to use Load and Execute\n");
5024 return -EPERM;
5025 }
5026
5027 parser->patched_cb_size += sizeof(struct packet_load_and_exe);
5028
5029 return 0;
5030}
5031
5032static int gaudi_validate_cb(struct hl_device *hdev,
5033 struct hl_cs_parser *parser, bool is_mmu)
5034{
5035 u32 cb_parsed_length = 0;
5036 int rc = 0;
5037
5038 parser->patched_cb_size = 0;
5039
5040 /* cb_user_size is more than 0 so loop will always be executed */
5041 while (cb_parsed_length < parser->user_cb_size) {
5042 enum packet_id pkt_id;
5043 u16 pkt_size;
5044 struct gaudi_packet *user_pkt;
5045
5046 user_pkt = parser->user_cb->kernel_address + cb_parsed_length;
5047
5048 pkt_id = (enum packet_id) (
5049 (le64_to_cpu(user_pkt->header) &
5050 PACKET_HEADER_PACKET_ID_MASK) >>
5051 PACKET_HEADER_PACKET_ID_SHIFT);
5052
5053 if (!validate_packet_id(id: pkt_id)) {
5054 dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id);
5055 rc = -EINVAL;
5056 break;
5057 }
5058
5059 pkt_size = gaudi_packet_sizes[pkt_id];
5060 cb_parsed_length += pkt_size;
5061 if (cb_parsed_length > parser->user_cb_size) {
5062 dev_err(hdev->dev,
5063 "packet 0x%x is out of CB boundary\n", pkt_id);
5064 rc = -EINVAL;
5065 break;
5066 }
5067
5068 switch (pkt_id) {
5069 case PACKET_MSG_PROT:
5070 dev_err(hdev->dev,
5071 "User not allowed to use MSG_PROT\n");
5072 rc = -EPERM;
5073 break;
5074
5075 case PACKET_CP_DMA:
5076 dev_err(hdev->dev, "User not allowed to use CP_DMA\n");
5077 rc = -EPERM;
5078 break;
5079
5080 case PACKET_STOP:
5081 dev_err(hdev->dev, "User not allowed to use STOP\n");
5082 rc = -EPERM;
5083 break;
5084
5085 case PACKET_WREG_BULK:
5086 dev_err(hdev->dev,
5087 "User not allowed to use WREG_BULK\n");
5088 rc = -EPERM;
5089 break;
5090
5091 case PACKET_LOAD_AND_EXE:
5092 rc = gaudi_validate_load_and_exe_pkt(hdev, parser,
5093 user_pkt: (struct packet_load_and_exe *) user_pkt);
5094 break;
5095
5096 case PACKET_LIN_DMA:
5097 parser->contains_dma_pkt = true;
5098 if (is_mmu)
5099 parser->patched_cb_size += pkt_size;
5100 else
5101 rc = gaudi_validate_dma_pkt_no_mmu(hdev, parser,
5102 user_dma_pkt: (struct packet_lin_dma *) user_pkt);
5103 break;
5104
5105 case PACKET_WREG_32:
5106 case PACKET_MSG_LONG:
5107 case PACKET_MSG_SHORT:
5108 case PACKET_REPEAT:
5109 case PACKET_FENCE:
5110 case PACKET_NOP:
5111 case PACKET_ARB_POINT:
5112 parser->patched_cb_size += pkt_size;
5113 break;
5114
5115 default:
5116 dev_err(hdev->dev, "Invalid packet header 0x%x\n",
5117 pkt_id);
5118 rc = -EINVAL;
5119 break;
5120 }
5121
5122 if (rc)
5123 break;
5124 }
5125
5126 /*
5127 * The new CB should have space at the end for two MSG_PROT packets:
5128 * 1. Optional NOP padding for cacheline alignment
5129 * 2. A packet that will act as a completion packet
5130 * 3. A packet that will generate MSI interrupt
5131 */
5132 if (parser->completion)
5133 parser->patched_cb_size += gaudi_get_patched_cb_extra_size(
5134 user_cb_size: parser->patched_cb_size);
5135
5136 return rc;
5137}
5138
5139static int gaudi_patch_dma_packet(struct hl_device *hdev,
5140 struct hl_cs_parser *parser,
5141 struct packet_lin_dma *user_dma_pkt,
5142 struct packet_lin_dma *new_dma_pkt,
5143 u32 *new_dma_pkt_size)
5144{
5145 struct hl_userptr *userptr;
5146 struct scatterlist *sg, *sg_next_iter;
5147 u32 count, dma_desc_cnt, user_wrcomp_en_mask, ctl;
5148 u64 len, len_next;
5149 dma_addr_t dma_addr, dma_addr_next;
5150 u64 device_memory_addr, addr;
5151 enum dma_data_direction dir;
5152 struct sg_table *sgt;
5153 bool src_in_host = false;
5154 bool skip_host_mem_pin = false;
5155 bool user_memset;
5156
5157 ctl = le32_to_cpu(user_dma_pkt->ctl);
5158
5159 if (parser->hw_queue_id <= GAUDI_QUEUE_ID_DMA_0_3)
5160 src_in_host = true;
5161
5162 user_memset = (ctl & GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK) >>
5163 GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT;
5164
5165 if (src_in_host) {
5166 addr = le64_to_cpu(user_dma_pkt->src_addr);
5167 device_memory_addr = le64_to_cpu(user_dma_pkt->dst_addr);
5168 dir = DMA_TO_DEVICE;
5169 if (user_memset)
5170 skip_host_mem_pin = true;
5171 } else {
5172 addr = le64_to_cpu(user_dma_pkt->dst_addr);
5173 device_memory_addr = le64_to_cpu(user_dma_pkt->src_addr);
5174 dir = DMA_FROM_DEVICE;
5175 }
5176
5177 if ((!skip_host_mem_pin) &&
5178 (!hl_userptr_is_pinned(hdev, addr,
5179 le32_to_cpu(user_dma_pkt->tsize),
5180 userptr_list: parser->job_userptr_list, userptr: &userptr))) {
5181 dev_err(hdev->dev, "Userptr 0x%llx + 0x%x NOT mapped\n",
5182 addr, user_dma_pkt->tsize);
5183 return -EFAULT;
5184 }
5185
5186 if ((user_memset) && (dir == DMA_TO_DEVICE)) {
5187 memcpy(new_dma_pkt, user_dma_pkt, sizeof(*user_dma_pkt));
5188 *new_dma_pkt_size = sizeof(*user_dma_pkt);
5189 return 0;
5190 }
5191
5192 user_wrcomp_en_mask = ctl & GAUDI_PKT_LIN_DMA_CTL_WRCOMP_EN_MASK;
5193
5194 sgt = userptr->sgt;
5195 dma_desc_cnt = 0;
5196
5197 for_each_sgtable_dma_sg(sgt, sg, count) {
5198 len = sg_dma_len(sg);
5199 dma_addr = sg_dma_address(sg);
5200
5201 if (len == 0)
5202 break;
5203
5204 while ((count + 1) < sgt->nents) {
5205 sg_next_iter = sg_next(sg);
5206 len_next = sg_dma_len(sg_next_iter);
5207 dma_addr_next = sg_dma_address(sg_next_iter);
5208
5209 if (len_next == 0)
5210 break;
5211
5212 if ((dma_addr + len == dma_addr_next) &&
5213 (len + len_next <= DMA_MAX_TRANSFER_SIZE)) {
5214 len += len_next;
5215 count++;
5216 sg = sg_next_iter;
5217 } else {
5218 break;
5219 }
5220 }
5221
5222 ctl = le32_to_cpu(user_dma_pkt->ctl);
5223 if (likely(dma_desc_cnt))
5224 ctl &= ~GAUDI_PKT_CTL_EB_MASK;
5225 ctl &= ~GAUDI_PKT_LIN_DMA_CTL_WRCOMP_EN_MASK;
5226 new_dma_pkt->ctl = cpu_to_le32(ctl);
5227 new_dma_pkt->tsize = cpu_to_le32(len);
5228
5229 if (dir == DMA_TO_DEVICE) {
5230 new_dma_pkt->src_addr = cpu_to_le64(dma_addr);
5231 new_dma_pkt->dst_addr = cpu_to_le64(device_memory_addr);
5232 } else {
5233 new_dma_pkt->src_addr = cpu_to_le64(device_memory_addr);
5234 new_dma_pkt->dst_addr = cpu_to_le64(dma_addr);
5235 }
5236
5237 if (!user_memset)
5238 device_memory_addr += len;
5239 dma_desc_cnt++;
5240 new_dma_pkt++;
5241 }
5242
5243 if (!dma_desc_cnt) {
5244 dev_err(hdev->dev,
5245 "Error of 0 SG entries when patching DMA packet\n");
5246 return -EFAULT;
5247 }
5248
5249 /* Fix the last dma packet - wrcomp must be as user set it */
5250 new_dma_pkt--;
5251 new_dma_pkt->ctl |= cpu_to_le32(user_wrcomp_en_mask);
5252
5253 *new_dma_pkt_size = dma_desc_cnt * sizeof(struct packet_lin_dma);
5254
5255 return 0;
5256}
5257
5258static int gaudi_patch_cb(struct hl_device *hdev,
5259 struct hl_cs_parser *parser)
5260{
5261 u32 cb_parsed_length = 0;
5262 u32 cb_patched_cur_length = 0;
5263 int rc = 0;
5264
5265 /* cb_user_size is more than 0 so loop will always be executed */
5266 while (cb_parsed_length < parser->user_cb_size) {
5267 enum packet_id pkt_id;
5268 u16 pkt_size;
5269 u32 new_pkt_size = 0;
5270 struct gaudi_packet *user_pkt, *kernel_pkt;
5271
5272 user_pkt = parser->user_cb->kernel_address + cb_parsed_length;
5273 kernel_pkt = parser->patched_cb->kernel_address +
5274 cb_patched_cur_length;
5275
5276 pkt_id = (enum packet_id) (
5277 (le64_to_cpu(user_pkt->header) &
5278 PACKET_HEADER_PACKET_ID_MASK) >>
5279 PACKET_HEADER_PACKET_ID_SHIFT);
5280
5281 if (!validate_packet_id(id: pkt_id)) {
5282 dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id);
5283 rc = -EINVAL;
5284 break;
5285 }
5286
5287 pkt_size = gaudi_packet_sizes[pkt_id];
5288 cb_parsed_length += pkt_size;
5289 if (cb_parsed_length > parser->user_cb_size) {
5290 dev_err(hdev->dev,
5291 "packet 0x%x is out of CB boundary\n", pkt_id);
5292 rc = -EINVAL;
5293 break;
5294 }
5295
5296 switch (pkt_id) {
5297 case PACKET_LIN_DMA:
5298 rc = gaudi_patch_dma_packet(hdev, parser,
5299 user_dma_pkt: (struct packet_lin_dma *) user_pkt,
5300 new_dma_pkt: (struct packet_lin_dma *) kernel_pkt,
5301 new_dma_pkt_size: &new_pkt_size);
5302 cb_patched_cur_length += new_pkt_size;
5303 break;
5304
5305 case PACKET_MSG_PROT:
5306 dev_err(hdev->dev,
5307 "User not allowed to use MSG_PROT\n");
5308 rc = -EPERM;
5309 break;
5310
5311 case PACKET_CP_DMA:
5312 dev_err(hdev->dev, "User not allowed to use CP_DMA\n");
5313 rc = -EPERM;
5314 break;
5315
5316 case PACKET_STOP:
5317 dev_err(hdev->dev, "User not allowed to use STOP\n");
5318 rc = -EPERM;
5319 break;
5320
5321 case PACKET_WREG_32:
5322 case PACKET_WREG_BULK:
5323 case PACKET_MSG_LONG:
5324 case PACKET_MSG_SHORT:
5325 case PACKET_REPEAT:
5326 case PACKET_FENCE:
5327 case PACKET_NOP:
5328 case PACKET_ARB_POINT:
5329 case PACKET_LOAD_AND_EXE:
5330 memcpy(kernel_pkt, user_pkt, pkt_size);
5331 cb_patched_cur_length += pkt_size;
5332 break;
5333
5334 default:
5335 dev_err(hdev->dev, "Invalid packet header 0x%x\n",
5336 pkt_id);
5337 rc = -EINVAL;
5338 break;
5339 }
5340
5341 if (rc)
5342 break;
5343 }
5344
5345 return rc;
5346}
5347
5348static int gaudi_parse_cb_mmu(struct hl_device *hdev,
5349 struct hl_cs_parser *parser)
5350{
5351 u64 handle;
5352 u32 patched_cb_size;
5353 struct hl_cb *user_cb;
5354 int rc;
5355
5356 /*
5357 * The new CB should have space at the end for two MSG_PROT packets:
5358 * 1. Optional NOP padding for cacheline alignment
5359 * 2. A packet that will act as a completion packet
5360 * 3. A packet that will generate MSI interrupt
5361 */
5362 if (parser->completion)
5363 parser->patched_cb_size = parser->user_cb_size +
5364 gaudi_get_patched_cb_extra_size(user_cb_size: parser->user_cb_size);
5365 else
5366 parser->patched_cb_size = parser->user_cb_size;
5367
5368 rc = hl_cb_create(hdev, mmg: &hdev->kernel_mem_mgr, ctx: hdev->kernel_ctx,
5369 cb_size: parser->patched_cb_size, internal_cb: false, map_cb: false,
5370 handle: &handle);
5371
5372 if (rc) {
5373 dev_err(hdev->dev,
5374 "Failed to allocate patched CB for DMA CS %d\n",
5375 rc);
5376 return rc;
5377 }
5378
5379 parser->patched_cb = hl_cb_get(mmg: &hdev->kernel_mem_mgr, handle);
5380 /* hl_cb_get should never fail */
5381 if (!parser->patched_cb) {
5382 dev_crit(hdev->dev, "DMA CB handle invalid 0x%llx\n", handle);
5383 rc = -EFAULT;
5384 goto out;
5385 }
5386
5387 /*
5388 * We are protected from overflow because the check
5389 * "parser->user_cb_size <= parser->user_cb->size" was done in get_cb_from_cs_chunk()
5390 * in the common code. That check is done only if is_kernel_allocated_cb is true.
5391 *
5392 * There is no option to reach here without going through that check because:
5393 * 1. validate_queue_index() assigns true to is_kernel_allocated_cb for any submission to
5394 * an external queue.
5395 * 2. For Gaudi, we only parse CBs that were submitted to the external queues.
5396 */
5397 memcpy(parser->patched_cb->kernel_address,
5398 parser->user_cb->kernel_address,
5399 parser->user_cb_size);
5400
5401 patched_cb_size = parser->patched_cb_size;
5402
5403 /* Validate patched CB instead of user CB */
5404 user_cb = parser->user_cb;
5405 parser->user_cb = parser->patched_cb;
5406 rc = gaudi_validate_cb(hdev, parser, is_mmu: true);
5407 parser->user_cb = user_cb;
5408
5409 if (rc) {
5410 hl_cb_put(cb: parser->patched_cb);
5411 goto out;
5412 }
5413
5414 if (patched_cb_size != parser->patched_cb_size) {
5415 dev_err(hdev->dev, "user CB size mismatch\n");
5416 hl_cb_put(cb: parser->patched_cb);
5417 rc = -EINVAL;
5418 goto out;
5419 }
5420
5421out:
5422 /*
5423 * Always call cb destroy here because we still have 1 reference
5424 * to it by calling cb_get earlier. After the job will be completed,
5425 * cb_put will release it, but here we want to remove it from the
5426 * idr
5427 */
5428 hl_cb_destroy(mmg: &hdev->kernel_mem_mgr, cb_handle: handle);
5429
5430 return rc;
5431}
5432
5433static int gaudi_parse_cb_no_mmu(struct hl_device *hdev,
5434 struct hl_cs_parser *parser)
5435{
5436 u64 handle;
5437 int rc;
5438
5439 rc = gaudi_validate_cb(hdev, parser, is_mmu: false);
5440
5441 if (rc)
5442 goto free_userptr;
5443
5444 rc = hl_cb_create(hdev, mmg: &hdev->kernel_mem_mgr, ctx: hdev->kernel_ctx,
5445 cb_size: parser->patched_cb_size, internal_cb: false, map_cb: false,
5446 handle: &handle);
5447 if (rc) {
5448 dev_err(hdev->dev,
5449 "Failed to allocate patched CB for DMA CS %d\n", rc);
5450 goto free_userptr;
5451 }
5452
5453 parser->patched_cb = hl_cb_get(mmg: &hdev->kernel_mem_mgr, handle);
5454 /* hl_cb_get should never fail here */
5455 if (!parser->patched_cb) {
5456 dev_crit(hdev->dev, "DMA CB handle invalid 0x%llx\n", handle);
5457 rc = -EFAULT;
5458 goto out;
5459 }
5460
5461 rc = gaudi_patch_cb(hdev, parser);
5462
5463 if (rc)
5464 hl_cb_put(cb: parser->patched_cb);
5465
5466out:
5467 /*
5468 * Always call cb destroy here because we still have 1 reference
5469 * to it by calling cb_get earlier. After the job will be completed,
5470 * cb_put will release it, but here we want to remove it from the
5471 * idr
5472 */
5473 hl_cb_destroy(mmg: &hdev->kernel_mem_mgr, cb_handle: handle);
5474
5475free_userptr:
5476 if (rc)
5477 hl_userptr_delete_list(hdev, userptr_list: parser->job_userptr_list);
5478 return rc;
5479}
5480
5481static int gaudi_parse_cb_no_ext_queue(struct hl_device *hdev,
5482 struct hl_cs_parser *parser)
5483{
5484 struct asic_fixed_properties *asic_prop = &hdev->asic_prop;
5485 struct gaudi_device *gaudi = hdev->asic_specific;
5486 u32 nic_queue_offset, nic_mask_q_id;
5487
5488 if ((parser->hw_queue_id >= GAUDI_QUEUE_ID_NIC_0_0) &&
5489 (parser->hw_queue_id <= GAUDI_QUEUE_ID_NIC_9_3)) {
5490 nic_queue_offset = parser->hw_queue_id - GAUDI_QUEUE_ID_NIC_0_0;
5491 nic_mask_q_id = 1 << (HW_CAP_NIC_SHIFT + (nic_queue_offset >> 2));
5492
5493 if (!(gaudi->hw_cap_initialized & nic_mask_q_id)) {
5494 dev_err(hdev->dev, "h/w queue %d is disabled\n", parser->hw_queue_id);
5495 return -EINVAL;
5496 }
5497 }
5498
5499 /* For internal queue jobs just check if CB address is valid */
5500 if (hl_mem_area_inside_range(address: (u64) (uintptr_t) parser->user_cb,
5501 size: parser->user_cb_size,
5502 range_start_address: asic_prop->sram_user_base_address,
5503 range_end_address: asic_prop->sram_end_address))
5504 return 0;
5505
5506 if (hl_mem_area_inside_range(address: (u64) (uintptr_t) parser->user_cb,
5507 size: parser->user_cb_size,
5508 range_start_address: asic_prop->dram_user_base_address,
5509 range_end_address: asic_prop->dram_end_address))
5510 return 0;
5511
5512 /* PMMU and HPMMU addresses are equal, check only one of them */
5513 if (hl_mem_area_inside_range(address: (u64) (uintptr_t) parser->user_cb,
5514 size: parser->user_cb_size,
5515 range_start_address: asic_prop->pmmu.start_addr,
5516 range_end_address: asic_prop->pmmu.end_addr))
5517 return 0;
5518
5519 dev_err(hdev->dev,
5520 "CB address 0x%px + 0x%x for internal QMAN is not valid\n",
5521 parser->user_cb, parser->user_cb_size);
5522
5523 return -EFAULT;
5524}
5525
5526static int gaudi_cs_parser(struct hl_device *hdev, struct hl_cs_parser *parser)
5527{
5528 struct gaudi_device *gaudi = hdev->asic_specific;
5529
5530 if (parser->queue_type == QUEUE_TYPE_INT)
5531 return gaudi_parse_cb_no_ext_queue(hdev, parser);
5532
5533 if (gaudi->hw_cap_initialized & HW_CAP_MMU)
5534 return gaudi_parse_cb_mmu(hdev, parser);
5535 else
5536 return gaudi_parse_cb_no_mmu(hdev, parser);
5537}
5538
5539static void gaudi_add_end_of_cb_packets(struct hl_device *hdev, void *kernel_address,
5540 u32 len, u32 original_len, u64 cq_addr, u32 cq_val,
5541 u32 msi_vec, bool eb)
5542{
5543 struct packet_msg_prot *cq_pkt;
5544 struct packet_nop *cq_padding;
5545 u64 msi_addr;
5546 u32 tmp;
5547
5548 cq_padding = kernel_address + original_len;
5549 cq_pkt = kernel_address + len - (sizeof(struct packet_msg_prot) * 2);
5550
5551 while ((void *)cq_padding < (void *)cq_pkt) {
5552 cq_padding->ctl = cpu_to_le32(FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_NOP));
5553 cq_padding++;
5554 }
5555
5556 tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
5557 tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5558
5559 if (eb)
5560 tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
5561
5562 cq_pkt->ctl = cpu_to_le32(tmp);
5563 cq_pkt->value = cpu_to_le32(cq_val);
5564 cq_pkt->addr = cpu_to_le64(cq_addr);
5565
5566 cq_pkt++;
5567
5568 tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
5569 tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5570 cq_pkt->ctl = cpu_to_le32(tmp);
5571 cq_pkt->value = cpu_to_le32(1);
5572 msi_addr = hdev->pdev ? mmPCIE_CORE_MSI_REQ : mmPCIE_MSI_INTR_0 + msi_vec * 4;
5573 cq_pkt->addr = cpu_to_le64(CFG_BASE + msi_addr);
5574}
5575
5576static void gaudi_update_eq_ci(struct hl_device *hdev, u32 val)
5577{
5578 WREG32(mmCPU_IF_EQ_RD_OFFS, val);
5579}
5580
5581static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr,
5582 u32 size, u64 val)
5583{
5584 struct packet_lin_dma *lin_dma_pkt;
5585 struct hl_cs_job *job;
5586 u32 cb_size, ctl, err_cause;
5587 struct hl_cb *cb;
5588 int rc;
5589
5590 cb = hl_cb_kernel_create(hdev, PAGE_SIZE, internal_cb: false);
5591 if (!cb)
5592 return -EFAULT;
5593
5594 lin_dma_pkt = cb->kernel_address;
5595 memset(lin_dma_pkt, 0, sizeof(*lin_dma_pkt));
5596 cb_size = sizeof(*lin_dma_pkt);
5597
5598 ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA);
5599 ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK, 1);
5600 ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_LIN_MASK, 1);
5601 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5602 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
5603
5604 lin_dma_pkt->ctl = cpu_to_le32(ctl);
5605 lin_dma_pkt->src_addr = cpu_to_le64(val);
5606 lin_dma_pkt->dst_addr |= cpu_to_le64(addr);
5607 lin_dma_pkt->tsize = cpu_to_le32(size);
5608
5609 job = hl_cs_allocate_job(hdev, queue_type: QUEUE_TYPE_EXT, is_kernel_allocated_cb: true);
5610 if (!job) {
5611 dev_err(hdev->dev, "Failed to allocate a new job\n");
5612 rc = -ENOMEM;
5613 goto release_cb;
5614 }
5615
5616 /* Verify DMA is OK */
5617 err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE);
5618 if (err_cause && !hdev->init_done) {
5619 dev_dbg(hdev->dev,
5620 "Clearing DMA0 engine from errors (cause 0x%x)\n",
5621 err_cause);
5622 WREG32(mmDMA0_CORE_ERR_CAUSE, err_cause);
5623 }
5624
5625 job->id = 0;
5626 job->user_cb = cb;
5627 atomic_inc(v: &job->user_cb->cs_cnt);
5628 job->user_cb_size = cb_size;
5629 job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
5630 job->patched_cb = job->user_cb;
5631 job->job_cb_size = job->user_cb_size + sizeof(struct packet_msg_prot);
5632
5633 hl_debugfs_add_job(hdev, job);
5634
5635 rc = gaudi_send_job_on_qman0(hdev, job);
5636 hl_debugfs_remove_job(hdev, job);
5637 kfree(objp: job);
5638 atomic_dec(v: &cb->cs_cnt);
5639
5640 /* Verify DMA is OK */
5641 err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE);
5642 if (err_cause) {
5643 dev_err(hdev->dev, "DMA Failed, cause 0x%x\n", err_cause);
5644 rc = -EIO;
5645 if (!hdev->init_done) {
5646 dev_dbg(hdev->dev,
5647 "Clearing DMA0 engine from errors (cause 0x%x)\n",
5648 err_cause);
5649 WREG32(mmDMA0_CORE_ERR_CAUSE, err_cause);
5650 }
5651 }
5652
5653release_cb:
5654 hl_cb_put(cb);
5655 hl_cb_destroy(mmg: &hdev->kernel_mem_mgr, cb_handle: cb->buf->handle);
5656
5657 return rc;
5658}
5659
5660static int gaudi_memset_registers(struct hl_device *hdev, u64 reg_base,
5661 u32 num_regs, u32 val)
5662{
5663 struct packet_msg_long *pkt;
5664 struct hl_cs_job *job;
5665 u32 cb_size, ctl;
5666 struct hl_cb *cb;
5667 int i, rc;
5668
5669 cb_size = (sizeof(*pkt) * num_regs) + sizeof(struct packet_msg_prot);
5670
5671 if (cb_size > SZ_2M) {
5672 dev_err(hdev->dev, "CB size must be smaller than %uMB", SZ_2M);
5673 return -ENOMEM;
5674 }
5675
5676 cb = hl_cb_kernel_create(hdev, cb_size, internal_cb: false);
5677 if (!cb)
5678 return -EFAULT;
5679
5680 pkt = cb->kernel_address;
5681
5682 ctl = FIELD_PREP(GAUDI_PKT_LONG_CTL_OP_MASK, 0); /* write the value */
5683 ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_LONG);
5684 ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
5685 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
5686 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5687
5688 for (i = 0; i < num_regs ; i++, pkt++) {
5689 pkt->ctl = cpu_to_le32(ctl);
5690 pkt->value = cpu_to_le32(val);
5691 pkt->addr = cpu_to_le64(reg_base + (i * 4));
5692 }
5693
5694 job = hl_cs_allocate_job(hdev, queue_type: QUEUE_TYPE_EXT, is_kernel_allocated_cb: true);
5695 if (!job) {
5696 dev_err(hdev->dev, "Failed to allocate a new job\n");
5697 rc = -ENOMEM;
5698 goto release_cb;
5699 }
5700
5701 job->id = 0;
5702 job->user_cb = cb;
5703 atomic_inc(v: &job->user_cb->cs_cnt);
5704 job->user_cb_size = cb_size;
5705 job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
5706 job->patched_cb = job->user_cb;
5707 job->job_cb_size = cb_size;
5708
5709 hl_debugfs_add_job(hdev, job);
5710
5711 rc = gaudi_send_job_on_qman0(hdev, job);
5712 hl_debugfs_remove_job(hdev, job);
5713 kfree(objp: job);
5714 atomic_dec(v: &cb->cs_cnt);
5715
5716release_cb:
5717 hl_cb_put(cb);
5718 hl_cb_destroy(mmg: &hdev->kernel_mem_mgr, cb_handle: cb->buf->handle);
5719
5720 return rc;
5721}
5722
5723static int gaudi_restore_sm_registers(struct hl_device *hdev)
5724{
5725 u64 base_addr;
5726 u32 num_regs;
5727 int rc;
5728
5729 base_addr = CFG_BASE + mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
5730 num_regs = NUM_OF_SOB_IN_BLOCK;
5731 rc = gaudi_memset_registers(hdev, reg_base: base_addr, num_regs, val: 0);
5732 if (rc) {
5733 dev_err(hdev->dev, "failed resetting SM registers");
5734 return -ENOMEM;
5735 }
5736
5737 base_addr = CFG_BASE + mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_SOB_OBJ_0;
5738 num_regs = NUM_OF_SOB_IN_BLOCK;
5739 rc = gaudi_memset_registers(hdev, reg_base: base_addr, num_regs, val: 0);
5740 if (rc) {
5741 dev_err(hdev->dev, "failed resetting SM registers");
5742 return -ENOMEM;
5743 }
5744
5745 base_addr = CFG_BASE + mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
5746 num_regs = NUM_OF_SOB_IN_BLOCK;
5747 rc = gaudi_memset_registers(hdev, reg_base: base_addr, num_regs, val: 0);
5748 if (rc) {
5749 dev_err(hdev->dev, "failed resetting SM registers");
5750 return -ENOMEM;
5751 }
5752
5753 base_addr = CFG_BASE + mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_0;
5754 num_regs = NUM_OF_MONITORS_IN_BLOCK;
5755 rc = gaudi_memset_registers(hdev, reg_base: base_addr, num_regs, val: 0);
5756 if (rc) {
5757 dev_err(hdev->dev, "failed resetting SM registers");
5758 return -ENOMEM;
5759 }
5760
5761 base_addr = CFG_BASE + mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_MON_STATUS_0;
5762 num_regs = NUM_OF_MONITORS_IN_BLOCK;
5763 rc = gaudi_memset_registers(hdev, reg_base: base_addr, num_regs, val: 0);
5764 if (rc) {
5765 dev_err(hdev->dev, "failed resetting SM registers");
5766 return -ENOMEM;
5767 }
5768
5769 base_addr = CFG_BASE + mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_MON_STATUS_0;
5770 num_regs = NUM_OF_MONITORS_IN_BLOCK;
5771 rc = gaudi_memset_registers(hdev, reg_base: base_addr, num_regs, val: 0);
5772 if (rc) {
5773 dev_err(hdev->dev, "failed resetting SM registers");
5774 return -ENOMEM;
5775 }
5776
5777 base_addr = CFG_BASE + mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
5778 (GAUDI_FIRST_AVAILABLE_W_S_SYNC_OBJECT * 4);
5779 num_regs = NUM_OF_SOB_IN_BLOCK - GAUDI_FIRST_AVAILABLE_W_S_SYNC_OBJECT;
5780 rc = gaudi_memset_registers(hdev, reg_base: base_addr, num_regs, val: 0);
5781 if (rc) {
5782 dev_err(hdev->dev, "failed resetting SM registers");
5783 return -ENOMEM;
5784 }
5785
5786 base_addr = CFG_BASE + mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0 +
5787 (GAUDI_FIRST_AVAILABLE_W_S_MONITOR * 4);
5788 num_regs = NUM_OF_MONITORS_IN_BLOCK - GAUDI_FIRST_AVAILABLE_W_S_MONITOR;
5789 rc = gaudi_memset_registers(hdev, reg_base: base_addr, num_regs, val: 0);
5790 if (rc) {
5791 dev_err(hdev->dev, "failed resetting SM registers");
5792 return -ENOMEM;
5793 }
5794
5795 return 0;
5796}
5797
5798static void gaudi_restore_dma_registers(struct hl_device *hdev)
5799{
5800 u32 sob_delta = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_1 -
5801 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
5802 int i;
5803
5804 for (i = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
5805 u64 sob_addr = CFG_BASE +
5806 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0 +
5807 (i * sob_delta);
5808 u32 dma_offset = i * DMA_CORE_OFFSET;
5809
5810 WREG32(mmDMA0_CORE_WR_COMP_ADDR_LO + dma_offset,
5811 lower_32_bits(sob_addr));
5812 WREG32(mmDMA0_CORE_WR_COMP_ADDR_HI + dma_offset,
5813 upper_32_bits(sob_addr));
5814 WREG32(mmDMA0_CORE_WR_COMP_WDATA + dma_offset, 0x80000001);
5815
5816 /* For DMAs 2-7, need to restore WR_AWUSER_31_11 as it can be
5817 * modified by the user for SRAM reduction
5818 */
5819 if (i > 1)
5820 WREG32(mmDMA0_CORE_WR_AWUSER_31_11 + dma_offset,
5821 0x00000001);
5822 }
5823}
5824
5825static void gaudi_restore_qm_registers(struct hl_device *hdev)
5826{
5827 u32 qman_offset;
5828 int i;
5829
5830 for (i = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
5831 qman_offset = i * DMA_QMAN_OFFSET;
5832 WREG32(mmDMA0_QM_ARB_CFG_0 + qman_offset, 0);
5833 }
5834
5835 for (i = 0 ; i < MME_NUMBER_OF_MASTER_ENGINES ; i++) {
5836 qman_offset = i * (mmMME2_QM_BASE - mmMME0_QM_BASE);
5837 WREG32(mmMME0_QM_ARB_CFG_0 + qman_offset, 0);
5838 }
5839
5840 for (i = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
5841 qman_offset = i * TPC_QMAN_OFFSET;
5842 WREG32(mmTPC0_QM_ARB_CFG_0 + qman_offset, 0);
5843 }
5844
5845 for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++) {
5846 qman_offset = (i >> 1) * NIC_MACRO_QMAN_OFFSET +
5847 (i & 0x1) * NIC_ENGINE_QMAN_OFFSET;
5848 WREG32(mmNIC0_QM0_ARB_CFG_0 + qman_offset, 0);
5849 }
5850}
5851
5852static int gaudi_restore_user_registers(struct hl_device *hdev)
5853{
5854 int rc;
5855
5856 rc = gaudi_restore_sm_registers(hdev);
5857 if (rc)
5858 return rc;
5859
5860 gaudi_restore_dma_registers(hdev);
5861 gaudi_restore_qm_registers(hdev);
5862
5863 return 0;
5864}
5865
5866static int gaudi_context_switch(struct hl_device *hdev, u32 asid)
5867{
5868 return 0;
5869}
5870
5871static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev)
5872{
5873 u32 size = hdev->asic_prop.mmu_pgt_size +
5874 hdev->asic_prop.mmu_cache_mng_size;
5875 struct gaudi_device *gaudi = hdev->asic_specific;
5876 u64 addr = hdev->asic_prop.mmu_pgt_addr;
5877
5878 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
5879 return 0;
5880
5881 return gaudi_memset_device_memory(hdev, addr, size, val: 0);
5882}
5883
5884static void gaudi_restore_phase_topology(struct hl_device *hdev)
5885{
5886
5887}
5888
5889static int gaudi_dma_core_transfer(struct hl_device *hdev, int dma_id, u64 addr,
5890 u32 size_to_dma, dma_addr_t dma_addr)
5891{
5892 u32 err_cause, val;
5893 u64 dma_offset;
5894 int rc;
5895
5896 dma_offset = dma_id * DMA_CORE_OFFSET;
5897
5898 WREG32(mmDMA0_CORE_SRC_BASE_LO + dma_offset, lower_32_bits(addr));
5899 WREG32(mmDMA0_CORE_SRC_BASE_HI + dma_offset, upper_32_bits(addr));
5900 WREG32(mmDMA0_CORE_DST_BASE_LO + dma_offset, lower_32_bits(dma_addr));
5901 WREG32(mmDMA0_CORE_DST_BASE_HI + dma_offset, upper_32_bits(dma_addr));
5902 WREG32(mmDMA0_CORE_DST_TSIZE_0 + dma_offset, size_to_dma);
5903 WREG32(mmDMA0_CORE_COMMIT + dma_offset,
5904 (1 << DMA0_CORE_COMMIT_LIN_SHIFT));
5905
5906 rc = hl_poll_timeout(
5907 hdev,
5908 mmDMA0_CORE_STS0 + dma_offset,
5909 val,
5910 ((val & DMA0_CORE_STS0_BUSY_MASK) == 0),
5911 0,
5912 1000000);
5913
5914 if (rc) {
5915 dev_err(hdev->dev,
5916 "DMA %d timed-out during reading of 0x%llx\n",
5917 dma_id, addr);
5918 return -EIO;
5919 }
5920
5921 /* Verify DMA is OK */
5922 err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);
5923 if (err_cause) {
5924 dev_err(hdev->dev, "DMA Failed, cause 0x%x\n", err_cause);
5925 dev_dbg(hdev->dev,
5926 "Clearing DMA0 engine from errors (cause 0x%x)\n",
5927 err_cause);
5928 WREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset, err_cause);
5929
5930 return -EIO;
5931 }
5932
5933 return 0;
5934}
5935
5936static int gaudi_debugfs_read_dma(struct hl_device *hdev, u64 addr, u32 size,
5937 void *blob_addr)
5938{
5939 u32 dma_core_sts0, err_cause, cfg1, size_left, pos, size_to_dma;
5940 u32 qm_glbl_sts0, qm_cgm_sts;
5941 u64 dma_offset, qm_offset;
5942 dma_addr_t dma_addr;
5943 void *kernel_addr;
5944 bool is_eng_idle;
5945 int rc = 0, dma_id;
5946
5947 kernel_addr = hl_asic_dma_alloc_coherent(hdev, SZ_2M, &dma_addr, GFP_KERNEL | __GFP_ZERO);
5948
5949 if (!kernel_addr)
5950 return -ENOMEM;
5951
5952 hdev->asic_funcs->hw_queues_lock(hdev);
5953
5954 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_1];
5955 dma_offset = dma_id * DMA_CORE_OFFSET;
5956 qm_offset = dma_id * DMA_QMAN_OFFSET;
5957 dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + dma_offset);
5958 qm_glbl_sts0 = RREG32(mmDMA0_QM_GLBL_STS0 + qm_offset);
5959 qm_cgm_sts = RREG32(mmDMA0_QM_CGM_STS + qm_offset);
5960 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
5961 IS_DMA_IDLE(dma_core_sts0);
5962
5963 if (!is_eng_idle) {
5964 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_2];
5965 dma_offset = dma_id * DMA_CORE_OFFSET;
5966 qm_offset = dma_id * DMA_QMAN_OFFSET;
5967 dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + dma_offset);
5968 qm_glbl_sts0 = RREG32(mmDMA0_QM_GLBL_STS0 + qm_offset);
5969 qm_cgm_sts = RREG32(mmDMA0_QM_CGM_STS + qm_offset);
5970 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
5971 IS_DMA_IDLE(dma_core_sts0);
5972
5973 if (!is_eng_idle) {
5974 dev_err_ratelimited(hdev->dev,
5975 "Can't read via DMA because it is BUSY\n");
5976 rc = -EAGAIN;
5977 goto out;
5978 }
5979 }
5980
5981 cfg1 = RREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset);
5982 WREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset,
5983 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
5984
5985 /* TODO: remove this by mapping the DMA temporary buffer to the MMU
5986 * using the compute ctx ASID, if exists. If not, use the kernel ctx
5987 * ASID
5988 */
5989 WREG32_OR(mmDMA0_CORE_PROT + dma_offset, BIT(DMA0_CORE_PROT_VAL_SHIFT));
5990
5991 /* Verify DMA is OK */
5992 err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);
5993 if (err_cause) {
5994 dev_dbg(hdev->dev,
5995 "Clearing DMA0 engine from errors (cause 0x%x)\n",
5996 err_cause);
5997 WREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset, err_cause);
5998 }
5999
6000 pos = 0;
6001 size_left = size;
6002 size_to_dma = SZ_2M;
6003
6004 while (size_left > 0) {
6005
6006 if (size_left < SZ_2M)
6007 size_to_dma = size_left;
6008
6009 rc = gaudi_dma_core_transfer(hdev, dma_id, addr, size_to_dma,
6010 dma_addr);
6011 if (rc)
6012 break;
6013
6014 memcpy(blob_addr + pos, kernel_addr, size_to_dma);
6015
6016 if (size_left <= SZ_2M)
6017 break;
6018
6019 pos += SZ_2M;
6020 addr += SZ_2M;
6021 size_left -= SZ_2M;
6022 }
6023
6024 /* TODO: remove this by mapping the DMA temporary buffer to the MMU
6025 * using the compute ctx ASID, if exists. If not, use the kernel ctx
6026 * ASID
6027 */
6028 WREG32_AND(mmDMA0_CORE_PROT + dma_offset,
6029 ~BIT(DMA0_CORE_PROT_VAL_SHIFT));
6030
6031 WREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset, cfg1);
6032
6033out:
6034 hdev->asic_funcs->hw_queues_unlock(hdev);
6035
6036 hl_asic_dma_free_coherent(hdev, SZ_2M, kernel_addr, dma_addr);
6037
6038 return rc;
6039}
6040
6041static u64 gaudi_read_pte(struct hl_device *hdev, u64 addr)
6042{
6043 struct gaudi_device *gaudi = hdev->asic_specific;
6044
6045 if (hdev->reset_info.hard_reset_pending)
6046 return U64_MAX;
6047
6048 return readq(addr: hdev->pcie_bar[HBM_BAR_ID] +
6049 (addr - gaudi->hbm_bar_cur_addr));
6050}
6051
6052static void gaudi_write_pte(struct hl_device *hdev, u64 addr, u64 val)
6053{
6054 struct gaudi_device *gaudi = hdev->asic_specific;
6055
6056 if (hdev->reset_info.hard_reset_pending)
6057 return;
6058
6059 writeq(val, addr: hdev->pcie_bar[HBM_BAR_ID] +
6060 (addr - gaudi->hbm_bar_cur_addr));
6061}
6062
6063void gaudi_mmu_prepare_reg(struct hl_device *hdev, u64 reg, u32 asid)
6064{
6065 /* mask to zero the MMBP and ASID bits */
6066 WREG32_AND(reg, ~0x7FF);
6067 WREG32_OR(reg, asid);
6068}
6069
6070static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid)
6071{
6072 struct gaudi_device *gaudi = hdev->asic_specific;
6073
6074 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
6075 return;
6076
6077 if (asid & ~DMA0_QM_GLBL_NON_SECURE_PROPS_0_ASID_MASK) {
6078 dev_crit(hdev->dev, "asid %u is too big\n", asid);
6079 return;
6080 }
6081
6082 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_0, asid);
6083 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_1, asid);
6084 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_2, asid);
6085 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_3, asid);
6086 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_4, asid);
6087
6088 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_0, asid);
6089 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_1, asid);
6090 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_2, asid);
6091 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_3, asid);
6092 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_4, asid);
6093
6094 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_0, asid);
6095 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_1, asid);
6096 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_2, asid);
6097 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_3, asid);
6098 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_4, asid);
6099
6100 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_0, asid);
6101 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_1, asid);
6102 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_2, asid);
6103 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_3, asid);
6104 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_4, asid);
6105
6106 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_0, asid);
6107 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_1, asid);
6108 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_2, asid);
6109 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_3, asid);
6110 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_4, asid);
6111
6112 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_0, asid);
6113 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_1, asid);
6114 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_2, asid);
6115 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_3, asid);
6116 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_4, asid);
6117
6118 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_0, asid);
6119 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_1, asid);
6120 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_2, asid);
6121 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_3, asid);
6122 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_4, asid);
6123
6124 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_0, asid);
6125 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_1, asid);
6126 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_2, asid);
6127 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_3, asid);
6128 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_4, asid);
6129
6130 gaudi_mmu_prepare_reg(hdev, mmDMA0_CORE_NON_SECURE_PROPS, asid);
6131 gaudi_mmu_prepare_reg(hdev, mmDMA1_CORE_NON_SECURE_PROPS, asid);
6132 gaudi_mmu_prepare_reg(hdev, mmDMA2_CORE_NON_SECURE_PROPS, asid);
6133 gaudi_mmu_prepare_reg(hdev, mmDMA3_CORE_NON_SECURE_PROPS, asid);
6134 gaudi_mmu_prepare_reg(hdev, mmDMA4_CORE_NON_SECURE_PROPS, asid);
6135 gaudi_mmu_prepare_reg(hdev, mmDMA5_CORE_NON_SECURE_PROPS, asid);
6136 gaudi_mmu_prepare_reg(hdev, mmDMA6_CORE_NON_SECURE_PROPS, asid);
6137 gaudi_mmu_prepare_reg(hdev, mmDMA7_CORE_NON_SECURE_PROPS, asid);
6138
6139 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_0, asid);
6140 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_1, asid);
6141 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_2, asid);
6142 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_3, asid);
6143 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_4, asid);
6144 gaudi_mmu_prepare_reg(hdev, mmTPC0_CFG_ARUSER_LO, asid);
6145 gaudi_mmu_prepare_reg(hdev, mmTPC0_CFG_AWUSER_LO, asid);
6146
6147 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_0, asid);
6148 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_1, asid);
6149 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_2, asid);
6150 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_3, asid);
6151 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_4, asid);
6152 gaudi_mmu_prepare_reg(hdev, mmTPC1_CFG_ARUSER_LO, asid);
6153 gaudi_mmu_prepare_reg(hdev, mmTPC1_CFG_AWUSER_LO, asid);
6154
6155 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_0, asid);
6156 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_1, asid);
6157 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_2, asid);
6158 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_3, asid);
6159 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_4, asid);
6160 gaudi_mmu_prepare_reg(hdev, mmTPC2_CFG_ARUSER_LO, asid);
6161 gaudi_mmu_prepare_reg(hdev, mmTPC2_CFG_AWUSER_LO, asid);
6162
6163 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_0, asid);
6164 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_1, asid);
6165 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_2, asid);
6166 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_3, asid);
6167 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_4, asid);
6168 gaudi_mmu_prepare_reg(hdev, mmTPC3_CFG_ARUSER_LO, asid);
6169 gaudi_mmu_prepare_reg(hdev, mmTPC3_CFG_AWUSER_LO, asid);
6170
6171 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_0, asid);
6172 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_1, asid);
6173 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_2, asid);
6174 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_3, asid);
6175 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_4, asid);
6176 gaudi_mmu_prepare_reg(hdev, mmTPC4_CFG_ARUSER_LO, asid);
6177 gaudi_mmu_prepare_reg(hdev, mmTPC4_CFG_AWUSER_LO, asid);
6178
6179 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_0, asid);
6180 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_1, asid);
6181 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_2, asid);
6182 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_3, asid);
6183 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_4, asid);
6184 gaudi_mmu_prepare_reg(hdev, mmTPC5_CFG_ARUSER_LO, asid);
6185 gaudi_mmu_prepare_reg(hdev, mmTPC5_CFG_AWUSER_LO, asid);
6186
6187 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_0, asid);
6188 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_1, asid);
6189 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_2, asid);
6190 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_3, asid);
6191 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_4, asid);
6192 gaudi_mmu_prepare_reg(hdev, mmTPC6_CFG_ARUSER_LO, asid);
6193 gaudi_mmu_prepare_reg(hdev, mmTPC6_CFG_AWUSER_LO, asid);
6194
6195 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_0, asid);
6196 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_1, asid);
6197 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_2, asid);
6198 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_3, asid);
6199 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_4, asid);
6200 gaudi_mmu_prepare_reg(hdev, mmTPC7_CFG_ARUSER_LO, asid);
6201 gaudi_mmu_prepare_reg(hdev, mmTPC7_CFG_AWUSER_LO, asid);
6202
6203 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_0, asid);
6204 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_1, asid);
6205 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_2, asid);
6206 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_3, asid);
6207 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_4, asid);
6208 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_0, asid);
6209 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_1, asid);
6210 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_2, asid);
6211 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_3, asid);
6212 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_4, asid);
6213
6214 gaudi_mmu_prepare_reg(hdev, mmMME0_SBAB_ARUSER0, asid);
6215 gaudi_mmu_prepare_reg(hdev, mmMME0_SBAB_ARUSER1, asid);
6216 gaudi_mmu_prepare_reg(hdev, mmMME1_SBAB_ARUSER0, asid);
6217 gaudi_mmu_prepare_reg(hdev, mmMME1_SBAB_ARUSER1, asid);
6218 gaudi_mmu_prepare_reg(hdev, mmMME2_SBAB_ARUSER0, asid);
6219 gaudi_mmu_prepare_reg(hdev, mmMME2_SBAB_ARUSER1, asid);
6220 gaudi_mmu_prepare_reg(hdev, mmMME3_SBAB_ARUSER0, asid);
6221 gaudi_mmu_prepare_reg(hdev, mmMME3_SBAB_ARUSER1, asid);
6222 gaudi_mmu_prepare_reg(hdev, mmMME0_ACC_WBC, asid);
6223 gaudi_mmu_prepare_reg(hdev, mmMME1_ACC_WBC, asid);
6224 gaudi_mmu_prepare_reg(hdev, mmMME2_ACC_WBC, asid);
6225 gaudi_mmu_prepare_reg(hdev, mmMME3_ACC_WBC, asid);
6226
6227 if (gaudi->hw_cap_initialized & HW_CAP_NIC0) {
6228 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_0,
6229 asid);
6230 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_1,
6231 asid);
6232 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_2,
6233 asid);
6234 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_3,
6235 asid);
6236 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_4,
6237 asid);
6238 }
6239
6240 if (gaudi->hw_cap_initialized & HW_CAP_NIC1) {
6241 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_0,
6242 asid);
6243 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_1,
6244 asid);
6245 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_2,
6246 asid);
6247 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_3,
6248 asid);
6249 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_4,
6250 asid);
6251 }
6252
6253 if (gaudi->hw_cap_initialized & HW_CAP_NIC2) {
6254 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_0,
6255 asid);
6256 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_1,
6257 asid);
6258 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_2,
6259 asid);
6260 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_3,
6261 asid);
6262 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_4,
6263 asid);
6264 }
6265
6266 if (gaudi->hw_cap_initialized & HW_CAP_NIC3) {
6267 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_0,
6268 asid);
6269 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_1,
6270 asid);
6271 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_2,
6272 asid);
6273 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_3,
6274 asid);
6275 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_4,
6276 asid);
6277 }
6278
6279 if (gaudi->hw_cap_initialized & HW_CAP_NIC4) {
6280 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_0,
6281 asid);
6282 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_1,
6283 asid);
6284 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_2,
6285 asid);
6286 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_3,
6287 asid);
6288 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_4,
6289 asid);
6290 }
6291
6292 if (gaudi->hw_cap_initialized & HW_CAP_NIC5) {
6293 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_0,
6294 asid);
6295 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_1,
6296 asid);
6297 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_2,
6298 asid);
6299 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_3,
6300 asid);
6301 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_4,
6302 asid);
6303 }
6304
6305 if (gaudi->hw_cap_initialized & HW_CAP_NIC6) {
6306 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_0,
6307 asid);
6308 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_1,
6309 asid);
6310 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_2,
6311 asid);
6312 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_3,
6313 asid);
6314 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_4,
6315 asid);
6316 }
6317
6318 if (gaudi->hw_cap_initialized & HW_CAP_NIC7) {
6319 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_0,
6320 asid);
6321 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_1,
6322 asid);
6323 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_2,
6324 asid);
6325 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_3,
6326 asid);
6327 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_4,
6328 asid);
6329 }
6330
6331 if (gaudi->hw_cap_initialized & HW_CAP_NIC8) {
6332 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_0,
6333 asid);
6334 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_1,
6335 asid);
6336 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_2,
6337 asid);
6338 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_3,
6339 asid);
6340 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_4,
6341 asid);
6342 }
6343
6344 if (gaudi->hw_cap_initialized & HW_CAP_NIC9) {
6345 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_0,
6346 asid);
6347 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_1,
6348 asid);
6349 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_2,
6350 asid);
6351 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_3,
6352 asid);
6353 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_4,
6354 asid);
6355 }
6356
6357 gaudi_mmu_prepare_reg(hdev, mmPSOC_GLOBAL_CONF_TRACE_ARUSER, asid);
6358 gaudi_mmu_prepare_reg(hdev, mmPSOC_GLOBAL_CONF_TRACE_AWUSER, asid);
6359}
6360
6361static int gaudi_send_job_on_qman0(struct hl_device *hdev,
6362 struct hl_cs_job *job)
6363{
6364 struct packet_msg_prot *fence_pkt;
6365 u32 *fence_ptr;
6366 dma_addr_t fence_dma_addr;
6367 struct hl_cb *cb;
6368 u32 tmp, timeout, dma_offset;
6369 int rc;
6370
6371 if (hdev->pldm)
6372 timeout = GAUDI_PLDM_QMAN0_TIMEOUT_USEC;
6373 else
6374 timeout = HL_DEVICE_TIMEOUT_USEC;
6375
6376 fence_ptr = hl_asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL, &fence_dma_addr);
6377 if (!fence_ptr) {
6378 dev_err(hdev->dev,
6379 "Failed to allocate fence memory for QMAN0\n");
6380 return -ENOMEM;
6381 }
6382
6383 cb = job->patched_cb;
6384
6385 fence_pkt = cb->kernel_address +
6386 job->job_cb_size - sizeof(struct packet_msg_prot);
6387
6388 tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
6389 tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
6390 tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
6391
6392 fence_pkt->ctl = cpu_to_le32(tmp);
6393 fence_pkt->value = cpu_to_le32(GAUDI_QMAN0_FENCE_VAL);
6394 fence_pkt->addr = cpu_to_le64(fence_dma_addr);
6395
6396 dma_offset = gaudi_dma_assignment[GAUDI_PCI_DMA_1] * DMA_CORE_OFFSET;
6397
6398 WREG32(mmDMA0_CORE_PROT + dma_offset,
6399 BIT(DMA0_CORE_PROT_ERR_VAL_SHIFT) | BIT(DMA0_CORE_PROT_VAL_SHIFT));
6400
6401 rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id: GAUDI_QUEUE_ID_DMA_0_0,
6402 cb_size: job->job_cb_size, cb_ptr: cb->bus_address);
6403 if (rc) {
6404 dev_err(hdev->dev, "Failed to send CB on QMAN0, %d\n", rc);
6405 goto free_fence_ptr;
6406 }
6407
6408 rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp,
6409 (tmp == GAUDI_QMAN0_FENCE_VAL), 1000,
6410 timeout, true);
6411
6412 hl_hw_queue_inc_ci_kernel(hdev, hw_queue_id: GAUDI_QUEUE_ID_DMA_0_0);
6413
6414 if (rc == -ETIMEDOUT) {
6415 dev_err(hdev->dev, "QMAN0 Job timeout (0x%x)\n", tmp);
6416 goto free_fence_ptr;
6417 }
6418
6419free_fence_ptr:
6420 WREG32(mmDMA0_CORE_PROT + dma_offset, BIT(DMA0_CORE_PROT_ERR_VAL_SHIFT));
6421
6422 hl_asic_dma_pool_free(hdev, (void *) fence_ptr, fence_dma_addr);
6423 return rc;
6424}
6425
6426static void gaudi_get_event_desc(u16 event_type, char *desc, size_t size)
6427{
6428 if (event_type >= GAUDI_EVENT_SIZE)
6429 goto event_not_supported;
6430
6431 if (!gaudi_irq_map_table[event_type].valid)
6432 goto event_not_supported;
6433
6434 snprintf(buf: desc, size, fmt: gaudi_irq_map_table[event_type].name);
6435
6436 return;
6437
6438event_not_supported:
6439 snprintf(buf: desc, size, fmt: "N/A");
6440}
6441
6442static const char *gaudi_get_razwi_initiator_dma_name(struct hl_device *hdev, u32 x_y,
6443 bool is_write, u16 *engine_id_1,
6444 u16 *engine_id_2)
6445{
6446 u32 dma_id[2], dma_offset, err_cause[2], mask, i;
6447
6448 mask = is_write ? DMA0_CORE_ERR_CAUSE_HBW_WR_ERR_MASK :
6449 DMA0_CORE_ERR_CAUSE_HBW_RD_ERR_MASK;
6450
6451 switch (x_y) {
6452 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
6453 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
6454 dma_id[0] = 0;
6455 dma_id[1] = 2;
6456 break;
6457 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
6458 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
6459 dma_id[0] = 1;
6460 dma_id[1] = 3;
6461 break;
6462 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
6463 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
6464 dma_id[0] = 4;
6465 dma_id[1] = 6;
6466 break;
6467 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
6468 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
6469 dma_id[0] = 5;
6470 dma_id[1] = 7;
6471 break;
6472 default:
6473 goto unknown_initiator;
6474 }
6475
6476 for (i = 0 ; i < 2 ; i++) {
6477 dma_offset = dma_id[i] * DMA_CORE_OFFSET;
6478 err_cause[i] = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);
6479 }
6480
6481 switch (x_y) {
6482 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
6483 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
6484 if ((err_cause[0] & mask) && !(err_cause[1] & mask)) {
6485 *engine_id_1 = GAUDI_ENGINE_ID_DMA_0;
6486 return "DMA0";
6487 } else if (!(err_cause[0] & mask) && (err_cause[1] & mask)) {
6488 *engine_id_1 = GAUDI_ENGINE_ID_DMA_2;
6489 return "DMA2";
6490 } else {
6491 *engine_id_1 = GAUDI_ENGINE_ID_DMA_0;
6492 *engine_id_2 = GAUDI_ENGINE_ID_DMA_2;
6493 return "DMA0 or DMA2";
6494 }
6495 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
6496 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
6497 if ((err_cause[0] & mask) && !(err_cause[1] & mask)) {
6498 *engine_id_1 = GAUDI_ENGINE_ID_DMA_1;
6499 return "DMA1";
6500 } else if (!(err_cause[0] & mask) && (err_cause[1] & mask)) {
6501 *engine_id_1 = GAUDI_ENGINE_ID_DMA_3;
6502 return "DMA3";
6503 } else {
6504 *engine_id_1 = GAUDI_ENGINE_ID_DMA_1;
6505 *engine_id_2 = GAUDI_ENGINE_ID_DMA_3;
6506 return "DMA1 or DMA3";
6507 }
6508 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
6509 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
6510 if ((err_cause[0] & mask) && !(err_cause[1] & mask)) {
6511 *engine_id_1 = GAUDI_ENGINE_ID_DMA_4;
6512 return "DMA4";
6513 } else if (!(err_cause[0] & mask) && (err_cause[1] & mask)) {
6514 *engine_id_1 = GAUDI_ENGINE_ID_DMA_6;
6515 return "DMA6";
6516 } else {
6517 *engine_id_1 = GAUDI_ENGINE_ID_DMA_4;
6518 *engine_id_2 = GAUDI_ENGINE_ID_DMA_6;
6519 return "DMA4 or DMA6";
6520 }
6521 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
6522 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
6523 if ((err_cause[0] & mask) && !(err_cause[1] & mask)) {
6524 *engine_id_1 = GAUDI_ENGINE_ID_DMA_5;
6525 return "DMA5";
6526 } else if (!(err_cause[0] & mask) && (err_cause[1] & mask)) {
6527 *engine_id_1 = GAUDI_ENGINE_ID_DMA_7;
6528 return "DMA7";
6529 } else {
6530 *engine_id_1 = GAUDI_ENGINE_ID_DMA_5;
6531 *engine_id_2 = GAUDI_ENGINE_ID_DMA_7;
6532 return "DMA5 or DMA7";
6533 }
6534 }
6535
6536unknown_initiator:
6537 return "unknown initiator";
6538}
6539
6540static const char *gaudi_get_razwi_initiator_name(struct hl_device *hdev, bool is_write,
6541 u16 *engine_id_1, u16 *engine_id_2)
6542{
6543 u32 val, x_y, axi_id;
6544
6545 val = is_write ? RREG32(mmMMU_UP_RAZWI_WRITE_ID) :
6546 RREG32(mmMMU_UP_RAZWI_READ_ID);
6547 x_y = val & ((RAZWI_INITIATOR_Y_MASK << RAZWI_INITIATOR_Y_SHIFT) |
6548 (RAZWI_INITIATOR_X_MASK << RAZWI_INITIATOR_X_SHIFT));
6549 axi_id = val & (RAZWI_INITIATOR_AXI_ID_MASK <<
6550 RAZWI_INITIATOR_AXI_ID_SHIFT);
6551
6552 switch (x_y) {
6553 case RAZWI_INITIATOR_ID_X_Y_TPC0_NIC0:
6554 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC)) {
6555 *engine_id_1 = GAUDI_ENGINE_ID_TPC_0;
6556 return "TPC0";
6557 }
6558 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC)) {
6559 *engine_id_1 = GAUDI_ENGINE_ID_NIC_0;
6560 return "NIC0";
6561 }
6562 break;
6563 case RAZWI_INITIATOR_ID_X_Y_TPC1:
6564 *engine_id_1 = GAUDI_ENGINE_ID_TPC_1;
6565 return "TPC1";
6566 case RAZWI_INITIATOR_ID_X_Y_MME0_0:
6567 case RAZWI_INITIATOR_ID_X_Y_MME0_1:
6568 *engine_id_1 = GAUDI_ENGINE_ID_MME_0;
6569 return "MME0";
6570 case RAZWI_INITIATOR_ID_X_Y_MME1_0:
6571 case RAZWI_INITIATOR_ID_X_Y_MME1_1:
6572 *engine_id_1 = GAUDI_ENGINE_ID_MME_1;
6573 return "MME1";
6574 case RAZWI_INITIATOR_ID_X_Y_TPC2:
6575 *engine_id_1 = GAUDI_ENGINE_ID_TPC_2;
6576 return "TPC2";
6577 case RAZWI_INITIATOR_ID_X_Y_TPC3_PCI_CPU_PSOC:
6578 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC)) {
6579 *engine_id_1 = GAUDI_ENGINE_ID_TPC_3;
6580 return "TPC3";
6581 }
6582 /* PCI, CPU or PSOC does not have engine id*/
6583 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_PCI))
6584 return "PCI";
6585 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_CPU))
6586 return "CPU";
6587 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_PSOC))
6588 return "PSOC";
6589 break;
6590 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
6591 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
6592 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
6593 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
6594 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
6595 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
6596 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
6597 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
6598 return gaudi_get_razwi_initiator_dma_name(hdev, x_y, is_write,
6599 engine_id_1, engine_id_2);
6600 case RAZWI_INITIATOR_ID_X_Y_TPC4_NIC1_NIC2:
6601 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC)) {
6602 *engine_id_1 = GAUDI_ENGINE_ID_TPC_4;
6603 return "TPC4";
6604 }
6605 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC)) {
6606 *engine_id_1 = GAUDI_ENGINE_ID_NIC_1;
6607 return "NIC1";
6608 }
6609 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC_FT)) {
6610 *engine_id_1 = GAUDI_ENGINE_ID_NIC_2;
6611 return "NIC2";
6612 }
6613 break;
6614 case RAZWI_INITIATOR_ID_X_Y_TPC5:
6615 *engine_id_1 = GAUDI_ENGINE_ID_TPC_5;
6616 return "TPC5";
6617 case RAZWI_INITIATOR_ID_X_Y_MME2_0:
6618 case RAZWI_INITIATOR_ID_X_Y_MME2_1:
6619 *engine_id_1 = GAUDI_ENGINE_ID_MME_2;
6620 return "MME2";
6621 case RAZWI_INITIATOR_ID_X_Y_MME3_0:
6622 case RAZWI_INITIATOR_ID_X_Y_MME3_1:
6623 *engine_id_1 = GAUDI_ENGINE_ID_MME_3;
6624 return "MME3";
6625 case RAZWI_INITIATOR_ID_X_Y_TPC6:
6626 *engine_id_1 = GAUDI_ENGINE_ID_TPC_6;
6627 return "TPC6";
6628 case RAZWI_INITIATOR_ID_X_Y_TPC7_NIC4_NIC5:
6629 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC)) {
6630 *engine_id_1 = GAUDI_ENGINE_ID_TPC_7;
6631 return "TPC7";
6632 }
6633 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC)) {
6634 *engine_id_1 = GAUDI_ENGINE_ID_NIC_4;
6635 return "NIC4";
6636 }
6637 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC_FT)) {
6638 *engine_id_1 = GAUDI_ENGINE_ID_NIC_5;
6639 return "NIC5";
6640 }
6641 break;
6642 default:
6643 break;
6644 }
6645
6646 dev_err(hdev->dev,
6647 "Unknown RAZWI initiator ID 0x%x [Y=%d, X=%d, AXI_ID=%d]\n",
6648 val,
6649 (val >> RAZWI_INITIATOR_Y_SHIFT) & RAZWI_INITIATOR_Y_MASK,
6650 (val >> RAZWI_INITIATOR_X_SHIFT) & RAZWI_INITIATOR_X_MASK,
6651 (val >> RAZWI_INITIATOR_AXI_ID_SHIFT) &
6652 RAZWI_INITIATOR_AXI_ID_MASK);
6653
6654 return "unknown initiator";
6655}
6656
6657static void gaudi_print_and_get_razwi_info(struct hl_device *hdev, u16 *engine_id_1,
6658 u16 *engine_id_2, bool *is_read, bool *is_write)
6659{
6660
6661 if (RREG32(mmMMU_UP_RAZWI_WRITE_VLD)) {
6662 dev_err_ratelimited(hdev->dev,
6663 "RAZWI event caused by illegal write of %s\n",
6664 gaudi_get_razwi_initiator_name(hdev, true, engine_id_1, engine_id_2));
6665 WREG32(mmMMU_UP_RAZWI_WRITE_VLD, 0);
6666 *is_write = true;
6667 }
6668
6669 if (RREG32(mmMMU_UP_RAZWI_READ_VLD)) {
6670 dev_err_ratelimited(hdev->dev,
6671 "RAZWI event caused by illegal read of %s\n",
6672 gaudi_get_razwi_initiator_name(hdev, false, engine_id_1, engine_id_2));
6673 WREG32(mmMMU_UP_RAZWI_READ_VLD, 0);
6674 *is_read = true;
6675 }
6676}
6677
6678static void gaudi_print_and_get_mmu_error_info(struct hl_device *hdev, u64 *addr, u64 *event_mask)
6679{
6680 struct gaudi_device *gaudi = hdev->asic_specific;
6681 u32 val;
6682
6683 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
6684 return;
6685
6686 val = RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE);
6687 if (val & MMU_UP_PAGE_ERROR_CAPTURE_ENTRY_VALID_MASK) {
6688 *addr = val & MMU_UP_PAGE_ERROR_CAPTURE_VA_49_32_MASK;
6689 *addr <<= 32;
6690 *addr |= RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE_VA);
6691
6692 dev_err_ratelimited(hdev->dev, "MMU page fault on va 0x%llx\n", *addr);
6693 hl_handle_page_fault(hdev, addr: *addr, eng_id: 0, is_pmmu: true, event_mask);
6694
6695 WREG32(mmMMU_UP_PAGE_ERROR_CAPTURE, 0);
6696 }
6697
6698 val = RREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE);
6699 if (val & MMU_UP_ACCESS_ERROR_CAPTURE_ENTRY_VALID_MASK) {
6700 *addr = val & MMU_UP_ACCESS_ERROR_CAPTURE_VA_49_32_MASK;
6701 *addr <<= 32;
6702 *addr |= RREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE_VA);
6703
6704 dev_err_ratelimited(hdev->dev, "MMU access error on va 0x%llx\n", *addr);
6705
6706 WREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE, 0);
6707 }
6708}
6709
6710/*
6711 * +-------------------+------------------------------------------------------+
6712 * | Configuration Reg | Description |
6713 * | Address | |
6714 * +-------------------+------------------------------------------------------+
6715 * | 0xF30 - 0xF3F |ECC single error indication (1 bit per memory wrapper)|
6716 * | |0xF30 memory wrappers 31:0 (MSB to LSB) |
6717 * | |0xF34 memory wrappers 63:32 |
6718 * | |0xF38 memory wrappers 95:64 |
6719 * | |0xF3C memory wrappers 127:96 |
6720 * +-------------------+------------------------------------------------------+
6721 * | 0xF40 - 0xF4F |ECC double error indication (1 bit per memory wrapper)|
6722 * | |0xF40 memory wrappers 31:0 (MSB to LSB) |
6723 * | |0xF44 memory wrappers 63:32 |
6724 * | |0xF48 memory wrappers 95:64 |
6725 * | |0xF4C memory wrappers 127:96 |
6726 * +-------------------+------------------------------------------------------+
6727 */
6728static int gaudi_extract_ecc_info(struct hl_device *hdev,
6729 struct ecc_info_extract_params *params, u64 *ecc_address,
6730 u64 *ecc_syndrom, u8 *memory_wrapper_idx)
6731{
6732 u32 i, num_mem_regs, reg, err_bit;
6733 u64 err_addr, err_word = 0;
6734
6735 num_mem_regs = params->num_memories / 32 +
6736 ((params->num_memories % 32) ? 1 : 0);
6737
6738 if (params->block_address >= CFG_BASE)
6739 params->block_address -= CFG_BASE;
6740
6741 if (params->derr)
6742 err_addr = params->block_address + GAUDI_ECC_DERR0_OFFSET;
6743 else
6744 err_addr = params->block_address + GAUDI_ECC_SERR0_OFFSET;
6745
6746 /* Set invalid wrapper index */
6747 *memory_wrapper_idx = 0xFF;
6748
6749 /* Iterate through memory wrappers, a single bit must be set */
6750 for (i = 0 ; i < num_mem_regs ; i++) {
6751 err_addr += i * 4;
6752 err_word = RREG32(err_addr);
6753 if (err_word) {
6754 err_bit = __ffs(err_word);
6755 *memory_wrapper_idx = err_bit + (32 * i);
6756 break;
6757 }
6758 }
6759
6760 if (*memory_wrapper_idx == 0xFF) {
6761 dev_err(hdev->dev, "ECC error information cannot be found\n");
6762 return -EINVAL;
6763 }
6764
6765 WREG32(params->block_address + GAUDI_ECC_MEM_SEL_OFFSET,
6766 *memory_wrapper_idx);
6767
6768 *ecc_address =
6769 RREG32(params->block_address + GAUDI_ECC_ADDRESS_OFFSET);
6770 *ecc_syndrom =
6771 RREG32(params->block_address + GAUDI_ECC_SYNDROME_OFFSET);
6772
6773 /* Clear error indication */
6774 reg = RREG32(params->block_address + GAUDI_ECC_MEM_INFO_CLR_OFFSET);
6775 if (params->derr)
6776 reg |= FIELD_PREP(GAUDI_ECC_MEM_INFO_CLR_DERR_MASK, 1);
6777 else
6778 reg |= FIELD_PREP(GAUDI_ECC_MEM_INFO_CLR_SERR_MASK, 1);
6779
6780 WREG32(params->block_address + GAUDI_ECC_MEM_INFO_CLR_OFFSET, reg);
6781
6782 return 0;
6783}
6784
6785/*
6786 * gaudi_queue_idx_dec - decrement queue index (pi/ci) and handle wrap
6787 *
6788 * @idx: the current pi/ci value
6789 * @q_len: the queue length (power of 2)
6790 *
6791 * @return the cyclically decremented index
6792 */
6793static inline u32 gaudi_queue_idx_dec(u32 idx, u32 q_len)
6794{
6795 u32 mask = q_len - 1;
6796
6797 /*
6798 * modular decrement is equivalent to adding (queue_size -1)
6799 * later we take LSBs to make sure the value is in the
6800 * range [0, queue_len - 1]
6801 */
6802 return (idx + q_len - 1) & mask;
6803}
6804
6805/**
6806 * gaudi_handle_sw_config_stream_data - print SW config stream data
6807 *
6808 * @hdev: pointer to the habanalabs device structure
6809 * @stream: the QMAN's stream
6810 * @qman_base: base address of QMAN registers block
6811 * @event_mask: mask of the last events occurred
6812 */
6813static void gaudi_handle_sw_config_stream_data(struct hl_device *hdev, u32 stream,
6814 u64 qman_base, u64 event_mask)
6815{
6816 u64 cq_ptr_lo, cq_ptr_hi, cq_tsize, cq_ptr;
6817 u32 cq_ptr_lo_off, size;
6818
6819 cq_ptr_lo_off = mmTPC0_QM_CQ_PTR_LO_1 - mmTPC0_QM_CQ_PTR_LO_0;
6820
6821 cq_ptr_lo = qman_base + (mmTPC0_QM_CQ_PTR_LO_0 - mmTPC0_QM_BASE) +
6822 stream * cq_ptr_lo_off;
6823 cq_ptr_hi = cq_ptr_lo +
6824 (mmTPC0_QM_CQ_PTR_HI_0 - mmTPC0_QM_CQ_PTR_LO_0);
6825 cq_tsize = cq_ptr_lo +
6826 (mmTPC0_QM_CQ_TSIZE_0 - mmTPC0_QM_CQ_PTR_LO_0);
6827
6828 cq_ptr = (((u64) RREG32(cq_ptr_hi)) << 32) | RREG32(cq_ptr_lo);
6829 size = RREG32(cq_tsize);
6830 dev_info(hdev->dev, "stop on err: stream: %u, addr: %#llx, size: %u\n",
6831 stream, cq_ptr, size);
6832
6833 if (event_mask & HL_NOTIFIER_EVENT_UNDEFINED_OPCODE) {
6834 hdev->captured_err_info.undef_opcode.cq_addr = cq_ptr;
6835 hdev->captured_err_info.undef_opcode.cq_size = size;
6836 hdev->captured_err_info.undef_opcode.stream_id = stream;
6837 }
6838}
6839
6840/**
6841 * gaudi_handle_last_pqes_on_err - print last PQEs on error
6842 *
6843 * @hdev: pointer to the habanalabs device structure
6844 * @qid_base: first QID of the QMAN (out of 4 streams)
6845 * @stream: the QMAN's stream
6846 * @qman_base: base address of QMAN registers block
6847 * @event_mask: mask of the last events occurred
6848 * @pr_sw_conf: if true print the SW config stream data (CQ PTR and SIZE)
6849 */
6850static void gaudi_handle_last_pqes_on_err(struct hl_device *hdev, u32 qid_base,
6851 u32 stream, u64 qman_base,
6852 u64 event_mask,
6853 bool pr_sw_conf)
6854{
6855 u32 ci, qm_ci_stream_off, queue_len;
6856 struct hl_hw_queue *q;
6857 u64 pq_ci, addr[PQ_FETCHER_CACHE_SIZE];
6858 int i;
6859
6860 q = &hdev->kernel_queues[qid_base + stream];
6861
6862 qm_ci_stream_off = mmTPC0_QM_PQ_CI_1 - mmTPC0_QM_PQ_CI_0;
6863 pq_ci = qman_base + (mmTPC0_QM_PQ_CI_0 - mmTPC0_QM_BASE) +
6864 stream * qm_ci_stream_off;
6865
6866 queue_len = (q->queue_type == QUEUE_TYPE_INT) ?
6867 q->int_queue_len : HL_QUEUE_LENGTH;
6868
6869 hdev->asic_funcs->hw_queues_lock(hdev);
6870
6871 if (pr_sw_conf)
6872 gaudi_handle_sw_config_stream_data(hdev, stream, qman_base, event_mask);
6873
6874 ci = RREG32(pq_ci);
6875
6876 /* we should start printing form ci -1 */
6877 ci = gaudi_queue_idx_dec(idx: ci, q_len: queue_len);
6878 memset(addr, 0, sizeof(addr));
6879
6880 for (i = 0; i < PQ_FETCHER_CACHE_SIZE; i++) {
6881 struct hl_bd *bd;
6882 u32 len;
6883
6884 bd = q->kernel_address;
6885 bd += ci;
6886
6887 len = le32_to_cpu(bd->len);
6888 /* len 0 means uninitialized entry- break */
6889 if (!len)
6890 break;
6891
6892 addr[i] = le64_to_cpu(bd->ptr);
6893
6894 dev_info(hdev->dev, "stop on err PQE(stream %u): ci: %u, addr: %#llx, size: %u\n",
6895 stream, ci, addr[i], len);
6896
6897 /* get previous ci, wrap if needed */
6898 ci = gaudi_queue_idx_dec(idx: ci, q_len: queue_len);
6899 }
6900
6901 if (event_mask & HL_NOTIFIER_EVENT_UNDEFINED_OPCODE) {
6902 struct undefined_opcode_info *undef_opcode = &hdev->captured_err_info.undef_opcode;
6903 u32 arr_idx = undef_opcode->cb_addr_streams_len;
6904
6905 if (arr_idx == 0) {
6906 undef_opcode->timestamp = ktime_get();
6907 undef_opcode->engine_id = gaudi_queue_id_to_engine_id[qid_base];
6908 }
6909
6910 memcpy(undef_opcode->cb_addr_streams[arr_idx], addr, sizeof(addr));
6911 undef_opcode->cb_addr_streams_len++;
6912 }
6913
6914 hdev->asic_funcs->hw_queues_unlock(hdev);
6915}
6916
6917/**
6918 * handle_qman_data_on_err - extract QMAN data on error
6919 *
6920 * @hdev: pointer to the habanalabs device structure
6921 * @qid_base: first QID of the QMAN (out of 4 streams)
6922 * @stream: the QMAN's stream
6923 * @qman_base: base address of QMAN registers block
6924 * @event_mask: mask of the last events occurred
6925 *
6926 * This function attempt to exatract as much data as possible on QMAN error.
6927 * On upper CP print the SW config stream data and last 8 PQEs.
6928 * On lower CP print SW config data and last PQEs of ALL 4 upper CPs
6929 */
6930static void handle_qman_data_on_err(struct hl_device *hdev, u32 qid_base,
6931 u32 stream, u64 qman_base, u64 event_mask)
6932{
6933 u32 i;
6934
6935 if (stream != QMAN_STREAMS) {
6936 gaudi_handle_last_pqes_on_err(hdev, qid_base, stream,
6937 qman_base, event_mask, pr_sw_conf: true);
6938 return;
6939 }
6940
6941 /* handle Lower-CP */
6942 gaudi_handle_sw_config_stream_data(hdev, stream, qman_base, event_mask);
6943
6944 for (i = 0; i < QMAN_STREAMS; i++)
6945 gaudi_handle_last_pqes_on_err(hdev, qid_base, stream: i,
6946 qman_base, event_mask, pr_sw_conf: false);
6947}
6948
6949static void gaudi_handle_qman_err_generic(struct hl_device *hdev,
6950 const char *qm_name,
6951 u64 qman_base,
6952 u32 qid_base,
6953 u64 *event_mask)
6954{
6955 u32 i, j, glbl_sts_val, arb_err_val, glbl_sts_clr_val;
6956 u64 glbl_sts_addr, arb_err_addr;
6957 char reg_desc[32];
6958
6959 glbl_sts_addr = qman_base + (mmTPC0_QM_GLBL_STS1_0 - mmTPC0_QM_BASE);
6960 arb_err_addr = qman_base + (mmTPC0_QM_ARB_ERR_CAUSE - mmTPC0_QM_BASE);
6961
6962 /* Iterate through all stream GLBL_STS1 registers + Lower CP */
6963 for (i = 0 ; i < QMAN_STREAMS + 1 ; i++) {
6964 glbl_sts_clr_val = 0;
6965 glbl_sts_val = RREG32(glbl_sts_addr + 4 * i);
6966
6967 if (!glbl_sts_val)
6968 continue;
6969
6970 if (i == QMAN_STREAMS)
6971 snprintf(buf: reg_desc, ARRAY_SIZE(reg_desc), fmt: "LowerCP");
6972 else
6973 snprintf(buf: reg_desc, ARRAY_SIZE(reg_desc), fmt: "stream%u", i);
6974
6975 for (j = 0 ; j < GAUDI_NUM_OF_QM_ERR_CAUSE ; j++) {
6976 if (glbl_sts_val & BIT(j)) {
6977 dev_err_ratelimited(hdev->dev,
6978 "%s %s. err cause: %s\n",
6979 qm_name, reg_desc,
6980 gaudi_qman_error_cause[j]);
6981 glbl_sts_clr_val |= BIT(j);
6982 }
6983 }
6984 /* check for undefined opcode */
6985 if (glbl_sts_val & TPC0_QM_GLBL_STS1_CP_UNDEF_CMD_ERR_MASK &&
6986 hdev->captured_err_info.undef_opcode.write_enable) {
6987 memset(&hdev->captured_err_info.undef_opcode, 0,
6988 sizeof(hdev->captured_err_info.undef_opcode));
6989
6990 hdev->captured_err_info.undef_opcode.write_enable = false;
6991 *event_mask |= HL_NOTIFIER_EVENT_UNDEFINED_OPCODE;
6992 }
6993
6994 /* Write 1 clear errors */
6995 if (!hdev->stop_on_err)
6996 WREG32(glbl_sts_addr + 4 * i, glbl_sts_clr_val);
6997 else
6998 handle_qman_data_on_err(hdev, qid_base, stream: i, qman_base, event_mask: *event_mask);
6999 }
7000
7001 arb_err_val = RREG32(arb_err_addr);
7002
7003 if (!arb_err_val)
7004 return;
7005
7006 for (j = 0 ; j < GAUDI_NUM_OF_QM_ARB_ERR_CAUSE ; j++) {
7007 if (arb_err_val & BIT(j)) {
7008 dev_err_ratelimited(hdev->dev,
7009 "%s ARB_ERR. err cause: %s\n",
7010 qm_name,
7011 gaudi_qman_arb_error_cause[j]);
7012 }
7013 }
7014}
7015
7016static void gaudi_print_sm_sei_info(struct hl_device *hdev, u16 event_type,
7017 struct hl_eq_sm_sei_data *sei_data)
7018{
7019 u32 index = event_type - GAUDI_EVENT_DMA_IF_SEI_0;
7020
7021 /* Flip the bits as the enum is ordered in the opposite way */
7022 index = (index ^ 0x3) & 0x3;
7023
7024 switch (sei_data->sei_cause) {
7025 case SM_SEI_SO_OVERFLOW:
7026 dev_err_ratelimited(hdev->dev,
7027 "%s SEI Error: SOB Group %u overflow/underflow",
7028 gaudi_sync_manager_names[index],
7029 le32_to_cpu(sei_data->sei_log));
7030 break;
7031 case SM_SEI_LBW_4B_UNALIGNED:
7032 dev_err_ratelimited(hdev->dev,
7033 "%s SEI Error: Unaligned 4B LBW access, monitor agent address low - %#x",
7034 gaudi_sync_manager_names[index],
7035 le32_to_cpu(sei_data->sei_log));
7036 break;
7037 case SM_SEI_AXI_RESPONSE_ERR:
7038 dev_err_ratelimited(hdev->dev,
7039 "%s SEI Error: AXI ID %u response error",
7040 gaudi_sync_manager_names[index],
7041 le32_to_cpu(sei_data->sei_log));
7042 break;
7043 default:
7044 dev_err_ratelimited(hdev->dev, "Unknown SM SEI cause %u",
7045 le32_to_cpu(sei_data->sei_log));
7046 break;
7047 }
7048}
7049
7050static void gaudi_handle_ecc_event(struct hl_device *hdev, u16 event_type,
7051 struct hl_eq_ecc_data *ecc_data)
7052{
7053 struct ecc_info_extract_params params;
7054 u64 ecc_address = 0, ecc_syndrom = 0;
7055 u8 index, memory_wrapper_idx = 0;
7056 bool extract_info_from_fw;
7057 int rc;
7058
7059 if (hdev->asic_prop.fw_security_enabled) {
7060 extract_info_from_fw = true;
7061 goto extract_ecc_info;
7062 }
7063
7064 switch (event_type) {
7065 case GAUDI_EVENT_PCIE_CORE_SERR ... GAUDI_EVENT_PCIE_PHY_DERR:
7066 case GAUDI_EVENT_DMA0_SERR_ECC ... GAUDI_EVENT_MMU_DERR:
7067 extract_info_from_fw = true;
7068 break;
7069 case GAUDI_EVENT_TPC0_SERR ... GAUDI_EVENT_TPC7_SERR:
7070 index = event_type - GAUDI_EVENT_TPC0_SERR;
7071 params.block_address = mmTPC0_CFG_BASE + index * TPC_CFG_OFFSET;
7072 params.num_memories = 90;
7073 params.derr = false;
7074 extract_info_from_fw = false;
7075 break;
7076 case GAUDI_EVENT_TPC0_DERR ... GAUDI_EVENT_TPC7_DERR:
7077 index = event_type - GAUDI_EVENT_TPC0_DERR;
7078 params.block_address =
7079 mmTPC0_CFG_BASE + index * TPC_CFG_OFFSET;
7080 params.num_memories = 90;
7081 params.derr = true;
7082 extract_info_from_fw = false;
7083 break;
7084 case GAUDI_EVENT_MME0_ACC_SERR:
7085 case GAUDI_EVENT_MME1_ACC_SERR:
7086 case GAUDI_EVENT_MME2_ACC_SERR:
7087 case GAUDI_EVENT_MME3_ACC_SERR:
7088 index = (event_type - GAUDI_EVENT_MME0_ACC_SERR) / 4;
7089 params.block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET;
7090 params.num_memories = 128;
7091 params.derr = false;
7092 extract_info_from_fw = false;
7093 break;
7094 case GAUDI_EVENT_MME0_ACC_DERR:
7095 case GAUDI_EVENT_MME1_ACC_DERR:
7096 case GAUDI_EVENT_MME2_ACC_DERR:
7097 case GAUDI_EVENT_MME3_ACC_DERR:
7098 index = (event_type - GAUDI_EVENT_MME0_ACC_DERR) / 4;
7099 params.block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET;
7100 params.num_memories = 128;
7101 params.derr = true;
7102 extract_info_from_fw = false;
7103 break;
7104 case GAUDI_EVENT_MME0_SBAB_SERR:
7105 case GAUDI_EVENT_MME1_SBAB_SERR:
7106 case GAUDI_EVENT_MME2_SBAB_SERR:
7107 case GAUDI_EVENT_MME3_SBAB_SERR:
7108 index = (event_type - GAUDI_EVENT_MME0_SBAB_SERR) / 4;
7109 params.block_address =
7110 mmMME0_SBAB_BASE + index * MME_ACC_OFFSET;
7111 params.num_memories = 33;
7112 params.derr = false;
7113 extract_info_from_fw = false;
7114 break;
7115 case GAUDI_EVENT_MME0_SBAB_DERR:
7116 case GAUDI_EVENT_MME1_SBAB_DERR:
7117 case GAUDI_EVENT_MME2_SBAB_DERR:
7118 case GAUDI_EVENT_MME3_SBAB_DERR:
7119 index = (event_type - GAUDI_EVENT_MME0_SBAB_DERR) / 4;
7120 params.block_address =
7121 mmMME0_SBAB_BASE + index * MME_ACC_OFFSET;
7122 params.num_memories = 33;
7123 params.derr = true;
7124 extract_info_from_fw = false;
7125 break;
7126 default:
7127 return;
7128 }
7129
7130extract_ecc_info:
7131 if (extract_info_from_fw) {
7132 ecc_address = le64_to_cpu(ecc_data->ecc_address);
7133 ecc_syndrom = le64_to_cpu(ecc_data->ecc_syndrom);
7134 memory_wrapper_idx = ecc_data->memory_wrapper_idx;
7135 } else {
7136 rc = gaudi_extract_ecc_info(hdev, params: &params, ecc_address: &ecc_address,
7137 ecc_syndrom: &ecc_syndrom, memory_wrapper_idx: &memory_wrapper_idx);
7138 if (rc)
7139 return;
7140 }
7141
7142 dev_err(hdev->dev,
7143 "ECC error detected. address: %#llx. Syndrom: %#llx. block id %u\n",
7144 ecc_address, ecc_syndrom, memory_wrapper_idx);
7145}
7146
7147static void gaudi_handle_qman_err(struct hl_device *hdev, u16 event_type, u64 *event_mask)
7148{
7149 u64 qman_base;
7150 char desc[32];
7151 u32 qid_base;
7152 u8 index;
7153
7154 switch (event_type) {
7155 case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM:
7156 index = event_type - GAUDI_EVENT_TPC0_QM;
7157 qid_base = GAUDI_QUEUE_ID_TPC_0_0 + index * QMAN_STREAMS;
7158 qman_base = mmTPC0_QM_BASE + index * TPC_QMAN_OFFSET;
7159 snprintf(buf: desc, ARRAY_SIZE(desc), fmt: "%s%d", "TPC_QM", index);
7160 break;
7161 case GAUDI_EVENT_MME0_QM ... GAUDI_EVENT_MME2_QM:
7162 if (event_type == GAUDI_EVENT_MME0_QM) {
7163 index = 0;
7164 qid_base = GAUDI_QUEUE_ID_MME_0_0;
7165 } else { /* event_type == GAUDI_EVENT_MME2_QM */
7166 index = 2;
7167 qid_base = GAUDI_QUEUE_ID_MME_1_0;
7168 }
7169 qman_base = mmMME0_QM_BASE + index * MME_QMAN_OFFSET;
7170 snprintf(buf: desc, ARRAY_SIZE(desc), fmt: "%s%d", "MME_QM", index);
7171 break;
7172 case GAUDI_EVENT_DMA0_QM ... GAUDI_EVENT_DMA7_QM:
7173 index = event_type - GAUDI_EVENT_DMA0_QM;
7174 qid_base = GAUDI_QUEUE_ID_DMA_0_0 + index * QMAN_STREAMS;
7175 /* skip GAUDI_QUEUE_ID_CPU_PQ if necessary */
7176 if (index > 1)
7177 qid_base++;
7178 qman_base = mmDMA0_QM_BASE + index * DMA_QMAN_OFFSET;
7179 snprintf(buf: desc, ARRAY_SIZE(desc), fmt: "%s%d", "DMA_QM", index);
7180 break;
7181 case GAUDI_EVENT_NIC0_QM0:
7182 qid_base = GAUDI_QUEUE_ID_NIC_0_0;
7183 qman_base = mmNIC0_QM0_BASE;
7184 snprintf(buf: desc, ARRAY_SIZE(desc), fmt: "NIC0_QM0");
7185 break;
7186 case GAUDI_EVENT_NIC0_QM1:
7187 qid_base = GAUDI_QUEUE_ID_NIC_1_0;
7188 qman_base = mmNIC0_QM1_BASE;
7189 snprintf(buf: desc, ARRAY_SIZE(desc), fmt: "NIC0_QM1");
7190 break;
7191 case GAUDI_EVENT_NIC1_QM0:
7192 qid_base = GAUDI_QUEUE_ID_NIC_2_0;
7193 qman_base = mmNIC1_QM0_BASE;
7194 snprintf(buf: desc, ARRAY_SIZE(desc), fmt: "NIC1_QM0");
7195 break;
7196 case GAUDI_EVENT_NIC1_QM1:
7197 qid_base = GAUDI_QUEUE_ID_NIC_3_0;
7198 qman_base = mmNIC1_QM1_BASE;
7199 snprintf(buf: desc, ARRAY_SIZE(desc), fmt: "NIC1_QM1");
7200 break;
7201 case GAUDI_EVENT_NIC2_QM0:
7202 qid_base = GAUDI_QUEUE_ID_NIC_4_0;
7203 qman_base = mmNIC2_QM0_BASE;
7204 snprintf(buf: desc, ARRAY_SIZE(desc), fmt: "NIC2_QM0");
7205 break;
7206 case GAUDI_EVENT_NIC2_QM1:
7207 qid_base = GAUDI_QUEUE_ID_NIC_5_0;
7208 qman_base = mmNIC2_QM1_BASE;
7209 snprintf(buf: desc, ARRAY_SIZE(desc), fmt: "NIC2_QM1");
7210 break;
7211 case GAUDI_EVENT_NIC3_QM0:
7212 qid_base = GAUDI_QUEUE_ID_NIC_6_0;
7213 qman_base = mmNIC3_QM0_BASE;
7214 snprintf(buf: desc, ARRAY_SIZE(desc), fmt: "NIC3_QM0");
7215 break;
7216 case GAUDI_EVENT_NIC3_QM1:
7217 qid_base = GAUDI_QUEUE_ID_NIC_7_0;
7218 qman_base = mmNIC3_QM1_BASE;
7219 snprintf(buf: desc, ARRAY_SIZE(desc), fmt: "NIC3_QM1");
7220 break;
7221 case GAUDI_EVENT_NIC4_QM0:
7222 qid_base = GAUDI_QUEUE_ID_NIC_8_0;
7223 qman_base = mmNIC4_QM0_BASE;
7224 snprintf(buf: desc, ARRAY_SIZE(desc), fmt: "NIC4_QM0");
7225 break;
7226 case GAUDI_EVENT_NIC4_QM1:
7227 qid_base = GAUDI_QUEUE_ID_NIC_9_0;
7228 qman_base = mmNIC4_QM1_BASE;
7229 snprintf(buf: desc, ARRAY_SIZE(desc), fmt: "NIC4_QM1");
7230 break;
7231 default:
7232 return;
7233 }
7234
7235 gaudi_handle_qman_err_generic(hdev, qm_name: desc, qman_base, qid_base, event_mask);
7236}
7237
7238static void gaudi_print_irq_info(struct hl_device *hdev, u16 event_type,
7239 bool check_razwi, u64 *event_mask)
7240{
7241 bool is_read = false, is_write = false;
7242 u16 engine_id[2], num_of_razwi_eng = 0;
7243 char desc[64] = "";
7244 u64 razwi_addr = 0;
7245 u8 razwi_flags = 0;
7246
7247 /*
7248 * Init engine id by default as not valid and only if razwi initiated from engine with
7249 * engine id it will get valid value.
7250 */
7251 engine_id[0] = HL_RAZWI_NA_ENG_ID;
7252 engine_id[1] = HL_RAZWI_NA_ENG_ID;
7253
7254 gaudi_get_event_desc(event_type, desc, size: sizeof(desc));
7255 dev_err_ratelimited(hdev->dev, "Received H/W interrupt %d [\"%s\"]\n",
7256 event_type, desc);
7257
7258 if (check_razwi) {
7259 gaudi_print_and_get_razwi_info(hdev, engine_id_1: &engine_id[0], engine_id_2: &engine_id[1], is_read: &is_read,
7260 is_write: &is_write);
7261 gaudi_print_and_get_mmu_error_info(hdev, addr: &razwi_addr, event_mask);
7262
7263 if (is_read)
7264 razwi_flags |= HL_RAZWI_READ;
7265 if (is_write)
7266 razwi_flags |= HL_RAZWI_WRITE;
7267
7268 if (engine_id[0] != HL_RAZWI_NA_ENG_ID) {
7269 if (engine_id[1] != HL_RAZWI_NA_ENG_ID)
7270 num_of_razwi_eng = 2;
7271 else
7272 num_of_razwi_eng = 1;
7273 }
7274
7275 if (razwi_flags)
7276 hl_handle_razwi(hdev, addr: razwi_addr, engine_id, num_of_engines: num_of_razwi_eng,
7277 flags: razwi_flags, event_mask);
7278 }
7279}
7280
7281static void gaudi_print_out_of_sync_info(struct hl_device *hdev,
7282 struct cpucp_pkt_sync_err *sync_err)
7283{
7284 struct hl_hw_queue *q = &hdev->kernel_queues[GAUDI_QUEUE_ID_CPU_PQ];
7285
7286 dev_err(hdev->dev, "Out of sync with FW, FW: pi=%u, ci=%u, LKD: pi=%u, ci=%d\n",
7287 le32_to_cpu(sync_err->pi), le32_to_cpu(sync_err->ci), q->pi, atomic_read(&q->ci));
7288}
7289
7290static void gaudi_print_fw_alive_info(struct hl_device *hdev,
7291 struct hl_eq_fw_alive *fw_alive)
7292{
7293 dev_err(hdev->dev,
7294 "FW alive report: severity=%s, process_id=%u, thread_id=%u, uptime=%llu seconds\n",
7295 (fw_alive->severity == FW_ALIVE_SEVERITY_MINOR) ? "Minor" : "Critical",
7296 le32_to_cpu(fw_alive->process_id),
7297 le32_to_cpu(fw_alive->thread_id),
7298 le64_to_cpu(fw_alive->uptime_seconds));
7299}
7300
7301static void gaudi_print_nic_axi_irq_info(struct hl_device *hdev, u16 event_type,
7302 void *data)
7303{
7304 char desc[64] = "", *type;
7305 struct eq_nic_sei_event *eq_nic_sei = data;
7306 u16 nic_id = event_type - GAUDI_EVENT_NIC_SEI_0;
7307
7308 switch (eq_nic_sei->axi_error_cause) {
7309 case RXB:
7310 type = "RXB";
7311 break;
7312 case RXE:
7313 type = "RXE";
7314 break;
7315 case TXS:
7316 type = "TXS";
7317 break;
7318 case TXE:
7319 type = "TXE";
7320 break;
7321 case QPC_RESP:
7322 type = "QPC_RESP";
7323 break;
7324 case NON_AXI_ERR:
7325 type = "NON_AXI_ERR";
7326 break;
7327 case TMR:
7328 type = "TMR";
7329 break;
7330 default:
7331 dev_err(hdev->dev, "unknown NIC AXI cause %d\n",
7332 eq_nic_sei->axi_error_cause);
7333 type = "N/A";
7334 break;
7335 }
7336
7337 snprintf(buf: desc, size: sizeof(desc), fmt: "NIC%d_%s%d", nic_id, type,
7338 eq_nic_sei->id);
7339 dev_err_ratelimited(hdev->dev, "Received H/W interrupt %d [\"%s\"]\n",
7340 event_type, desc);
7341}
7342
7343static int gaudi_compute_reset_late_init(struct hl_device *hdev)
7344{
7345 /* GAUDI doesn't support any reset except hard-reset */
7346 return -EPERM;
7347}
7348
7349static int gaudi_hbm_read_interrupts(struct hl_device *hdev, int device,
7350 struct hl_eq_hbm_ecc_data *hbm_ecc_data)
7351{
7352 u32 base, val, val2, wr_par, rd_par, ca_par, derr, serr, type, ch;
7353 int rc = 0;
7354
7355 if (hdev->asic_prop.fw_app_cpu_boot_dev_sts0 &
7356 CPU_BOOT_DEV_STS0_HBM_ECC_EN) {
7357 if (!hbm_ecc_data) {
7358 dev_err(hdev->dev, "No FW ECC data");
7359 return 0;
7360 }
7361
7362 wr_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_WR_PAR_MASK,
7363 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7364 rd_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_RD_PAR_MASK,
7365 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7366 ca_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_CA_PAR_MASK,
7367 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7368 derr = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_DERR_MASK,
7369 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7370 serr = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_SERR_MASK,
7371 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7372 type = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_TYPE_MASK,
7373 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7374 ch = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_HBM_CH_MASK,
7375 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7376
7377 dev_err(hdev->dev,
7378 "HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
7379 device, ch, wr_par, rd_par, ca_par, serr, derr);
7380 dev_err(hdev->dev,
7381 "HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%u, SEC_CNT=%d, DEC_CNT=%d\n",
7382 device, ch, hbm_ecc_data->first_addr, type,
7383 hbm_ecc_data->sec_cont_cnt, hbm_ecc_data->sec_cnt,
7384 hbm_ecc_data->dec_cnt);
7385 return 0;
7386 }
7387
7388 if (hdev->asic_prop.fw_security_enabled) {
7389 dev_info(hdev->dev, "Cannot access MC regs for ECC data while security is enabled\n");
7390 return 0;
7391 }
7392
7393 base = GAUDI_HBM_CFG_BASE + device * GAUDI_HBM_CFG_OFFSET;
7394 for (ch = 0 ; ch < GAUDI_HBM_CHANNELS ; ch++) {
7395 val = RREG32_MASK(base + ch * 0x1000 + 0x06C, 0x0000FFFF);
7396 val = (val & 0xFF) | ((val >> 8) & 0xFF);
7397 if (val) {
7398 rc = -EIO;
7399 dev_err(hdev->dev,
7400 "HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
7401 device, ch * 2, val & 0x1, (val >> 1) & 0x1,
7402 (val >> 2) & 0x1, (val >> 3) & 0x1,
7403 (val >> 4) & 0x1);
7404
7405 val2 = RREG32(base + ch * 0x1000 + 0x060);
7406 dev_err(hdev->dev,
7407 "HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%d, SEC_CNT=%d, DEC_CNT=%d\n",
7408 device, ch * 2,
7409 RREG32(base + ch * 0x1000 + 0x064),
7410 (val2 & 0x200) >> 9, (val2 & 0xFC00) >> 10,
7411 (val2 & 0xFF0000) >> 16,
7412 (val2 & 0xFF000000) >> 24);
7413 }
7414
7415 val = RREG32_MASK(base + ch * 0x1000 + 0x07C, 0x0000FFFF);
7416 val = (val & 0xFF) | ((val >> 8) & 0xFF);
7417 if (val) {
7418 rc = -EIO;
7419 dev_err(hdev->dev,
7420 "HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
7421 device, ch * 2 + 1, val & 0x1, (val >> 1) & 0x1,
7422 (val >> 2) & 0x1, (val >> 3) & 0x1,
7423 (val >> 4) & 0x1);
7424
7425 val2 = RREG32(base + ch * 0x1000 + 0x070);
7426 dev_err(hdev->dev,
7427 "HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%d, SEC_CNT=%d, DEC_CNT=%d\n",
7428 device, ch * 2 + 1,
7429 RREG32(base + ch * 0x1000 + 0x074),
7430 (val2 & 0x200) >> 9, (val2 & 0xFC00) >> 10,
7431 (val2 & 0xFF0000) >> 16,
7432 (val2 & 0xFF000000) >> 24);
7433 }
7434
7435 /* Clear interrupts */
7436 RMWREG32(base + (ch * 0x1000) + 0x060, 0x1C8, 0x1FF);
7437 RMWREG32(base + (ch * 0x1000) + 0x070, 0x1C8, 0x1FF);
7438 WREG32(base + (ch * 0x1000) + 0x06C, 0x1F1F);
7439 WREG32(base + (ch * 0x1000) + 0x07C, 0x1F1F);
7440 RMWREG32(base + (ch * 0x1000) + 0x060, 0x0, 0xF);
7441 RMWREG32(base + (ch * 0x1000) + 0x070, 0x0, 0xF);
7442 }
7443
7444 val = RREG32(base + 0x8F30);
7445 val2 = RREG32(base + 0x8F34);
7446 if (val | val2) {
7447 rc = -EIO;
7448 dev_err(hdev->dev,
7449 "HBM %d MC SRAM SERR info: Reg 0x8F30=0x%x, Reg 0x8F34=0x%x\n",
7450 device, val, val2);
7451 }
7452 val = RREG32(base + 0x8F40);
7453 val2 = RREG32(base + 0x8F44);
7454 if (val | val2) {
7455 rc = -EIO;
7456 dev_err(hdev->dev,
7457 "HBM %d MC SRAM DERR info: Reg 0x8F40=0x%x, Reg 0x8F44=0x%x\n",
7458 device, val, val2);
7459 }
7460
7461 return rc;
7462}
7463
7464static int gaudi_hbm_event_to_dev(u16 hbm_event_type)
7465{
7466 switch (hbm_event_type) {
7467 case GAUDI_EVENT_HBM0_SPI_0:
7468 case GAUDI_EVENT_HBM0_SPI_1:
7469 return 0;
7470 case GAUDI_EVENT_HBM1_SPI_0:
7471 case GAUDI_EVENT_HBM1_SPI_1:
7472 return 1;
7473 case GAUDI_EVENT_HBM2_SPI_0:
7474 case GAUDI_EVENT_HBM2_SPI_1:
7475 return 2;
7476 case GAUDI_EVENT_HBM3_SPI_0:
7477 case GAUDI_EVENT_HBM3_SPI_1:
7478 return 3;
7479 default:
7480 break;
7481 }
7482
7483 /* Should never happen */
7484 return 0;
7485}
7486
7487static bool gaudi_tpc_read_interrupts(struct hl_device *hdev, u8 tpc_id,
7488 char *interrupt_name)
7489{
7490 u32 tpc_offset = tpc_id * TPC_CFG_OFFSET, tpc_interrupts_cause, i;
7491 bool soft_reset_required = false;
7492
7493 tpc_interrupts_cause = RREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset) &
7494 TPC0_CFG_TPC_INTR_CAUSE_CAUSE_MASK;
7495
7496 for (i = 0 ; i < GAUDI_NUM_OF_TPC_INTR_CAUSE ; i++)
7497 if (tpc_interrupts_cause & BIT(i)) {
7498 dev_err_ratelimited(hdev->dev,
7499 "TPC%d_%s interrupt cause: %s\n",
7500 tpc_id, interrupt_name,
7501 gaudi_tpc_interrupts_cause[i]);
7502 /* If this is QM error, we need to soft-reset */
7503 if (i == 15)
7504 soft_reset_required = true;
7505 }
7506
7507 /* Clear interrupts */
7508 WREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset, 0);
7509
7510 return soft_reset_required;
7511}
7512
7513static int tpc_dec_event_to_tpc_id(u16 tpc_dec_event_type)
7514{
7515 return (tpc_dec_event_type - GAUDI_EVENT_TPC0_DEC) >> 1;
7516}
7517
7518static int tpc_krn_event_to_tpc_id(u16 tpc_dec_event_type)
7519{
7520 return (tpc_dec_event_type - GAUDI_EVENT_TPC0_KRN_ERR) / 6;
7521}
7522
7523static void gaudi_print_clk_change_info(struct hl_device *hdev, u16 event_type, u64 *event_mask)
7524{
7525 ktime_t zero_time = ktime_set(secs: 0, nsecs: 0);
7526
7527 mutex_lock(&hdev->clk_throttling.lock);
7528
7529 switch (event_type) {
7530 case GAUDI_EVENT_FIX_POWER_ENV_S:
7531 hdev->clk_throttling.current_reason |= HL_CLK_THROTTLE_POWER;
7532 hdev->clk_throttling.aggregated_reason |= HL_CLK_THROTTLE_POWER;
7533 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].start = ktime_get();
7534 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].end = zero_time;
7535 dev_info_ratelimited(hdev->dev,
7536 "Clock throttling due to power consumption\n");
7537 break;
7538
7539 case GAUDI_EVENT_FIX_POWER_ENV_E:
7540 hdev->clk_throttling.current_reason &= ~HL_CLK_THROTTLE_POWER;
7541 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].end = ktime_get();
7542 dev_info_ratelimited(hdev->dev,
7543 "Power envelop is safe, back to optimal clock\n");
7544 break;
7545
7546 case GAUDI_EVENT_FIX_THERMAL_ENV_S:
7547 hdev->clk_throttling.current_reason |= HL_CLK_THROTTLE_THERMAL;
7548 hdev->clk_throttling.aggregated_reason |= HL_CLK_THROTTLE_THERMAL;
7549 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].start = ktime_get();
7550 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].end = zero_time;
7551 *event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7552 dev_info_ratelimited(hdev->dev,
7553 "Clock throttling due to overheating\n");
7554 break;
7555
7556 case GAUDI_EVENT_FIX_THERMAL_ENV_E:
7557 hdev->clk_throttling.current_reason &= ~HL_CLK_THROTTLE_THERMAL;
7558 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].end = ktime_get();
7559 *event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7560 dev_info_ratelimited(hdev->dev,
7561 "Thermal envelop is safe, back to optimal clock\n");
7562 break;
7563
7564 default:
7565 dev_err(hdev->dev, "Received invalid clock change event %d\n",
7566 event_type);
7567 break;
7568 }
7569
7570 mutex_unlock(lock: &hdev->clk_throttling.lock);
7571}
7572
7573static void gaudi_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entry)
7574{
7575 struct gaudi_device *gaudi = hdev->asic_specific;
7576 struct hl_info_fw_err_info fw_err_info;
7577 u64 data = le64_to_cpu(eq_entry->data[0]), event_mask = 0;
7578 u32 ctl = le32_to_cpu(eq_entry->hdr.ctl);
7579 u32 fw_fatal_err_flag = 0, flags = 0;
7580 u16 event_type = ((ctl & EQ_CTL_EVENT_TYPE_MASK)
7581 >> EQ_CTL_EVENT_TYPE_SHIFT);
7582 bool reset_required, reset_direct = false;
7583 u8 cause;
7584 int rc;
7585
7586 if (event_type >= GAUDI_EVENT_SIZE) {
7587 dev_err(hdev->dev, "Event type %u exceeds maximum of %u",
7588 event_type, GAUDI_EVENT_SIZE - 1);
7589 return;
7590 }
7591
7592 gaudi->events_stat[event_type]++;
7593 gaudi->events_stat_aggregate[event_type]++;
7594
7595 switch (event_type) {
7596 case GAUDI_EVENT_PCIE_CORE_DERR:
7597 case GAUDI_EVENT_PCIE_IF_DERR:
7598 case GAUDI_EVENT_PCIE_PHY_DERR:
7599 case GAUDI_EVENT_TPC0_DERR ... GAUDI_EVENT_TPC7_DERR:
7600 case GAUDI_EVENT_MME0_ACC_DERR:
7601 case GAUDI_EVENT_MME0_SBAB_DERR:
7602 case GAUDI_EVENT_MME1_ACC_DERR:
7603 case GAUDI_EVENT_MME1_SBAB_DERR:
7604 case GAUDI_EVENT_MME2_ACC_DERR:
7605 case GAUDI_EVENT_MME2_SBAB_DERR:
7606 case GAUDI_EVENT_MME3_ACC_DERR:
7607 case GAUDI_EVENT_MME3_SBAB_DERR:
7608 case GAUDI_EVENT_DMA0_DERR_ECC ... GAUDI_EVENT_DMA7_DERR_ECC:
7609 fallthrough;
7610 case GAUDI_EVENT_CPU_IF_ECC_DERR:
7611 case GAUDI_EVENT_PSOC_MEM_DERR:
7612 case GAUDI_EVENT_PSOC_CORESIGHT_DERR:
7613 case GAUDI_EVENT_SRAM0_DERR ... GAUDI_EVENT_SRAM28_DERR:
7614 case GAUDI_EVENT_NIC0_DERR ... GAUDI_EVENT_NIC4_DERR:
7615 case GAUDI_EVENT_DMA_IF0_DERR ... GAUDI_EVENT_DMA_IF3_DERR:
7616 case GAUDI_EVENT_HBM_0_DERR ... GAUDI_EVENT_HBM_3_DERR:
7617 case GAUDI_EVENT_MMU_DERR:
7618 case GAUDI_EVENT_NIC0_CS_DBG_DERR ... GAUDI_EVENT_NIC4_CS_DBG_DERR:
7619 gaudi_print_irq_info(hdev, event_type, check_razwi: true, event_mask: &event_mask);
7620 gaudi_handle_ecc_event(hdev, event_type, ecc_data: &eq_entry->ecc_data);
7621 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7622 fw_fatal_err_flag = HL_DRV_RESET_FW_FATAL_ERR;
7623 goto reset_device;
7624
7625 case GAUDI_EVENT_GIC500:
7626 case GAUDI_EVENT_AXI_ECC:
7627 case GAUDI_EVENT_L2_RAM_ECC:
7628 case GAUDI_EVENT_PLL0 ... GAUDI_EVENT_PLL17:
7629 gaudi_print_irq_info(hdev, event_type, check_razwi: false, event_mask: &event_mask);
7630 fw_fatal_err_flag = HL_DRV_RESET_FW_FATAL_ERR;
7631 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7632 goto reset_device;
7633
7634 case GAUDI_EVENT_HBM0_SPI_0:
7635 case GAUDI_EVENT_HBM1_SPI_0:
7636 case GAUDI_EVENT_HBM2_SPI_0:
7637 case GAUDI_EVENT_HBM3_SPI_0:
7638 gaudi_print_irq_info(hdev, event_type, check_razwi: false, event_mask: &event_mask);
7639 gaudi_hbm_read_interrupts(hdev,
7640 device: gaudi_hbm_event_to_dev(hbm_event_type: event_type),
7641 hbm_ecc_data: &eq_entry->hbm_ecc_data);
7642 fw_fatal_err_flag = HL_DRV_RESET_FW_FATAL_ERR;
7643 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7644 goto reset_device;
7645
7646 case GAUDI_EVENT_HBM0_SPI_1:
7647 case GAUDI_EVENT_HBM1_SPI_1:
7648 case GAUDI_EVENT_HBM2_SPI_1:
7649 case GAUDI_EVENT_HBM3_SPI_1:
7650 gaudi_print_irq_info(hdev, event_type, check_razwi: false, event_mask: &event_mask);
7651 gaudi_hbm_read_interrupts(hdev,
7652 device: gaudi_hbm_event_to_dev(hbm_event_type: event_type),
7653 hbm_ecc_data: &eq_entry->hbm_ecc_data);
7654 hl_fw_unmask_irq(hdev, event_type);
7655 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7656 break;
7657
7658 case GAUDI_EVENT_TPC0_DEC:
7659 case GAUDI_EVENT_TPC1_DEC:
7660 case GAUDI_EVENT_TPC2_DEC:
7661 case GAUDI_EVENT_TPC3_DEC:
7662 case GAUDI_EVENT_TPC4_DEC:
7663 case GAUDI_EVENT_TPC5_DEC:
7664 case GAUDI_EVENT_TPC6_DEC:
7665 case GAUDI_EVENT_TPC7_DEC:
7666 /* In TPC DEC event, notify on TPC assertion. While there isn't
7667 * a specific event for assertion yet, the FW generates TPC DEC event.
7668 * The SW upper layer will inspect an internal mapped area to indicate
7669 * if the event is a TPC Assertion or a "real" TPC DEC.
7670 */
7671 event_mask |= HL_NOTIFIER_EVENT_TPC_ASSERT;
7672 gaudi_print_irq_info(hdev, event_type, check_razwi: true, event_mask: &event_mask);
7673 reset_required = gaudi_tpc_read_interrupts(hdev,
7674 tpc_id: tpc_dec_event_to_tpc_id(tpc_dec_event_type: event_type),
7675 interrupt_name: "AXI_SLV_DEC_Error");
7676 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7677 if (reset_required) {
7678 dev_err(hdev->dev, "reset required due to %s\n",
7679 gaudi_irq_map_table[event_type].name);
7680
7681 reset_direct = true;
7682 goto reset_device;
7683 } else {
7684 hl_fw_unmask_irq(hdev, event_type);
7685 event_mask |= HL_NOTIFIER_EVENT_DEVICE_RESET;
7686 }
7687 break;
7688
7689 case GAUDI_EVENT_TPC0_KRN_ERR:
7690 case GAUDI_EVENT_TPC1_KRN_ERR:
7691 case GAUDI_EVENT_TPC2_KRN_ERR:
7692 case GAUDI_EVENT_TPC3_KRN_ERR:
7693 case GAUDI_EVENT_TPC4_KRN_ERR:
7694 case GAUDI_EVENT_TPC5_KRN_ERR:
7695 case GAUDI_EVENT_TPC6_KRN_ERR:
7696 case GAUDI_EVENT_TPC7_KRN_ERR:
7697 gaudi_print_irq_info(hdev, event_type, check_razwi: true, event_mask: &event_mask);
7698 reset_required = gaudi_tpc_read_interrupts(hdev,
7699 tpc_id: tpc_krn_event_to_tpc_id(tpc_dec_event_type: event_type),
7700 interrupt_name: "KRN_ERR");
7701 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7702 if (reset_required) {
7703 dev_err(hdev->dev, "reset required due to %s\n",
7704 gaudi_irq_map_table[event_type].name);
7705
7706 reset_direct = true;
7707 goto reset_device;
7708 } else {
7709 hl_fw_unmask_irq(hdev, event_type);
7710 event_mask |= HL_NOTIFIER_EVENT_DEVICE_RESET;
7711 }
7712 break;
7713
7714 case GAUDI_EVENT_PCIE_CORE_SERR:
7715 case GAUDI_EVENT_PCIE_IF_SERR:
7716 case GAUDI_EVENT_PCIE_PHY_SERR:
7717 case GAUDI_EVENT_TPC0_SERR ... GAUDI_EVENT_TPC7_SERR:
7718 case GAUDI_EVENT_MME0_ACC_SERR:
7719 case GAUDI_EVENT_MME0_SBAB_SERR:
7720 case GAUDI_EVENT_MME1_ACC_SERR:
7721 case GAUDI_EVENT_MME1_SBAB_SERR:
7722 case GAUDI_EVENT_MME2_ACC_SERR:
7723 case GAUDI_EVENT_MME2_SBAB_SERR:
7724 case GAUDI_EVENT_MME3_ACC_SERR:
7725 case GAUDI_EVENT_MME3_SBAB_SERR:
7726 case GAUDI_EVENT_DMA0_SERR_ECC ... GAUDI_EVENT_DMA7_SERR_ECC:
7727 case GAUDI_EVENT_CPU_IF_ECC_SERR:
7728 case GAUDI_EVENT_PSOC_MEM_SERR:
7729 case GAUDI_EVENT_PSOC_CORESIGHT_SERR:
7730 case GAUDI_EVENT_SRAM0_SERR ... GAUDI_EVENT_SRAM28_SERR:
7731 case GAUDI_EVENT_NIC0_SERR ... GAUDI_EVENT_NIC4_SERR:
7732 case GAUDI_EVENT_DMA_IF0_SERR ... GAUDI_EVENT_DMA_IF3_SERR:
7733 case GAUDI_EVENT_HBM_0_SERR ... GAUDI_EVENT_HBM_3_SERR:
7734 fallthrough;
7735 case GAUDI_EVENT_MMU_SERR:
7736 gaudi_print_irq_info(hdev, event_type, check_razwi: true, event_mask: &event_mask);
7737 gaudi_handle_ecc_event(hdev, event_type, ecc_data: &eq_entry->ecc_data);
7738 hl_fw_unmask_irq(hdev, event_type);
7739 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7740 break;
7741
7742 case GAUDI_EVENT_PCIE_DEC:
7743 case GAUDI_EVENT_CPU_AXI_SPLITTER:
7744 case GAUDI_EVENT_PSOC_AXI_DEC:
7745 case GAUDI_EVENT_PSOC_PRSTN_FALL:
7746 gaudi_print_irq_info(hdev, event_type, check_razwi: true, event_mask: &event_mask);
7747 hl_fw_unmask_irq(hdev, event_type);
7748 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7749 break;
7750
7751 case GAUDI_EVENT_MMU_PAGE_FAULT:
7752 case GAUDI_EVENT_MMU_WR_PERM:
7753 gaudi_print_irq_info(hdev, event_type, check_razwi: true, event_mask: &event_mask);
7754 hl_fw_unmask_irq(hdev, event_type);
7755 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7756 break;
7757
7758 case GAUDI_EVENT_MME0_WBC_RSP:
7759 case GAUDI_EVENT_MME0_SBAB0_RSP:
7760 case GAUDI_EVENT_MME1_WBC_RSP:
7761 case GAUDI_EVENT_MME1_SBAB0_RSP:
7762 case GAUDI_EVENT_MME2_WBC_RSP:
7763 case GAUDI_EVENT_MME2_SBAB0_RSP:
7764 case GAUDI_EVENT_MME3_WBC_RSP:
7765 case GAUDI_EVENT_MME3_SBAB0_RSP:
7766 case GAUDI_EVENT_RAZWI_OR_ADC:
7767 case GAUDI_EVENT_MME0_QM ... GAUDI_EVENT_MME2_QM:
7768 case GAUDI_EVENT_DMA0_QM ... GAUDI_EVENT_DMA7_QM:
7769 fallthrough;
7770 case GAUDI_EVENT_NIC0_QM0:
7771 case GAUDI_EVENT_NIC0_QM1:
7772 case GAUDI_EVENT_NIC1_QM0:
7773 case GAUDI_EVENT_NIC1_QM1:
7774 case GAUDI_EVENT_NIC2_QM0:
7775 case GAUDI_EVENT_NIC2_QM1:
7776 case GAUDI_EVENT_NIC3_QM0:
7777 case GAUDI_EVENT_NIC3_QM1:
7778 case GAUDI_EVENT_NIC4_QM0:
7779 case GAUDI_EVENT_NIC4_QM1:
7780 case GAUDI_EVENT_DMA0_CORE ... GAUDI_EVENT_DMA7_CORE:
7781 case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM:
7782 gaudi_print_irq_info(hdev, event_type, check_razwi: true, event_mask: &event_mask);
7783 gaudi_handle_qman_err(hdev, event_type, event_mask: &event_mask);
7784 hl_fw_unmask_irq(hdev, event_type);
7785 event_mask |= (HL_NOTIFIER_EVENT_USER_ENGINE_ERR | HL_NOTIFIER_EVENT_DEVICE_RESET);
7786 break;
7787
7788 case GAUDI_EVENT_RAZWI_OR_ADC_SW:
7789 gaudi_print_irq_info(hdev, event_type, check_razwi: true, event_mask: &event_mask);
7790 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7791 goto reset_device;
7792
7793 case GAUDI_EVENT_TPC0_BMON_SPMU:
7794 case GAUDI_EVENT_TPC1_BMON_SPMU:
7795 case GAUDI_EVENT_TPC2_BMON_SPMU:
7796 case GAUDI_EVENT_TPC3_BMON_SPMU:
7797 case GAUDI_EVENT_TPC4_BMON_SPMU:
7798 case GAUDI_EVENT_TPC5_BMON_SPMU:
7799 case GAUDI_EVENT_TPC6_BMON_SPMU:
7800 case GAUDI_EVENT_TPC7_BMON_SPMU:
7801 case GAUDI_EVENT_DMA_BM_CH0 ... GAUDI_EVENT_DMA_BM_CH7:
7802 gaudi_print_irq_info(hdev, event_type, check_razwi: false, event_mask: &event_mask);
7803 hl_fw_unmask_irq(hdev, event_type);
7804 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7805 break;
7806
7807 case GAUDI_EVENT_NIC_SEI_0 ... GAUDI_EVENT_NIC_SEI_4:
7808 gaudi_print_nic_axi_irq_info(hdev, event_type, data: &data);
7809 hl_fw_unmask_irq(hdev, event_type);
7810 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7811 break;
7812
7813 case GAUDI_EVENT_DMA_IF_SEI_0 ... GAUDI_EVENT_DMA_IF_SEI_3:
7814 gaudi_print_irq_info(hdev, event_type, check_razwi: false, event_mask: &event_mask);
7815 gaudi_print_sm_sei_info(hdev, event_type,
7816 sei_data: &eq_entry->sm_sei_data);
7817 rc = hl_state_dump(hdev);
7818 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7819 if (rc)
7820 dev_err(hdev->dev,
7821 "Error during system state dump %d\n", rc);
7822 hl_fw_unmask_irq(hdev, event_type);
7823 break;
7824
7825 case GAUDI_EVENT_STATUS_NIC0_ENG0 ... GAUDI_EVENT_STATUS_NIC4_ENG1:
7826 break;
7827
7828 case GAUDI_EVENT_FIX_POWER_ENV_S ... GAUDI_EVENT_FIX_THERMAL_ENV_E:
7829 gaudi_print_clk_change_info(hdev, event_type, event_mask: &event_mask);
7830 hl_fw_unmask_irq(hdev, event_type);
7831 break;
7832
7833 case GAUDI_EVENT_PSOC_GPIO_U16_0:
7834 cause = le64_to_cpu(eq_entry->data[0]) & 0xFF;
7835 dev_err(hdev->dev,
7836 "Received high temp H/W interrupt %d (cause %d)\n",
7837 event_type, cause);
7838 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7839 break;
7840
7841 case GAUDI_EVENT_DEV_RESET_REQ:
7842 gaudi_print_irq_info(hdev, event_type, check_razwi: false, event_mask: &event_mask);
7843 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7844 goto reset_device;
7845
7846 case GAUDI_EVENT_PKT_QUEUE_OUT_SYNC:
7847 gaudi_print_irq_info(hdev, event_type, check_razwi: false, event_mask: &event_mask);
7848 gaudi_print_out_of_sync_info(hdev, sync_err: &eq_entry->pkt_sync_err);
7849 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7850 goto reset_device;
7851
7852 case GAUDI_EVENT_FW_ALIVE_S:
7853 gaudi_print_irq_info(hdev, event_type, check_razwi: false, event_mask: &event_mask);
7854 gaudi_print_fw_alive_info(hdev, fw_alive: &eq_entry->fw_alive);
7855 fw_err_info.err_type = HL_INFO_FW_REPORTED_ERR;
7856 fw_err_info.event_id = event_type;
7857 fw_err_info.event_mask = &event_mask;
7858 hl_handle_fw_err(hdev, info: &fw_err_info);
7859 goto reset_device;
7860
7861 default:
7862 dev_err(hdev->dev, "Received invalid H/W interrupt %d\n",
7863 event_type);
7864 break;
7865 }
7866
7867 if (event_mask)
7868 hl_notifier_event_send_all(hdev, event_mask);
7869
7870 return;
7871
7872reset_device:
7873 reset_required = true;
7874
7875 if (hdev->asic_prop.fw_security_enabled && !reset_direct) {
7876 flags = HL_DRV_RESET_HARD | HL_DRV_RESET_BYPASS_REQ_TO_FW | fw_fatal_err_flag;
7877
7878 /* notify on device unavailable while the reset triggered by fw */
7879 event_mask |= (HL_NOTIFIER_EVENT_DEVICE_RESET |
7880 HL_NOTIFIER_EVENT_DEVICE_UNAVAILABLE);
7881 } else if (hdev->hard_reset_on_fw_events) {
7882 flags = HL_DRV_RESET_HARD | HL_DRV_RESET_DELAY | fw_fatal_err_flag;
7883 event_mask |= HL_NOTIFIER_EVENT_DEVICE_RESET;
7884 } else {
7885 reset_required = false;
7886 }
7887
7888 if (reset_required) {
7889 /* escalate general hw errors to critical/fatal error */
7890 if (event_mask & HL_NOTIFIER_EVENT_GENERAL_HW_ERR)
7891 hl_handle_critical_hw_err(hdev, event_id: event_type, event_mask: &event_mask);
7892
7893 hl_device_cond_reset(hdev, flags, event_mask);
7894 } else {
7895 hl_fw_unmask_irq(hdev, event_type);
7896 /* Notification on occurred event needs to be sent although reset is not executed */
7897 if (event_mask)
7898 hl_notifier_event_send_all(hdev, event_mask);
7899 }
7900}
7901
7902static void *gaudi_get_events_stat(struct hl_device *hdev, bool aggregate, u32 *size)
7903{
7904 struct gaudi_device *gaudi = hdev->asic_specific;
7905
7906 if (aggregate) {
7907 *size = (u32) sizeof(gaudi->events_stat_aggregate);
7908 return gaudi->events_stat_aggregate;
7909 }
7910
7911 *size = (u32) sizeof(gaudi->events_stat);
7912 return gaudi->events_stat;
7913}
7914
7915static int gaudi_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard, u32 flags)
7916{
7917 struct gaudi_device *gaudi = hdev->asic_specific;
7918 u32 status, timeout_usec;
7919 int rc;
7920
7921 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU) ||
7922 hdev->reset_info.hard_reset_pending)
7923 return 0;
7924
7925 if (hdev->pldm)
7926 timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC;
7927 else
7928 timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
7929
7930 /* L0 & L1 invalidation */
7931 WREG32(mmSTLB_INV_PS, 3);
7932 WREG32(mmSTLB_CACHE_INV, gaudi->mmu_cache_inv_pi++);
7933 WREG32(mmSTLB_INV_PS, 2);
7934
7935 rc = hl_poll_timeout(
7936 hdev,
7937 mmSTLB_INV_PS,
7938 status,
7939 !status,
7940 1000,
7941 timeout_usec);
7942
7943 WREG32(mmSTLB_INV_SET, 0);
7944
7945 return rc;
7946}
7947
7948static int gaudi_mmu_invalidate_cache_range(struct hl_device *hdev,
7949 bool is_hard, u32 flags,
7950 u32 asid, u64 va, u64 size)
7951{
7952 /* Treat as invalidate all because there is no range invalidation
7953 * in Gaudi
7954 */
7955 return hdev->asic_funcs->mmu_invalidate_cache(hdev, is_hard, flags);
7956}
7957
7958static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev, u32 asid, u64 phys_addr)
7959{
7960 u32 status, timeout_usec;
7961 int rc;
7962
7963 if (hdev->pldm)
7964 timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC;
7965 else
7966 timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
7967
7968 WREG32(MMU_ASID, asid);
7969 WREG32(MMU_HOP0_PA43_12, phys_addr >> MMU_HOP0_PA43_12_SHIFT);
7970 WREG32(MMU_HOP0_PA49_44, phys_addr >> MMU_HOP0_PA49_44_SHIFT);
7971 WREG32(MMU_BUSY, 0x80000000);
7972
7973 rc = hl_poll_timeout(
7974 hdev,
7975 MMU_BUSY,
7976 status,
7977 !(status & 0x80000000),
7978 1000,
7979 timeout_usec);
7980
7981 if (rc) {
7982 dev_err(hdev->dev,
7983 "Timeout during MMU hop0 config of asid %d\n", asid);
7984 return rc;
7985 }
7986
7987 return 0;
7988}
7989
7990static int gaudi_send_heartbeat(struct hl_device *hdev)
7991{
7992 struct gaudi_device *gaudi = hdev->asic_specific;
7993
7994 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
7995 return 0;
7996
7997 return hl_fw_send_heartbeat(hdev);
7998}
7999
8000static int gaudi_cpucp_info_get(struct hl_device *hdev)
8001{
8002 struct gaudi_device *gaudi = hdev->asic_specific;
8003 struct asic_fixed_properties *prop = &hdev->asic_prop;
8004 int rc;
8005
8006 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
8007 return 0;
8008
8009 rc = hl_fw_cpucp_handshake(hdev, mmCPU_BOOT_DEV_STS0,
8010 mmCPU_BOOT_DEV_STS1, mmCPU_BOOT_ERR0,
8011 mmCPU_BOOT_ERR1);
8012 if (rc)
8013 return rc;
8014
8015 if (!strlen(prop->cpucp_info.card_name))
8016 strscpy_pad(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
8017 CARD_NAME_MAX_LEN);
8018
8019 hdev->card_type = le32_to_cpu(hdev->asic_prop.cpucp_info.card_type);
8020
8021 set_default_power_values(hdev);
8022
8023 return 0;
8024}
8025
8026static bool gaudi_is_device_idle(struct hl_device *hdev, u64 *mask_arr, u8 mask_len,
8027 struct engines_data *e)
8028{
8029 struct gaudi_device *gaudi = hdev->asic_specific;
8030 const char *fmt = "%-5d%-9s%#-14x%#-12x%#x\n";
8031 const char *mme_slave_fmt = "%-5d%-9s%-14s%-12s%#x\n";
8032 const char *nic_fmt = "%-5d%-9s%#-14x%#x\n";
8033 unsigned long *mask = (unsigned long *)mask_arr;
8034 u32 qm_glbl_sts0, qm_cgm_sts, dma_core_sts0, tpc_cfg_sts, mme_arch_sts;
8035 bool is_idle = true, is_eng_idle, is_slave;
8036 u64 offset;
8037 int i, dma_id, port;
8038
8039 if (e)
8040 hl_engine_data_sprintf(e,
8041 fmt: "\nDMA is_idle QM_GLBL_STS0 QM_CGM_STS DMA_CORE_STS0\n"
8042 "--- ------- ------------ ---------- -------------\n");
8043
8044 for (i = 0 ; i < DMA_NUMBER_OF_CHNLS ; i++) {
8045 dma_id = gaudi_dma_assignment[i];
8046 offset = dma_id * DMA_QMAN_OFFSET;
8047
8048 qm_glbl_sts0 = RREG32(mmDMA0_QM_GLBL_STS0 + offset);
8049 qm_cgm_sts = RREG32(mmDMA0_QM_CGM_STS + offset);
8050 dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + offset);
8051 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
8052 IS_DMA_IDLE(dma_core_sts0);
8053 is_idle &= is_eng_idle;
8054
8055 if (mask && !is_eng_idle)
8056 set_bit(nr: GAUDI_ENGINE_ID_DMA_0 + dma_id, addr: mask);
8057 if (e)
8058 hl_engine_data_sprintf(e, fmt, dma_id,
8059 is_eng_idle ? "Y" : "N", qm_glbl_sts0,
8060 qm_cgm_sts, dma_core_sts0);
8061 }
8062
8063 if (e)
8064 hl_engine_data_sprintf(e,
8065 fmt: "\nTPC is_idle QM_GLBL_STS0 QM_CGM_STS CFG_STATUS\n"
8066 "--- ------- ------------ ---------- ----------\n");
8067
8068 for (i = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
8069 offset = i * TPC_QMAN_OFFSET;
8070 qm_glbl_sts0 = RREG32(mmTPC0_QM_GLBL_STS0 + offset);
8071 qm_cgm_sts = RREG32(mmTPC0_QM_CGM_STS + offset);
8072 tpc_cfg_sts = RREG32(mmTPC0_CFG_STATUS + offset);
8073 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
8074 IS_TPC_IDLE(tpc_cfg_sts);
8075 is_idle &= is_eng_idle;
8076
8077 if (mask && !is_eng_idle)
8078 set_bit(nr: GAUDI_ENGINE_ID_TPC_0 + i, addr: mask);
8079 if (e)
8080 hl_engine_data_sprintf(e, fmt, i,
8081 is_eng_idle ? "Y" : "N",
8082 qm_glbl_sts0, qm_cgm_sts, tpc_cfg_sts);
8083 }
8084
8085 if (e)
8086 hl_engine_data_sprintf(e,
8087 fmt: "\nMME is_idle QM_GLBL_STS0 QM_CGM_STS ARCH_STATUS\n"
8088 "--- ------- ------------ ---------- -----------\n");
8089
8090 for (i = 0 ; i < MME_NUMBER_OF_ENGINES ; i++) {
8091 offset = i * MME_QMAN_OFFSET;
8092 mme_arch_sts = RREG32(mmMME0_CTRL_ARCH_STATUS + offset);
8093 is_eng_idle = IS_MME_IDLE(mme_arch_sts);
8094
8095 /* MME 1 & 3 are slaves, no need to check their QMANs */
8096 is_slave = i % 2;
8097 if (!is_slave) {
8098 qm_glbl_sts0 = RREG32(mmMME0_QM_GLBL_STS0 + offset);
8099 qm_cgm_sts = RREG32(mmMME0_QM_CGM_STS + offset);
8100 is_eng_idle &= IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
8101 }
8102
8103 is_idle &= is_eng_idle;
8104
8105 if (mask && !is_eng_idle)
8106 set_bit(nr: GAUDI_ENGINE_ID_MME_0 + i, addr: mask);
8107 if (e) {
8108 if (!is_slave)
8109 hl_engine_data_sprintf(e, fmt, i,
8110 is_eng_idle ? "Y" : "N",
8111 qm_glbl_sts0, qm_cgm_sts, mme_arch_sts);
8112 else
8113 hl_engine_data_sprintf(e, fmt: mme_slave_fmt, i,
8114 is_eng_idle ? "Y" : "N", "-",
8115 "-", mme_arch_sts);
8116 }
8117 }
8118
8119 if (e)
8120 hl_engine_data_sprintf(e,
8121 fmt: "\nNIC is_idle QM_GLBL_STS0 QM_CGM_STS\n"
8122 "--- ------- ------------ ----------\n");
8123
8124 for (i = 0 ; i < (NIC_NUMBER_OF_ENGINES / 2) ; i++) {
8125 offset = i * NIC_MACRO_QMAN_OFFSET;
8126 port = 2 * i;
8127 if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + port)) {
8128 qm_glbl_sts0 = RREG32(mmNIC0_QM0_GLBL_STS0 + offset);
8129 qm_cgm_sts = RREG32(mmNIC0_QM0_CGM_STS + offset);
8130 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
8131 is_idle &= is_eng_idle;
8132
8133 if (mask && !is_eng_idle)
8134 set_bit(nr: GAUDI_ENGINE_ID_NIC_0 + port, addr: mask);
8135 if (e)
8136 hl_engine_data_sprintf(e, fmt: nic_fmt, port,
8137 is_eng_idle ? "Y" : "N",
8138 qm_glbl_sts0, qm_cgm_sts);
8139 }
8140
8141 port = 2 * i + 1;
8142 if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + port)) {
8143 qm_glbl_sts0 = RREG32(mmNIC0_QM1_GLBL_STS0 + offset);
8144 qm_cgm_sts = RREG32(mmNIC0_QM1_CGM_STS + offset);
8145 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
8146 is_idle &= is_eng_idle;
8147
8148 if (mask && !is_eng_idle)
8149 set_bit(nr: GAUDI_ENGINE_ID_NIC_0 + port, addr: mask);
8150 if (e)
8151 hl_engine_data_sprintf(e, fmt: nic_fmt, port,
8152 is_eng_idle ? "Y" : "N",
8153 qm_glbl_sts0, qm_cgm_sts);
8154 }
8155 }
8156
8157 if (e)
8158 hl_engine_data_sprintf(e, fmt: "\n");
8159
8160 return is_idle;
8161}
8162
8163static void gaudi_hw_queues_lock(struct hl_device *hdev)
8164 __acquires(&gaudi->hw_queues_lock)
8165{
8166 struct gaudi_device *gaudi = hdev->asic_specific;
8167
8168 spin_lock(lock: &gaudi->hw_queues_lock);
8169}
8170
8171static void gaudi_hw_queues_unlock(struct hl_device *hdev)
8172 __releases(&gaudi->hw_queues_lock)
8173{
8174 struct gaudi_device *gaudi = hdev->asic_specific;
8175
8176 spin_unlock(lock: &gaudi->hw_queues_lock);
8177}
8178
8179static u32 gaudi_get_pci_id(struct hl_device *hdev)
8180{
8181 return hdev->pdev->device;
8182}
8183
8184static int gaudi_get_eeprom_data(struct hl_device *hdev, void *data,
8185 size_t max_size)
8186{
8187 struct gaudi_device *gaudi = hdev->asic_specific;
8188
8189 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
8190 return 0;
8191
8192 return hl_fw_get_eeprom_data(hdev, data, max_size);
8193}
8194
8195static int gaudi_get_monitor_dump(struct hl_device *hdev, void *data)
8196{
8197 struct gaudi_device *gaudi = hdev->asic_specific;
8198
8199 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
8200 return 0;
8201
8202 return hl_fw_get_monitor_dump(hdev, data);
8203}
8204
8205/*
8206 * this function should be used only during initialization and/or after reset,
8207 * when there are no active users.
8208 */
8209static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel, u32 tpc_id)
8210{
8211 u64 kernel_timeout;
8212 u32 status, offset;
8213 int rc;
8214
8215 offset = tpc_id * (mmTPC1_CFG_STATUS - mmTPC0_CFG_STATUS);
8216
8217 if (hdev->pldm)
8218 kernel_timeout = GAUDI_PLDM_TPC_KERNEL_WAIT_USEC;
8219 else
8220 kernel_timeout = HL_DEVICE_TIMEOUT_USEC;
8221
8222 WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_LOW + offset,
8223 lower_32_bits(tpc_kernel));
8224 WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_HIGH + offset,
8225 upper_32_bits(tpc_kernel));
8226
8227 WREG32(mmTPC0_CFG_ICACHE_BASE_ADDERESS_LOW + offset,
8228 lower_32_bits(tpc_kernel));
8229 WREG32(mmTPC0_CFG_ICACHE_BASE_ADDERESS_HIGH + offset,
8230 upper_32_bits(tpc_kernel));
8231 /* set a valid LUT pointer, content is of no significance */
8232 WREG32(mmTPC0_CFG_LUT_FUNC256_BASE_ADDR_LO + offset,
8233 lower_32_bits(tpc_kernel));
8234 WREG32(mmTPC0_CFG_LUT_FUNC256_BASE_ADDR_HI + offset,
8235 upper_32_bits(tpc_kernel));
8236
8237 WREG32(mmTPC0_CFG_QM_SYNC_OBJECT_ADDR + offset,
8238 lower_32_bits(CFG_BASE +
8239 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0));
8240
8241 WREG32(mmTPC0_CFG_TPC_CMD + offset,
8242 (1 << TPC0_CFG_TPC_CMD_ICACHE_INVALIDATE_SHIFT |
8243 1 << TPC0_CFG_TPC_CMD_ICACHE_PREFETCH_64KB_SHIFT));
8244 /* wait a bit for the engine to start executing */
8245 usleep_range(min: 1000, max: 1500);
8246
8247 /* wait until engine has finished executing */
8248 rc = hl_poll_timeout(
8249 hdev,
8250 mmTPC0_CFG_STATUS + offset,
8251 status,
8252 (status & TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK) ==
8253 TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK,
8254 1000,
8255 kernel_timeout);
8256
8257 if (rc) {
8258 dev_err(hdev->dev,
8259 "Timeout while waiting for TPC%d icache prefetch\n",
8260 tpc_id);
8261 return -EIO;
8262 }
8263
8264 WREG32(mmTPC0_CFG_TPC_EXECUTE + offset,
8265 1 << TPC0_CFG_TPC_EXECUTE_V_SHIFT);
8266
8267 /* wait a bit for the engine to start executing */
8268 usleep_range(min: 1000, max: 1500);
8269
8270 /* wait until engine has finished executing */
8271 rc = hl_poll_timeout(
8272 hdev,
8273 mmTPC0_CFG_STATUS + offset,
8274 status,
8275 (status & TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK) ==
8276 TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK,
8277 1000,
8278 kernel_timeout);
8279
8280 if (rc) {
8281 dev_err(hdev->dev,
8282 "Timeout while waiting for TPC%d vector pipe\n",
8283 tpc_id);
8284 return -EIO;
8285 }
8286
8287 rc = hl_poll_timeout(
8288 hdev,
8289 mmTPC0_CFG_WQ_INFLIGHT_CNTR + offset,
8290 status,
8291 (status == 0),
8292 1000,
8293 kernel_timeout);
8294
8295 if (rc) {
8296 dev_err(hdev->dev,
8297 "Timeout while waiting for TPC%d kernel to execute\n",
8298 tpc_id);
8299 return -EIO;
8300 }
8301
8302 return 0;
8303}
8304
8305static int gaudi_internal_cb_pool_init(struct hl_device *hdev,
8306 struct hl_ctx *ctx)
8307{
8308 struct gaudi_device *gaudi = hdev->asic_specific;
8309 int min_alloc_order, rc, collective_cb_size;
8310
8311 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
8312 return 0;
8313
8314 hdev->internal_cb_pool_virt_addr = hl_asic_dma_alloc_coherent(hdev,
8315 HOST_SPACE_INTERNAL_CB_SZ,
8316 &hdev->internal_cb_pool_dma_addr,
8317 GFP_KERNEL | __GFP_ZERO);
8318
8319 if (!hdev->internal_cb_pool_virt_addr)
8320 return -ENOMEM;
8321
8322 collective_cb_size = sizeof(struct packet_msg_short) * 5 +
8323 sizeof(struct packet_fence);
8324 min_alloc_order = ilog2(collective_cb_size);
8325
8326 hdev->internal_cb_pool = gen_pool_create(min_alloc_order, -1);
8327 if (!hdev->internal_cb_pool) {
8328 dev_err(hdev->dev,
8329 "Failed to create internal CB pool\n");
8330 rc = -ENOMEM;
8331 goto free_internal_cb_pool;
8332 }
8333
8334 rc = gen_pool_add(pool: hdev->internal_cb_pool,
8335 addr: (uintptr_t) hdev->internal_cb_pool_virt_addr,
8336 HOST_SPACE_INTERNAL_CB_SZ, nid: -1);
8337 if (rc) {
8338 dev_err(hdev->dev,
8339 "Failed to add memory to internal CB pool\n");
8340 rc = -EFAULT;
8341 goto destroy_internal_cb_pool;
8342 }
8343
8344 hdev->internal_cb_va_base = hl_reserve_va_block(hdev, ctx,
8345 type: HL_VA_RANGE_TYPE_HOST, HOST_SPACE_INTERNAL_CB_SZ,
8346 HL_MMU_VA_ALIGNMENT_NOT_NEEDED);
8347
8348 if (!hdev->internal_cb_va_base) {
8349 rc = -ENOMEM;
8350 goto destroy_internal_cb_pool;
8351 }
8352
8353 mutex_lock(&hdev->mmu_lock);
8354
8355 rc = hl_mmu_map_contiguous(ctx, virt_addr: hdev->internal_cb_va_base,
8356 phys_addr: hdev->internal_cb_pool_dma_addr,
8357 HOST_SPACE_INTERNAL_CB_SZ);
8358 if (rc)
8359 goto unreserve_internal_cb_pool;
8360
8361 rc = hl_mmu_invalidate_cache(hdev, is_hard: false, flags: MMU_OP_USERPTR);
8362 if (rc)
8363 goto unmap_internal_cb_pool;
8364
8365 mutex_unlock(lock: &hdev->mmu_lock);
8366
8367 return 0;
8368
8369unmap_internal_cb_pool:
8370 hl_mmu_unmap_contiguous(ctx, virt_addr: hdev->internal_cb_va_base,
8371 HOST_SPACE_INTERNAL_CB_SZ);
8372unreserve_internal_cb_pool:
8373 mutex_unlock(lock: &hdev->mmu_lock);
8374 hl_unreserve_va_block(hdev, ctx, start_addr: hdev->internal_cb_va_base,
8375 HOST_SPACE_INTERNAL_CB_SZ);
8376destroy_internal_cb_pool:
8377 gen_pool_destroy(hdev->internal_cb_pool);
8378free_internal_cb_pool:
8379 hl_asic_dma_free_coherent(hdev, HOST_SPACE_INTERNAL_CB_SZ, hdev->internal_cb_pool_virt_addr,
8380 hdev->internal_cb_pool_dma_addr);
8381
8382 return rc;
8383}
8384
8385static void gaudi_internal_cb_pool_fini(struct hl_device *hdev,
8386 struct hl_ctx *ctx)
8387{
8388 struct gaudi_device *gaudi = hdev->asic_specific;
8389
8390 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
8391 return;
8392
8393 mutex_lock(&hdev->mmu_lock);
8394 hl_mmu_unmap_contiguous(ctx, virt_addr: hdev->internal_cb_va_base,
8395 HOST_SPACE_INTERNAL_CB_SZ);
8396 hl_unreserve_va_block(hdev, ctx, start_addr: hdev->internal_cb_va_base,
8397 HOST_SPACE_INTERNAL_CB_SZ);
8398 hl_mmu_invalidate_cache(hdev, is_hard: true, flags: MMU_OP_USERPTR);
8399 mutex_unlock(lock: &hdev->mmu_lock);
8400
8401 gen_pool_destroy(hdev->internal_cb_pool);
8402
8403 hl_asic_dma_free_coherent(hdev, HOST_SPACE_INTERNAL_CB_SZ, hdev->internal_cb_pool_virt_addr,
8404 hdev->internal_cb_pool_dma_addr);
8405}
8406
8407static int gaudi_ctx_init(struct hl_ctx *ctx)
8408{
8409 int rc;
8410
8411 if (ctx->asid == HL_KERNEL_ASID_ID)
8412 return 0;
8413
8414 rc = gaudi_internal_cb_pool_init(hdev: ctx->hdev, ctx);
8415 if (rc)
8416 return rc;
8417
8418 rc = gaudi_restore_user_registers(hdev: ctx->hdev);
8419 if (rc)
8420 gaudi_internal_cb_pool_fini(hdev: ctx->hdev, ctx);
8421
8422 return rc;
8423}
8424
8425static void gaudi_ctx_fini(struct hl_ctx *ctx)
8426{
8427 if (ctx->asid == HL_KERNEL_ASID_ID)
8428 return;
8429
8430 gaudi_internal_cb_pool_fini(hdev: ctx->hdev, ctx);
8431}
8432
8433static int gaudi_pre_schedule_cs(struct hl_cs *cs)
8434{
8435 return 0;
8436}
8437
8438static u32 gaudi_get_queue_id_for_cq(struct hl_device *hdev, u32 cq_idx)
8439{
8440 return gaudi_cq_assignment[cq_idx];
8441}
8442
8443static u32 gaudi_get_signal_cb_size(struct hl_device *hdev)
8444{
8445 return sizeof(struct packet_msg_short) +
8446 sizeof(struct packet_msg_prot) * 2;
8447}
8448
8449static u32 gaudi_get_wait_cb_size(struct hl_device *hdev)
8450{
8451 return sizeof(struct packet_msg_short) * 4 +
8452 sizeof(struct packet_fence) +
8453 sizeof(struct packet_msg_prot) * 2;
8454}
8455
8456static u32 gaudi_get_sob_addr(struct hl_device *hdev, u32 sob_id)
8457{
8458 return mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 + (sob_id * 4);
8459}
8460
8461static u32 gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id,
8462 u32 size, bool eb)
8463{
8464 struct hl_cb *cb = (struct hl_cb *) data;
8465 struct packet_msg_short *pkt;
8466 u32 value, ctl, pkt_size = sizeof(*pkt);
8467
8468 pkt = cb->kernel_address + size;
8469 memset(pkt, 0, pkt_size);
8470
8471 /* Inc by 1, Mode ADD */
8472 value = FIELD_PREP(GAUDI_PKT_SHORT_VAL_SOB_SYNC_VAL_MASK, 1);
8473 value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_SOB_MOD_MASK, 1);
8474
8475 ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, sob_id * 4);
8476 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OP_MASK, 0); /* write the value */
8477 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 3); /* W_S SOB base */
8478 ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
8479 ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, eb);
8480 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8481 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
8482
8483 pkt->value = cpu_to_le32(value);
8484 pkt->ctl = cpu_to_le32(ctl);
8485
8486 return size + pkt_size;
8487}
8488
8489static u32 gaudi_add_mon_msg_short(struct packet_msg_short *pkt, u32 value,
8490 u16 addr)
8491{
8492 u32 ctl, pkt_size = sizeof(*pkt);
8493
8494 memset(pkt, 0, pkt_size);
8495
8496 ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, addr);
8497 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 2); /* W_S MON base */
8498 ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
8499 ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0);
8500 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8501 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 0); /* last pkt MB */
8502
8503 pkt->value = cpu_to_le32(value);
8504 pkt->ctl = cpu_to_le32(ctl);
8505
8506 return pkt_size;
8507}
8508
8509static u32 gaudi_add_arm_monitor_pkt(struct hl_device *hdev,
8510 struct packet_msg_short *pkt, u16 sob_base, u8 sob_mask,
8511 u16 sob_val, u16 mon_id)
8512{
8513 u64 monitor_base;
8514 u32 ctl, value, pkt_size = sizeof(*pkt);
8515 u16 msg_addr_offset;
8516 u8 mask;
8517
8518 if (hl_gen_sob_mask(sob_base, sob_mask, mask: &mask)) {
8519 dev_err(hdev->dev,
8520 "sob_base %u (mask %#x) is not valid\n",
8521 sob_base, sob_mask);
8522 return 0;
8523 }
8524
8525 /*
8526 * monitor_base should be the content of the base0 address registers,
8527 * so it will be added to the msg short offsets
8528 */
8529 monitor_base = mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0;
8530
8531 msg_addr_offset =
8532 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0 + mon_id * 4) -
8533 monitor_base;
8534
8535 memset(pkt, 0, pkt_size);
8536
8537 /* Monitor config packet: bind the monitor to a sync object */
8538 value = FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_SYNC_GID_MASK, sob_base / 8);
8539 value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_SYNC_VAL_MASK, sob_val);
8540 value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_MODE_MASK,
8541 0); /* GREATER OR EQUAL*/
8542 value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_MASK_MASK, mask);
8543
8544 ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, msg_addr_offset);
8545 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OP_MASK, 0); /* write the value */
8546 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 2); /* W_S MON base */
8547 ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
8548 ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0);
8549 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8550 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
8551
8552 pkt->value = cpu_to_le32(value);
8553 pkt->ctl = cpu_to_le32(ctl);
8554
8555 return pkt_size;
8556}
8557
8558static u32 gaudi_add_fence_pkt(struct packet_fence *pkt)
8559{
8560 u32 ctl, cfg, pkt_size = sizeof(*pkt);
8561
8562 memset(pkt, 0, pkt_size);
8563
8564 cfg = FIELD_PREP(GAUDI_PKT_FENCE_CFG_DEC_VAL_MASK, 1);
8565 cfg |= FIELD_PREP(GAUDI_PKT_FENCE_CFG_TARGET_VAL_MASK, 1);
8566 cfg |= FIELD_PREP(GAUDI_PKT_FENCE_CFG_ID_MASK, 2);
8567
8568 ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_FENCE);
8569 ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0);
8570 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8571 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
8572
8573 pkt->cfg = cpu_to_le32(cfg);
8574 pkt->ctl = cpu_to_le32(ctl);
8575
8576 return pkt_size;
8577}
8578
8579static int gaudi_get_fence_addr(struct hl_device *hdev, u32 queue_id, u64 *addr)
8580{
8581 u32 offset, nic_index;
8582
8583 switch (queue_id) {
8584 case GAUDI_QUEUE_ID_DMA_0_0:
8585 offset = mmDMA0_QM_CP_FENCE2_RDATA_0;
8586 break;
8587 case GAUDI_QUEUE_ID_DMA_0_1:
8588 offset = mmDMA0_QM_CP_FENCE2_RDATA_1;
8589 break;
8590 case GAUDI_QUEUE_ID_DMA_0_2:
8591 offset = mmDMA0_QM_CP_FENCE2_RDATA_2;
8592 break;
8593 case GAUDI_QUEUE_ID_DMA_0_3:
8594 offset = mmDMA0_QM_CP_FENCE2_RDATA_3;
8595 break;
8596 case GAUDI_QUEUE_ID_DMA_1_0:
8597 offset = mmDMA1_QM_CP_FENCE2_RDATA_0;
8598 break;
8599 case GAUDI_QUEUE_ID_DMA_1_1:
8600 offset = mmDMA1_QM_CP_FENCE2_RDATA_1;
8601 break;
8602 case GAUDI_QUEUE_ID_DMA_1_2:
8603 offset = mmDMA1_QM_CP_FENCE2_RDATA_2;
8604 break;
8605 case GAUDI_QUEUE_ID_DMA_1_3:
8606 offset = mmDMA1_QM_CP_FENCE2_RDATA_3;
8607 break;
8608 case GAUDI_QUEUE_ID_DMA_5_0:
8609 offset = mmDMA5_QM_CP_FENCE2_RDATA_0;
8610 break;
8611 case GAUDI_QUEUE_ID_DMA_5_1:
8612 offset = mmDMA5_QM_CP_FENCE2_RDATA_1;
8613 break;
8614 case GAUDI_QUEUE_ID_DMA_5_2:
8615 offset = mmDMA5_QM_CP_FENCE2_RDATA_2;
8616 break;
8617 case GAUDI_QUEUE_ID_DMA_5_3:
8618 offset = mmDMA5_QM_CP_FENCE2_RDATA_3;
8619 break;
8620 case GAUDI_QUEUE_ID_TPC_7_0:
8621 offset = mmTPC7_QM_CP_FENCE2_RDATA_0;
8622 break;
8623 case GAUDI_QUEUE_ID_TPC_7_1:
8624 offset = mmTPC7_QM_CP_FENCE2_RDATA_1;
8625 break;
8626 case GAUDI_QUEUE_ID_TPC_7_2:
8627 offset = mmTPC7_QM_CP_FENCE2_RDATA_2;
8628 break;
8629 case GAUDI_QUEUE_ID_TPC_7_3:
8630 offset = mmTPC7_QM_CP_FENCE2_RDATA_3;
8631 break;
8632 case GAUDI_QUEUE_ID_NIC_0_0:
8633 case GAUDI_QUEUE_ID_NIC_1_0:
8634 case GAUDI_QUEUE_ID_NIC_2_0:
8635 case GAUDI_QUEUE_ID_NIC_3_0:
8636 case GAUDI_QUEUE_ID_NIC_4_0:
8637 case GAUDI_QUEUE_ID_NIC_5_0:
8638 case GAUDI_QUEUE_ID_NIC_6_0:
8639 case GAUDI_QUEUE_ID_NIC_7_0:
8640 case GAUDI_QUEUE_ID_NIC_8_0:
8641 case GAUDI_QUEUE_ID_NIC_9_0:
8642 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_0) >> 2;
8643 offset = mmNIC0_QM0_CP_FENCE2_RDATA_0 +
8644 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8645 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8646 break;
8647 case GAUDI_QUEUE_ID_NIC_0_1:
8648 case GAUDI_QUEUE_ID_NIC_1_1:
8649 case GAUDI_QUEUE_ID_NIC_2_1:
8650 case GAUDI_QUEUE_ID_NIC_3_1:
8651 case GAUDI_QUEUE_ID_NIC_4_1:
8652 case GAUDI_QUEUE_ID_NIC_5_1:
8653 case GAUDI_QUEUE_ID_NIC_6_1:
8654 case GAUDI_QUEUE_ID_NIC_7_1:
8655 case GAUDI_QUEUE_ID_NIC_8_1:
8656 case GAUDI_QUEUE_ID_NIC_9_1:
8657 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_1) >> 2;
8658 offset = mmNIC0_QM0_CP_FENCE2_RDATA_1 +
8659 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8660 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8661 break;
8662 case GAUDI_QUEUE_ID_NIC_0_2:
8663 case GAUDI_QUEUE_ID_NIC_1_2:
8664 case GAUDI_QUEUE_ID_NIC_2_2:
8665 case GAUDI_QUEUE_ID_NIC_3_2:
8666 case GAUDI_QUEUE_ID_NIC_4_2:
8667 case GAUDI_QUEUE_ID_NIC_5_2:
8668 case GAUDI_QUEUE_ID_NIC_6_2:
8669 case GAUDI_QUEUE_ID_NIC_7_2:
8670 case GAUDI_QUEUE_ID_NIC_8_2:
8671 case GAUDI_QUEUE_ID_NIC_9_2:
8672 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_2) >> 2;
8673 offset = mmNIC0_QM0_CP_FENCE2_RDATA_2 +
8674 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8675 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8676 break;
8677 case GAUDI_QUEUE_ID_NIC_0_3:
8678 case GAUDI_QUEUE_ID_NIC_1_3:
8679 case GAUDI_QUEUE_ID_NIC_2_3:
8680 case GAUDI_QUEUE_ID_NIC_3_3:
8681 case GAUDI_QUEUE_ID_NIC_4_3:
8682 case GAUDI_QUEUE_ID_NIC_5_3:
8683 case GAUDI_QUEUE_ID_NIC_6_3:
8684 case GAUDI_QUEUE_ID_NIC_7_3:
8685 case GAUDI_QUEUE_ID_NIC_8_3:
8686 case GAUDI_QUEUE_ID_NIC_9_3:
8687 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_3) >> 2;
8688 offset = mmNIC0_QM0_CP_FENCE2_RDATA_3 +
8689 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8690 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8691 break;
8692 default:
8693 return -EINVAL;
8694 }
8695
8696 *addr = CFG_BASE + offset;
8697
8698 return 0;
8699}
8700
8701static u32 gaudi_add_mon_pkts(void *buf, u16 mon_id, u64 fence_addr)
8702{
8703 u64 monitor_base;
8704 u32 size = 0;
8705 u16 msg_addr_offset;
8706
8707 /*
8708 * monitor_base should be the content of the base0 address registers,
8709 * so it will be added to the msg short offsets
8710 */
8711 monitor_base = mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0;
8712
8713 /* First monitor config packet: low address of the sync */
8714 msg_addr_offset =
8715 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + mon_id * 4) -
8716 monitor_base;
8717
8718 size += gaudi_add_mon_msg_short(pkt: buf + size, value: (u32) fence_addr,
8719 addr: msg_addr_offset);
8720
8721 /* Second monitor config packet: high address of the sync */
8722 msg_addr_offset =
8723 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 + mon_id * 4) -
8724 monitor_base;
8725
8726 size += gaudi_add_mon_msg_short(pkt: buf + size, value: (u32) (fence_addr >> 32),
8727 addr: msg_addr_offset);
8728
8729 /*
8730 * Third monitor config packet: the payload, i.e. what to write when the
8731 * sync triggers
8732 */
8733 msg_addr_offset =
8734 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + mon_id * 4) -
8735 monitor_base;
8736
8737 size += gaudi_add_mon_msg_short(pkt: buf + size, value: 1, addr: msg_addr_offset);
8738
8739 return size;
8740}
8741
8742static u32 gaudi_gen_wait_cb(struct hl_device *hdev,
8743 struct hl_gen_wait_properties *prop)
8744{
8745 struct hl_cb *cb = (struct hl_cb *) prop->data;
8746 void *buf = cb->kernel_address;
8747 u64 fence_addr = 0;
8748 u32 size = prop->size;
8749
8750 if (gaudi_get_fence_addr(hdev, queue_id: prop->q_idx, addr: &fence_addr)) {
8751 dev_crit(hdev->dev, "wrong queue id %d for wait packet\n",
8752 prop->q_idx);
8753 return 0;
8754 }
8755
8756 size += gaudi_add_mon_pkts(buf: buf + size, mon_id: prop->mon_id, fence_addr);
8757 size += gaudi_add_arm_monitor_pkt(hdev, pkt: buf + size, sob_base: prop->sob_base,
8758 sob_mask: prop->sob_mask, sob_val: prop->sob_val, mon_id: prop->mon_id);
8759 size += gaudi_add_fence_pkt(pkt: buf + size);
8760
8761 return size;
8762}
8763
8764static void gaudi_reset_sob(struct hl_device *hdev, void *data)
8765{
8766 struct hl_hw_sob *hw_sob = (struct hl_hw_sob *) data;
8767
8768 dev_dbg(hdev->dev, "reset SOB, q_idx: %d, sob_id: %d\n", hw_sob->q_idx,
8769 hw_sob->sob_id);
8770
8771 WREG32(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
8772 hw_sob->sob_id * 4, 0);
8773
8774 kref_init(kref: &hw_sob->kref);
8775}
8776
8777static u64 gaudi_get_device_time(struct hl_device *hdev)
8778{
8779 u64 device_time = ((u64) RREG32(mmPSOC_TIMESTAMP_CNTCVU)) << 32;
8780
8781 return device_time | RREG32(mmPSOC_TIMESTAMP_CNTCVL);
8782}
8783
8784static int gaudi_get_hw_block_id(struct hl_device *hdev, u64 block_addr,
8785 u32 *block_size, u32 *block_id)
8786{
8787 return -EPERM;
8788}
8789
8790static int gaudi_block_mmap(struct hl_device *hdev,
8791 struct vm_area_struct *vma,
8792 u32 block_id, u32 block_size)
8793{
8794 return -EPERM;
8795}
8796
8797static void gaudi_enable_events_from_fw(struct hl_device *hdev)
8798{
8799 struct cpu_dyn_regs *dyn_regs =
8800 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
8801 u32 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
8802 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
8803 le32_to_cpu(dyn_regs->gic_host_ints_irq);
8804
8805 WREG32(irq_handler_offset,
8806 gaudi_irq_map_table[GAUDI_EVENT_INTS_REGISTER].cpu_id);
8807}
8808
8809static int gaudi_ack_mmu_page_fault_or_access_error(struct hl_device *hdev, u64 mmu_cap_mask)
8810{
8811 return -EINVAL;
8812}
8813
8814static int gaudi_map_pll_idx_to_fw_idx(u32 pll_idx)
8815{
8816 switch (pll_idx) {
8817 case HL_GAUDI_CPU_PLL: return CPU_PLL;
8818 case HL_GAUDI_PCI_PLL: return PCI_PLL;
8819 case HL_GAUDI_NIC_PLL: return NIC_PLL;
8820 case HL_GAUDI_DMA_PLL: return DMA_PLL;
8821 case HL_GAUDI_MESH_PLL: return MESH_PLL;
8822 case HL_GAUDI_MME_PLL: return MME_PLL;
8823 case HL_GAUDI_TPC_PLL: return TPC_PLL;
8824 case HL_GAUDI_IF_PLL: return IF_PLL;
8825 case HL_GAUDI_SRAM_PLL: return SRAM_PLL;
8826 case HL_GAUDI_HBM_PLL: return HBM_PLL;
8827 default: return -EINVAL;
8828 }
8829}
8830
8831static int gaudi_add_sync_to_engine_map_entry(
8832 struct hl_sync_to_engine_map *map, u32 reg_value,
8833 enum hl_sync_engine_type engine_type, u32 engine_id)
8834{
8835 struct hl_sync_to_engine_map_entry *entry;
8836
8837 /* Reg value represents a partial address of sync object,
8838 * it is used as unique identifier. For this we need to
8839 * clear the cutoff cfg base bits from the value.
8840 */
8841 if (reg_value == 0 || reg_value == 0xffffffff)
8842 return 0;
8843 reg_value -= lower_32_bits(CFG_BASE);
8844
8845 /* create a new hash entry */
8846 entry = kzalloc(sizeof(*entry), GFP_KERNEL);
8847 if (!entry)
8848 return -ENOMEM;
8849 entry->engine_type = engine_type;
8850 entry->engine_id = engine_id;
8851 entry->sync_id = reg_value;
8852 hash_add(map->tb, &entry->node, reg_value);
8853
8854 return 0;
8855}
8856
8857static int gaudi_gen_sync_to_engine_map(struct hl_device *hdev,
8858 struct hl_sync_to_engine_map *map)
8859{
8860 struct hl_state_dump_specs *sds = &hdev->state_dump_specs;
8861 int i, j, rc;
8862 u32 reg_value;
8863
8864 /* Iterate over TPC engines */
8865 for (i = 0; i < sds->props[SP_NUM_OF_TPC_ENGINES]; ++i) {
8866
8867 reg_value = RREG32(sds->props[SP_TPC0_CFG_SO] +
8868 sds->props[SP_NEXT_TPC] * i);
8869
8870 rc = gaudi_add_sync_to_engine_map_entry(map, reg_value,
8871 engine_type: ENGINE_TPC, engine_id: i);
8872 if (rc)
8873 goto free_sync_to_engine_map;
8874 }
8875
8876 /* Iterate over MME engines */
8877 for (i = 0; i < sds->props[SP_NUM_OF_MME_ENGINES]; ++i) {
8878 for (j = 0; j < sds->props[SP_SUB_MME_ENG_NUM]; ++j) {
8879
8880 reg_value = RREG32(sds->props[SP_MME_CFG_SO] +
8881 sds->props[SP_NEXT_MME] * i +
8882 j * sizeof(u32));
8883
8884 rc = gaudi_add_sync_to_engine_map_entry(
8885 map, reg_value, engine_type: ENGINE_MME,
8886 engine_id: i * sds->props[SP_SUB_MME_ENG_NUM] + j);
8887 if (rc)
8888 goto free_sync_to_engine_map;
8889 }
8890 }
8891
8892 /* Iterate over DMA engines */
8893 for (i = 0; i < sds->props[SP_NUM_OF_DMA_ENGINES]; ++i) {
8894 reg_value = RREG32(sds->props[SP_DMA_CFG_SO] +
8895 sds->props[SP_DMA_QUEUES_OFFSET] * i);
8896 rc = gaudi_add_sync_to_engine_map_entry(map, reg_value,
8897 engine_type: ENGINE_DMA, engine_id: i);
8898 if (rc)
8899 goto free_sync_to_engine_map;
8900 }
8901
8902 return 0;
8903
8904free_sync_to_engine_map:
8905 hl_state_dump_free_sync_to_engine_map(map);
8906
8907 return rc;
8908}
8909
8910static int gaudi_monitor_valid(struct hl_mon_state_dump *mon)
8911{
8912 return FIELD_GET(
8913 SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0_VALID_MASK,
8914 mon->status);
8915}
8916
8917static void gaudi_fill_sobs_from_mon(char *sobs, struct hl_mon_state_dump *mon)
8918{
8919 const size_t max_write = 10;
8920 u32 gid, mask, sob;
8921 int i, offset;
8922
8923 /* Sync object ID is calculated as follows:
8924 * (8 * group_id + cleared bits in mask)
8925 */
8926 gid = FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SID_MASK,
8927 mon->arm_data);
8928 mask = FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_MASK_MASK,
8929 mon->arm_data);
8930
8931 for (i = 0, offset = 0; mask && offset < MONITOR_SOB_STRING_SIZE -
8932 max_write; mask >>= 1, i++) {
8933 if (!(mask & 1)) {
8934 sob = gid * MONITOR_MAX_SOBS + i;
8935
8936 if (offset > 0)
8937 offset += snprintf(buf: sobs + offset, size: max_write,
8938 fmt: ", ");
8939
8940 offset += snprintf(buf: sobs + offset, size: max_write, fmt: "%u", sob);
8941 }
8942 }
8943}
8944
8945static int gaudi_print_single_monitor(char **buf, size_t *size, size_t *offset,
8946 struct hl_device *hdev,
8947 struct hl_mon_state_dump *mon)
8948{
8949 const char *name;
8950 char scratch_buf1[BIN_REG_STRING_SIZE],
8951 scratch_buf2[BIN_REG_STRING_SIZE];
8952 char monitored_sobs[MONITOR_SOB_STRING_SIZE] = {0};
8953
8954 name = hl_state_dump_get_monitor_name(hdev, mon);
8955 if (!name)
8956 name = "";
8957
8958 gaudi_fill_sobs_from_mon(sobs: monitored_sobs, mon);
8959
8960 return hl_snprintf_resize(
8961 buf, size, offset,
8962 format: "Mon id: %u%s, wait for group id: %u mask %s to reach val: %u and write %u to address 0x%llx. Pending: %s. Means sync objects [%s] are being monitored.",
8963 mon->id, name,
8964 FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SID_MASK,
8965 mon->arm_data),
8966 hl_format_as_binary(
8967 buf: scratch_buf1, buf_len: sizeof(scratch_buf1),
8968 FIELD_GET(
8969 SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_MASK_MASK,
8970 mon->arm_data)),
8971 FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SOD_MASK,
8972 mon->arm_data),
8973 mon->wr_data,
8974 (((u64)mon->wr_addr_high) << 32) | mon->wr_addr_low,
8975 hl_format_as_binary(
8976 buf: scratch_buf2, buf_len: sizeof(scratch_buf2),
8977 FIELD_GET(
8978 SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0_PENDING_MASK,
8979 mon->status)),
8980 monitored_sobs);
8981}
8982
8983
8984static int gaudi_print_fences_single_engine(
8985 struct hl_device *hdev, u64 base_offset, u64 status_base_offset,
8986 enum hl_sync_engine_type engine_type, u32 engine_id, char **buf,
8987 size_t *size, size_t *offset)
8988{
8989 struct hl_state_dump_specs *sds = &hdev->state_dump_specs;
8990 int rc = -ENOMEM, i;
8991 u32 *statuses, *fences;
8992
8993 statuses = kcalloc(sds->props[SP_ENGINE_NUM_OF_QUEUES],
8994 sizeof(*statuses), GFP_KERNEL);
8995 if (!statuses)
8996 goto out;
8997
8998 fences = kcalloc(sds->props[SP_ENGINE_NUM_OF_FENCES] *
8999 sds->props[SP_ENGINE_NUM_OF_QUEUES],
9000 sizeof(*fences), GFP_KERNEL);
9001 if (!fences)
9002 goto free_status;
9003
9004 for (i = 0; i < sds->props[SP_ENGINE_NUM_OF_FENCES]; ++i)
9005 statuses[i] = RREG32(status_base_offset + i * sizeof(u32));
9006
9007 for (i = 0; i < sds->props[SP_ENGINE_NUM_OF_FENCES] *
9008 sds->props[SP_ENGINE_NUM_OF_QUEUES]; ++i)
9009 fences[i] = RREG32(base_offset + i * sizeof(u32));
9010
9011 /* The actual print */
9012 for (i = 0; i < sds->props[SP_ENGINE_NUM_OF_QUEUES]; ++i) {
9013 u32 fence_id;
9014 u64 fence_cnt, fence_rdata;
9015 const char *engine_name;
9016
9017 if (!FIELD_GET(TPC0_QM_CP_STS_0_FENCE_IN_PROGRESS_MASK,
9018 statuses[i]))
9019 continue;
9020
9021 fence_id =
9022 FIELD_GET(TPC0_QM_CP_STS_0_FENCE_ID_MASK, statuses[i]);
9023 fence_cnt = base_offset + CFG_BASE +
9024 sizeof(u32) *
9025 (i + fence_id * sds->props[SP_ENGINE_NUM_OF_QUEUES]);
9026 fence_rdata = fence_cnt - sds->props[SP_FENCE0_CNT_OFFSET] +
9027 sds->props[SP_FENCE0_RDATA_OFFSET];
9028 engine_name = hl_sync_engine_to_string(engine_type);
9029
9030 rc = hl_snprintf_resize(
9031 buf, size, offset,
9032 format: "%s%u, stream %u: fence id %u cnt = 0x%llx (%s%u_QM.CP_FENCE%u_CNT_%u) rdata = 0x%llx (%s%u_QM.CP_FENCE%u_RDATA_%u) value = %u, cp_status = %u\n",
9033 engine_name, engine_id,
9034 i, fence_id,
9035 fence_cnt, engine_name, engine_id, fence_id, i,
9036 fence_rdata, engine_name, engine_id, fence_id, i,
9037 fences[fence_id],
9038 statuses[i]);
9039 if (rc)
9040 goto free_fences;
9041 }
9042
9043 rc = 0;
9044
9045free_fences:
9046 kfree(objp: fences);
9047free_status:
9048 kfree(objp: statuses);
9049out:
9050 return rc;
9051}
9052
9053
9054static struct hl_state_dump_specs_funcs gaudi_state_dump_funcs = {
9055 .monitor_valid = gaudi_monitor_valid,
9056 .print_single_monitor = gaudi_print_single_monitor,
9057 .gen_sync_to_engine_map = gaudi_gen_sync_to_engine_map,
9058 .print_fences_single_engine = gaudi_print_fences_single_engine,
9059};
9060
9061static void gaudi_state_dump_init(struct hl_device *hdev)
9062{
9063 struct hl_state_dump_specs *sds = &hdev->state_dump_specs;
9064 int i;
9065
9066 for (i = 0; i < ARRAY_SIZE(gaudi_so_id_to_str); ++i)
9067 hash_add(sds->so_id_to_str_tb,
9068 &gaudi_so_id_to_str[i].node,
9069 gaudi_so_id_to_str[i].id);
9070
9071 for (i = 0; i < ARRAY_SIZE(gaudi_monitor_id_to_str); ++i)
9072 hash_add(sds->monitor_id_to_str_tb,
9073 &gaudi_monitor_id_to_str[i].node,
9074 gaudi_monitor_id_to_str[i].id);
9075
9076 sds->props = gaudi_state_dump_specs_props;
9077
9078 sds->sync_namager_names = gaudi_sync_manager_names;
9079
9080 sds->funcs = gaudi_state_dump_funcs;
9081}
9082
9083static u32 *gaudi_get_stream_master_qid_arr(void)
9084{
9085 return gaudi_stream_master;
9086}
9087
9088static int gaudi_set_dram_properties(struct hl_device *hdev)
9089{
9090 return 0;
9091}
9092
9093static int gaudi_set_binning_masks(struct hl_device *hdev)
9094{
9095 return 0;
9096}
9097
9098static void gaudi_check_if_razwi_happened(struct hl_device *hdev)
9099{
9100}
9101
9102static ssize_t infineon_ver_show(struct device *dev, struct device_attribute *attr, char *buf)
9103{
9104 struct hl_device *hdev = dev_get_drvdata(dev);
9105 struct cpucp_info *cpucp_info;
9106
9107 cpucp_info = &hdev->asic_prop.cpucp_info;
9108
9109 return sprintf(buf, fmt: "%#04x\n", le32_to_cpu(cpucp_info->infineon_version));
9110}
9111
9112static DEVICE_ATTR_RO(infineon_ver);
9113
9114static struct attribute *gaudi_vrm_dev_attrs[] = {
9115 &dev_attr_infineon_ver.attr,
9116 NULL,
9117};
9118
9119static void gaudi_add_device_attr(struct hl_device *hdev, struct attribute_group *dev_clk_attr_grp,
9120 struct attribute_group *dev_vrm_attr_grp)
9121{
9122 hl_sysfs_add_dev_clk_attr(hdev, dev_clk_attr_grp);
9123 dev_vrm_attr_grp->attrs = gaudi_vrm_dev_attrs;
9124}
9125
9126static int gaudi_send_device_activity(struct hl_device *hdev, bool open)
9127{
9128 return 0;
9129}
9130
9131static const struct hl_asic_funcs gaudi_funcs = {
9132 .early_init = gaudi_early_init,
9133 .early_fini = gaudi_early_fini,
9134 .late_init = gaudi_late_init,
9135 .late_fini = gaudi_late_fini,
9136 .sw_init = gaudi_sw_init,
9137 .sw_fini = gaudi_sw_fini,
9138 .hw_init = gaudi_hw_init,
9139 .hw_fini = gaudi_hw_fini,
9140 .halt_engines = gaudi_halt_engines,
9141 .suspend = gaudi_suspend,
9142 .resume = gaudi_resume,
9143 .mmap = gaudi_mmap,
9144 .ring_doorbell = gaudi_ring_doorbell,
9145 .pqe_write = gaudi_pqe_write,
9146 .asic_dma_alloc_coherent = gaudi_dma_alloc_coherent,
9147 .asic_dma_free_coherent = gaudi_dma_free_coherent,
9148 .scrub_device_mem = gaudi_scrub_device_mem,
9149 .scrub_device_dram = gaudi_scrub_device_dram,
9150 .get_int_queue_base = gaudi_get_int_queue_base,
9151 .test_queues = gaudi_test_queues,
9152 .asic_dma_pool_zalloc = gaudi_dma_pool_zalloc,
9153 .asic_dma_pool_free = gaudi_dma_pool_free,
9154 .cpu_accessible_dma_pool_alloc = gaudi_cpu_accessible_dma_pool_alloc,
9155 .cpu_accessible_dma_pool_free = gaudi_cpu_accessible_dma_pool_free,
9156 .dma_unmap_sgtable = hl_asic_dma_unmap_sgtable,
9157 .cs_parser = gaudi_cs_parser,
9158 .dma_map_sgtable = hl_asic_dma_map_sgtable,
9159 .add_end_of_cb_packets = gaudi_add_end_of_cb_packets,
9160 .update_eq_ci = gaudi_update_eq_ci,
9161 .context_switch = gaudi_context_switch,
9162 .restore_phase_topology = gaudi_restore_phase_topology,
9163 .debugfs_read_dma = gaudi_debugfs_read_dma,
9164 .add_device_attr = gaudi_add_device_attr,
9165 .handle_eqe = gaudi_handle_eqe,
9166 .get_events_stat = gaudi_get_events_stat,
9167 .read_pte = gaudi_read_pte,
9168 .write_pte = gaudi_write_pte,
9169 .mmu_invalidate_cache = gaudi_mmu_invalidate_cache,
9170 .mmu_invalidate_cache_range = gaudi_mmu_invalidate_cache_range,
9171 .mmu_prefetch_cache_range = NULL,
9172 .send_heartbeat = gaudi_send_heartbeat,
9173 .debug_coresight = gaudi_debug_coresight,
9174 .is_device_idle = gaudi_is_device_idle,
9175 .compute_reset_late_init = gaudi_compute_reset_late_init,
9176 .hw_queues_lock = gaudi_hw_queues_lock,
9177 .hw_queues_unlock = gaudi_hw_queues_unlock,
9178 .get_pci_id = gaudi_get_pci_id,
9179 .get_eeprom_data = gaudi_get_eeprom_data,
9180 .get_monitor_dump = gaudi_get_monitor_dump,
9181 .send_cpu_message = gaudi_send_cpu_message,
9182 .pci_bars_map = gaudi_pci_bars_map,
9183 .init_iatu = gaudi_init_iatu,
9184 .rreg = hl_rreg,
9185 .wreg = hl_wreg,
9186 .halt_coresight = gaudi_halt_coresight,
9187 .ctx_init = gaudi_ctx_init,
9188 .ctx_fini = gaudi_ctx_fini,
9189 .pre_schedule_cs = gaudi_pre_schedule_cs,
9190 .get_queue_id_for_cq = gaudi_get_queue_id_for_cq,
9191 .load_firmware_to_device = gaudi_load_firmware_to_device,
9192 .load_boot_fit_to_device = gaudi_load_boot_fit_to_device,
9193 .get_signal_cb_size = gaudi_get_signal_cb_size,
9194 .get_wait_cb_size = gaudi_get_wait_cb_size,
9195 .gen_signal_cb = gaudi_gen_signal_cb,
9196 .gen_wait_cb = gaudi_gen_wait_cb,
9197 .reset_sob = gaudi_reset_sob,
9198 .reset_sob_group = gaudi_reset_sob_group,
9199 .get_device_time = gaudi_get_device_time,
9200 .pb_print_security_errors = NULL,
9201 .collective_wait_init_cs = gaudi_collective_wait_init_cs,
9202 .collective_wait_create_jobs = gaudi_collective_wait_create_jobs,
9203 .get_dec_base_addr = NULL,
9204 .scramble_addr = hl_mmu_scramble_addr,
9205 .descramble_addr = hl_mmu_descramble_addr,
9206 .ack_protection_bits_errors = gaudi_ack_protection_bits_errors,
9207 .get_hw_block_id = gaudi_get_hw_block_id,
9208 .hw_block_mmap = gaudi_block_mmap,
9209 .enable_events_from_fw = gaudi_enable_events_from_fw,
9210 .ack_mmu_errors = gaudi_ack_mmu_page_fault_or_access_error,
9211 .map_pll_idx_to_fw_idx = gaudi_map_pll_idx_to_fw_idx,
9212 .init_firmware_preload_params = gaudi_init_firmware_preload_params,
9213 .init_firmware_loader = gaudi_init_firmware_loader,
9214 .init_cpu_scrambler_dram = gaudi_init_scrambler_hbm,
9215 .state_dump_init = gaudi_state_dump_init,
9216 .get_sob_addr = gaudi_get_sob_addr,
9217 .set_pci_memory_regions = gaudi_set_pci_memory_regions,
9218 .get_stream_master_qid_arr = gaudi_get_stream_master_qid_arr,
9219 .check_if_razwi_happened = gaudi_check_if_razwi_happened,
9220 .mmu_get_real_page_size = hl_mmu_get_real_page_size,
9221 .access_dev_mem = hl_access_dev_mem,
9222 .set_dram_bar_base = gaudi_set_hbm_bar_base,
9223 .send_device_activity = gaudi_send_device_activity,
9224 .set_dram_properties = gaudi_set_dram_properties,
9225 .set_binning_masks = gaudi_set_binning_masks,
9226};
9227
9228/**
9229 * gaudi_set_asic_funcs - set GAUDI function pointers
9230 *
9231 * @hdev: pointer to hl_device structure
9232 *
9233 */
9234void gaudi_set_asic_funcs(struct hl_device *hdev)
9235{
9236 hdev->asic_funcs = &gaudi_funcs;
9237}
9238

source code of linux/drivers/accel/habanalabs/gaudi/gaudi.c