| 1 | /* SPDX-License-Identifier: GPL-2.0-only or MIT */ |
| 2 | /* Copyright 2025 Arm, Ltd. */ |
| 3 | |
| 4 | #ifndef __ETHOSU_DEVICE_H__ |
| 5 | #define __ETHOSU_DEVICE_H__ |
| 6 | |
| 7 | #include <linux/bitfield.h> |
| 8 | #include <linux/bits.h> |
| 9 | #include <linux/types.h> |
| 10 | |
| 11 | #include <drm/drm_device.h> |
| 12 | #include <drm/gpu_scheduler.h> |
| 13 | |
| 14 | #include <drm/ethosu_accel.h> |
| 15 | |
| 16 | struct clk; |
| 17 | struct gen_pool; |
| 18 | |
| 19 | #define NPU_REG_ID 0x0000 |
| 20 | #define NPU_REG_STATUS 0x0004 |
| 21 | #define NPU_REG_CMD 0x0008 |
| 22 | #define NPU_REG_RESET 0x000c |
| 23 | #define NPU_REG_QBASE 0x0010 |
| 24 | #define NPU_REG_QBASE_HI 0x0014 |
| 25 | #define NPU_REG_QREAD 0x0018 |
| 26 | #define NPU_REG_QCONFIG 0x001c |
| 27 | #define NPU_REG_QSIZE 0x0020 |
| 28 | #define NPU_REG_PROT 0x0024 |
| 29 | #define NPU_REG_CONFIG 0x0028 |
| 30 | #define NPU_REG_REGIONCFG 0x003c |
| 31 | #define NPU_REG_AXILIMIT0 0x0040 // U65 |
| 32 | #define NPU_REG_AXILIMIT1 0x0044 // U65 |
| 33 | #define NPU_REG_AXILIMIT2 0x0048 // U65 |
| 34 | #define NPU_REG_AXILIMIT3 0x004c // U65 |
| 35 | #define NPU_REG_MEM_ATTR0 0x0040 // U85 |
| 36 | #define NPU_REG_MEM_ATTR1 0x0044 // U85 |
| 37 | #define NPU_REG_MEM_ATTR2 0x0048 // U85 |
| 38 | #define NPU_REG_MEM_ATTR3 0x004c // U85 |
| 39 | #define NPU_REG_AXI_SRAM 0x0050 // U85 |
| 40 | #define NPU_REG_AXI_EXT 0x0054 // U85 |
| 41 | |
| 42 | #define NPU_REG_BASEP(x) (0x0080 + (x) * 8) |
| 43 | #define NPU_REG_BASEP_HI(x) (0x0084 + (x) * 8) |
| 44 | #define NPU_BASEP_REGION_MAX 8 |
| 45 | |
| 46 | #define ID_ARCH_MAJOR_MASK GENMASK(31, 28) |
| 47 | #define ID_ARCH_MINOR_MASK GENMASK(27, 20) |
| 48 | #define ID_ARCH_PATCH_MASK GENMASK(19, 16) |
| 49 | #define ID_VER_MAJOR_MASK GENMASK(11, 8) |
| 50 | #define ID_VER_MINOR_MASK GENMASK(7, 4) |
| 51 | |
| 52 | #define CONFIG_MACS_PER_CC_MASK GENMASK(3, 0) |
| 53 | #define CONFIG_CMD_STREAM_VER_MASK GENMASK(7, 4) |
| 54 | |
| 55 | #define STATUS_STATE_RUNNING BIT(0) |
| 56 | #define STATUS_IRQ_RAISED BIT(1) |
| 57 | #define STATUS_BUS_STATUS BIT(2) |
| 58 | #define STATUS_RESET_STATUS BIT(3) |
| 59 | #define STATUS_CMD_PARSE_ERR BIT(4) |
| 60 | #define STATUS_CMD_END_REACHED BIT(5) |
| 61 | |
| 62 | #define CMD_CLEAR_IRQ BIT(1) |
| 63 | #define CMD_TRANSITION_TO_RUN BIT(0) |
| 64 | |
| 65 | #define RESET_PENDING_CSL BIT(1) |
| 66 | #define RESET_PENDING_CPL BIT(0) |
| 67 | |
| 68 | #define PROT_ACTIVE_CSL BIT(1) |
| 69 | |
| 70 | enum ethosu_cmds { |
| 71 | NPU_OP_CONV = 0x2, |
| 72 | NPU_OP_DEPTHWISE = 0x3, |
| 73 | NPU_OP_POOL = 0x5, |
| 74 | NPU_OP_ELEMENTWISE = 0x6, |
| 75 | NPU_OP_RESIZE = 0x7, // U85 only |
| 76 | NPU_OP_DMA_START = 0x10, |
| 77 | NPU_SET_IFM_PAD_TOP = 0x100, |
| 78 | NPU_SET_IFM_PAD_LEFT = 0x101, |
| 79 | NPU_SET_IFM_PAD_RIGHT = 0x102, |
| 80 | NPU_SET_IFM_PAD_BOTTOM = 0x103, |
| 81 | NPU_SET_IFM_DEPTH_M1 = 0x104, |
| 82 | NPU_SET_IFM_PRECISION = 0x105, |
| 83 | NPU_SET_IFM_BROADCAST = 0x108, |
| 84 | NPU_SET_IFM_WIDTH0_M1 = 0x10a, |
| 85 | NPU_SET_IFM_HEIGHT0_M1 = 0x10b, |
| 86 | NPU_SET_IFM_HEIGHT1_M1 = 0x10c, |
| 87 | NPU_SET_IFM_REGION = 0x10f, |
| 88 | NPU_SET_OFM_WIDTH_M1 = 0x111, |
| 89 | NPU_SET_OFM_HEIGHT_M1 = 0x112, |
| 90 | NPU_SET_OFM_DEPTH_M1 = 0x113, |
| 91 | NPU_SET_OFM_PRECISION = 0x114, |
| 92 | NPU_SET_OFM_WIDTH0_M1 = 0x11a, |
| 93 | NPU_SET_OFM_HEIGHT0_M1 = 0x11b, |
| 94 | NPU_SET_OFM_HEIGHT1_M1 = 0x11c, |
| 95 | NPU_SET_OFM_REGION = 0x11f, |
| 96 | NPU_SET_KERNEL_WIDTH_M1 = 0x120, |
| 97 | NPU_SET_KERNEL_HEIGHT_M1 = 0x121, |
| 98 | NPU_SET_KERNEL_STRIDE = 0x122, |
| 99 | NPU_SET_WEIGHT_REGION = 0x128, |
| 100 | NPU_SET_SCALE_REGION = 0x129, |
| 101 | NPU_SET_DMA0_SRC_REGION = 0x130, |
| 102 | NPU_SET_DMA0_DST_REGION = 0x131, |
| 103 | NPU_SET_DMA0_SIZE0 = 0x132, |
| 104 | NPU_SET_DMA0_SIZE1 = 0x133, |
| 105 | NPU_SET_IFM2_BROADCAST = 0x180, |
| 106 | NPU_SET_IFM2_PRECISION = 0x185, |
| 107 | NPU_SET_IFM2_WIDTH0_M1 = 0x18a, |
| 108 | NPU_SET_IFM2_HEIGHT0_M1 = 0x18b, |
| 109 | NPU_SET_IFM2_HEIGHT1_M1 = 0x18c, |
| 110 | NPU_SET_IFM2_REGION = 0x18f, |
| 111 | NPU_SET_IFM_BASE0 = 0x4000, |
| 112 | NPU_SET_IFM_BASE1 = 0x4001, |
| 113 | NPU_SET_IFM_BASE2 = 0x4002, |
| 114 | NPU_SET_IFM_BASE3 = 0x4003, |
| 115 | NPU_SET_IFM_STRIDE_X = 0x4004, |
| 116 | NPU_SET_IFM_STRIDE_Y = 0x4005, |
| 117 | NPU_SET_IFM_STRIDE_C = 0x4006, |
| 118 | NPU_SET_OFM_BASE0 = 0x4010, |
| 119 | NPU_SET_OFM_BASE1 = 0x4011, |
| 120 | NPU_SET_OFM_BASE2 = 0x4012, |
| 121 | NPU_SET_OFM_BASE3 = 0x4013, |
| 122 | NPU_SET_OFM_STRIDE_X = 0x4014, |
| 123 | NPU_SET_OFM_STRIDE_Y = 0x4015, |
| 124 | NPU_SET_OFM_STRIDE_C = 0x4016, |
| 125 | NPU_SET_WEIGHT_BASE = 0x4020, |
| 126 | NPU_SET_WEIGHT_LENGTH = 0x4021, |
| 127 | NPU_SET_SCALE_BASE = 0x4022, |
| 128 | NPU_SET_SCALE_LENGTH = 0x4023, |
| 129 | NPU_SET_DMA0_SRC = 0x4030, |
| 130 | NPU_SET_DMA0_DST = 0x4031, |
| 131 | NPU_SET_DMA0_LEN = 0x4032, |
| 132 | NPU_SET_DMA0_SRC_STRIDE0 = 0x4033, |
| 133 | NPU_SET_DMA0_SRC_STRIDE1 = 0x4034, |
| 134 | NPU_SET_DMA0_DST_STRIDE0 = 0x4035, |
| 135 | NPU_SET_DMA0_DST_STRIDE1 = 0x4036, |
| 136 | NPU_SET_IFM2_BASE0 = 0x4080, |
| 137 | NPU_SET_IFM2_BASE1 = 0x4081, |
| 138 | NPU_SET_IFM2_BASE2 = 0x4082, |
| 139 | NPU_SET_IFM2_BASE3 = 0x4083, |
| 140 | NPU_SET_IFM2_STRIDE_X = 0x4084, |
| 141 | NPU_SET_IFM2_STRIDE_Y = 0x4085, |
| 142 | NPU_SET_IFM2_STRIDE_C = 0x4086, |
| 143 | NPU_SET_WEIGHT1_BASE = 0x4090, |
| 144 | NPU_SET_WEIGHT1_LENGTH = 0x4091, |
| 145 | NPU_SET_SCALE1_BASE = 0x4092, |
| 146 | NPU_SET_WEIGHT2_BASE = 0x4092, |
| 147 | NPU_SET_SCALE1_LENGTH = 0x4093, |
| 148 | NPU_SET_WEIGHT2_LENGTH = 0x4093, |
| 149 | NPU_SET_WEIGHT3_BASE = 0x4094, |
| 150 | NPU_SET_WEIGHT3_LENGTH = 0x4095, |
| 151 | }; |
| 152 | |
| 153 | #define ETHOSU_SRAM_REGION 2 /* Matching Vela compiler */ |
| 154 | |
| 155 | /** |
| 156 | * struct ethosu_device - Ethosu device |
| 157 | */ |
| 158 | struct ethosu_device { |
| 159 | /** @base: Base drm_device. */ |
| 160 | struct drm_device base; |
| 161 | |
| 162 | /** @iomem: CPU mapping of the registers. */ |
| 163 | void __iomem *regs; |
| 164 | |
| 165 | void __iomem *sram; |
| 166 | struct gen_pool *srampool; |
| 167 | dma_addr_t sramphys; |
| 168 | |
| 169 | struct clk_bulk_data *clks; |
| 170 | int num_clks; |
| 171 | int irq; |
| 172 | |
| 173 | struct drm_ethosu_npu_info npu_info; |
| 174 | |
| 175 | struct ethosu_job *in_flight_job; |
| 176 | /* For in_flight_job and ethosu_job_hw_submit() */ |
| 177 | struct mutex job_lock; |
| 178 | |
| 179 | /* For dma_fence */ |
| 180 | spinlock_t fence_lock; |
| 181 | |
| 182 | struct drm_gpu_scheduler sched; |
| 183 | /* For ethosu_job_do_push() */ |
| 184 | struct mutex sched_lock; |
| 185 | u64 fence_context; |
| 186 | u64 emit_seqno; |
| 187 | }; |
| 188 | |
| 189 | #define to_ethosu_device(drm_dev) \ |
| 190 | ((struct ethosu_device *)container_of(drm_dev, struct ethosu_device, base)) |
| 191 | |
| 192 | static inline bool ethosu_is_u65(const struct ethosu_device *ethosudev) |
| 193 | { |
| 194 | return FIELD_GET(ID_ARCH_MAJOR_MASK, ethosudev->npu_info.id) == 1; |
| 195 | } |
| 196 | |
| 197 | #endif |
| 198 | |