1 | /* |
2 | * Copyright (C) 2007 Ben Skeggs. |
3 | * All Rights Reserved. |
4 | * |
5 | * Permission is hereby granted, free of charge, to any person obtaining |
6 | * a copy of this software and associated documentation files (the |
7 | * "Software"), to deal in the Software without restriction, including |
8 | * without limitation the rights to use, copy, modify, merge, publish, |
9 | * distribute, sublicense, and/or sell copies of the Software, and to |
10 | * permit persons to whom the Software is furnished to do so, subject to |
11 | * the following conditions: |
12 | * |
13 | * The above copyright notice and this permission notice (including the |
14 | * next paragraph) shall be included in all copies or substantial |
15 | * portions of the Software. |
16 | * |
17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
18 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
19 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. |
20 | * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE |
21 | * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION |
22 | * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION |
23 | * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
24 | * |
25 | */ |
26 | |
27 | #include "nouveau_drv.h" |
28 | #include "nouveau_dma.h" |
29 | #include "nouveau_vmm.h" |
30 | |
31 | #include <nvif/user.h> |
32 | |
33 | /* Fetch and adjust GPU GET pointer |
34 | * |
35 | * Returns: |
36 | * value >= 0, the adjusted GET pointer |
37 | * -EINVAL if GET pointer currently outside main push buffer |
38 | * -EBUSY if timeout exceeded |
39 | */ |
40 | static inline int |
41 | READ_GET(struct nouveau_channel *chan, uint64_t *prev_get, int *timeout) |
42 | { |
43 | uint64_t val; |
44 | |
45 | val = nvif_rd32(chan->userd, chan->user_get); |
46 | if (chan->user_get_hi) |
47 | val |= (uint64_t)nvif_rd32(chan->userd, chan->user_get_hi) << 32; |
48 | |
49 | /* reset counter as long as GET is still advancing, this is |
50 | * to avoid misdetecting a GPU lockup if the GPU happens to |
51 | * just be processing an operation that takes a long time |
52 | */ |
53 | if (val != *prev_get) { |
54 | *prev_get = val; |
55 | *timeout = 0; |
56 | } |
57 | |
58 | if ((++*timeout & 0xff) == 0) { |
59 | udelay(1); |
60 | if (*timeout > 100000) |
61 | return -EBUSY; |
62 | } |
63 | |
64 | if (val < chan->push.addr || |
65 | val > chan->push.addr + (chan->dma.max << 2)) |
66 | return -EINVAL; |
67 | |
68 | return (val - chan->push.addr) >> 2; |
69 | } |
70 | |
71 | void |
72 | nv50_dma_push(struct nouveau_channel *chan, u64 offset, u32 length, |
73 | bool no_prefetch) |
74 | { |
75 | struct nvif_user *user = &chan->drm->client.device.user; |
76 | struct nouveau_bo *pb = chan->push.buffer; |
77 | int ip = (chan->dma.ib_put * 2) + chan->dma.ib_base; |
78 | |
79 | BUG_ON(chan->dma.ib_free < 1); |
80 | WARN_ON(length > NV50_DMA_PUSH_MAX_LENGTH); |
81 | |
82 | nouveau_bo_wr32(pb, index: ip++, lower_32_bits(offset)); |
83 | nouveau_bo_wr32(pb, index: ip++, upper_32_bits(offset) | length << 8 | |
84 | (no_prefetch ? (1 << 31) : 0)); |
85 | |
86 | chan->dma.ib_put = (chan->dma.ib_put + 1) & chan->dma.ib_max; |
87 | |
88 | mb(); |
89 | /* Flush writes. */ |
90 | nouveau_bo_rd32(pb, index: 0); |
91 | |
92 | nvif_wr32(chan->userd, 0x8c, chan->dma.ib_put); |
93 | if (user->func && user->func->doorbell) |
94 | user->func->doorbell(user, chan->token); |
95 | chan->dma.ib_free--; |
96 | } |
97 | |
98 | static int |
99 | nv50_dma_push_wait(struct nouveau_channel *chan, int count) |
100 | { |
101 | uint32_t cnt = 0, prev_get = 0; |
102 | |
103 | while (chan->dma.ib_free < count) { |
104 | uint32_t get = nvif_rd32(chan->userd, 0x88); |
105 | if (get != prev_get) { |
106 | prev_get = get; |
107 | cnt = 0; |
108 | } |
109 | |
110 | if ((++cnt & 0xff) == 0) { |
111 | udelay(1); |
112 | if (cnt > 100000) |
113 | return -EBUSY; |
114 | } |
115 | |
116 | chan->dma.ib_free = get - chan->dma.ib_put; |
117 | if (chan->dma.ib_free <= 0) |
118 | chan->dma.ib_free += chan->dma.ib_max; |
119 | } |
120 | |
121 | return 0; |
122 | } |
123 | |
124 | static int |
125 | nv50_dma_wait(struct nouveau_channel *chan, int slots, int count) |
126 | { |
127 | uint64_t prev_get = 0; |
128 | int ret, cnt = 0; |
129 | |
130 | ret = nv50_dma_push_wait(chan, count: slots + 1); |
131 | if (unlikely(ret)) |
132 | return ret; |
133 | |
134 | while (chan->dma.free < count) { |
135 | int get = READ_GET(chan, prev_get: &prev_get, timeout: &cnt); |
136 | if (unlikely(get < 0)) { |
137 | if (get == -EINVAL) |
138 | continue; |
139 | |
140 | return get; |
141 | } |
142 | |
143 | if (get <= chan->dma.cur) { |
144 | chan->dma.free = chan->dma.max - chan->dma.cur; |
145 | if (chan->dma.free >= count) |
146 | break; |
147 | |
148 | FIRE_RING(chan); |
149 | do { |
150 | get = READ_GET(chan, prev_get: &prev_get, timeout: &cnt); |
151 | if (unlikely(get < 0)) { |
152 | if (get == -EINVAL) |
153 | continue; |
154 | return get; |
155 | } |
156 | } while (get == 0); |
157 | chan->dma.cur = 0; |
158 | chan->dma.put = 0; |
159 | } |
160 | |
161 | chan->dma.free = get - chan->dma.cur - 1; |
162 | } |
163 | |
164 | return 0; |
165 | } |
166 | |
167 | int |
168 | nouveau_dma_wait(struct nouveau_channel *chan, int slots, int size) |
169 | { |
170 | uint64_t prev_get = 0; |
171 | int cnt = 0, get; |
172 | |
173 | if (chan->dma.ib_max) |
174 | return nv50_dma_wait(chan, slots, count: size); |
175 | |
176 | while (chan->dma.free < size) { |
177 | get = READ_GET(chan, prev_get: &prev_get, timeout: &cnt); |
178 | if (unlikely(get == -EBUSY)) |
179 | return -EBUSY; |
180 | |
181 | /* loop until we have a usable GET pointer. the value |
182 | * we read from the GPU may be outside the main ring if |
183 | * PFIFO is processing a buffer called from the main ring, |
184 | * discard these values until something sensible is seen. |
185 | * |
186 | * the other case we discard GET is while the GPU is fetching |
187 | * from the SKIPS area, so the code below doesn't have to deal |
188 | * with some fun corner cases. |
189 | */ |
190 | if (unlikely(get == -EINVAL) || get < NOUVEAU_DMA_SKIPS) |
191 | continue; |
192 | |
193 | if (get <= chan->dma.cur) { |
194 | /* engine is fetching behind us, or is completely |
195 | * idle (GET == PUT) so we have free space up until |
196 | * the end of the push buffer |
197 | * |
198 | * we can only hit that path once per call due to |
199 | * looping back to the beginning of the push buffer, |
200 | * we'll hit the fetching-ahead-of-us path from that |
201 | * point on. |
202 | * |
203 | * the *one* exception to that rule is if we read |
204 | * GET==PUT, in which case the below conditional will |
205 | * always succeed and break us out of the wait loop. |
206 | */ |
207 | chan->dma.free = chan->dma.max - chan->dma.cur; |
208 | if (chan->dma.free >= size) |
209 | break; |
210 | |
211 | /* not enough space left at the end of the push buffer, |
212 | * instruct the GPU to jump back to the start right |
213 | * after processing the currently pending commands. |
214 | */ |
215 | OUT_RING(chan, data: chan->push.addr | 0x20000000); |
216 | |
217 | /* wait for GET to depart from the skips area. |
218 | * prevents writing GET==PUT and causing a race |
219 | * condition that causes us to think the GPU is |
220 | * idle when it's not. |
221 | */ |
222 | do { |
223 | get = READ_GET(chan, prev_get: &prev_get, timeout: &cnt); |
224 | if (unlikely(get == -EBUSY)) |
225 | return -EBUSY; |
226 | if (unlikely(get == -EINVAL)) |
227 | continue; |
228 | } while (get <= NOUVEAU_DMA_SKIPS); |
229 | WRITE_PUT(NOUVEAU_DMA_SKIPS); |
230 | |
231 | /* we're now submitting commands at the start of |
232 | * the push buffer. |
233 | */ |
234 | chan->dma.cur = |
235 | chan->dma.put = NOUVEAU_DMA_SKIPS; |
236 | } |
237 | |
238 | /* engine fetching ahead of us, we have space up until the |
239 | * current GET pointer. the "- 1" is to ensure there's |
240 | * space left to emit a jump back to the beginning of the |
241 | * push buffer if we require it. we can never get GET == PUT |
242 | * here, so this is safe. |
243 | */ |
244 | chan->dma.free = get - chan->dma.cur - 1; |
245 | } |
246 | |
247 | return 0; |
248 | } |
249 | |
250 | |