1 | // SPDX-License-Identifier: GPL-2.0-only |
---|---|
2 | /* Copyright(c) 2022 Intel Corporation. All rights reserved. */ |
3 | #include <linux/memregion.h> |
4 | #include <linux/genalloc.h> |
5 | #include <linux/device.h> |
6 | #include <linux/module.h> |
7 | #include <linux/memory.h> |
8 | #include <linux/slab.h> |
9 | #include <linux/uuid.h> |
10 | #include <linux/sort.h> |
11 | #include <linux/idr.h> |
12 | #include <cxlmem.h> |
13 | #include <cxl.h> |
14 | #include "core.h" |
15 | |
16 | /** |
17 | * DOC: cxl core region |
18 | * |
19 | * CXL Regions represent mapped memory capacity in system physical address |
20 | * space. Whereas the CXL Root Decoders identify the bounds of potential CXL |
21 | * Memory ranges, Regions represent the active mapped capacity by the HDM |
22 | * Decoder Capability structures throughout the Host Bridges, Switches, and |
23 | * Endpoints in the topology. |
24 | * |
25 | * Region configuration has ordering constraints. UUID may be set at any time |
26 | * but is only visible for persistent regions. |
27 | * 1. Interleave granularity |
28 | * 2. Interleave size |
29 | * 3. Decoder targets |
30 | */ |
31 | |
32 | static struct cxl_region *to_cxl_region(struct device *dev); |
33 | |
34 | #define __ACCESS_ATTR_RO(_level, _name) { \ |
35 | .attr = { .name = __stringify(_name), .mode = 0444 }, \ |
36 | .show = _name##_access##_level##_show, \ |
37 | } |
38 | |
39 | #define ACCESS_DEVICE_ATTR_RO(level, name) \ |
40 | struct device_attribute dev_attr_access##level##_##name = __ACCESS_ATTR_RO(level, name) |
41 | |
42 | #define ACCESS_ATTR_RO(level, attrib) \ |
43 | static ssize_t attrib##_access##level##_show(struct device *dev, \ |
44 | struct device_attribute *attr, \ |
45 | char *buf) \ |
46 | { \ |
47 | struct cxl_region *cxlr = to_cxl_region(dev); \ |
48 | \ |
49 | if (cxlr->coord[level].attrib == 0) \ |
50 | return -ENOENT; \ |
51 | \ |
52 | return sysfs_emit(buf, "%u\n", cxlr->coord[level].attrib); \ |
53 | } \ |
54 | static ACCESS_DEVICE_ATTR_RO(level, attrib) |
55 | |
56 | ACCESS_ATTR_RO(0, read_bandwidth); |
57 | ACCESS_ATTR_RO(0, read_latency); |
58 | ACCESS_ATTR_RO(0, write_bandwidth); |
59 | ACCESS_ATTR_RO(0, write_latency); |
60 | |
61 | #define ACCESS_ATTR_DECLARE(level, attrib) \ |
62 | (&dev_attr_access##level##_##attrib.attr) |
63 | |
64 | static struct attribute *access0_coordinate_attrs[] = { |
65 | ACCESS_ATTR_DECLARE(0, read_bandwidth), |
66 | ACCESS_ATTR_DECLARE(0, write_bandwidth), |
67 | ACCESS_ATTR_DECLARE(0, read_latency), |
68 | ACCESS_ATTR_DECLARE(0, write_latency), |
69 | NULL |
70 | }; |
71 | |
72 | ACCESS_ATTR_RO(1, read_bandwidth); |
73 | ACCESS_ATTR_RO(1, read_latency); |
74 | ACCESS_ATTR_RO(1, write_bandwidth); |
75 | ACCESS_ATTR_RO(1, write_latency); |
76 | |
77 | static struct attribute *access1_coordinate_attrs[] = { |
78 | ACCESS_ATTR_DECLARE(1, read_bandwidth), |
79 | ACCESS_ATTR_DECLARE(1, write_bandwidth), |
80 | ACCESS_ATTR_DECLARE(1, read_latency), |
81 | ACCESS_ATTR_DECLARE(1, write_latency), |
82 | NULL |
83 | }; |
84 | |
85 | #define ACCESS_VISIBLE(level) \ |
86 | static umode_t cxl_region_access##level##_coordinate_visible( \ |
87 | struct kobject *kobj, struct attribute *a, int n) \ |
88 | { \ |
89 | struct device *dev = kobj_to_dev(kobj); \ |
90 | struct cxl_region *cxlr = to_cxl_region(dev); \ |
91 | \ |
92 | if (a == &dev_attr_access##level##_read_latency.attr && \ |
93 | cxlr->coord[level].read_latency == 0) \ |
94 | return 0; \ |
95 | \ |
96 | if (a == &dev_attr_access##level##_write_latency.attr && \ |
97 | cxlr->coord[level].write_latency == 0) \ |
98 | return 0; \ |
99 | \ |
100 | if (a == &dev_attr_access##level##_read_bandwidth.attr && \ |
101 | cxlr->coord[level].read_bandwidth == 0) \ |
102 | return 0; \ |
103 | \ |
104 | if (a == &dev_attr_access##level##_write_bandwidth.attr && \ |
105 | cxlr->coord[level].write_bandwidth == 0) \ |
106 | return 0; \ |
107 | \ |
108 | return a->mode; \ |
109 | } |
110 | |
111 | ACCESS_VISIBLE(0); |
112 | ACCESS_VISIBLE(1); |
113 | |
114 | static const struct attribute_group cxl_region_access0_coordinate_group = { |
115 | .name = "access0", |
116 | .attrs = access0_coordinate_attrs, |
117 | .is_visible = cxl_region_access0_coordinate_visible, |
118 | }; |
119 | |
120 | static const struct attribute_group *get_cxl_region_access0_group(void) |
121 | { |
122 | return &cxl_region_access0_coordinate_group; |
123 | } |
124 | |
125 | static const struct attribute_group cxl_region_access1_coordinate_group = { |
126 | .name = "access1", |
127 | .attrs = access1_coordinate_attrs, |
128 | .is_visible = cxl_region_access1_coordinate_visible, |
129 | }; |
130 | |
131 | static const struct attribute_group *get_cxl_region_access1_group(void) |
132 | { |
133 | return &cxl_region_access1_coordinate_group; |
134 | } |
135 | |
136 | static ssize_t uuid_show(struct device *dev, struct device_attribute *attr, |
137 | char *buf) |
138 | { |
139 | struct cxl_region *cxlr = to_cxl_region(dev); |
140 | struct cxl_region_params *p = &cxlr->params; |
141 | ssize_t rc; |
142 | |
143 | rc = down_read_interruptible(sem: &cxl_region_rwsem); |
144 | if (rc) |
145 | return rc; |
146 | if (cxlr->mode != CXL_DECODER_PMEM) |
147 | rc = sysfs_emit(buf, fmt: "\n"); |
148 | else |
149 | rc = sysfs_emit(buf, fmt: "%pUb\n", &p->uuid); |
150 | up_read(sem: &cxl_region_rwsem); |
151 | |
152 | return rc; |
153 | } |
154 | |
155 | static int is_dup(struct device *match, void *data) |
156 | { |
157 | struct cxl_region_params *p; |
158 | struct cxl_region *cxlr; |
159 | uuid_t *uuid = data; |
160 | |
161 | if (!is_cxl_region(dev: match)) |
162 | return 0; |
163 | |
164 | lockdep_assert_held(&cxl_region_rwsem); |
165 | cxlr = to_cxl_region(dev: match); |
166 | p = &cxlr->params; |
167 | |
168 | if (uuid_equal(u1: &p->uuid, u2: uuid)) { |
169 | dev_dbg(match, "already has uuid: %pUb\n", uuid); |
170 | return -EBUSY; |
171 | } |
172 | |
173 | return 0; |
174 | } |
175 | |
176 | static ssize_t uuid_store(struct device *dev, struct device_attribute *attr, |
177 | const char *buf, size_t len) |
178 | { |
179 | struct cxl_region *cxlr = to_cxl_region(dev); |
180 | struct cxl_region_params *p = &cxlr->params; |
181 | uuid_t temp; |
182 | ssize_t rc; |
183 | |
184 | if (len != UUID_STRING_LEN + 1) |
185 | return -EINVAL; |
186 | |
187 | rc = uuid_parse(uuid: buf, u: &temp); |
188 | if (rc) |
189 | return rc; |
190 | |
191 | if (uuid_is_null(uuid: &temp)) |
192 | return -EINVAL; |
193 | |
194 | rc = down_write_killable(sem: &cxl_region_rwsem); |
195 | if (rc) |
196 | return rc; |
197 | |
198 | if (uuid_equal(u1: &p->uuid, u2: &temp)) |
199 | goto out; |
200 | |
201 | rc = -EBUSY; |
202 | if (p->state >= CXL_CONFIG_ACTIVE) |
203 | goto out; |
204 | |
205 | rc = bus_for_each_dev(bus: &cxl_bus_type, NULL, data: &temp, fn: is_dup); |
206 | if (rc < 0) |
207 | goto out; |
208 | |
209 | uuid_copy(dst: &p->uuid, src: &temp); |
210 | out: |
211 | up_write(sem: &cxl_region_rwsem); |
212 | |
213 | if (rc) |
214 | return rc; |
215 | return len; |
216 | } |
217 | static DEVICE_ATTR_RW(uuid); |
218 | |
219 | static struct cxl_region_ref *cxl_rr_load(struct cxl_port *port, |
220 | struct cxl_region *cxlr) |
221 | { |
222 | return xa_load(&port->regions, index: (unsigned long)cxlr); |
223 | } |
224 | |
225 | static int cxl_region_invalidate_memregion(struct cxl_region *cxlr) |
226 | { |
227 | if (!cpu_cache_has_invalidate_memregion()) { |
228 | if (IS_ENABLED(CONFIG_CXL_REGION_INVALIDATION_TEST)) { |
229 | dev_info_once( |
230 | &cxlr->dev, |
231 | "Bypassing cpu_cache_invalidate_memregion() for testing!\n"); |
232 | return 0; |
233 | } else { |
234 | dev_err(&cxlr->dev, |
235 | "Failed to synchronize CPU cache state\n"); |
236 | return -ENXIO; |
237 | } |
238 | } |
239 | |
240 | cpu_cache_invalidate_memregion(res_desc: IORES_DESC_CXL); |
241 | return 0; |
242 | } |
243 | |
244 | static int cxl_region_decode_reset(struct cxl_region *cxlr, int count) |
245 | { |
246 | struct cxl_region_params *p = &cxlr->params; |
247 | int i, rc = 0; |
248 | |
249 | /* |
250 | * Before region teardown attempt to flush, and if the flush |
251 | * fails cancel the region teardown for data consistency |
252 | * concerns |
253 | */ |
254 | rc = cxl_region_invalidate_memregion(cxlr); |
255 | if (rc) |
256 | return rc; |
257 | |
258 | for (i = count - 1; i >= 0; i--) { |
259 | struct cxl_endpoint_decoder *cxled = p->targets[i]; |
260 | struct cxl_memdev *cxlmd = cxled_to_memdev(cxled); |
261 | struct cxl_port *iter = cxled_to_port(cxled); |
262 | struct cxl_dev_state *cxlds = cxlmd->cxlds; |
263 | struct cxl_ep *ep; |
264 | |
265 | if (cxlds->rcd) |
266 | goto endpoint_reset; |
267 | |
268 | while (!is_cxl_root(port: to_cxl_port(dev: iter->dev.parent))) |
269 | iter = to_cxl_port(dev: iter->dev.parent); |
270 | |
271 | for (ep = cxl_ep_load(port: iter, cxlmd); iter; |
272 | iter = ep->next, ep = cxl_ep_load(port: iter, cxlmd)) { |
273 | struct cxl_region_ref *cxl_rr; |
274 | struct cxl_decoder *cxld; |
275 | |
276 | cxl_rr = cxl_rr_load(port: iter, cxlr); |
277 | cxld = cxl_rr->decoder; |
278 | if (cxld->reset) |
279 | rc = cxld->reset(cxld); |
280 | if (rc) |
281 | return rc; |
282 | set_bit(CXL_REGION_F_NEEDS_RESET, addr: &cxlr->flags); |
283 | } |
284 | |
285 | endpoint_reset: |
286 | rc = cxled->cxld.reset(&cxled->cxld); |
287 | if (rc) |
288 | return rc; |
289 | set_bit(CXL_REGION_F_NEEDS_RESET, addr: &cxlr->flags); |
290 | } |
291 | |
292 | /* all decoders associated with this region have been torn down */ |
293 | clear_bit(CXL_REGION_F_NEEDS_RESET, addr: &cxlr->flags); |
294 | |
295 | return 0; |
296 | } |
297 | |
298 | static int commit_decoder(struct cxl_decoder *cxld) |
299 | { |
300 | struct cxl_switch_decoder *cxlsd = NULL; |
301 | |
302 | if (cxld->commit) |
303 | return cxld->commit(cxld); |
304 | |
305 | if (is_switch_decoder(dev: &cxld->dev)) |
306 | cxlsd = to_cxl_switch_decoder(dev: &cxld->dev); |
307 | |
308 | if (dev_WARN_ONCE(&cxld->dev, !cxlsd || cxlsd->nr_targets > 1, |
309 | "->commit() is required\n")) |
310 | return -ENXIO; |
311 | return 0; |
312 | } |
313 | |
314 | static int cxl_region_decode_commit(struct cxl_region *cxlr) |
315 | { |
316 | struct cxl_region_params *p = &cxlr->params; |
317 | int i, rc = 0; |
318 | |
319 | for (i = 0; i < p->nr_targets; i++) { |
320 | struct cxl_endpoint_decoder *cxled = p->targets[i]; |
321 | struct cxl_memdev *cxlmd = cxled_to_memdev(cxled); |
322 | struct cxl_region_ref *cxl_rr; |
323 | struct cxl_decoder *cxld; |
324 | struct cxl_port *iter; |
325 | struct cxl_ep *ep; |
326 | |
327 | /* commit bottom up */ |
328 | for (iter = cxled_to_port(cxled); !is_cxl_root(port: iter); |
329 | iter = to_cxl_port(dev: iter->dev.parent)) { |
330 | cxl_rr = cxl_rr_load(port: iter, cxlr); |
331 | cxld = cxl_rr->decoder; |
332 | rc = commit_decoder(cxld); |
333 | if (rc) |
334 | break; |
335 | } |
336 | |
337 | if (rc) { |
338 | /* programming @iter failed, teardown */ |
339 | for (ep = cxl_ep_load(port: iter, cxlmd); ep && iter; |
340 | iter = ep->next, ep = cxl_ep_load(port: iter, cxlmd)) { |
341 | cxl_rr = cxl_rr_load(port: iter, cxlr); |
342 | cxld = cxl_rr->decoder; |
343 | if (cxld->reset) |
344 | cxld->reset(cxld); |
345 | } |
346 | |
347 | cxled->cxld.reset(&cxled->cxld); |
348 | goto err; |
349 | } |
350 | } |
351 | |
352 | return 0; |
353 | |
354 | err: |
355 | /* undo the targets that were successfully committed */ |
356 | cxl_region_decode_reset(cxlr, count: i); |
357 | return rc; |
358 | } |
359 | |
360 | static ssize_t commit_store(struct device *dev, struct device_attribute *attr, |
361 | const char *buf, size_t len) |
362 | { |
363 | struct cxl_region *cxlr = to_cxl_region(dev); |
364 | struct cxl_region_params *p = &cxlr->params; |
365 | bool commit; |
366 | ssize_t rc; |
367 | |
368 | rc = kstrtobool(s: buf, res: &commit); |
369 | if (rc) |
370 | return rc; |
371 | |
372 | rc = down_write_killable(sem: &cxl_region_rwsem); |
373 | if (rc) |
374 | return rc; |
375 | |
376 | /* Already in the requested state? */ |
377 | if (commit && p->state >= CXL_CONFIG_COMMIT) |
378 | goto out; |
379 | if (!commit && p->state < CXL_CONFIG_COMMIT) |
380 | goto out; |
381 | |
382 | /* Not ready to commit? */ |
383 | if (commit && p->state < CXL_CONFIG_ACTIVE) { |
384 | rc = -ENXIO; |
385 | goto out; |
386 | } |
387 | |
388 | /* |
389 | * Invalidate caches before region setup to drop any speculative |
390 | * consumption of this address space |
391 | */ |
392 | rc = cxl_region_invalidate_memregion(cxlr); |
393 | if (rc) |
394 | goto out; |
395 | |
396 | if (commit) { |
397 | rc = cxl_region_decode_commit(cxlr); |
398 | if (rc == 0) |
399 | p->state = CXL_CONFIG_COMMIT; |
400 | } else { |
401 | p->state = CXL_CONFIG_RESET_PENDING; |
402 | up_write(sem: &cxl_region_rwsem); |
403 | device_release_driver(dev: &cxlr->dev); |
404 | down_write(sem: &cxl_region_rwsem); |
405 | |
406 | /* |
407 | * The lock was dropped, so need to revalidate that the reset is |
408 | * still pending. |
409 | */ |
410 | if (p->state == CXL_CONFIG_RESET_PENDING) { |
411 | rc = cxl_region_decode_reset(cxlr, count: p->interleave_ways); |
412 | /* |
413 | * Revert to committed since there may still be active |
414 | * decoders associated with this region, or move forward |
415 | * to active to mark the reset successful |
416 | */ |
417 | if (rc) |
418 | p->state = CXL_CONFIG_COMMIT; |
419 | else |
420 | p->state = CXL_CONFIG_ACTIVE; |
421 | } |
422 | } |
423 | |
424 | out: |
425 | up_write(sem: &cxl_region_rwsem); |
426 | |
427 | if (rc) |
428 | return rc; |
429 | return len; |
430 | } |
431 | |
432 | static ssize_t commit_show(struct device *dev, struct device_attribute *attr, |
433 | char *buf) |
434 | { |
435 | struct cxl_region *cxlr = to_cxl_region(dev); |
436 | struct cxl_region_params *p = &cxlr->params; |
437 | ssize_t rc; |
438 | |
439 | rc = down_read_interruptible(sem: &cxl_region_rwsem); |
440 | if (rc) |
441 | return rc; |
442 | rc = sysfs_emit(buf, fmt: "%d\n", p->state >= CXL_CONFIG_COMMIT); |
443 | up_read(sem: &cxl_region_rwsem); |
444 | |
445 | return rc; |
446 | } |
447 | static DEVICE_ATTR_RW(commit); |
448 | |
449 | static umode_t cxl_region_visible(struct kobject *kobj, struct attribute *a, |
450 | int n) |
451 | { |
452 | struct device *dev = kobj_to_dev(kobj); |
453 | struct cxl_region *cxlr = to_cxl_region(dev); |
454 | |
455 | /* |
456 | * Support tooling that expects to find a 'uuid' attribute for all |
457 | * regions regardless of mode. |
458 | */ |
459 | if (a == &dev_attr_uuid.attr && cxlr->mode != CXL_DECODER_PMEM) |
460 | return 0444; |
461 | return a->mode; |
462 | } |
463 | |
464 | static ssize_t interleave_ways_show(struct device *dev, |
465 | struct device_attribute *attr, char *buf) |
466 | { |
467 | struct cxl_region *cxlr = to_cxl_region(dev); |
468 | struct cxl_region_params *p = &cxlr->params; |
469 | ssize_t rc; |
470 | |
471 | rc = down_read_interruptible(sem: &cxl_region_rwsem); |
472 | if (rc) |
473 | return rc; |
474 | rc = sysfs_emit(buf, fmt: "%d\n", p->interleave_ways); |
475 | up_read(sem: &cxl_region_rwsem); |
476 | |
477 | return rc; |
478 | } |
479 | |
480 | static const struct attribute_group *get_cxl_region_target_group(void); |
481 | |
482 | static ssize_t interleave_ways_store(struct device *dev, |
483 | struct device_attribute *attr, |
484 | const char *buf, size_t len) |
485 | { |
486 | struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(dev: dev->parent); |
487 | struct cxl_decoder *cxld = &cxlrd->cxlsd.cxld; |
488 | struct cxl_region *cxlr = to_cxl_region(dev); |
489 | struct cxl_region_params *p = &cxlr->params; |
490 | unsigned int val, save; |
491 | int rc; |
492 | u8 iw; |
493 | |
494 | rc = kstrtouint(s: buf, base: 0, res: &val); |
495 | if (rc) |
496 | return rc; |
497 | |
498 | rc = ways_to_eiw(ways: val, eiw: &iw); |
499 | if (rc) |
500 | return rc; |
501 | |
502 | /* |
503 | * Even for x3, x6, and x12 interleaves the region interleave must be a |
504 | * power of 2 multiple of the host bridge interleave. |
505 | */ |
506 | if (!is_power_of_2(n: val / cxld->interleave_ways) || |
507 | (val % cxld->interleave_ways)) { |
508 | dev_dbg(&cxlr->dev, "invalid interleave: %d\n", val); |
509 | return -EINVAL; |
510 | } |
511 | |
512 | rc = down_write_killable(sem: &cxl_region_rwsem); |
513 | if (rc) |
514 | return rc; |
515 | if (p->state >= CXL_CONFIG_INTERLEAVE_ACTIVE) { |
516 | rc = -EBUSY; |
517 | goto out; |
518 | } |
519 | |
520 | save = p->interleave_ways; |
521 | p->interleave_ways = val; |
522 | rc = sysfs_update_group(kobj: &cxlr->dev.kobj, grp: get_cxl_region_target_group()); |
523 | if (rc) |
524 | p->interleave_ways = save; |
525 | out: |
526 | up_write(sem: &cxl_region_rwsem); |
527 | if (rc) |
528 | return rc; |
529 | return len; |
530 | } |
531 | static DEVICE_ATTR_RW(interleave_ways); |
532 | |
533 | static ssize_t interleave_granularity_show(struct device *dev, |
534 | struct device_attribute *attr, |
535 | char *buf) |
536 | { |
537 | struct cxl_region *cxlr = to_cxl_region(dev); |
538 | struct cxl_region_params *p = &cxlr->params; |
539 | ssize_t rc; |
540 | |
541 | rc = down_read_interruptible(sem: &cxl_region_rwsem); |
542 | if (rc) |
543 | return rc; |
544 | rc = sysfs_emit(buf, fmt: "%d\n", p->interleave_granularity); |
545 | up_read(sem: &cxl_region_rwsem); |
546 | |
547 | return rc; |
548 | } |
549 | |
550 | static ssize_t interleave_granularity_store(struct device *dev, |
551 | struct device_attribute *attr, |
552 | const char *buf, size_t len) |
553 | { |
554 | struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(dev: dev->parent); |
555 | struct cxl_decoder *cxld = &cxlrd->cxlsd.cxld; |
556 | struct cxl_region *cxlr = to_cxl_region(dev); |
557 | struct cxl_region_params *p = &cxlr->params; |
558 | int rc, val; |
559 | u16 ig; |
560 | |
561 | rc = kstrtoint(s: buf, base: 0, res: &val); |
562 | if (rc) |
563 | return rc; |
564 | |
565 | rc = granularity_to_eig(granularity: val, eig: &ig); |
566 | if (rc) |
567 | return rc; |
568 | |
569 | /* |
570 | * When the host-bridge is interleaved, disallow region granularity != |
571 | * root granularity. Regions with a granularity less than the root |
572 | * interleave result in needing multiple endpoints to support a single |
573 | * slot in the interleave (possible to support in the future). Regions |
574 | * with a granularity greater than the root interleave result in invalid |
575 | * DPA translations (invalid to support). |
576 | */ |
577 | if (cxld->interleave_ways > 1 && val != cxld->interleave_granularity) |
578 | return -EINVAL; |
579 | |
580 | rc = down_write_killable(sem: &cxl_region_rwsem); |
581 | if (rc) |
582 | return rc; |
583 | if (p->state >= CXL_CONFIG_INTERLEAVE_ACTIVE) { |
584 | rc = -EBUSY; |
585 | goto out; |
586 | } |
587 | |
588 | p->interleave_granularity = val; |
589 | out: |
590 | up_write(sem: &cxl_region_rwsem); |
591 | if (rc) |
592 | return rc; |
593 | return len; |
594 | } |
595 | static DEVICE_ATTR_RW(interleave_granularity); |
596 | |
597 | static ssize_t resource_show(struct device *dev, struct device_attribute *attr, |
598 | char *buf) |
599 | { |
600 | struct cxl_region *cxlr = to_cxl_region(dev); |
601 | struct cxl_region_params *p = &cxlr->params; |
602 | u64 resource = -1ULL; |
603 | ssize_t rc; |
604 | |
605 | rc = down_read_interruptible(sem: &cxl_region_rwsem); |
606 | if (rc) |
607 | return rc; |
608 | if (p->res) |
609 | resource = p->res->start; |
610 | rc = sysfs_emit(buf, fmt: "%#llx\n", resource); |
611 | up_read(sem: &cxl_region_rwsem); |
612 | |
613 | return rc; |
614 | } |
615 | static DEVICE_ATTR_RO(resource); |
616 | |
617 | static ssize_t mode_show(struct device *dev, struct device_attribute *attr, |
618 | char *buf) |
619 | { |
620 | struct cxl_region *cxlr = to_cxl_region(dev); |
621 | |
622 | return sysfs_emit(buf, fmt: "%s\n", cxl_decoder_mode_name(mode: cxlr->mode)); |
623 | } |
624 | static DEVICE_ATTR_RO(mode); |
625 | |
626 | static int alloc_hpa(struct cxl_region *cxlr, resource_size_t size) |
627 | { |
628 | struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(dev: cxlr->dev.parent); |
629 | struct cxl_region_params *p = &cxlr->params; |
630 | struct resource *res; |
631 | u64 remainder = 0; |
632 | |
633 | lockdep_assert_held_write(&cxl_region_rwsem); |
634 | |
635 | /* Nothing to do... */ |
636 | if (p->res && resource_size(res: p->res) == size) |
637 | return 0; |
638 | |
639 | /* To change size the old size must be freed first */ |
640 | if (p->res) |
641 | return -EBUSY; |
642 | |
643 | if (p->state >= CXL_CONFIG_INTERLEAVE_ACTIVE) |
644 | return -EBUSY; |
645 | |
646 | /* ways, granularity and uuid (if PMEM) need to be set before HPA */ |
647 | if (!p->interleave_ways || !p->interleave_granularity || |
648 | (cxlr->mode == CXL_DECODER_PMEM && uuid_is_null(uuid: &p->uuid))) |
649 | return -ENXIO; |
650 | |
651 | div64_u64_rem(dividend: size, divisor: (u64)SZ_256M * p->interleave_ways, remainder: &remainder); |
652 | if (remainder) |
653 | return -EINVAL; |
654 | |
655 | res = alloc_free_mem_region(base: cxlrd->res, size, SZ_256M, |
656 | name: dev_name(dev: &cxlr->dev)); |
657 | if (IS_ERR(ptr: res)) { |
658 | dev_dbg(&cxlr->dev, |
659 | "HPA allocation error (%ld) for size:%pap in %s %pr\n", |
660 | PTR_ERR(res), &size, cxlrd->res->name, cxlrd->res); |
661 | return PTR_ERR(ptr: res); |
662 | } |
663 | |
664 | p->res = res; |
665 | p->state = CXL_CONFIG_INTERLEAVE_ACTIVE; |
666 | |
667 | return 0; |
668 | } |
669 | |
670 | static void cxl_region_iomem_release(struct cxl_region *cxlr) |
671 | { |
672 | struct cxl_region_params *p = &cxlr->params; |
673 | |
674 | if (device_is_registered(dev: &cxlr->dev)) |
675 | lockdep_assert_held_write(&cxl_region_rwsem); |
676 | if (p->res) { |
677 | /* |
678 | * Autodiscovered regions may not have been able to insert their |
679 | * resource. |
680 | */ |
681 | if (p->res->parent) |
682 | remove_resource(old: p->res); |
683 | kfree(objp: p->res); |
684 | p->res = NULL; |
685 | } |
686 | } |
687 | |
688 | static int free_hpa(struct cxl_region *cxlr) |
689 | { |
690 | struct cxl_region_params *p = &cxlr->params; |
691 | |
692 | lockdep_assert_held_write(&cxl_region_rwsem); |
693 | |
694 | if (!p->res) |
695 | return 0; |
696 | |
697 | if (p->state >= CXL_CONFIG_ACTIVE) |
698 | return -EBUSY; |
699 | |
700 | cxl_region_iomem_release(cxlr); |
701 | p->state = CXL_CONFIG_IDLE; |
702 | return 0; |
703 | } |
704 | |
705 | static ssize_t size_store(struct device *dev, struct device_attribute *attr, |
706 | const char *buf, size_t len) |
707 | { |
708 | struct cxl_region *cxlr = to_cxl_region(dev); |
709 | u64 val; |
710 | int rc; |
711 | |
712 | rc = kstrtou64(s: buf, base: 0, res: &val); |
713 | if (rc) |
714 | return rc; |
715 | |
716 | rc = down_write_killable(sem: &cxl_region_rwsem); |
717 | if (rc) |
718 | return rc; |
719 | |
720 | if (val) |
721 | rc = alloc_hpa(cxlr, size: val); |
722 | else |
723 | rc = free_hpa(cxlr); |
724 | up_write(sem: &cxl_region_rwsem); |
725 | |
726 | if (rc) |
727 | return rc; |
728 | |
729 | return len; |
730 | } |
731 | |
732 | static ssize_t size_show(struct device *dev, struct device_attribute *attr, |
733 | char *buf) |
734 | { |
735 | struct cxl_region *cxlr = to_cxl_region(dev); |
736 | struct cxl_region_params *p = &cxlr->params; |
737 | u64 size = 0; |
738 | ssize_t rc; |
739 | |
740 | rc = down_read_interruptible(sem: &cxl_region_rwsem); |
741 | if (rc) |
742 | return rc; |
743 | if (p->res) |
744 | size = resource_size(res: p->res); |
745 | rc = sysfs_emit(buf, fmt: "%#llx\n", size); |
746 | up_read(sem: &cxl_region_rwsem); |
747 | |
748 | return rc; |
749 | } |
750 | static DEVICE_ATTR_RW(size); |
751 | |
752 | static struct attribute *cxl_region_attrs[] = { |
753 | &dev_attr_uuid.attr, |
754 | &dev_attr_commit.attr, |
755 | &dev_attr_interleave_ways.attr, |
756 | &dev_attr_interleave_granularity.attr, |
757 | &dev_attr_resource.attr, |
758 | &dev_attr_size.attr, |
759 | &dev_attr_mode.attr, |
760 | NULL, |
761 | }; |
762 | |
763 | static const struct attribute_group cxl_region_group = { |
764 | .attrs = cxl_region_attrs, |
765 | .is_visible = cxl_region_visible, |
766 | }; |
767 | |
768 | static size_t show_targetN(struct cxl_region *cxlr, char *buf, int pos) |
769 | { |
770 | struct cxl_region_params *p = &cxlr->params; |
771 | struct cxl_endpoint_decoder *cxled; |
772 | int rc; |
773 | |
774 | rc = down_read_interruptible(sem: &cxl_region_rwsem); |
775 | if (rc) |
776 | return rc; |
777 | |
778 | if (pos >= p->interleave_ways) { |
779 | dev_dbg(&cxlr->dev, "position %d out of range %d\n", pos, |
780 | p->interleave_ways); |
781 | rc = -ENXIO; |
782 | goto out; |
783 | } |
784 | |
785 | cxled = p->targets[pos]; |
786 | if (!cxled) |
787 | rc = sysfs_emit(buf, fmt: "\n"); |
788 | else |
789 | rc = sysfs_emit(buf, fmt: "%s\n", dev_name(dev: &cxled->cxld.dev)); |
790 | out: |
791 | up_read(sem: &cxl_region_rwsem); |
792 | |
793 | return rc; |
794 | } |
795 | |
796 | static int match_free_decoder(struct device *dev, void *data) |
797 | { |
798 | struct cxl_decoder *cxld; |
799 | int *id = data; |
800 | |
801 | if (!is_switch_decoder(dev)) |
802 | return 0; |
803 | |
804 | cxld = to_cxl_decoder(dev); |
805 | |
806 | /* enforce ordered allocation */ |
807 | if (cxld->id != *id) |
808 | return 0; |
809 | |
810 | if (!cxld->region) |
811 | return 1; |
812 | |
813 | (*id)++; |
814 | |
815 | return 0; |
816 | } |
817 | |
818 | static int match_auto_decoder(struct device *dev, void *data) |
819 | { |
820 | struct cxl_region_params *p = data; |
821 | struct cxl_decoder *cxld; |
822 | struct range *r; |
823 | |
824 | if (!is_switch_decoder(dev)) |
825 | return 0; |
826 | |
827 | cxld = to_cxl_decoder(dev); |
828 | r = &cxld->hpa_range; |
829 | |
830 | if (p->res && p->res->start == r->start && p->res->end == r->end) |
831 | return 1; |
832 | |
833 | return 0; |
834 | } |
835 | |
836 | static struct cxl_decoder * |
837 | cxl_region_find_decoder(struct cxl_port *port, |
838 | struct cxl_endpoint_decoder *cxled, |
839 | struct cxl_region *cxlr) |
840 | { |
841 | struct device *dev; |
842 | int id = 0; |
843 | |
844 | if (port == cxled_to_port(cxled)) |
845 | return &cxled->cxld; |
846 | |
847 | if (test_bit(CXL_REGION_F_AUTO, &cxlr->flags)) |
848 | dev = device_find_child(dev: &port->dev, data: &cxlr->params, |
849 | match: match_auto_decoder); |
850 | else |
851 | dev = device_find_child(dev: &port->dev, data: &id, match: match_free_decoder); |
852 | if (!dev) |
853 | return NULL; |
854 | /* |
855 | * This decoder is pinned registered as long as the endpoint decoder is |
856 | * registered, and endpoint decoder unregistration holds the |
857 | * cxl_region_rwsem over unregister events, so no need to hold on to |
858 | * this extra reference. |
859 | */ |
860 | put_device(dev); |
861 | return to_cxl_decoder(dev); |
862 | } |
863 | |
864 | static bool auto_order_ok(struct cxl_port *port, struct cxl_region *cxlr_iter, |
865 | struct cxl_decoder *cxld) |
866 | { |
867 | struct cxl_region_ref *rr = cxl_rr_load(port, cxlr: cxlr_iter); |
868 | struct cxl_decoder *cxld_iter = rr->decoder; |
869 | |
870 | /* |
871 | * Allow the out of order assembly of auto-discovered regions. |
872 | * Per CXL Spec 3.1 8.2.4.20.12 software must commit decoders |
873 | * in HPA order. Confirm that the decoder with the lesser HPA |
874 | * starting address has the lesser id. |
875 | */ |
876 | dev_dbg(&cxld->dev, "check for HPA violation %s:%d < %s:%d\n", |
877 | dev_name(&cxld->dev), cxld->id, |
878 | dev_name(&cxld_iter->dev), cxld_iter->id); |
879 | |
880 | if (cxld_iter->id > cxld->id) |
881 | return true; |
882 | |
883 | return false; |
884 | } |
885 | |
886 | static struct cxl_region_ref * |
887 | alloc_region_ref(struct cxl_port *port, struct cxl_region *cxlr, |
888 | struct cxl_endpoint_decoder *cxled) |
889 | { |
890 | struct cxl_region_params *p = &cxlr->params; |
891 | struct cxl_region_ref *cxl_rr, *iter; |
892 | unsigned long index; |
893 | int rc; |
894 | |
895 | xa_for_each(&port->regions, index, iter) { |
896 | struct cxl_region_params *ip = &iter->region->params; |
897 | |
898 | if (!ip->res || ip->res->start < p->res->start) |
899 | continue; |
900 | |
901 | if (test_bit(CXL_REGION_F_AUTO, &cxlr->flags)) { |
902 | struct cxl_decoder *cxld; |
903 | |
904 | cxld = cxl_region_find_decoder(port, cxled, cxlr); |
905 | if (auto_order_ok(port, cxlr_iter: iter->region, cxld)) |
906 | continue; |
907 | } |
908 | dev_dbg(&cxlr->dev, "%s: HPA order violation %s:%pr vs %pr\n", |
909 | dev_name(&port->dev), |
910 | dev_name(&iter->region->dev), ip->res, p->res); |
911 | |
912 | return ERR_PTR(error: -EBUSY); |
913 | } |
914 | |
915 | cxl_rr = kzalloc(size: sizeof(*cxl_rr), GFP_KERNEL); |
916 | if (!cxl_rr) |
917 | return ERR_PTR(error: -ENOMEM); |
918 | cxl_rr->port = port; |
919 | cxl_rr->region = cxlr; |
920 | cxl_rr->nr_targets = 1; |
921 | xa_init(xa: &cxl_rr->endpoints); |
922 | |
923 | rc = xa_insert(xa: &port->regions, index: (unsigned long)cxlr, entry: cxl_rr, GFP_KERNEL); |
924 | if (rc) { |
925 | dev_dbg(&cxlr->dev, |
926 | "%s: failed to track region reference: %d\n", |
927 | dev_name(&port->dev), rc); |
928 | kfree(objp: cxl_rr); |
929 | return ERR_PTR(error: rc); |
930 | } |
931 | |
932 | return cxl_rr; |
933 | } |
934 | |
935 | static void cxl_rr_free_decoder(struct cxl_region_ref *cxl_rr) |
936 | { |
937 | struct cxl_region *cxlr = cxl_rr->region; |
938 | struct cxl_decoder *cxld = cxl_rr->decoder; |
939 | |
940 | if (!cxld) |
941 | return; |
942 | |
943 | dev_WARN_ONCE(&cxlr->dev, cxld->region != cxlr, "region mismatch\n"); |
944 | if (cxld->region == cxlr) { |
945 | cxld->region = NULL; |
946 | put_device(dev: &cxlr->dev); |
947 | } |
948 | } |
949 | |
950 | static void free_region_ref(struct cxl_region_ref *cxl_rr) |
951 | { |
952 | struct cxl_port *port = cxl_rr->port; |
953 | struct cxl_region *cxlr = cxl_rr->region; |
954 | |
955 | cxl_rr_free_decoder(cxl_rr); |
956 | xa_erase(&port->regions, index: (unsigned long)cxlr); |
957 | xa_destroy(&cxl_rr->endpoints); |
958 | kfree(objp: cxl_rr); |
959 | } |
960 | |
961 | static int cxl_rr_ep_add(struct cxl_region_ref *cxl_rr, |
962 | struct cxl_endpoint_decoder *cxled) |
963 | { |
964 | int rc; |
965 | struct cxl_port *port = cxl_rr->port; |
966 | struct cxl_region *cxlr = cxl_rr->region; |
967 | struct cxl_decoder *cxld = cxl_rr->decoder; |
968 | struct cxl_ep *ep = cxl_ep_load(port, cxlmd: cxled_to_memdev(cxled)); |
969 | |
970 | if (ep) { |
971 | rc = xa_insert(xa: &cxl_rr->endpoints, index: (unsigned long)cxled, entry: ep, |
972 | GFP_KERNEL); |
973 | if (rc) |
974 | return rc; |
975 | } |
976 | cxl_rr->nr_eps++; |
977 | |
978 | if (!cxld->region) { |
979 | cxld->region = cxlr; |
980 | get_device(dev: &cxlr->dev); |
981 | } |
982 | |
983 | return 0; |
984 | } |
985 | |
986 | static int cxl_rr_alloc_decoder(struct cxl_port *port, struct cxl_region *cxlr, |
987 | struct cxl_endpoint_decoder *cxled, |
988 | struct cxl_region_ref *cxl_rr) |
989 | { |
990 | struct cxl_decoder *cxld; |
991 | |
992 | cxld = cxl_region_find_decoder(port, cxled, cxlr); |
993 | if (!cxld) { |
994 | dev_dbg(&cxlr->dev, "%s: no decoder available\n", |
995 | dev_name(&port->dev)); |
996 | return -EBUSY; |
997 | } |
998 | |
999 | if (cxld->region) { |
1000 | dev_dbg(&cxlr->dev, "%s: %s already attached to %s\n", |
1001 | dev_name(&port->dev), dev_name(&cxld->dev), |
1002 | dev_name(&cxld->region->dev)); |
1003 | return -EBUSY; |
1004 | } |
1005 | |
1006 | /* |
1007 | * Endpoints should already match the region type, but backstop that |
1008 | * assumption with an assertion. Switch-decoders change mapping-type |
1009 | * based on what is mapped when they are assigned to a region. |
1010 | */ |
1011 | dev_WARN_ONCE(&cxlr->dev, |
1012 | port == cxled_to_port(cxled) && |
1013 | cxld->target_type != cxlr->type, |
1014 | "%s:%s mismatch decoder type %d -> %d\n", |
1015 | dev_name(&cxled_to_memdev(cxled)->dev), |
1016 | dev_name(&cxld->dev), cxld->target_type, cxlr->type); |
1017 | cxld->target_type = cxlr->type; |
1018 | cxl_rr->decoder = cxld; |
1019 | return 0; |
1020 | } |
1021 | |
1022 | /** |
1023 | * cxl_port_attach_region() - track a region's interest in a port by endpoint |
1024 | * @port: port to add a new region reference 'struct cxl_region_ref' |
1025 | * @cxlr: region to attach to @port |
1026 | * @cxled: endpoint decoder used to create or further pin a region reference |
1027 | * @pos: interleave position of @cxled in @cxlr |
1028 | * |
1029 | * The attach event is an opportunity to validate CXL decode setup |
1030 | * constraints and record metadata needed for programming HDM decoders, |
1031 | * in particular decoder target lists. |
1032 | * |
1033 | * The steps are: |
1034 | * |
1035 | * - validate that there are no other regions with a higher HPA already |
1036 | * associated with @port |
1037 | * - establish a region reference if one is not already present |
1038 | * |
1039 | * - additionally allocate a decoder instance that will host @cxlr on |
1040 | * @port |
1041 | * |
1042 | * - pin the region reference by the endpoint |
1043 | * - account for how many entries in @port's target list are needed to |
1044 | * cover all of the added endpoints. |
1045 | */ |
1046 | static int cxl_port_attach_region(struct cxl_port *port, |
1047 | struct cxl_region *cxlr, |
1048 | struct cxl_endpoint_decoder *cxled, int pos) |
1049 | { |
1050 | struct cxl_memdev *cxlmd = cxled_to_memdev(cxled); |
1051 | struct cxl_ep *ep = cxl_ep_load(port, cxlmd); |
1052 | struct cxl_region_ref *cxl_rr; |
1053 | bool nr_targets_inc = false; |
1054 | struct cxl_decoder *cxld; |
1055 | unsigned long index; |
1056 | int rc = -EBUSY; |
1057 | |
1058 | lockdep_assert_held_write(&cxl_region_rwsem); |
1059 | |
1060 | cxl_rr = cxl_rr_load(port, cxlr); |
1061 | if (cxl_rr) { |
1062 | struct cxl_ep *ep_iter; |
1063 | int found = 0; |
1064 | |
1065 | /* |
1066 | * Walk the existing endpoints that have been attached to |
1067 | * @cxlr at @port and see if they share the same 'next' port |
1068 | * in the downstream direction. I.e. endpoints that share common |
1069 | * upstream switch. |
1070 | */ |
1071 | xa_for_each(&cxl_rr->endpoints, index, ep_iter) { |
1072 | if (ep_iter == ep) |
1073 | continue; |
1074 | if (ep_iter->next == ep->next) { |
1075 | found++; |
1076 | break; |
1077 | } |
1078 | } |
1079 | |
1080 | /* |
1081 | * New target port, or @port is an endpoint port that always |
1082 | * accounts its own local decode as a target. |
1083 | */ |
1084 | if (!found || !ep->next) { |
1085 | cxl_rr->nr_targets++; |
1086 | nr_targets_inc = true; |
1087 | } |
1088 | } else { |
1089 | cxl_rr = alloc_region_ref(port, cxlr, cxled); |
1090 | if (IS_ERR(ptr: cxl_rr)) { |
1091 | dev_dbg(&cxlr->dev, |
1092 | "%s: failed to allocate region reference\n", |
1093 | dev_name(&port->dev)); |
1094 | return PTR_ERR(ptr: cxl_rr); |
1095 | } |
1096 | nr_targets_inc = true; |
1097 | |
1098 | rc = cxl_rr_alloc_decoder(port, cxlr, cxled, cxl_rr); |
1099 | if (rc) |
1100 | goto out_erase; |
1101 | } |
1102 | cxld = cxl_rr->decoder; |
1103 | |
1104 | rc = cxl_rr_ep_add(cxl_rr, cxled); |
1105 | if (rc) { |
1106 | dev_dbg(&cxlr->dev, |
1107 | "%s: failed to track endpoint %s:%s reference\n", |
1108 | dev_name(&port->dev), dev_name(&cxlmd->dev), |
1109 | dev_name(&cxld->dev)); |
1110 | goto out_erase; |
1111 | } |
1112 | |
1113 | dev_dbg(&cxlr->dev, |
1114 | "%s:%s %s add: %s:%s @ %d next: %s nr_eps: %d nr_targets: %d\n", |
1115 | dev_name(port->uport_dev), dev_name(&port->dev), |
1116 | dev_name(&cxld->dev), dev_name(&cxlmd->dev), |
1117 | dev_name(&cxled->cxld.dev), pos, |
1118 | ep ? ep->next ? dev_name(ep->next->uport_dev) : |
1119 | dev_name(&cxlmd->dev) : |
1120 | "none", |
1121 | cxl_rr->nr_eps, cxl_rr->nr_targets); |
1122 | |
1123 | return 0; |
1124 | out_erase: |
1125 | if (nr_targets_inc) |
1126 | cxl_rr->nr_targets--; |
1127 | if (cxl_rr->nr_eps == 0) |
1128 | free_region_ref(cxl_rr); |
1129 | return rc; |
1130 | } |
1131 | |
1132 | static void cxl_port_detach_region(struct cxl_port *port, |
1133 | struct cxl_region *cxlr, |
1134 | struct cxl_endpoint_decoder *cxled) |
1135 | { |
1136 | struct cxl_region_ref *cxl_rr; |
1137 | struct cxl_ep *ep = NULL; |
1138 | |
1139 | lockdep_assert_held_write(&cxl_region_rwsem); |
1140 | |
1141 | cxl_rr = cxl_rr_load(port, cxlr); |
1142 | if (!cxl_rr) |
1143 | return; |
1144 | |
1145 | /* |
1146 | * Endpoint ports do not carry cxl_ep references, and they |
1147 | * never target more than one endpoint by definition |
1148 | */ |
1149 | if (cxl_rr->decoder == &cxled->cxld) |
1150 | cxl_rr->nr_eps--; |
1151 | else |
1152 | ep = xa_erase(&cxl_rr->endpoints, index: (unsigned long)cxled); |
1153 | if (ep) { |
1154 | struct cxl_ep *ep_iter; |
1155 | unsigned long index; |
1156 | int found = 0; |
1157 | |
1158 | cxl_rr->nr_eps--; |
1159 | xa_for_each(&cxl_rr->endpoints, index, ep_iter) { |
1160 | if (ep_iter->next == ep->next) { |
1161 | found++; |
1162 | break; |
1163 | } |
1164 | } |
1165 | if (!found) |
1166 | cxl_rr->nr_targets--; |
1167 | } |
1168 | |
1169 | if (cxl_rr->nr_eps == 0) |
1170 | free_region_ref(cxl_rr); |
1171 | } |
1172 | |
1173 | static int check_last_peer(struct cxl_endpoint_decoder *cxled, |
1174 | struct cxl_ep *ep, struct cxl_region_ref *cxl_rr, |
1175 | int distance) |
1176 | { |
1177 | struct cxl_memdev *cxlmd = cxled_to_memdev(cxled); |
1178 | struct cxl_region *cxlr = cxl_rr->region; |
1179 | struct cxl_region_params *p = &cxlr->params; |
1180 | struct cxl_endpoint_decoder *cxled_peer; |
1181 | struct cxl_port *port = cxl_rr->port; |
1182 | struct cxl_memdev *cxlmd_peer; |
1183 | struct cxl_ep *ep_peer; |
1184 | int pos = cxled->pos; |
1185 | |
1186 | /* |
1187 | * If this position wants to share a dport with the last endpoint mapped |
1188 | * then that endpoint, at index 'position - distance', must also be |
1189 | * mapped by this dport. |
1190 | */ |
1191 | if (pos < distance) { |
1192 | dev_dbg(&cxlr->dev, "%s:%s: cannot host %s:%s at %d\n", |
1193 | dev_name(port->uport_dev), dev_name(&port->dev), |
1194 | dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev), pos); |
1195 | return -ENXIO; |
1196 | } |
1197 | cxled_peer = p->targets[pos - distance]; |
1198 | cxlmd_peer = cxled_to_memdev(cxled: cxled_peer); |
1199 | ep_peer = cxl_ep_load(port, cxlmd: cxlmd_peer); |
1200 | if (ep->dport != ep_peer->dport) { |
1201 | dev_dbg(&cxlr->dev, |
1202 | "%s:%s: %s:%s pos %d mismatched peer %s:%s\n", |
1203 | dev_name(port->uport_dev), dev_name(&port->dev), |
1204 | dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev), pos, |
1205 | dev_name(&cxlmd_peer->dev), |
1206 | dev_name(&cxled_peer->cxld.dev)); |
1207 | return -ENXIO; |
1208 | } |
1209 | |
1210 | return 0; |
1211 | } |
1212 | |
1213 | static int cxl_port_setup_targets(struct cxl_port *port, |
1214 | struct cxl_region *cxlr, |
1215 | struct cxl_endpoint_decoder *cxled) |
1216 | { |
1217 | struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(dev: cxlr->dev.parent); |
1218 | int parent_iw, parent_ig, ig, iw, rc, inc = 0, pos = cxled->pos; |
1219 | struct cxl_port *parent_port = to_cxl_port(dev: port->dev.parent); |
1220 | struct cxl_region_ref *cxl_rr = cxl_rr_load(port, cxlr); |
1221 | struct cxl_memdev *cxlmd = cxled_to_memdev(cxled); |
1222 | struct cxl_ep *ep = cxl_ep_load(port, cxlmd); |
1223 | struct cxl_region_params *p = &cxlr->params; |
1224 | struct cxl_decoder *cxld = cxl_rr->decoder; |
1225 | struct cxl_switch_decoder *cxlsd; |
1226 | u16 eig, peig; |
1227 | u8 eiw, peiw; |
1228 | |
1229 | /* |
1230 | * While root level decoders support x3, x6, x12, switch level |
1231 | * decoders only support powers of 2 up to x16. |
1232 | */ |
1233 | if (!is_power_of_2(n: cxl_rr->nr_targets)) { |
1234 | dev_dbg(&cxlr->dev, "%s:%s: invalid target count %d\n", |
1235 | dev_name(port->uport_dev), dev_name(&port->dev), |
1236 | cxl_rr->nr_targets); |
1237 | return -EINVAL; |
1238 | } |
1239 | |
1240 | cxlsd = to_cxl_switch_decoder(dev: &cxld->dev); |
1241 | if (cxl_rr->nr_targets_set) { |
1242 | int i, distance; |
1243 | |
1244 | /* |
1245 | * Passthrough decoders impose no distance requirements between |
1246 | * peers |
1247 | */ |
1248 | if (cxl_rr->nr_targets == 1) |
1249 | distance = 0; |
1250 | else |
1251 | distance = p->nr_targets / cxl_rr->nr_targets; |
1252 | for (i = 0; i < cxl_rr->nr_targets_set; i++) |
1253 | if (ep->dport == cxlsd->target[i]) { |
1254 | rc = check_last_peer(cxled, ep, cxl_rr, |
1255 | distance); |
1256 | if (rc) |
1257 | return rc; |
1258 | goto out_target_set; |
1259 | } |
1260 | goto add_target; |
1261 | } |
1262 | |
1263 | if (is_cxl_root(port: parent_port)) { |
1264 | /* |
1265 | * Root decoder IG is always set to value in CFMWS which |
1266 | * may be different than this region's IG. We can use the |
1267 | * region's IG here since interleave_granularity_store() |
1268 | * does not allow interleaved host-bridges with |
1269 | * root IG != region IG. |
1270 | */ |
1271 | parent_ig = p->interleave_granularity; |
1272 | parent_iw = cxlrd->cxlsd.cxld.interleave_ways; |
1273 | /* |
1274 | * For purposes of address bit routing, use power-of-2 math for |
1275 | * switch ports. |
1276 | */ |
1277 | if (!is_power_of_2(n: parent_iw)) |
1278 | parent_iw /= 3; |
1279 | } else { |
1280 | struct cxl_region_ref *parent_rr; |
1281 | struct cxl_decoder *parent_cxld; |
1282 | |
1283 | parent_rr = cxl_rr_load(port: parent_port, cxlr); |
1284 | parent_cxld = parent_rr->decoder; |
1285 | parent_ig = parent_cxld->interleave_granularity; |
1286 | parent_iw = parent_cxld->interleave_ways; |
1287 | } |
1288 | |
1289 | rc = granularity_to_eig(granularity: parent_ig, eig: &peig); |
1290 | if (rc) { |
1291 | dev_dbg(&cxlr->dev, "%s:%s: invalid parent granularity: %d\n", |
1292 | dev_name(parent_port->uport_dev), |
1293 | dev_name(&parent_port->dev), parent_ig); |
1294 | return rc; |
1295 | } |
1296 | |
1297 | rc = ways_to_eiw(ways: parent_iw, eiw: &peiw); |
1298 | if (rc) { |
1299 | dev_dbg(&cxlr->dev, "%s:%s: invalid parent interleave: %d\n", |
1300 | dev_name(parent_port->uport_dev), |
1301 | dev_name(&parent_port->dev), parent_iw); |
1302 | return rc; |
1303 | } |
1304 | |
1305 | iw = cxl_rr->nr_targets; |
1306 | rc = ways_to_eiw(ways: iw, eiw: &eiw); |
1307 | if (rc) { |
1308 | dev_dbg(&cxlr->dev, "%s:%s: invalid port interleave: %d\n", |
1309 | dev_name(port->uport_dev), dev_name(&port->dev), iw); |
1310 | return rc; |
1311 | } |
1312 | |
1313 | /* |
1314 | * Interleave granularity is a multiple of @parent_port granularity. |
1315 | * Multiplier is the parent port interleave ways. |
1316 | */ |
1317 | rc = granularity_to_eig(granularity: parent_ig * parent_iw, eig: &eig); |
1318 | if (rc) { |
1319 | dev_dbg(&cxlr->dev, |
1320 | "%s: invalid granularity calculation (%d * %d)\n", |
1321 | dev_name(&parent_port->dev), parent_ig, parent_iw); |
1322 | return rc; |
1323 | } |
1324 | |
1325 | rc = eig_to_granularity(eig, granularity: &ig); |
1326 | if (rc) { |
1327 | dev_dbg(&cxlr->dev, "%s:%s: invalid interleave: %d\n", |
1328 | dev_name(port->uport_dev), dev_name(&port->dev), |
1329 | 256 << eig); |
1330 | return rc; |
1331 | } |
1332 | |
1333 | if (iw > 8 || iw > cxlsd->nr_targets) { |
1334 | dev_dbg(&cxlr->dev, |
1335 | "%s:%s:%s: ways: %d overflows targets: %d\n", |
1336 | dev_name(port->uport_dev), dev_name(&port->dev), |
1337 | dev_name(&cxld->dev), iw, cxlsd->nr_targets); |
1338 | return -ENXIO; |
1339 | } |
1340 | |
1341 | if (test_bit(CXL_REGION_F_AUTO, &cxlr->flags)) { |
1342 | if (cxld->interleave_ways != iw || |
1343 | cxld->interleave_granularity != ig || |
1344 | cxld->hpa_range.start != p->res->start || |
1345 | cxld->hpa_range.end != p->res->end || |
1346 | ((cxld->flags & CXL_DECODER_F_ENABLE) == 0)) { |
1347 | dev_err(&cxlr->dev, |
1348 | "%s:%s %s expected iw: %d ig: %d %pr\n", |
1349 | dev_name(port->uport_dev), dev_name(&port->dev), |
1350 | __func__, iw, ig, p->res); |
1351 | dev_err(&cxlr->dev, |
1352 | "%s:%s %s got iw: %d ig: %d state: %s %#llx:%#llx\n", |
1353 | dev_name(port->uport_dev), dev_name(&port->dev), |
1354 | __func__, cxld->interleave_ways, |
1355 | cxld->interleave_granularity, |
1356 | (cxld->flags & CXL_DECODER_F_ENABLE) ? |
1357 | "enabled": |
1358 | "disabled", |
1359 | cxld->hpa_range.start, cxld->hpa_range.end); |
1360 | return -ENXIO; |
1361 | } |
1362 | } else { |
1363 | cxld->interleave_ways = iw; |
1364 | cxld->interleave_granularity = ig; |
1365 | cxld->hpa_range = (struct range) { |
1366 | .start = p->res->start, |
1367 | .end = p->res->end, |
1368 | }; |
1369 | } |
1370 | dev_dbg(&cxlr->dev, "%s:%s iw: %d ig: %d\n", dev_name(port->uport_dev), |
1371 | dev_name(&port->dev), iw, ig); |
1372 | add_target: |
1373 | if (cxl_rr->nr_targets_set == cxl_rr->nr_targets) { |
1374 | dev_dbg(&cxlr->dev, |
1375 | "%s:%s: targets full trying to add %s:%s at %d\n", |
1376 | dev_name(port->uport_dev), dev_name(&port->dev), |
1377 | dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev), pos); |
1378 | return -ENXIO; |
1379 | } |
1380 | if (test_bit(CXL_REGION_F_AUTO, &cxlr->flags)) { |
1381 | if (cxlsd->target[cxl_rr->nr_targets_set] != ep->dport) { |
1382 | dev_dbg(&cxlr->dev, "%s:%s: %s expected %s at %d\n", |
1383 | dev_name(port->uport_dev), dev_name(&port->dev), |
1384 | dev_name(&cxlsd->cxld.dev), |
1385 | dev_name(ep->dport->dport_dev), |
1386 | cxl_rr->nr_targets_set); |
1387 | return -ENXIO; |
1388 | } |
1389 | } else |
1390 | cxlsd->target[cxl_rr->nr_targets_set] = ep->dport; |
1391 | inc = 1; |
1392 | out_target_set: |
1393 | cxl_rr->nr_targets_set += inc; |
1394 | dev_dbg(&cxlr->dev, "%s:%s target[%d] = %s for %s:%s @ %d\n", |
1395 | dev_name(port->uport_dev), dev_name(&port->dev), |
1396 | cxl_rr->nr_targets_set - 1, dev_name(ep->dport->dport_dev), |
1397 | dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev), pos); |
1398 | |
1399 | return 0; |
1400 | } |
1401 | |
1402 | static void cxl_port_reset_targets(struct cxl_port *port, |
1403 | struct cxl_region *cxlr) |
1404 | { |
1405 | struct cxl_region_ref *cxl_rr = cxl_rr_load(port, cxlr); |
1406 | struct cxl_decoder *cxld; |
1407 | |
1408 | /* |
1409 | * After the last endpoint has been detached the entire cxl_rr may now |
1410 | * be gone. |
1411 | */ |
1412 | if (!cxl_rr) |
1413 | return; |
1414 | cxl_rr->nr_targets_set = 0; |
1415 | |
1416 | cxld = cxl_rr->decoder; |
1417 | cxld->hpa_range = (struct range) { |
1418 | .start = 0, |
1419 | .end = -1, |
1420 | }; |
1421 | } |
1422 | |
1423 | static void cxl_region_teardown_targets(struct cxl_region *cxlr) |
1424 | { |
1425 | struct cxl_region_params *p = &cxlr->params; |
1426 | struct cxl_endpoint_decoder *cxled; |
1427 | struct cxl_dev_state *cxlds; |
1428 | struct cxl_memdev *cxlmd; |
1429 | struct cxl_port *iter; |
1430 | struct cxl_ep *ep; |
1431 | int i; |
1432 | |
1433 | /* |
1434 | * In the auto-discovery case skip automatic teardown since the |
1435 | * address space is already active |
1436 | */ |
1437 | if (test_bit(CXL_REGION_F_AUTO, &cxlr->flags)) |
1438 | return; |
1439 | |
1440 | for (i = 0; i < p->nr_targets; i++) { |
1441 | cxled = p->targets[i]; |
1442 | cxlmd = cxled_to_memdev(cxled); |
1443 | cxlds = cxlmd->cxlds; |
1444 | |
1445 | if (cxlds->rcd) |
1446 | continue; |
1447 | |
1448 | iter = cxled_to_port(cxled); |
1449 | while (!is_cxl_root(port: to_cxl_port(dev: iter->dev.parent))) |
1450 | iter = to_cxl_port(dev: iter->dev.parent); |
1451 | |
1452 | for (ep = cxl_ep_load(port: iter, cxlmd); iter; |
1453 | iter = ep->next, ep = cxl_ep_load(port: iter, cxlmd)) |
1454 | cxl_port_reset_targets(port: iter, cxlr); |
1455 | } |
1456 | } |
1457 | |
1458 | static int cxl_region_setup_targets(struct cxl_region *cxlr) |
1459 | { |
1460 | struct cxl_region_params *p = &cxlr->params; |
1461 | struct cxl_endpoint_decoder *cxled; |
1462 | struct cxl_dev_state *cxlds; |
1463 | int i, rc, rch = 0, vh = 0; |
1464 | struct cxl_memdev *cxlmd; |
1465 | struct cxl_port *iter; |
1466 | struct cxl_ep *ep; |
1467 | |
1468 | for (i = 0; i < p->nr_targets; i++) { |
1469 | cxled = p->targets[i]; |
1470 | cxlmd = cxled_to_memdev(cxled); |
1471 | cxlds = cxlmd->cxlds; |
1472 | |
1473 | /* validate that all targets agree on topology */ |
1474 | if (!cxlds->rcd) { |
1475 | vh++; |
1476 | } else { |
1477 | rch++; |
1478 | continue; |
1479 | } |
1480 | |
1481 | iter = cxled_to_port(cxled); |
1482 | while (!is_cxl_root(port: to_cxl_port(dev: iter->dev.parent))) |
1483 | iter = to_cxl_port(dev: iter->dev.parent); |
1484 | |
1485 | /* |
1486 | * Descend the topology tree programming / validating |
1487 | * targets while looking for conflicts. |
1488 | */ |
1489 | for (ep = cxl_ep_load(port: iter, cxlmd); iter; |
1490 | iter = ep->next, ep = cxl_ep_load(port: iter, cxlmd)) { |
1491 | rc = cxl_port_setup_targets(port: iter, cxlr, cxled); |
1492 | if (rc) { |
1493 | cxl_region_teardown_targets(cxlr); |
1494 | return rc; |
1495 | } |
1496 | } |
1497 | } |
1498 | |
1499 | if (rch && vh) { |
1500 | dev_err(&cxlr->dev, "mismatched CXL topologies detected\n"); |
1501 | cxl_region_teardown_targets(cxlr); |
1502 | return -ENXIO; |
1503 | } |
1504 | |
1505 | return 0; |
1506 | } |
1507 | |
1508 | static int cxl_region_validate_position(struct cxl_region *cxlr, |
1509 | struct cxl_endpoint_decoder *cxled, |
1510 | int pos) |
1511 | { |
1512 | struct cxl_memdev *cxlmd = cxled_to_memdev(cxled); |
1513 | struct cxl_region_params *p = &cxlr->params; |
1514 | int i; |
1515 | |
1516 | if (pos < 0 || pos >= p->interleave_ways) { |
1517 | dev_dbg(&cxlr->dev, "position %d out of range %d\n", pos, |
1518 | p->interleave_ways); |
1519 | return -ENXIO; |
1520 | } |
1521 | |
1522 | if (p->targets[pos] == cxled) |
1523 | return 0; |
1524 | |
1525 | if (p->targets[pos]) { |
1526 | struct cxl_endpoint_decoder *cxled_target = p->targets[pos]; |
1527 | struct cxl_memdev *cxlmd_target = cxled_to_memdev(cxled: cxled_target); |
1528 | |
1529 | dev_dbg(&cxlr->dev, "position %d already assigned to %s:%s\n", |
1530 | pos, dev_name(&cxlmd_target->dev), |
1531 | dev_name(&cxled_target->cxld.dev)); |
1532 | return -EBUSY; |
1533 | } |
1534 | |
1535 | for (i = 0; i < p->interleave_ways; i++) { |
1536 | struct cxl_endpoint_decoder *cxled_target; |
1537 | struct cxl_memdev *cxlmd_target; |
1538 | |
1539 | cxled_target = p->targets[i]; |
1540 | if (!cxled_target) |
1541 | continue; |
1542 | |
1543 | cxlmd_target = cxled_to_memdev(cxled: cxled_target); |
1544 | if (cxlmd_target == cxlmd) { |
1545 | dev_dbg(&cxlr->dev, |
1546 | "%s already specified at position %d via: %s\n", |
1547 | dev_name(&cxlmd->dev), pos, |
1548 | dev_name(&cxled_target->cxld.dev)); |
1549 | return -EBUSY; |
1550 | } |
1551 | } |
1552 | |
1553 | return 0; |
1554 | } |
1555 | |
1556 | static int cxl_region_attach_position(struct cxl_region *cxlr, |
1557 | struct cxl_root_decoder *cxlrd, |
1558 | struct cxl_endpoint_decoder *cxled, |
1559 | const struct cxl_dport *dport, int pos) |
1560 | { |
1561 | struct cxl_memdev *cxlmd = cxled_to_memdev(cxled); |
1562 | struct cxl_port *iter; |
1563 | int rc; |
1564 | |
1565 | if (cxlrd->calc_hb(cxlrd, pos) != dport) { |
1566 | dev_dbg(&cxlr->dev, "%s:%s invalid target position for %s\n", |
1567 | dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev), |
1568 | dev_name(&cxlrd->cxlsd.cxld.dev)); |
1569 | return -ENXIO; |
1570 | } |
1571 | |
1572 | for (iter = cxled_to_port(cxled); !is_cxl_root(port: iter); |
1573 | iter = to_cxl_port(dev: iter->dev.parent)) { |
1574 | rc = cxl_port_attach_region(port: iter, cxlr, cxled, pos); |
1575 | if (rc) |
1576 | goto err; |
1577 | } |
1578 | |
1579 | return 0; |
1580 | |
1581 | err: |
1582 | for (iter = cxled_to_port(cxled); !is_cxl_root(port: iter); |
1583 | iter = to_cxl_port(dev: iter->dev.parent)) |
1584 | cxl_port_detach_region(port: iter, cxlr, cxled); |
1585 | return rc; |
1586 | } |
1587 | |
1588 | static int cxl_region_attach_auto(struct cxl_region *cxlr, |
1589 | struct cxl_endpoint_decoder *cxled, int pos) |
1590 | { |
1591 | struct cxl_region_params *p = &cxlr->params; |
1592 | |
1593 | if (cxled->state != CXL_DECODER_STATE_AUTO) { |
1594 | dev_err(&cxlr->dev, |
1595 | "%s: unable to add decoder to autodetected region\n", |
1596 | dev_name(&cxled->cxld.dev)); |
1597 | return -EINVAL; |
1598 | } |
1599 | |
1600 | if (pos >= 0) { |
1601 | dev_dbg(&cxlr->dev, "%s: expected auto position, not %d\n", |
1602 | dev_name(&cxled->cxld.dev), pos); |
1603 | return -EINVAL; |
1604 | } |
1605 | |
1606 | if (p->nr_targets >= p->interleave_ways) { |
1607 | dev_err(&cxlr->dev, "%s: no more target slots available\n", |
1608 | dev_name(&cxled->cxld.dev)); |
1609 | return -ENXIO; |
1610 | } |
1611 | |
1612 | /* |
1613 | * Temporarily record the endpoint decoder into the target array. Yes, |
1614 | * this means that userspace can view devices in the wrong position |
1615 | * before the region activates, and must be careful to understand when |
1616 | * it might be racing region autodiscovery. |
1617 | */ |
1618 | pos = p->nr_targets; |
1619 | p->targets[pos] = cxled; |
1620 | cxled->pos = pos; |
1621 | p->nr_targets++; |
1622 | |
1623 | return 0; |
1624 | } |
1625 | |
1626 | static int cmp_interleave_pos(const void *a, const void *b) |
1627 | { |
1628 | struct cxl_endpoint_decoder *cxled_a = *(typeof(cxled_a) *)a; |
1629 | struct cxl_endpoint_decoder *cxled_b = *(typeof(cxled_b) *)b; |
1630 | |
1631 | return cxled_a->pos - cxled_b->pos; |
1632 | } |
1633 | |
1634 | static struct cxl_port *next_port(struct cxl_port *port) |
1635 | { |
1636 | if (!port->parent_dport) |
1637 | return NULL; |
1638 | return port->parent_dport->port; |
1639 | } |
1640 | |
1641 | static int match_switch_decoder_by_range(struct device *dev, void *data) |
1642 | { |
1643 | struct cxl_switch_decoder *cxlsd; |
1644 | struct range *r1, *r2 = data; |
1645 | |
1646 | if (!is_switch_decoder(dev)) |
1647 | return 0; |
1648 | |
1649 | cxlsd = to_cxl_switch_decoder(dev); |
1650 | r1 = &cxlsd->cxld.hpa_range; |
1651 | |
1652 | if (is_root_decoder(dev)) |
1653 | return range_contains(r1, r2); |
1654 | return (r1->start == r2->start && r1->end == r2->end); |
1655 | } |
1656 | |
1657 | static int find_pos_and_ways(struct cxl_port *port, struct range *range, |
1658 | int *pos, int *ways) |
1659 | { |
1660 | struct cxl_switch_decoder *cxlsd; |
1661 | struct cxl_port *parent; |
1662 | struct device *dev; |
1663 | int rc = -ENXIO; |
1664 | |
1665 | parent = next_port(port); |
1666 | if (!parent) |
1667 | return rc; |
1668 | |
1669 | dev = device_find_child(dev: &parent->dev, data: range, |
1670 | match: match_switch_decoder_by_range); |
1671 | if (!dev) { |
1672 | dev_err(port->uport_dev, |
1673 | "failed to find decoder mapping %#llx-%#llx\n", |
1674 | range->start, range->end); |
1675 | return rc; |
1676 | } |
1677 | cxlsd = to_cxl_switch_decoder(dev); |
1678 | *ways = cxlsd->cxld.interleave_ways; |
1679 | |
1680 | for (int i = 0; i < *ways; i++) { |
1681 | if (cxlsd->target[i] == port->parent_dport) { |
1682 | *pos = i; |
1683 | rc = 0; |
1684 | break; |
1685 | } |
1686 | } |
1687 | put_device(dev); |
1688 | |
1689 | return rc; |
1690 | } |
1691 | |
1692 | /** |
1693 | * cxl_calc_interleave_pos() - calculate an endpoint position in a region |
1694 | * @cxled: endpoint decoder member of given region |
1695 | * |
1696 | * The endpoint position is calculated by traversing the topology from |
1697 | * the endpoint to the root decoder and iteratively applying this |
1698 | * calculation: |
1699 | * |
1700 | * position = position * parent_ways + parent_pos; |
1701 | * |
1702 | * ...where @position is inferred from switch and root decoder target lists. |
1703 | * |
1704 | * Return: position >= 0 on success |
1705 | * -ENXIO on failure |
1706 | */ |
1707 | static int cxl_calc_interleave_pos(struct cxl_endpoint_decoder *cxled) |
1708 | { |
1709 | struct cxl_port *iter, *port = cxled_to_port(cxled); |
1710 | struct cxl_memdev *cxlmd = cxled_to_memdev(cxled); |
1711 | struct range *range = &cxled->cxld.hpa_range; |
1712 | int parent_ways = 0, parent_pos = 0, pos = 0; |
1713 | int rc; |
1714 | |
1715 | /* |
1716 | * Example: the expected interleave order of the 4-way region shown |
1717 | * below is: mem0, mem2, mem1, mem3 |
1718 | * |
1719 | * root_port |
1720 | * / \ |
1721 | * host_bridge_0 host_bridge_1 |
1722 | * | | | | |
1723 | * mem0 mem1 mem2 mem3 |
1724 | * |
1725 | * In the example the calculator will iterate twice. The first iteration |
1726 | * uses the mem position in the host-bridge and the ways of the host- |
1727 | * bridge to generate the first, or local, position. The second |
1728 | * iteration uses the host-bridge position in the root_port and the ways |
1729 | * of the root_port to refine the position. |
1730 | * |
1731 | * A trace of the calculation per endpoint looks like this: |
1732 | * mem0: pos = 0 * 2 + 0 mem2: pos = 0 * 2 + 0 |
1733 | * pos = 0 * 2 + 0 pos = 0 * 2 + 1 |
1734 | * pos: 0 pos: 1 |
1735 | * |
1736 | * mem1: pos = 0 * 2 + 1 mem3: pos = 0 * 2 + 1 |
1737 | * pos = 1 * 2 + 0 pos = 1 * 2 + 1 |
1738 | * pos: 2 pos = 3 |
1739 | * |
1740 | * Note that while this example is simple, the method applies to more |
1741 | * complex topologies, including those with switches. |
1742 | */ |
1743 | |
1744 | /* Iterate from endpoint to root_port refining the position */ |
1745 | for (iter = port; iter; iter = next_port(port: iter)) { |
1746 | if (is_cxl_root(port: iter)) |
1747 | break; |
1748 | |
1749 | rc = find_pos_and_ways(port: iter, range, pos: &parent_pos, ways: &parent_ways); |
1750 | if (rc) |
1751 | return rc; |
1752 | |
1753 | pos = pos * parent_ways + parent_pos; |
1754 | } |
1755 | |
1756 | dev_dbg(&cxlmd->dev, |
1757 | "decoder:%s parent:%s port:%s range:%#llx-%#llx pos:%d\n", |
1758 | dev_name(&cxled->cxld.dev), dev_name(cxlmd->dev.parent), |
1759 | dev_name(&port->dev), range->start, range->end, pos); |
1760 | |
1761 | return pos; |
1762 | } |
1763 | |
1764 | static int cxl_region_sort_targets(struct cxl_region *cxlr) |
1765 | { |
1766 | struct cxl_region_params *p = &cxlr->params; |
1767 | int i, rc = 0; |
1768 | |
1769 | for (i = 0; i < p->nr_targets; i++) { |
1770 | struct cxl_endpoint_decoder *cxled = p->targets[i]; |
1771 | |
1772 | cxled->pos = cxl_calc_interleave_pos(cxled); |
1773 | /* |
1774 | * Record that sorting failed, but still continue to calc |
1775 | * cxled->pos so that follow-on code paths can reliably |
1776 | * do p->targets[cxled->pos] to self-reference their entry. |
1777 | */ |
1778 | if (cxled->pos < 0) |
1779 | rc = -ENXIO; |
1780 | } |
1781 | /* Keep the cxlr target list in interleave position order */ |
1782 | sort(base: p->targets, num: p->nr_targets, size: sizeof(p->targets[0]), |
1783 | cmp_func: cmp_interleave_pos, NULL); |
1784 | |
1785 | dev_dbg(&cxlr->dev, "region sort %s\n", rc ? "failed": "successful"); |
1786 | return rc; |
1787 | } |
1788 | |
1789 | static int cxl_region_attach(struct cxl_region *cxlr, |
1790 | struct cxl_endpoint_decoder *cxled, int pos) |
1791 | { |
1792 | struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(dev: cxlr->dev.parent); |
1793 | struct cxl_memdev *cxlmd = cxled_to_memdev(cxled); |
1794 | struct cxl_region_params *p = &cxlr->params; |
1795 | struct cxl_port *ep_port, *root_port; |
1796 | struct cxl_dport *dport; |
1797 | int rc = -ENXIO; |
1798 | |
1799 | if (cxled->mode != cxlr->mode) { |
1800 | dev_dbg(&cxlr->dev, "%s region mode: %d mismatch: %d\n", |
1801 | dev_name(&cxled->cxld.dev), cxlr->mode, cxled->mode); |
1802 | return -EINVAL; |
1803 | } |
1804 | |
1805 | if (cxled->mode == CXL_DECODER_DEAD) { |
1806 | dev_dbg(&cxlr->dev, "%s dead\n", dev_name(&cxled->cxld.dev)); |
1807 | return -ENODEV; |
1808 | } |
1809 | |
1810 | /* all full of members, or interleave config not established? */ |
1811 | if (p->state > CXL_CONFIG_INTERLEAVE_ACTIVE) { |
1812 | dev_dbg(&cxlr->dev, "region already active\n"); |
1813 | return -EBUSY; |
1814 | } else if (p->state < CXL_CONFIG_INTERLEAVE_ACTIVE) { |
1815 | dev_dbg(&cxlr->dev, "interleave config missing\n"); |
1816 | return -ENXIO; |
1817 | } |
1818 | |
1819 | if (p->nr_targets >= p->interleave_ways) { |
1820 | dev_dbg(&cxlr->dev, "region already has %d endpoints\n", |
1821 | p->nr_targets); |
1822 | return -EINVAL; |
1823 | } |
1824 | |
1825 | ep_port = cxled_to_port(cxled); |
1826 | root_port = cxlrd_to_port(cxlrd); |
1827 | dport = cxl_find_dport_by_dev(port: root_port, dport_dev: ep_port->host_bridge); |
1828 | if (!dport) { |
1829 | dev_dbg(&cxlr->dev, "%s:%s invalid target for %s\n", |
1830 | dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev), |
1831 | dev_name(cxlr->dev.parent)); |
1832 | return -ENXIO; |
1833 | } |
1834 | |
1835 | if (cxled->cxld.target_type != cxlr->type) { |
1836 | dev_dbg(&cxlr->dev, "%s:%s type mismatch: %d vs %d\n", |
1837 | dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev), |
1838 | cxled->cxld.target_type, cxlr->type); |
1839 | return -ENXIO; |
1840 | } |
1841 | |
1842 | if (!cxled->dpa_res) { |
1843 | dev_dbg(&cxlr->dev, "%s:%s: missing DPA allocation.\n", |
1844 | dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev)); |
1845 | return -ENXIO; |
1846 | } |
1847 | |
1848 | if (resource_size(res: cxled->dpa_res) * p->interleave_ways != |
1849 | resource_size(res: p->res)) { |
1850 | dev_dbg(&cxlr->dev, |
1851 | "%s:%s: decoder-size-%#llx * ways-%d != region-size-%#llx\n", |
1852 | dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev), |
1853 | (u64)resource_size(cxled->dpa_res), p->interleave_ways, |
1854 | (u64)resource_size(p->res)); |
1855 | return -EINVAL; |
1856 | } |
1857 | |
1858 | cxl_region_perf_data_calculate(cxlr, cxled); |
1859 | |
1860 | if (test_bit(CXL_REGION_F_AUTO, &cxlr->flags)) { |
1861 | int i; |
1862 | |
1863 | rc = cxl_region_attach_auto(cxlr, cxled, pos); |
1864 | if (rc) |
1865 | return rc; |
1866 | |
1867 | /* await more targets to arrive... */ |
1868 | if (p->nr_targets < p->interleave_ways) |
1869 | return 0; |
1870 | |
1871 | /* |
1872 | * All targets are here, which implies all PCI enumeration that |
1873 | * affects this region has been completed. Walk the topology to |
1874 | * sort the devices into their relative region decode position. |
1875 | */ |
1876 | rc = cxl_region_sort_targets(cxlr); |
1877 | if (rc) |
1878 | return rc; |
1879 | |
1880 | for (i = 0; i < p->nr_targets; i++) { |
1881 | cxled = p->targets[i]; |
1882 | ep_port = cxled_to_port(cxled); |
1883 | dport = cxl_find_dport_by_dev(port: root_port, |
1884 | dport_dev: ep_port->host_bridge); |
1885 | rc = cxl_region_attach_position(cxlr, cxlrd, cxled, |
1886 | dport, pos: i); |
1887 | if (rc) |
1888 | return rc; |
1889 | } |
1890 | |
1891 | rc = cxl_region_setup_targets(cxlr); |
1892 | if (rc) |
1893 | return rc; |
1894 | |
1895 | /* |
1896 | * If target setup succeeds in the autodiscovery case |
1897 | * then the region is already committed. |
1898 | */ |
1899 | p->state = CXL_CONFIG_COMMIT; |
1900 | |
1901 | return 0; |
1902 | } |
1903 | |
1904 | rc = cxl_region_validate_position(cxlr, cxled, pos); |
1905 | if (rc) |
1906 | return rc; |
1907 | |
1908 | rc = cxl_region_attach_position(cxlr, cxlrd, cxled, dport, pos); |
1909 | if (rc) |
1910 | return rc; |
1911 | |
1912 | p->targets[pos] = cxled; |
1913 | cxled->pos = pos; |
1914 | p->nr_targets++; |
1915 | |
1916 | if (p->nr_targets == p->interleave_ways) { |
1917 | rc = cxl_region_setup_targets(cxlr); |
1918 | if (rc) |
1919 | return rc; |
1920 | p->state = CXL_CONFIG_ACTIVE; |
1921 | } |
1922 | |
1923 | cxled->cxld.interleave_ways = p->interleave_ways; |
1924 | cxled->cxld.interleave_granularity = p->interleave_granularity; |
1925 | cxled->cxld.hpa_range = (struct range) { |
1926 | .start = p->res->start, |
1927 | .end = p->res->end, |
1928 | }; |
1929 | |
1930 | if (p->nr_targets != p->interleave_ways) |
1931 | return 0; |
1932 | |
1933 | /* |
1934 | * Test the auto-discovery position calculator function |
1935 | * against this successfully created user-defined region. |
1936 | * A fail message here means that this interleave config |
1937 | * will fail when presented as CXL_REGION_F_AUTO. |
1938 | */ |
1939 | for (int i = 0; i < p->nr_targets; i++) { |
1940 | struct cxl_endpoint_decoder *cxled = p->targets[i]; |
1941 | int test_pos; |
1942 | |
1943 | test_pos = cxl_calc_interleave_pos(cxled); |
1944 | dev_dbg(&cxled->cxld.dev, |
1945 | "Test cxl_calc_interleave_pos(): %s test_pos:%d cxled->pos:%d\n", |
1946 | (test_pos == cxled->pos) ? "success": "fail", |
1947 | test_pos, cxled->pos); |
1948 | } |
1949 | |
1950 | return 0; |
1951 | } |
1952 | |
1953 | static int cxl_region_detach(struct cxl_endpoint_decoder *cxled) |
1954 | { |
1955 | struct cxl_port *iter, *ep_port = cxled_to_port(cxled); |
1956 | struct cxl_region *cxlr = cxled->cxld.region; |
1957 | struct cxl_region_params *p; |
1958 | int rc = 0; |
1959 | |
1960 | lockdep_assert_held_write(&cxl_region_rwsem); |
1961 | |
1962 | if (!cxlr) |
1963 | return 0; |
1964 | |
1965 | p = &cxlr->params; |
1966 | get_device(dev: &cxlr->dev); |
1967 | |
1968 | if (p->state > CXL_CONFIG_ACTIVE) { |
1969 | /* |
1970 | * TODO: tear down all impacted regions if a device is |
1971 | * removed out of order |
1972 | */ |
1973 | rc = cxl_region_decode_reset(cxlr, count: p->interleave_ways); |
1974 | if (rc) |
1975 | goto out; |
1976 | p->state = CXL_CONFIG_ACTIVE; |
1977 | } |
1978 | |
1979 | for (iter = ep_port; !is_cxl_root(port: iter); |
1980 | iter = to_cxl_port(dev: iter->dev.parent)) |
1981 | cxl_port_detach_region(port: iter, cxlr, cxled); |
1982 | |
1983 | if (cxled->pos < 0 || cxled->pos >= p->interleave_ways || |
1984 | p->targets[cxled->pos] != cxled) { |
1985 | struct cxl_memdev *cxlmd = cxled_to_memdev(cxled); |
1986 | |
1987 | dev_WARN_ONCE(&cxlr->dev, 1, "expected %s:%s at position %d\n", |
1988 | dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev), |
1989 | cxled->pos); |
1990 | goto out; |
1991 | } |
1992 | |
1993 | if (p->state == CXL_CONFIG_ACTIVE) { |
1994 | p->state = CXL_CONFIG_INTERLEAVE_ACTIVE; |
1995 | cxl_region_teardown_targets(cxlr); |
1996 | } |
1997 | p->targets[cxled->pos] = NULL; |
1998 | p->nr_targets--; |
1999 | cxled->cxld.hpa_range = (struct range) { |
2000 | .start = 0, |
2001 | .end = -1, |
2002 | }; |
2003 | |
2004 | /* notify the region driver that one of its targets has departed */ |
2005 | up_write(sem: &cxl_region_rwsem); |
2006 | device_release_driver(dev: &cxlr->dev); |
2007 | down_write(sem: &cxl_region_rwsem); |
2008 | out: |
2009 | put_device(dev: &cxlr->dev); |
2010 | return rc; |
2011 | } |
2012 | |
2013 | void cxl_decoder_kill_region(struct cxl_endpoint_decoder *cxled) |
2014 | { |
2015 | down_write(sem: &cxl_region_rwsem); |
2016 | cxled->mode = CXL_DECODER_DEAD; |
2017 | cxl_region_detach(cxled); |
2018 | up_write(sem: &cxl_region_rwsem); |
2019 | } |
2020 | |
2021 | static int attach_target(struct cxl_region *cxlr, |
2022 | struct cxl_endpoint_decoder *cxled, int pos, |
2023 | unsigned int state) |
2024 | { |
2025 | int rc = 0; |
2026 | |
2027 | if (state == TASK_INTERRUPTIBLE) |
2028 | rc = down_write_killable(sem: &cxl_region_rwsem); |
2029 | else |
2030 | down_write(sem: &cxl_region_rwsem); |
2031 | if (rc) |
2032 | return rc; |
2033 | |
2034 | down_read(sem: &cxl_dpa_rwsem); |
2035 | rc = cxl_region_attach(cxlr, cxled, pos); |
2036 | up_read(sem: &cxl_dpa_rwsem); |
2037 | up_write(sem: &cxl_region_rwsem); |
2038 | return rc; |
2039 | } |
2040 | |
2041 | static int detach_target(struct cxl_region *cxlr, int pos) |
2042 | { |
2043 | struct cxl_region_params *p = &cxlr->params; |
2044 | int rc; |
2045 | |
2046 | rc = down_write_killable(sem: &cxl_region_rwsem); |
2047 | if (rc) |
2048 | return rc; |
2049 | |
2050 | if (pos >= p->interleave_ways) { |
2051 | dev_dbg(&cxlr->dev, "position %d out of range %d\n", pos, |
2052 | p->interleave_ways); |
2053 | rc = -ENXIO; |
2054 | goto out; |
2055 | } |
2056 | |
2057 | if (!p->targets[pos]) { |
2058 | rc = 0; |
2059 | goto out; |
2060 | } |
2061 | |
2062 | rc = cxl_region_detach(cxled: p->targets[pos]); |
2063 | out: |
2064 | up_write(sem: &cxl_region_rwsem); |
2065 | return rc; |
2066 | } |
2067 | |
2068 | static size_t store_targetN(struct cxl_region *cxlr, const char *buf, int pos, |
2069 | size_t len) |
2070 | { |
2071 | int rc; |
2072 | |
2073 | if (sysfs_streq(s1: buf, s2: "\n")) |
2074 | rc = detach_target(cxlr, pos); |
2075 | else { |
2076 | struct device *dev; |
2077 | |
2078 | dev = bus_find_device_by_name(bus: &cxl_bus_type, NULL, name: buf); |
2079 | if (!dev) |
2080 | return -ENODEV; |
2081 | |
2082 | if (!is_endpoint_decoder(dev)) { |
2083 | rc = -EINVAL; |
2084 | goto out; |
2085 | } |
2086 | |
2087 | rc = attach_target(cxlr, cxled: to_cxl_endpoint_decoder(dev), pos, |
2088 | TASK_INTERRUPTIBLE); |
2089 | out: |
2090 | put_device(dev); |
2091 | } |
2092 | |
2093 | if (rc < 0) |
2094 | return rc; |
2095 | return len; |
2096 | } |
2097 | |
2098 | #define TARGET_ATTR_RW(n) \ |
2099 | static ssize_t target##n##_show( \ |
2100 | struct device *dev, struct device_attribute *attr, char *buf) \ |
2101 | { \ |
2102 | return show_targetN(to_cxl_region(dev), buf, (n)); \ |
2103 | } \ |
2104 | static ssize_t target##n##_store(struct device *dev, \ |
2105 | struct device_attribute *attr, \ |
2106 | const char *buf, size_t len) \ |
2107 | { \ |
2108 | return store_targetN(to_cxl_region(dev), buf, (n), len); \ |
2109 | } \ |
2110 | static DEVICE_ATTR_RW(target##n) |
2111 | |
2112 | TARGET_ATTR_RW(0); |
2113 | TARGET_ATTR_RW(1); |
2114 | TARGET_ATTR_RW(2); |
2115 | TARGET_ATTR_RW(3); |
2116 | TARGET_ATTR_RW(4); |
2117 | TARGET_ATTR_RW(5); |
2118 | TARGET_ATTR_RW(6); |
2119 | TARGET_ATTR_RW(7); |
2120 | TARGET_ATTR_RW(8); |
2121 | TARGET_ATTR_RW(9); |
2122 | TARGET_ATTR_RW(10); |
2123 | TARGET_ATTR_RW(11); |
2124 | TARGET_ATTR_RW(12); |
2125 | TARGET_ATTR_RW(13); |
2126 | TARGET_ATTR_RW(14); |
2127 | TARGET_ATTR_RW(15); |
2128 | |
2129 | static struct attribute *target_attrs[] = { |
2130 | &dev_attr_target0.attr, |
2131 | &dev_attr_target1.attr, |
2132 | &dev_attr_target2.attr, |
2133 | &dev_attr_target3.attr, |
2134 | &dev_attr_target4.attr, |
2135 | &dev_attr_target5.attr, |
2136 | &dev_attr_target6.attr, |
2137 | &dev_attr_target7.attr, |
2138 | &dev_attr_target8.attr, |
2139 | &dev_attr_target9.attr, |
2140 | &dev_attr_target10.attr, |
2141 | &dev_attr_target11.attr, |
2142 | &dev_attr_target12.attr, |
2143 | &dev_attr_target13.attr, |
2144 | &dev_attr_target14.attr, |
2145 | &dev_attr_target15.attr, |
2146 | NULL, |
2147 | }; |
2148 | |
2149 | static umode_t cxl_region_target_visible(struct kobject *kobj, |
2150 | struct attribute *a, int n) |
2151 | { |
2152 | struct device *dev = kobj_to_dev(kobj); |
2153 | struct cxl_region *cxlr = to_cxl_region(dev); |
2154 | struct cxl_region_params *p = &cxlr->params; |
2155 | |
2156 | if (n < p->interleave_ways) |
2157 | return a->mode; |
2158 | return 0; |
2159 | } |
2160 | |
2161 | static const struct attribute_group cxl_region_target_group = { |
2162 | .attrs = target_attrs, |
2163 | .is_visible = cxl_region_target_visible, |
2164 | }; |
2165 | |
2166 | static const struct attribute_group *get_cxl_region_target_group(void) |
2167 | { |
2168 | return &cxl_region_target_group; |
2169 | } |
2170 | |
2171 | static const struct attribute_group *region_groups[] = { |
2172 | &cxl_base_attribute_group, |
2173 | &cxl_region_group, |
2174 | &cxl_region_target_group, |
2175 | &cxl_region_access0_coordinate_group, |
2176 | &cxl_region_access1_coordinate_group, |
2177 | NULL, |
2178 | }; |
2179 | |
2180 | static void cxl_region_release(struct device *dev) |
2181 | { |
2182 | struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(dev: dev->parent); |
2183 | struct cxl_region *cxlr = to_cxl_region(dev); |
2184 | int id = atomic_read(v: &cxlrd->region_id); |
2185 | |
2186 | /* |
2187 | * Try to reuse the recently idled id rather than the cached |
2188 | * next id to prevent the region id space from increasing |
2189 | * unnecessarily. |
2190 | */ |
2191 | if (cxlr->id < id) |
2192 | if (atomic_try_cmpxchg(v: &cxlrd->region_id, old: &id, new: cxlr->id)) { |
2193 | memregion_free(id); |
2194 | goto out; |
2195 | } |
2196 | |
2197 | memregion_free(id: cxlr->id); |
2198 | out: |
2199 | put_device(dev: dev->parent); |
2200 | kfree(objp: cxlr); |
2201 | } |
2202 | |
2203 | const struct device_type cxl_region_type = { |
2204 | .name = "cxl_region", |
2205 | .release = cxl_region_release, |
2206 | .groups = region_groups |
2207 | }; |
2208 | |
2209 | bool is_cxl_region(struct device *dev) |
2210 | { |
2211 | return dev->type == &cxl_region_type; |
2212 | } |
2213 | EXPORT_SYMBOL_NS_GPL(is_cxl_region, CXL); |
2214 | |
2215 | static struct cxl_region *to_cxl_region(struct device *dev) |
2216 | { |
2217 | if (dev_WARN_ONCE(dev, dev->type != &cxl_region_type, |
2218 | "not a cxl_region device\n")) |
2219 | return NULL; |
2220 | |
2221 | return container_of(dev, struct cxl_region, dev); |
2222 | } |
2223 | |
2224 | static void unregister_region(void *_cxlr) |
2225 | { |
2226 | struct cxl_region *cxlr = _cxlr; |
2227 | struct cxl_region_params *p = &cxlr->params; |
2228 | int i; |
2229 | |
2230 | unregister_memory_notifier(nb: &cxlr->memory_notifier); |
2231 | device_del(dev: &cxlr->dev); |
2232 | |
2233 | /* |
2234 | * Now that region sysfs is shutdown, the parameter block is now |
2235 | * read-only, so no need to hold the region rwsem to access the |
2236 | * region parameters. |
2237 | */ |
2238 | for (i = 0; i < p->interleave_ways; i++) |
2239 | detach_target(cxlr, pos: i); |
2240 | |
2241 | cxl_region_iomem_release(cxlr); |
2242 | put_device(dev: &cxlr->dev); |
2243 | } |
2244 | |
2245 | static struct lock_class_key cxl_region_key; |
2246 | |
2247 | static struct cxl_region *cxl_region_alloc(struct cxl_root_decoder *cxlrd, int id) |
2248 | { |
2249 | struct cxl_region *cxlr; |
2250 | struct device *dev; |
2251 | |
2252 | cxlr = kzalloc(size: sizeof(*cxlr), GFP_KERNEL); |
2253 | if (!cxlr) { |
2254 | memregion_free(id); |
2255 | return ERR_PTR(error: -ENOMEM); |
2256 | } |
2257 | |
2258 | dev = &cxlr->dev; |
2259 | device_initialize(dev); |
2260 | lockdep_set_class(&dev->mutex, &cxl_region_key); |
2261 | dev->parent = &cxlrd->cxlsd.cxld.dev; |
2262 | /* |
2263 | * Keep root decoder pinned through cxl_region_release to fixup |
2264 | * region id allocations |
2265 | */ |
2266 | get_device(dev: dev->parent); |
2267 | device_set_pm_not_required(dev); |
2268 | dev->bus = &cxl_bus_type; |
2269 | dev->type = &cxl_region_type; |
2270 | cxlr->id = id; |
2271 | |
2272 | return cxlr; |
2273 | } |
2274 | |
2275 | static bool cxl_region_update_coordinates(struct cxl_region *cxlr, int nid) |
2276 | { |
2277 | int cset = 0; |
2278 | int rc; |
2279 | |
2280 | for (int i = 0; i < ACCESS_COORDINATE_MAX; i++) { |
2281 | if (cxlr->coord[i].read_bandwidth) { |
2282 | rc = 0; |
2283 | if (cxl_need_node_perf_attrs_update(nid)) |
2284 | node_set_perf_attrs(nid, coord: &cxlr->coord[i], access: i); |
2285 | else |
2286 | rc = cxl_update_hmat_access_coordinates(nid, cxlr, access: i); |
2287 | |
2288 | if (rc == 0) |
2289 | cset++; |
2290 | } |
2291 | } |
2292 | |
2293 | if (!cset) |
2294 | return false; |
2295 | |
2296 | rc = sysfs_update_group(kobj: &cxlr->dev.kobj, grp: get_cxl_region_access0_group()); |
2297 | if (rc) |
2298 | dev_dbg(&cxlr->dev, "Failed to update access0 group\n"); |
2299 | |
2300 | rc = sysfs_update_group(kobj: &cxlr->dev.kobj, grp: get_cxl_region_access1_group()); |
2301 | if (rc) |
2302 | dev_dbg(&cxlr->dev, "Failed to update access1 group\n"); |
2303 | |
2304 | return true; |
2305 | } |
2306 | |
2307 | static int cxl_region_perf_attrs_callback(struct notifier_block *nb, |
2308 | unsigned long action, void *arg) |
2309 | { |
2310 | struct cxl_region *cxlr = container_of(nb, struct cxl_region, |
2311 | memory_notifier); |
2312 | struct cxl_region_params *p = &cxlr->params; |
2313 | struct cxl_endpoint_decoder *cxled = p->targets[0]; |
2314 | struct cxl_decoder *cxld = &cxled->cxld; |
2315 | struct memory_notify *mnb = arg; |
2316 | int nid = mnb->status_change_nid; |
2317 | int region_nid; |
2318 | |
2319 | if (nid == NUMA_NO_NODE || action != MEM_ONLINE) |
2320 | return NOTIFY_DONE; |
2321 | |
2322 | region_nid = phys_to_target_node(start: cxld->hpa_range.start); |
2323 | if (nid != region_nid) |
2324 | return NOTIFY_DONE; |
2325 | |
2326 | if (!cxl_region_update_coordinates(cxlr, nid)) |
2327 | return NOTIFY_DONE; |
2328 | |
2329 | return NOTIFY_OK; |
2330 | } |
2331 | |
2332 | /** |
2333 | * devm_cxl_add_region - Adds a region to a decoder |
2334 | * @cxlrd: root decoder |
2335 | * @id: memregion id to create, or memregion_free() on failure |
2336 | * @mode: mode for the endpoint decoders of this region |
2337 | * @type: select whether this is an expander or accelerator (type-2 or type-3) |
2338 | * |
2339 | * This is the second step of region initialization. Regions exist within an |
2340 | * address space which is mapped by a @cxlrd. |
2341 | * |
2342 | * Return: 0 if the region was added to the @cxlrd, else returns negative error |
2343 | * code. The region will be named "regionZ" where Z is the unique region number. |
2344 | */ |
2345 | static struct cxl_region *devm_cxl_add_region(struct cxl_root_decoder *cxlrd, |
2346 | int id, |
2347 | enum cxl_decoder_mode mode, |
2348 | enum cxl_decoder_type type) |
2349 | { |
2350 | struct cxl_port *port = to_cxl_port(dev: cxlrd->cxlsd.cxld.dev.parent); |
2351 | struct cxl_region *cxlr; |
2352 | struct device *dev; |
2353 | int rc; |
2354 | |
2355 | switch (mode) { |
2356 | case CXL_DECODER_RAM: |
2357 | case CXL_DECODER_PMEM: |
2358 | break; |
2359 | default: |
2360 | dev_err(&cxlrd->cxlsd.cxld.dev, "unsupported mode %d\n", mode); |
2361 | return ERR_PTR(error: -EINVAL); |
2362 | } |
2363 | |
2364 | cxlr = cxl_region_alloc(cxlrd, id); |
2365 | if (IS_ERR(ptr: cxlr)) |
2366 | return cxlr; |
2367 | cxlr->mode = mode; |
2368 | cxlr->type = type; |
2369 | |
2370 | dev = &cxlr->dev; |
2371 | rc = dev_set_name(dev, name: "region%d", id); |
2372 | if (rc) |
2373 | goto err; |
2374 | |
2375 | rc = device_add(dev); |
2376 | if (rc) |
2377 | goto err; |
2378 | |
2379 | cxlr->memory_notifier.notifier_call = cxl_region_perf_attrs_callback; |
2380 | cxlr->memory_notifier.priority = CXL_CALLBACK_PRI; |
2381 | register_memory_notifier(nb: &cxlr->memory_notifier); |
2382 | |
2383 | rc = devm_add_action_or_reset(port->uport_dev, unregister_region, cxlr); |
2384 | if (rc) |
2385 | return ERR_PTR(error: rc); |
2386 | |
2387 | dev_dbg(port->uport_dev, "%s: created %s\n", |
2388 | dev_name(&cxlrd->cxlsd.cxld.dev), dev_name(dev)); |
2389 | return cxlr; |
2390 | |
2391 | err: |
2392 | put_device(dev); |
2393 | return ERR_PTR(error: rc); |
2394 | } |
2395 | |
2396 | static ssize_t __create_region_show(struct cxl_root_decoder *cxlrd, char *buf) |
2397 | { |
2398 | return sysfs_emit(buf, fmt: "region%u\n", atomic_read(v: &cxlrd->region_id)); |
2399 | } |
2400 | |
2401 | static ssize_t create_pmem_region_show(struct device *dev, |
2402 | struct device_attribute *attr, char *buf) |
2403 | { |
2404 | return __create_region_show(cxlrd: to_cxl_root_decoder(dev), buf); |
2405 | } |
2406 | |
2407 | static ssize_t create_ram_region_show(struct device *dev, |
2408 | struct device_attribute *attr, char *buf) |
2409 | { |
2410 | return __create_region_show(cxlrd: to_cxl_root_decoder(dev), buf); |
2411 | } |
2412 | |
2413 | static struct cxl_region *__create_region(struct cxl_root_decoder *cxlrd, |
2414 | enum cxl_decoder_mode mode, int id) |
2415 | { |
2416 | int rc; |
2417 | |
2418 | rc = memregion_alloc(GFP_KERNEL); |
2419 | if (rc < 0) |
2420 | return ERR_PTR(error: rc); |
2421 | |
2422 | if (atomic_cmpxchg(v: &cxlrd->region_id, old: id, new: rc) != id) { |
2423 | memregion_free(id: rc); |
2424 | return ERR_PTR(error: -EBUSY); |
2425 | } |
2426 | |
2427 | return devm_cxl_add_region(cxlrd, id, mode, type: CXL_DECODER_HOSTONLYMEM); |
2428 | } |
2429 | |
2430 | static ssize_t create_pmem_region_store(struct device *dev, |
2431 | struct device_attribute *attr, |
2432 | const char *buf, size_t len) |
2433 | { |
2434 | struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(dev); |
2435 | struct cxl_region *cxlr; |
2436 | int rc, id; |
2437 | |
2438 | rc = sscanf(buf, "region%d\n", &id); |
2439 | if (rc != 1) |
2440 | return -EINVAL; |
2441 | |
2442 | cxlr = __create_region(cxlrd, mode: CXL_DECODER_PMEM, id); |
2443 | if (IS_ERR(ptr: cxlr)) |
2444 | return PTR_ERR(ptr: cxlr); |
2445 | |
2446 | return len; |
2447 | } |
2448 | DEVICE_ATTR_RW(create_pmem_region); |
2449 | |
2450 | static ssize_t create_ram_region_store(struct device *dev, |
2451 | struct device_attribute *attr, |
2452 | const char *buf, size_t len) |
2453 | { |
2454 | struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(dev); |
2455 | struct cxl_region *cxlr; |
2456 | int rc, id; |
2457 | |
2458 | rc = sscanf(buf, "region%d\n", &id); |
2459 | if (rc != 1) |
2460 | return -EINVAL; |
2461 | |
2462 | cxlr = __create_region(cxlrd, mode: CXL_DECODER_RAM, id); |
2463 | if (IS_ERR(ptr: cxlr)) |
2464 | return PTR_ERR(ptr: cxlr); |
2465 | |
2466 | return len; |
2467 | } |
2468 | DEVICE_ATTR_RW(create_ram_region); |
2469 | |
2470 | static ssize_t region_show(struct device *dev, struct device_attribute *attr, |
2471 | char *buf) |
2472 | { |
2473 | struct cxl_decoder *cxld = to_cxl_decoder(dev); |
2474 | ssize_t rc; |
2475 | |
2476 | rc = down_read_interruptible(sem: &cxl_region_rwsem); |
2477 | if (rc) |
2478 | return rc; |
2479 | |
2480 | if (cxld->region) |
2481 | rc = sysfs_emit(buf, fmt: "%s\n", dev_name(dev: &cxld->region->dev)); |
2482 | else |
2483 | rc = sysfs_emit(buf, fmt: "\n"); |
2484 | up_read(sem: &cxl_region_rwsem); |
2485 | |
2486 | return rc; |
2487 | } |
2488 | DEVICE_ATTR_RO(region); |
2489 | |
2490 | static struct cxl_region * |
2491 | cxl_find_region_by_name(struct cxl_root_decoder *cxlrd, const char *name) |
2492 | { |
2493 | struct cxl_decoder *cxld = &cxlrd->cxlsd.cxld; |
2494 | struct device *region_dev; |
2495 | |
2496 | region_dev = device_find_child_by_name(parent: &cxld->dev, name); |
2497 | if (!region_dev) |
2498 | return ERR_PTR(error: -ENODEV); |
2499 | |
2500 | return to_cxl_region(dev: region_dev); |
2501 | } |
2502 | |
2503 | static ssize_t delete_region_store(struct device *dev, |
2504 | struct device_attribute *attr, |
2505 | const char *buf, size_t len) |
2506 | { |
2507 | struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(dev); |
2508 | struct cxl_port *port = to_cxl_port(dev: dev->parent); |
2509 | struct cxl_region *cxlr; |
2510 | |
2511 | cxlr = cxl_find_region_by_name(cxlrd, name: buf); |
2512 | if (IS_ERR(ptr: cxlr)) |
2513 | return PTR_ERR(ptr: cxlr); |
2514 | |
2515 | devm_release_action(dev: port->uport_dev, action: unregister_region, data: cxlr); |
2516 | put_device(dev: &cxlr->dev); |
2517 | |
2518 | return len; |
2519 | } |
2520 | DEVICE_ATTR_WO(delete_region); |
2521 | |
2522 | static void cxl_pmem_region_release(struct device *dev) |
2523 | { |
2524 | struct cxl_pmem_region *cxlr_pmem = to_cxl_pmem_region(dev); |
2525 | int i; |
2526 | |
2527 | for (i = 0; i < cxlr_pmem->nr_mappings; i++) { |
2528 | struct cxl_memdev *cxlmd = cxlr_pmem->mapping[i].cxlmd; |
2529 | |
2530 | put_device(dev: &cxlmd->dev); |
2531 | } |
2532 | |
2533 | kfree(objp: cxlr_pmem); |
2534 | } |
2535 | |
2536 | static const struct attribute_group *cxl_pmem_region_attribute_groups[] = { |
2537 | &cxl_base_attribute_group, |
2538 | NULL, |
2539 | }; |
2540 | |
2541 | const struct device_type cxl_pmem_region_type = { |
2542 | .name = "cxl_pmem_region", |
2543 | .release = cxl_pmem_region_release, |
2544 | .groups = cxl_pmem_region_attribute_groups, |
2545 | }; |
2546 | |
2547 | bool is_cxl_pmem_region(struct device *dev) |
2548 | { |
2549 | return dev->type == &cxl_pmem_region_type; |
2550 | } |
2551 | EXPORT_SYMBOL_NS_GPL(is_cxl_pmem_region, CXL); |
2552 | |
2553 | struct cxl_pmem_region *to_cxl_pmem_region(struct device *dev) |
2554 | { |
2555 | if (dev_WARN_ONCE(dev, !is_cxl_pmem_region(dev), |
2556 | "not a cxl_pmem_region device\n")) |
2557 | return NULL; |
2558 | return container_of(dev, struct cxl_pmem_region, dev); |
2559 | } |
2560 | EXPORT_SYMBOL_NS_GPL(to_cxl_pmem_region, CXL); |
2561 | |
2562 | struct cxl_poison_context { |
2563 | struct cxl_port *port; |
2564 | enum cxl_decoder_mode mode; |
2565 | u64 offset; |
2566 | }; |
2567 | |
2568 | static int cxl_get_poison_unmapped(struct cxl_memdev *cxlmd, |
2569 | struct cxl_poison_context *ctx) |
2570 | { |
2571 | struct cxl_dev_state *cxlds = cxlmd->cxlds; |
2572 | u64 offset, length; |
2573 | int rc = 0; |
2574 | |
2575 | /* |
2576 | * Collect poison for the remaining unmapped resources |
2577 | * after poison is collected by committed endpoints. |
2578 | * |
2579 | * Knowing that PMEM must always follow RAM, get poison |
2580 | * for unmapped resources based on the last decoder's mode: |
2581 | * ram: scan remains of ram range, then any pmem range |
2582 | * pmem: scan remains of pmem range |
2583 | */ |
2584 | |
2585 | if (ctx->mode == CXL_DECODER_RAM) { |
2586 | offset = ctx->offset; |
2587 | length = resource_size(res: &cxlds->ram_res) - offset; |
2588 | rc = cxl_mem_get_poison(cxlmd, offset, len: length, NULL); |
2589 | if (rc == -EFAULT) |
2590 | rc = 0; |
2591 | if (rc) |
2592 | return rc; |
2593 | } |
2594 | if (ctx->mode == CXL_DECODER_PMEM) { |
2595 | offset = ctx->offset; |
2596 | length = resource_size(res: &cxlds->dpa_res) - offset; |
2597 | if (!length) |
2598 | return 0; |
2599 | } else if (resource_size(res: &cxlds->pmem_res)) { |
2600 | offset = cxlds->pmem_res.start; |
2601 | length = resource_size(res: &cxlds->pmem_res); |
2602 | } else { |
2603 | return 0; |
2604 | } |
2605 | |
2606 | return cxl_mem_get_poison(cxlmd, offset, len: length, NULL); |
2607 | } |
2608 | |
2609 | static int poison_by_decoder(struct device *dev, void *arg) |
2610 | { |
2611 | struct cxl_poison_context *ctx = arg; |
2612 | struct cxl_endpoint_decoder *cxled; |
2613 | struct cxl_memdev *cxlmd; |
2614 | u64 offset, length; |
2615 | int rc = 0; |
2616 | |
2617 | if (!is_endpoint_decoder(dev)) |
2618 | return rc; |
2619 | |
2620 | cxled = to_cxl_endpoint_decoder(dev); |
2621 | if (!cxled->dpa_res || !resource_size(res: cxled->dpa_res)) |
2622 | return rc; |
2623 | |
2624 | /* |
2625 | * Regions are only created with single mode decoders: pmem or ram. |
2626 | * Linux does not support mixed mode decoders. This means that |
2627 | * reading poison per endpoint decoder adheres to the requirement |
2628 | * that poison reads of pmem and ram must be separated. |
2629 | * CXL 3.0 Spec 8.2.9.8.4.1 |
2630 | */ |
2631 | if (cxled->mode == CXL_DECODER_MIXED) { |
2632 | dev_dbg(dev, "poison list read unsupported in mixed mode\n"); |
2633 | return rc; |
2634 | } |
2635 | |
2636 | cxlmd = cxled_to_memdev(cxled); |
2637 | if (cxled->skip) { |
2638 | offset = cxled->dpa_res->start - cxled->skip; |
2639 | length = cxled->skip; |
2640 | rc = cxl_mem_get_poison(cxlmd, offset, len: length, NULL); |
2641 | if (rc == -EFAULT && cxled->mode == CXL_DECODER_RAM) |
2642 | rc = 0; |
2643 | if (rc) |
2644 | return rc; |
2645 | } |
2646 | |
2647 | offset = cxled->dpa_res->start; |
2648 | length = cxled->dpa_res->end - offset + 1; |
2649 | rc = cxl_mem_get_poison(cxlmd, offset, len: length, cxlr: cxled->cxld.region); |
2650 | if (rc == -EFAULT && cxled->mode == CXL_DECODER_RAM) |
2651 | rc = 0; |
2652 | if (rc) |
2653 | return rc; |
2654 | |
2655 | /* Iterate until commit_end is reached */ |
2656 | if (cxled->cxld.id == ctx->port->commit_end) { |
2657 | ctx->offset = cxled->dpa_res->end + 1; |
2658 | ctx->mode = cxled->mode; |
2659 | return 1; |
2660 | } |
2661 | |
2662 | return 0; |
2663 | } |
2664 | |
2665 | int cxl_get_poison_by_endpoint(struct cxl_port *port) |
2666 | { |
2667 | struct cxl_poison_context ctx; |
2668 | int rc = 0; |
2669 | |
2670 | ctx = (struct cxl_poison_context) { |
2671 | .port = port |
2672 | }; |
2673 | |
2674 | rc = device_for_each_child(dev: &port->dev, data: &ctx, fn: poison_by_decoder); |
2675 | if (rc == 1) |
2676 | rc = cxl_get_poison_unmapped(cxlmd: to_cxl_memdev(dev: port->uport_dev), |
2677 | ctx: &ctx); |
2678 | |
2679 | return rc; |
2680 | } |
2681 | |
2682 | static struct lock_class_key cxl_pmem_region_key; |
2683 | |
2684 | static struct cxl_pmem_region *cxl_pmem_region_alloc(struct cxl_region *cxlr) |
2685 | { |
2686 | struct cxl_region_params *p = &cxlr->params; |
2687 | struct cxl_nvdimm_bridge *cxl_nvb; |
2688 | struct cxl_pmem_region *cxlr_pmem; |
2689 | struct device *dev; |
2690 | int i; |
2691 | |
2692 | down_read(sem: &cxl_region_rwsem); |
2693 | if (p->state != CXL_CONFIG_COMMIT) { |
2694 | cxlr_pmem = ERR_PTR(error: -ENXIO); |
2695 | goto out; |
2696 | } |
2697 | |
2698 | cxlr_pmem = kzalloc(struct_size(cxlr_pmem, mapping, p->nr_targets), |
2699 | GFP_KERNEL); |
2700 | if (!cxlr_pmem) { |
2701 | cxlr_pmem = ERR_PTR(error: -ENOMEM); |
2702 | goto out; |
2703 | } |
2704 | |
2705 | cxlr_pmem->hpa_range.start = p->res->start; |
2706 | cxlr_pmem->hpa_range.end = p->res->end; |
2707 | |
2708 | /* Snapshot the region configuration underneath the cxl_region_rwsem */ |
2709 | cxlr_pmem->nr_mappings = p->nr_targets; |
2710 | for (i = 0; i < p->nr_targets; i++) { |
2711 | struct cxl_endpoint_decoder *cxled = p->targets[i]; |
2712 | struct cxl_memdev *cxlmd = cxled_to_memdev(cxled); |
2713 | struct cxl_pmem_region_mapping *m = &cxlr_pmem->mapping[i]; |
2714 | |
2715 | /* |
2716 | * Regions never span CXL root devices, so by definition the |
2717 | * bridge for one device is the same for all. |
2718 | */ |
2719 | if (i == 0) { |
2720 | cxl_nvb = cxl_find_nvdimm_bridge(cxlmd); |
2721 | if (!cxl_nvb) { |
2722 | cxlr_pmem = ERR_PTR(error: -ENODEV); |
2723 | goto out; |
2724 | } |
2725 | cxlr->cxl_nvb = cxl_nvb; |
2726 | } |
2727 | m->cxlmd = cxlmd; |
2728 | get_device(dev: &cxlmd->dev); |
2729 | m->start = cxled->dpa_res->start; |
2730 | m->size = resource_size(res: cxled->dpa_res); |
2731 | m->position = i; |
2732 | } |
2733 | |
2734 | dev = &cxlr_pmem->dev; |
2735 | cxlr_pmem->cxlr = cxlr; |
2736 | cxlr->cxlr_pmem = cxlr_pmem; |
2737 | device_initialize(dev); |
2738 | lockdep_set_class(&dev->mutex, &cxl_pmem_region_key); |
2739 | device_set_pm_not_required(dev); |
2740 | dev->parent = &cxlr->dev; |
2741 | dev->bus = &cxl_bus_type; |
2742 | dev->type = &cxl_pmem_region_type; |
2743 | out: |
2744 | up_read(sem: &cxl_region_rwsem); |
2745 | |
2746 | return cxlr_pmem; |
2747 | } |
2748 | |
2749 | static void cxl_dax_region_release(struct device *dev) |
2750 | { |
2751 | struct cxl_dax_region *cxlr_dax = to_cxl_dax_region(dev); |
2752 | |
2753 | kfree(objp: cxlr_dax); |
2754 | } |
2755 | |
2756 | static const struct attribute_group *cxl_dax_region_attribute_groups[] = { |
2757 | &cxl_base_attribute_group, |
2758 | NULL, |
2759 | }; |
2760 | |
2761 | const struct device_type cxl_dax_region_type = { |
2762 | .name = "cxl_dax_region", |
2763 | .release = cxl_dax_region_release, |
2764 | .groups = cxl_dax_region_attribute_groups, |
2765 | }; |
2766 | |
2767 | static bool is_cxl_dax_region(struct device *dev) |
2768 | { |
2769 | return dev->type == &cxl_dax_region_type; |
2770 | } |
2771 | |
2772 | struct cxl_dax_region *to_cxl_dax_region(struct device *dev) |
2773 | { |
2774 | if (dev_WARN_ONCE(dev, !is_cxl_dax_region(dev), |
2775 | "not a cxl_dax_region device\n")) |
2776 | return NULL; |
2777 | return container_of(dev, struct cxl_dax_region, dev); |
2778 | } |
2779 | EXPORT_SYMBOL_NS_GPL(to_cxl_dax_region, CXL); |
2780 | |
2781 | static struct lock_class_key cxl_dax_region_key; |
2782 | |
2783 | static struct cxl_dax_region *cxl_dax_region_alloc(struct cxl_region *cxlr) |
2784 | { |
2785 | struct cxl_region_params *p = &cxlr->params; |
2786 | struct cxl_dax_region *cxlr_dax; |
2787 | struct device *dev; |
2788 | |
2789 | down_read(sem: &cxl_region_rwsem); |
2790 | if (p->state != CXL_CONFIG_COMMIT) { |
2791 | cxlr_dax = ERR_PTR(error: -ENXIO); |
2792 | goto out; |
2793 | } |
2794 | |
2795 | cxlr_dax = kzalloc(size: sizeof(*cxlr_dax), GFP_KERNEL); |
2796 | if (!cxlr_dax) { |
2797 | cxlr_dax = ERR_PTR(error: -ENOMEM); |
2798 | goto out; |
2799 | } |
2800 | |
2801 | cxlr_dax->hpa_range.start = p->res->start; |
2802 | cxlr_dax->hpa_range.end = p->res->end; |
2803 | |
2804 | dev = &cxlr_dax->dev; |
2805 | cxlr_dax->cxlr = cxlr; |
2806 | device_initialize(dev); |
2807 | lockdep_set_class(&dev->mutex, &cxl_dax_region_key); |
2808 | device_set_pm_not_required(dev); |
2809 | dev->parent = &cxlr->dev; |
2810 | dev->bus = &cxl_bus_type; |
2811 | dev->type = &cxl_dax_region_type; |
2812 | out: |
2813 | up_read(sem: &cxl_region_rwsem); |
2814 | |
2815 | return cxlr_dax; |
2816 | } |
2817 | |
2818 | static void cxlr_pmem_unregister(void *_cxlr_pmem) |
2819 | { |
2820 | struct cxl_pmem_region *cxlr_pmem = _cxlr_pmem; |
2821 | struct cxl_region *cxlr = cxlr_pmem->cxlr; |
2822 | struct cxl_nvdimm_bridge *cxl_nvb = cxlr->cxl_nvb; |
2823 | |
2824 | /* |
2825 | * Either the bridge is in ->remove() context under the device_lock(), |
2826 | * or cxlr_release_nvdimm() is cancelling the bridge's release action |
2827 | * for @cxlr_pmem and doing it itself (while manually holding the bridge |
2828 | * lock). |
2829 | */ |
2830 | device_lock_assert(dev: &cxl_nvb->dev); |
2831 | cxlr->cxlr_pmem = NULL; |
2832 | cxlr_pmem->cxlr = NULL; |
2833 | device_unregister(dev: &cxlr_pmem->dev); |
2834 | } |
2835 | |
2836 | static void cxlr_release_nvdimm(void *_cxlr) |
2837 | { |
2838 | struct cxl_region *cxlr = _cxlr; |
2839 | struct cxl_nvdimm_bridge *cxl_nvb = cxlr->cxl_nvb; |
2840 | |
2841 | device_lock(dev: &cxl_nvb->dev); |
2842 | if (cxlr->cxlr_pmem) |
2843 | devm_release_action(dev: &cxl_nvb->dev, action: cxlr_pmem_unregister, |
2844 | data: cxlr->cxlr_pmem); |
2845 | device_unlock(dev: &cxl_nvb->dev); |
2846 | cxlr->cxl_nvb = NULL; |
2847 | put_device(dev: &cxl_nvb->dev); |
2848 | } |
2849 | |
2850 | /** |
2851 | * devm_cxl_add_pmem_region() - add a cxl_region-to-nd_region bridge |
2852 | * @cxlr: parent CXL region for this pmem region bridge device |
2853 | * |
2854 | * Return: 0 on success negative error code on failure. |
2855 | */ |
2856 | static int devm_cxl_add_pmem_region(struct cxl_region *cxlr) |
2857 | { |
2858 | struct cxl_pmem_region *cxlr_pmem; |
2859 | struct cxl_nvdimm_bridge *cxl_nvb; |
2860 | struct device *dev; |
2861 | int rc; |
2862 | |
2863 | cxlr_pmem = cxl_pmem_region_alloc(cxlr); |
2864 | if (IS_ERR(ptr: cxlr_pmem)) |
2865 | return PTR_ERR(ptr: cxlr_pmem); |
2866 | cxl_nvb = cxlr->cxl_nvb; |
2867 | |
2868 | dev = &cxlr_pmem->dev; |
2869 | rc = dev_set_name(dev, name: "pmem_region%d", cxlr->id); |
2870 | if (rc) |
2871 | goto err; |
2872 | |
2873 | rc = device_add(dev); |
2874 | if (rc) |
2875 | goto err; |
2876 | |
2877 | dev_dbg(&cxlr->dev, "%s: register %s\n", dev_name(dev->parent), |
2878 | dev_name(dev)); |
2879 | |
2880 | device_lock(dev: &cxl_nvb->dev); |
2881 | if (cxl_nvb->dev.driver) |
2882 | rc = devm_add_action_or_reset(&cxl_nvb->dev, |
2883 | cxlr_pmem_unregister, cxlr_pmem); |
2884 | else |
2885 | rc = -ENXIO; |
2886 | device_unlock(dev: &cxl_nvb->dev); |
2887 | |
2888 | if (rc) |
2889 | goto err_bridge; |
2890 | |
2891 | /* @cxlr carries a reference on @cxl_nvb until cxlr_release_nvdimm */ |
2892 | return devm_add_action_or_reset(&cxlr->dev, cxlr_release_nvdimm, cxlr); |
2893 | |
2894 | err: |
2895 | put_device(dev); |
2896 | err_bridge: |
2897 | put_device(dev: &cxl_nvb->dev); |
2898 | cxlr->cxl_nvb = NULL; |
2899 | return rc; |
2900 | } |
2901 | |
2902 | static void cxlr_dax_unregister(void *_cxlr_dax) |
2903 | { |
2904 | struct cxl_dax_region *cxlr_dax = _cxlr_dax; |
2905 | |
2906 | device_unregister(dev: &cxlr_dax->dev); |
2907 | } |
2908 | |
2909 | static int devm_cxl_add_dax_region(struct cxl_region *cxlr) |
2910 | { |
2911 | struct cxl_dax_region *cxlr_dax; |
2912 | struct device *dev; |
2913 | int rc; |
2914 | |
2915 | cxlr_dax = cxl_dax_region_alloc(cxlr); |
2916 | if (IS_ERR(ptr: cxlr_dax)) |
2917 | return PTR_ERR(ptr: cxlr_dax); |
2918 | |
2919 | dev = &cxlr_dax->dev; |
2920 | rc = dev_set_name(dev, name: "dax_region%d", cxlr->id); |
2921 | if (rc) |
2922 | goto err; |
2923 | |
2924 | rc = device_add(dev); |
2925 | if (rc) |
2926 | goto err; |
2927 | |
2928 | dev_dbg(&cxlr->dev, "%s: register %s\n", dev_name(dev->parent), |
2929 | dev_name(dev)); |
2930 | |
2931 | return devm_add_action_or_reset(&cxlr->dev, cxlr_dax_unregister, |
2932 | cxlr_dax); |
2933 | err: |
2934 | put_device(dev); |
2935 | return rc; |
2936 | } |
2937 | |
2938 | static int match_root_decoder_by_range(struct device *dev, void *data) |
2939 | { |
2940 | struct range *r1, *r2 = data; |
2941 | struct cxl_root_decoder *cxlrd; |
2942 | |
2943 | if (!is_root_decoder(dev)) |
2944 | return 0; |
2945 | |
2946 | cxlrd = to_cxl_root_decoder(dev); |
2947 | r1 = &cxlrd->cxlsd.cxld.hpa_range; |
2948 | return range_contains(r1, r2); |
2949 | } |
2950 | |
2951 | static int match_region_by_range(struct device *dev, void *data) |
2952 | { |
2953 | struct cxl_region_params *p; |
2954 | struct cxl_region *cxlr; |
2955 | struct range *r = data; |
2956 | int rc = 0; |
2957 | |
2958 | if (!is_cxl_region(dev)) |
2959 | return 0; |
2960 | |
2961 | cxlr = to_cxl_region(dev); |
2962 | p = &cxlr->params; |
2963 | |
2964 | down_read(sem: &cxl_region_rwsem); |
2965 | if (p->res && p->res->start == r->start && p->res->end == r->end) |
2966 | rc = 1; |
2967 | up_read(sem: &cxl_region_rwsem); |
2968 | |
2969 | return rc; |
2970 | } |
2971 | |
2972 | /* Establish an empty region covering the given HPA range */ |
2973 | static struct cxl_region *construct_region(struct cxl_root_decoder *cxlrd, |
2974 | struct cxl_endpoint_decoder *cxled) |
2975 | { |
2976 | struct cxl_memdev *cxlmd = cxled_to_memdev(cxled); |
2977 | struct cxl_port *port = cxlrd_to_port(cxlrd); |
2978 | struct range *hpa = &cxled->cxld.hpa_range; |
2979 | struct cxl_region_params *p; |
2980 | struct cxl_region *cxlr; |
2981 | struct resource *res; |
2982 | int rc; |
2983 | |
2984 | do { |
2985 | cxlr = __create_region(cxlrd, mode: cxled->mode, |
2986 | id: atomic_read(v: &cxlrd->region_id)); |
2987 | } while (IS_ERR(ptr: cxlr) && PTR_ERR(ptr: cxlr) == -EBUSY); |
2988 | |
2989 | if (IS_ERR(ptr: cxlr)) { |
2990 | dev_err(cxlmd->dev.parent, |
2991 | "%s:%s: %s failed assign region: %ld\n", |
2992 | dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev), |
2993 | __func__, PTR_ERR(cxlr)); |
2994 | return cxlr; |
2995 | } |
2996 | |
2997 | down_write(sem: &cxl_region_rwsem); |
2998 | p = &cxlr->params; |
2999 | if (p->state >= CXL_CONFIG_INTERLEAVE_ACTIVE) { |
3000 | dev_err(cxlmd->dev.parent, |
3001 | "%s:%s: %s autodiscovery interrupted\n", |
3002 | dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev), |
3003 | __func__); |
3004 | rc = -EBUSY; |
3005 | goto err; |
3006 | } |
3007 | |
3008 | set_bit(CXL_REGION_F_AUTO, addr: &cxlr->flags); |
3009 | |
3010 | res = kmalloc(size: sizeof(*res), GFP_KERNEL); |
3011 | if (!res) { |
3012 | rc = -ENOMEM; |
3013 | goto err; |
3014 | } |
3015 | |
3016 | *res = DEFINE_RES_MEM_NAMED(hpa->start, range_len(hpa), |
3017 | dev_name(&cxlr->dev)); |
3018 | rc = insert_resource(parent: cxlrd->res, new: res); |
3019 | if (rc) { |
3020 | /* |
3021 | * Platform-firmware may not have split resources like "System |
3022 | * RAM" on CXL window boundaries see cxl_region_iomem_release() |
3023 | */ |
3024 | dev_warn(cxlmd->dev.parent, |
3025 | "%s:%s: %s %s cannot insert resource\n", |
3026 | dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev), |
3027 | __func__, dev_name(&cxlr->dev)); |
3028 | } |
3029 | |
3030 | p->res = res; |
3031 | p->interleave_ways = cxled->cxld.interleave_ways; |
3032 | p->interleave_granularity = cxled->cxld.interleave_granularity; |
3033 | p->state = CXL_CONFIG_INTERLEAVE_ACTIVE; |
3034 | |
3035 | rc = sysfs_update_group(kobj: &cxlr->dev.kobj, grp: get_cxl_region_target_group()); |
3036 | if (rc) |
3037 | goto err; |
3038 | |
3039 | dev_dbg(cxlmd->dev.parent, "%s:%s: %s %s res: %pr iw: %d ig: %d\n", |
3040 | dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev), __func__, |
3041 | dev_name(&cxlr->dev), p->res, p->interleave_ways, |
3042 | p->interleave_granularity); |
3043 | |
3044 | /* ...to match put_device() in cxl_add_to_region() */ |
3045 | get_device(dev: &cxlr->dev); |
3046 | up_write(sem: &cxl_region_rwsem); |
3047 | |
3048 | return cxlr; |
3049 | |
3050 | err: |
3051 | up_write(sem: &cxl_region_rwsem); |
3052 | devm_release_action(dev: port->uport_dev, action: unregister_region, data: cxlr); |
3053 | return ERR_PTR(error: rc); |
3054 | } |
3055 | |
3056 | int cxl_add_to_region(struct cxl_port *root, struct cxl_endpoint_decoder *cxled) |
3057 | { |
3058 | struct cxl_memdev *cxlmd = cxled_to_memdev(cxled); |
3059 | struct range *hpa = &cxled->cxld.hpa_range; |
3060 | struct cxl_decoder *cxld = &cxled->cxld; |
3061 | struct device *cxlrd_dev, *region_dev; |
3062 | struct cxl_root_decoder *cxlrd; |
3063 | struct cxl_region_params *p; |
3064 | struct cxl_region *cxlr; |
3065 | bool attach = false; |
3066 | int rc; |
3067 | |
3068 | cxlrd_dev = device_find_child(dev: &root->dev, data: &cxld->hpa_range, |
3069 | match: match_root_decoder_by_range); |
3070 | if (!cxlrd_dev) { |
3071 | dev_err(cxlmd->dev.parent, |
3072 | "%s:%s no CXL window for range %#llx:%#llx\n", |
3073 | dev_name(&cxlmd->dev), dev_name(&cxld->dev), |
3074 | cxld->hpa_range.start, cxld->hpa_range.end); |
3075 | return -ENXIO; |
3076 | } |
3077 | |
3078 | cxlrd = to_cxl_root_decoder(dev: cxlrd_dev); |
3079 | |
3080 | /* |
3081 | * Ensure that if multiple threads race to construct_region() for @hpa |
3082 | * one does the construction and the others add to that. |
3083 | */ |
3084 | mutex_lock(&cxlrd->range_lock); |
3085 | region_dev = device_find_child(dev: &cxlrd->cxlsd.cxld.dev, data: hpa, |
3086 | match: match_region_by_range); |
3087 | if (!region_dev) { |
3088 | cxlr = construct_region(cxlrd, cxled); |
3089 | region_dev = &cxlr->dev; |
3090 | } else |
3091 | cxlr = to_cxl_region(dev: region_dev); |
3092 | mutex_unlock(lock: &cxlrd->range_lock); |
3093 | |
3094 | rc = PTR_ERR_OR_ZERO(ptr: cxlr); |
3095 | if (rc) |
3096 | goto out; |
3097 | |
3098 | attach_target(cxlr, cxled, pos: -1, TASK_UNINTERRUPTIBLE); |
3099 | |
3100 | down_read(sem: &cxl_region_rwsem); |
3101 | p = &cxlr->params; |
3102 | attach = p->state == CXL_CONFIG_COMMIT; |
3103 | up_read(sem: &cxl_region_rwsem); |
3104 | |
3105 | if (attach) { |
3106 | /* |
3107 | * If device_attach() fails the range may still be active via |
3108 | * the platform-firmware memory map, otherwise the driver for |
3109 | * regions is local to this file, so driver matching can't fail. |
3110 | */ |
3111 | if (device_attach(dev: &cxlr->dev) < 0) |
3112 | dev_err(&cxlr->dev, "failed to enable, range: %pr\n", |
3113 | p->res); |
3114 | } |
3115 | |
3116 | put_device(dev: region_dev); |
3117 | out: |
3118 | put_device(dev: cxlrd_dev); |
3119 | return rc; |
3120 | } |
3121 | EXPORT_SYMBOL_NS_GPL(cxl_add_to_region, CXL); |
3122 | |
3123 | static int is_system_ram(struct resource *res, void *arg) |
3124 | { |
3125 | struct cxl_region *cxlr = arg; |
3126 | struct cxl_region_params *p = &cxlr->params; |
3127 | |
3128 | dev_dbg(&cxlr->dev, "%pr has System RAM: %pr\n", p->res, res); |
3129 | return 1; |
3130 | } |
3131 | |
3132 | static int cxl_region_probe(struct device *dev) |
3133 | { |
3134 | struct cxl_region *cxlr = to_cxl_region(dev); |
3135 | struct cxl_region_params *p = &cxlr->params; |
3136 | int rc; |
3137 | |
3138 | rc = down_read_interruptible(sem: &cxl_region_rwsem); |
3139 | if (rc) { |
3140 | dev_dbg(&cxlr->dev, "probe interrupted\n"); |
3141 | return rc; |
3142 | } |
3143 | |
3144 | if (p->state < CXL_CONFIG_COMMIT) { |
3145 | dev_dbg(&cxlr->dev, "config state: %d\n", p->state); |
3146 | rc = -ENXIO; |
3147 | goto out; |
3148 | } |
3149 | |
3150 | if (test_bit(CXL_REGION_F_NEEDS_RESET, &cxlr->flags)) { |
3151 | dev_err(&cxlr->dev, |
3152 | "failed to activate, re-commit region and retry\n"); |
3153 | rc = -ENXIO; |
3154 | goto out; |
3155 | } |
3156 | |
3157 | /* |
3158 | * From this point on any path that changes the region's state away from |
3159 | * CXL_CONFIG_COMMIT is also responsible for releasing the driver. |
3160 | */ |
3161 | out: |
3162 | up_read(sem: &cxl_region_rwsem); |
3163 | |
3164 | if (rc) |
3165 | return rc; |
3166 | |
3167 | switch (cxlr->mode) { |
3168 | case CXL_DECODER_PMEM: |
3169 | return devm_cxl_add_pmem_region(cxlr); |
3170 | case CXL_DECODER_RAM: |
3171 | /* |
3172 | * The region can not be manged by CXL if any portion of |
3173 | * it is already online as 'System RAM' |
3174 | */ |
3175 | if (walk_iomem_res_desc(desc: IORES_DESC_NONE, |
3176 | IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY, |
3177 | start: p->res->start, end: p->res->end, arg: cxlr, |
3178 | func: is_system_ram) > 0) |
3179 | return 0; |
3180 | return devm_cxl_add_dax_region(cxlr); |
3181 | default: |
3182 | dev_dbg(&cxlr->dev, "unsupported region mode: %d\n", |
3183 | cxlr->mode); |
3184 | return -ENXIO; |
3185 | } |
3186 | } |
3187 | |
3188 | static struct cxl_driver cxl_region_driver = { |
3189 | .name = "cxl_region", |
3190 | .probe = cxl_region_probe, |
3191 | .id = CXL_DEVICE_REGION, |
3192 | }; |
3193 | |
3194 | int cxl_region_init(void) |
3195 | { |
3196 | return cxl_driver_register(&cxl_region_driver); |
3197 | } |
3198 | |
3199 | void cxl_region_exit(void) |
3200 | { |
3201 | cxl_driver_unregister(cxl_drv: &cxl_region_driver); |
3202 | } |
3203 | |
3204 | MODULE_IMPORT_NS(CXL); |
3205 | MODULE_IMPORT_NS(DEVMEM); |
3206 | MODULE_ALIAS_CXL(CXL_DEVICE_REGION); |
3207 |
Definitions
- access0_coordinate_attrs
- access1_coordinate_attrs
- cxl_region_access0_coordinate_group
- get_cxl_region_access0_group
- cxl_region_access1_coordinate_group
- get_cxl_region_access1_group
- uuid_show
- is_dup
- uuid_store
- cxl_rr_load
- cxl_region_invalidate_memregion
- cxl_region_decode_reset
- commit_decoder
- cxl_region_decode_commit
- commit_store
- commit_show
- cxl_region_visible
- interleave_ways_show
- interleave_ways_store
- interleave_granularity_show
- interleave_granularity_store
- resource_show
- mode_show
- alloc_hpa
- cxl_region_iomem_release
- free_hpa
- size_store
- size_show
- cxl_region_attrs
- cxl_region_group
- show_targetN
- match_free_decoder
- match_auto_decoder
- cxl_region_find_decoder
- auto_order_ok
- alloc_region_ref
- cxl_rr_free_decoder
- free_region_ref
- cxl_rr_ep_add
- cxl_rr_alloc_decoder
- cxl_port_attach_region
- cxl_port_detach_region
- check_last_peer
- cxl_port_setup_targets
- cxl_port_reset_targets
- cxl_region_teardown_targets
- cxl_region_setup_targets
- cxl_region_validate_position
- cxl_region_attach_position
- cxl_region_attach_auto
- cmp_interleave_pos
- next_port
- match_switch_decoder_by_range
- find_pos_and_ways
- cxl_calc_interleave_pos
- cxl_region_sort_targets
- cxl_region_attach
- cxl_region_detach
- cxl_decoder_kill_region
- attach_target
- detach_target
- store_targetN
- target_attrs
- cxl_region_target_visible
- cxl_region_target_group
- get_cxl_region_target_group
- region_groups
- cxl_region_release
- cxl_region_type
- is_cxl_region
- to_cxl_region
- unregister_region
- cxl_region_key
- cxl_region_alloc
- cxl_region_update_coordinates
- cxl_region_perf_attrs_callback
- devm_cxl_add_region
- __create_region_show
- create_pmem_region_show
- create_ram_region_show
- __create_region
- create_pmem_region_store
- create_ram_region_store
- region_show
- cxl_find_region_by_name
- delete_region_store
- cxl_pmem_region_release
- cxl_pmem_region_attribute_groups
- cxl_pmem_region_type
- is_cxl_pmem_region
- to_cxl_pmem_region
- cxl_poison_context
- cxl_get_poison_unmapped
- poison_by_decoder
- cxl_get_poison_by_endpoint
- cxl_pmem_region_key
- cxl_pmem_region_alloc
- cxl_dax_region_release
- cxl_dax_region_attribute_groups
- cxl_dax_region_type
- is_cxl_dax_region
- to_cxl_dax_region
- cxl_dax_region_key
- cxl_dax_region_alloc
- cxlr_pmem_unregister
- cxlr_release_nvdimm
- devm_cxl_add_pmem_region
- cxlr_dax_unregister
- devm_cxl_add_dax_region
- match_root_decoder_by_range
- match_region_by_range
- construct_region
- cxl_add_to_region
- is_system_ram
- cxl_region_probe
- cxl_region_driver
- cxl_region_init
Improve your Profiling and Debugging skills
Find out more