1 | // SPDX-License-Identifier: GPL-2.0-only |
2 | /* |
3 | * Copyright (C) 2001-2003 Sistina Software (UK) Limited. |
4 | * |
5 | * This file is released under the GPL. |
6 | */ |
7 | |
8 | #include "dm.h" |
9 | #include <linux/device-mapper.h> |
10 | |
11 | #include <linux/module.h> |
12 | #include <linux/init.h> |
13 | #include <linux/blkdev.h> |
14 | #include <linux/bio.h> |
15 | #include <linux/dax.h> |
16 | #include <linux/slab.h> |
17 | #include <linux/log2.h> |
18 | |
19 | static struct workqueue_struct *dm_stripe_wq; |
20 | |
21 | #define DM_MSG_PREFIX "striped" |
22 | #define DM_IO_ERROR_THRESHOLD 15 |
23 | |
24 | struct stripe { |
25 | struct dm_dev *dev; |
26 | sector_t physical_start; |
27 | |
28 | atomic_t error_count; |
29 | }; |
30 | |
31 | struct stripe_c { |
32 | uint32_t stripes; |
33 | int stripes_shift; |
34 | |
35 | /* The size of this target / num. stripes */ |
36 | sector_t stripe_width; |
37 | |
38 | uint32_t chunk_size; |
39 | int chunk_size_shift; |
40 | |
41 | /* Needed for handling events */ |
42 | struct dm_target *ti; |
43 | |
44 | /* Work struct used for triggering events*/ |
45 | struct work_struct trigger_event; |
46 | |
47 | struct stripe stripe[] __counted_by(stripes); |
48 | }; |
49 | |
50 | /* |
51 | * An event is triggered whenever a drive |
52 | * drops out of a stripe volume. |
53 | */ |
54 | static void trigger_event(struct work_struct *work) |
55 | { |
56 | struct stripe_c *sc = container_of(work, struct stripe_c, |
57 | trigger_event); |
58 | dm_table_event(t: sc->ti->table); |
59 | } |
60 | |
61 | /* |
62 | * Parse a single <dev> <sector> pair |
63 | */ |
64 | static int get_stripe(struct dm_target *ti, struct stripe_c *sc, |
65 | unsigned int stripe, char **argv) |
66 | { |
67 | unsigned long long start; |
68 | char dummy; |
69 | int ret; |
70 | |
71 | if (sscanf(argv[1], "%llu%c" , &start, &dummy) != 1) |
72 | return -EINVAL; |
73 | |
74 | ret = dm_get_device(ti, path: argv[0], mode: dm_table_get_mode(t: ti->table), |
75 | result: &sc->stripe[stripe].dev); |
76 | if (ret) |
77 | return ret; |
78 | |
79 | sc->stripe[stripe].physical_start = start; |
80 | |
81 | return 0; |
82 | } |
83 | |
84 | /* |
85 | * Construct a striped mapping. |
86 | * <number of stripes> <chunk size> [<dev_path> <offset>]+ |
87 | */ |
88 | static int stripe_ctr(struct dm_target *ti, unsigned int argc, char **argv) |
89 | { |
90 | struct stripe_c *sc; |
91 | sector_t width, tmp_len; |
92 | uint32_t stripes; |
93 | uint32_t chunk_size; |
94 | int r; |
95 | unsigned int i; |
96 | |
97 | if (argc < 2) { |
98 | ti->error = "Not enough arguments" ; |
99 | return -EINVAL; |
100 | } |
101 | |
102 | if (kstrtouint(s: argv[0], base: 10, res: &stripes) || !stripes) { |
103 | ti->error = "Invalid stripe count" ; |
104 | return -EINVAL; |
105 | } |
106 | |
107 | if (kstrtouint(s: argv[1], base: 10, res: &chunk_size) || !chunk_size) { |
108 | ti->error = "Invalid chunk_size" ; |
109 | return -EINVAL; |
110 | } |
111 | |
112 | width = ti->len; |
113 | if (sector_div(width, stripes)) { |
114 | ti->error = "Target length not divisible by number of stripes" ; |
115 | return -EINVAL; |
116 | } |
117 | |
118 | tmp_len = width; |
119 | if (sector_div(tmp_len, chunk_size)) { |
120 | ti->error = "Target length not divisible by chunk size" ; |
121 | return -EINVAL; |
122 | } |
123 | |
124 | /* |
125 | * Do we have enough arguments for that many stripes ? |
126 | */ |
127 | if (argc != (2 + 2 * stripes)) { |
128 | ti->error = "Not enough destinations specified" ; |
129 | return -EINVAL; |
130 | } |
131 | |
132 | sc = kmalloc(struct_size(sc, stripe, stripes), GFP_KERNEL); |
133 | if (!sc) { |
134 | ti->error = "Memory allocation for striped context failed" ; |
135 | return -ENOMEM; |
136 | } |
137 | |
138 | INIT_WORK(&sc->trigger_event, trigger_event); |
139 | |
140 | /* Set pointer to dm target; used in trigger_event */ |
141 | sc->ti = ti; |
142 | sc->stripes = stripes; |
143 | sc->stripe_width = width; |
144 | |
145 | if (stripes & (stripes - 1)) |
146 | sc->stripes_shift = -1; |
147 | else |
148 | sc->stripes_shift = __ffs(stripes); |
149 | |
150 | r = dm_set_target_max_io_len(ti, len: chunk_size); |
151 | if (r) { |
152 | kfree(objp: sc); |
153 | return r; |
154 | } |
155 | |
156 | ti->num_flush_bios = stripes; |
157 | ti->num_discard_bios = stripes; |
158 | ti->num_secure_erase_bios = stripes; |
159 | ti->num_write_zeroes_bios = stripes; |
160 | |
161 | sc->chunk_size = chunk_size; |
162 | if (chunk_size & (chunk_size - 1)) |
163 | sc->chunk_size_shift = -1; |
164 | else |
165 | sc->chunk_size_shift = __ffs(chunk_size); |
166 | |
167 | /* |
168 | * Get the stripe destinations. |
169 | */ |
170 | for (i = 0; i < stripes; i++) { |
171 | argv += 2; |
172 | |
173 | r = get_stripe(ti, sc, stripe: i, argv); |
174 | if (r < 0) { |
175 | ti->error = "Couldn't parse stripe destination" ; |
176 | while (i--) |
177 | dm_put_device(ti, d: sc->stripe[i].dev); |
178 | kfree(objp: sc); |
179 | return r; |
180 | } |
181 | atomic_set(v: &(sc->stripe[i].error_count), i: 0); |
182 | } |
183 | |
184 | ti->private = sc; |
185 | |
186 | return 0; |
187 | } |
188 | |
189 | static void stripe_dtr(struct dm_target *ti) |
190 | { |
191 | unsigned int i; |
192 | struct stripe_c *sc = ti->private; |
193 | |
194 | for (i = 0; i < sc->stripes; i++) |
195 | dm_put_device(ti, d: sc->stripe[i].dev); |
196 | |
197 | flush_work(work: &sc->trigger_event); |
198 | kfree(objp: sc); |
199 | } |
200 | |
201 | static void stripe_map_sector(struct stripe_c *sc, sector_t sector, |
202 | uint32_t *stripe, sector_t *result) |
203 | { |
204 | sector_t chunk = dm_target_offset(sc->ti, sector); |
205 | sector_t chunk_offset; |
206 | |
207 | if (sc->chunk_size_shift < 0) |
208 | chunk_offset = sector_div(chunk, sc->chunk_size); |
209 | else { |
210 | chunk_offset = chunk & (sc->chunk_size - 1); |
211 | chunk >>= sc->chunk_size_shift; |
212 | } |
213 | |
214 | if (sc->stripes_shift < 0) |
215 | *stripe = sector_div(chunk, sc->stripes); |
216 | else { |
217 | *stripe = chunk & (sc->stripes - 1); |
218 | chunk >>= sc->stripes_shift; |
219 | } |
220 | |
221 | if (sc->chunk_size_shift < 0) |
222 | chunk *= sc->chunk_size; |
223 | else |
224 | chunk <<= sc->chunk_size_shift; |
225 | |
226 | *result = chunk + chunk_offset; |
227 | } |
228 | |
229 | static void stripe_map_range_sector(struct stripe_c *sc, sector_t sector, |
230 | uint32_t target_stripe, sector_t *result) |
231 | { |
232 | uint32_t stripe; |
233 | |
234 | stripe_map_sector(sc, sector, stripe: &stripe, result); |
235 | if (stripe == target_stripe) |
236 | return; |
237 | |
238 | /* round down */ |
239 | sector = *result; |
240 | if (sc->chunk_size_shift < 0) |
241 | *result -= sector_div(sector, sc->chunk_size); |
242 | else |
243 | *result = sector & ~(sector_t)(sc->chunk_size - 1); |
244 | |
245 | if (target_stripe < stripe) |
246 | *result += sc->chunk_size; /* next chunk */ |
247 | } |
248 | |
249 | static int stripe_map_range(struct stripe_c *sc, struct bio *bio, |
250 | uint32_t target_stripe) |
251 | { |
252 | sector_t begin, end; |
253 | |
254 | stripe_map_range_sector(sc, sector: bio->bi_iter.bi_sector, |
255 | target_stripe, result: &begin); |
256 | stripe_map_range_sector(sc, bio_end_sector(bio), |
257 | target_stripe, result: &end); |
258 | if (begin < end) { |
259 | bio_set_dev(bio, bdev: sc->stripe[target_stripe].dev->bdev); |
260 | bio->bi_iter.bi_sector = begin + |
261 | sc->stripe[target_stripe].physical_start; |
262 | bio->bi_iter.bi_size = to_bytes(n: end - begin); |
263 | return DM_MAPIO_REMAPPED; |
264 | } |
265 | |
266 | /* The range doesn't map to the target stripe */ |
267 | bio_endio(bio); |
268 | return DM_MAPIO_SUBMITTED; |
269 | } |
270 | |
271 | int stripe_map(struct dm_target *ti, struct bio *bio) |
272 | { |
273 | struct stripe_c *sc = ti->private; |
274 | uint32_t stripe; |
275 | unsigned int target_bio_nr; |
276 | |
277 | if (bio->bi_opf & REQ_PREFLUSH) { |
278 | target_bio_nr = dm_bio_get_target_bio_nr(bio); |
279 | BUG_ON(target_bio_nr >= sc->stripes); |
280 | bio_set_dev(bio, bdev: sc->stripe[target_bio_nr].dev->bdev); |
281 | return DM_MAPIO_REMAPPED; |
282 | } |
283 | if (unlikely(bio_op(bio) == REQ_OP_DISCARD) || |
284 | unlikely(bio_op(bio) == REQ_OP_SECURE_ERASE) || |
285 | unlikely(bio_op(bio) == REQ_OP_WRITE_ZEROES)) { |
286 | target_bio_nr = dm_bio_get_target_bio_nr(bio); |
287 | BUG_ON(target_bio_nr >= sc->stripes); |
288 | return stripe_map_range(sc, bio, target_stripe: target_bio_nr); |
289 | } |
290 | |
291 | stripe_map_sector(sc, sector: bio->bi_iter.bi_sector, |
292 | stripe: &stripe, result: &bio->bi_iter.bi_sector); |
293 | |
294 | bio->bi_iter.bi_sector += sc->stripe[stripe].physical_start; |
295 | bio_set_dev(bio, bdev: sc->stripe[stripe].dev->bdev); |
296 | |
297 | return DM_MAPIO_REMAPPED; |
298 | } |
299 | |
300 | #if IS_ENABLED(CONFIG_FS_DAX) |
301 | static struct dax_device *stripe_dax_pgoff(struct dm_target *ti, pgoff_t *pgoff) |
302 | { |
303 | struct stripe_c *sc = ti->private; |
304 | struct block_device *bdev; |
305 | sector_t dev_sector; |
306 | uint32_t stripe; |
307 | |
308 | stripe_map_sector(sc, sector: *pgoff * PAGE_SECTORS, stripe: &stripe, result: &dev_sector); |
309 | dev_sector += sc->stripe[stripe].physical_start; |
310 | bdev = sc->stripe[stripe].dev->bdev; |
311 | |
312 | *pgoff = (get_start_sect(bdev) + dev_sector) >> PAGE_SECTORS_SHIFT; |
313 | return sc->stripe[stripe].dev->dax_dev; |
314 | } |
315 | |
316 | static long stripe_dax_direct_access(struct dm_target *ti, pgoff_t pgoff, |
317 | long nr_pages, enum dax_access_mode mode, void **kaddr, |
318 | pfn_t *pfn) |
319 | { |
320 | struct dax_device *dax_dev = stripe_dax_pgoff(ti, pgoff: &pgoff); |
321 | |
322 | return dax_direct_access(dax_dev, pgoff, nr_pages, mode, kaddr, pfn); |
323 | } |
324 | |
325 | static int stripe_dax_zero_page_range(struct dm_target *ti, pgoff_t pgoff, |
326 | size_t nr_pages) |
327 | { |
328 | struct dax_device *dax_dev = stripe_dax_pgoff(ti, pgoff: &pgoff); |
329 | |
330 | return dax_zero_page_range(dax_dev, pgoff, nr_pages); |
331 | } |
332 | |
333 | static size_t stripe_dax_recovery_write(struct dm_target *ti, pgoff_t pgoff, |
334 | void *addr, size_t bytes, struct iov_iter *i) |
335 | { |
336 | struct dax_device *dax_dev = stripe_dax_pgoff(ti, pgoff: &pgoff); |
337 | |
338 | return dax_recovery_write(dax_dev, pgoff, addr, bytes, i); |
339 | } |
340 | |
341 | #else |
342 | #define stripe_dax_direct_access NULL |
343 | #define stripe_dax_zero_page_range NULL |
344 | #define stripe_dax_recovery_write NULL |
345 | #endif |
346 | |
347 | /* |
348 | * Stripe status: |
349 | * |
350 | * INFO |
351 | * #stripes [stripe_name <stripe_name>] [group word count] |
352 | * [error count 'A|D' <error count 'A|D'>] |
353 | * |
354 | * TABLE |
355 | * #stripes [stripe chunk size] |
356 | * [stripe_name physical_start <stripe_name physical_start>] |
357 | * |
358 | */ |
359 | |
360 | static void stripe_status(struct dm_target *ti, status_type_t type, |
361 | unsigned int status_flags, char *result, unsigned int maxlen) |
362 | { |
363 | struct stripe_c *sc = ti->private; |
364 | unsigned int sz = 0; |
365 | unsigned int i; |
366 | |
367 | switch (type) { |
368 | case STATUSTYPE_INFO: |
369 | DMEMIT("%d " , sc->stripes); |
370 | for (i = 0; i < sc->stripes; i++) |
371 | DMEMIT("%s " , sc->stripe[i].dev->name); |
372 | |
373 | DMEMIT("1 " ); |
374 | for (i = 0; i < sc->stripes; i++) |
375 | DMEMIT("%c" , atomic_read(&(sc->stripe[i].error_count)) ? 'D' : 'A'); |
376 | break; |
377 | |
378 | case STATUSTYPE_TABLE: |
379 | DMEMIT("%d %llu" , sc->stripes, |
380 | (unsigned long long)sc->chunk_size); |
381 | for (i = 0; i < sc->stripes; i++) |
382 | DMEMIT(" %s %llu" , sc->stripe[i].dev->name, |
383 | (unsigned long long)sc->stripe[i].physical_start); |
384 | break; |
385 | |
386 | case STATUSTYPE_IMA: |
387 | DMEMIT_TARGET_NAME_VERSION(ti->type); |
388 | DMEMIT(",stripes=%d,chunk_size=%llu" , sc->stripes, |
389 | (unsigned long long)sc->chunk_size); |
390 | |
391 | for (i = 0; i < sc->stripes; i++) { |
392 | DMEMIT(",stripe_%d_device_name=%s" , i, sc->stripe[i].dev->name); |
393 | DMEMIT(",stripe_%d_physical_start=%llu" , i, |
394 | (unsigned long long)sc->stripe[i].physical_start); |
395 | DMEMIT(",stripe_%d_status=%c" , i, |
396 | atomic_read(&(sc->stripe[i].error_count)) ? 'D' : 'A'); |
397 | } |
398 | DMEMIT(";" ); |
399 | break; |
400 | } |
401 | } |
402 | |
403 | static int stripe_end_io(struct dm_target *ti, struct bio *bio, |
404 | blk_status_t *error) |
405 | { |
406 | unsigned int i; |
407 | char major_minor[16]; |
408 | struct stripe_c *sc = ti->private; |
409 | |
410 | if (!*error) |
411 | return DM_ENDIO_DONE; /* I/O complete */ |
412 | |
413 | if (bio->bi_opf & REQ_RAHEAD) |
414 | return DM_ENDIO_DONE; |
415 | |
416 | if (*error == BLK_STS_NOTSUPP) |
417 | return DM_ENDIO_DONE; |
418 | |
419 | memset(major_minor, 0, sizeof(major_minor)); |
420 | sprintf(buf: major_minor, fmt: "%d:%d" , MAJOR(bio_dev(bio)), MINOR(bio_dev(bio))); |
421 | |
422 | /* |
423 | * Test to see which stripe drive triggered the event |
424 | * and increment error count for all stripes on that device. |
425 | * If the error count for a given device exceeds the threshold |
426 | * value we will no longer trigger any further events. |
427 | */ |
428 | for (i = 0; i < sc->stripes; i++) |
429 | if (!strcmp(sc->stripe[i].dev->name, major_minor)) { |
430 | atomic_inc(v: &(sc->stripe[i].error_count)); |
431 | if (atomic_read(v: &(sc->stripe[i].error_count)) < |
432 | DM_IO_ERROR_THRESHOLD) |
433 | queue_work(wq: dm_stripe_wq, work: &sc->trigger_event); |
434 | } |
435 | |
436 | return DM_ENDIO_DONE; |
437 | } |
438 | |
439 | static int stripe_iterate_devices(struct dm_target *ti, |
440 | iterate_devices_callout_fn fn, void *data) |
441 | { |
442 | struct stripe_c *sc = ti->private; |
443 | int ret = 0; |
444 | unsigned int i = 0; |
445 | |
446 | do { |
447 | ret = fn(ti, sc->stripe[i].dev, |
448 | sc->stripe[i].physical_start, |
449 | sc->stripe_width, data); |
450 | } while (!ret && ++i < sc->stripes); |
451 | |
452 | return ret; |
453 | } |
454 | |
455 | static void stripe_io_hints(struct dm_target *ti, |
456 | struct queue_limits *limits) |
457 | { |
458 | struct stripe_c *sc = ti->private; |
459 | unsigned int chunk_size = sc->chunk_size << SECTOR_SHIFT; |
460 | |
461 | blk_limits_io_min(limits, min: chunk_size); |
462 | blk_limits_io_opt(limits, opt: chunk_size * sc->stripes); |
463 | } |
464 | |
465 | static struct target_type stripe_target = { |
466 | .name = "striped" , |
467 | .version = {1, 6, 0}, |
468 | .features = DM_TARGET_PASSES_INTEGRITY | DM_TARGET_NOWAIT, |
469 | .module = THIS_MODULE, |
470 | .ctr = stripe_ctr, |
471 | .dtr = stripe_dtr, |
472 | .map = stripe_map, |
473 | .end_io = stripe_end_io, |
474 | .status = stripe_status, |
475 | .iterate_devices = stripe_iterate_devices, |
476 | .io_hints = stripe_io_hints, |
477 | .direct_access = stripe_dax_direct_access, |
478 | .dax_zero_page_range = stripe_dax_zero_page_range, |
479 | .dax_recovery_write = stripe_dax_recovery_write, |
480 | }; |
481 | |
482 | int __init dm_stripe_init(void) |
483 | { |
484 | int r; |
485 | |
486 | dm_stripe_wq = alloc_workqueue(fmt: "dm_stripe_wq" , flags: 0, max_active: 0); |
487 | if (!dm_stripe_wq) |
488 | return -ENOMEM; |
489 | r = dm_register_target(t: &stripe_target); |
490 | if (r < 0) { |
491 | destroy_workqueue(wq: dm_stripe_wq); |
492 | DMWARN("target registration failed" ); |
493 | } |
494 | |
495 | return r; |
496 | } |
497 | |
498 | void dm_stripe_exit(void) |
499 | { |
500 | dm_unregister_target(t: &stripe_target); |
501 | destroy_workqueue(wq: dm_stripe_wq); |
502 | } |
503 | |