dm-snap-persistent.c source code [linux/drivers/md/dm-snap-persistent.c]

1	// SPDX-License-Identifier: GPL-2.0-only
2	/*
3	* Copyright (C) 2001-2002 Sistina Software (UK) Limited.
4	* Copyright (C) 2006-2008 Red Hat GmbH
5	*
6	* This file is released under the GPL.
7	*/
8
9	#include "dm-exception-store.h"
10
11	#include <linux/ctype.h>
12	#include <linux/mm.h>
13	#include <linux/pagemap.h>
14	#include <linux/vmalloc.h>
15	#include <linux/export.h>
16	#include <linux/slab.h>
17	#include <linux/dm-io.h>
18	#include <linux/dm-bufio.h>
19
20	#define DM_MSG_PREFIX "persistent snapshot"
21	#define DM_CHUNK_SIZE_DEFAULT_SECTORS 32U /* 16KB */
22
23	#define DM_PREFETCH_CHUNKS 12
24
25	/*
26	*---------------------------------------------------------------
27	* Persistent snapshots, by persistent we mean that the snapshot
28	* will survive a reboot.
29	*---------------------------------------------------------------
30	*/
31
32	/*
33	* We need to store a record of which parts of the origin have
34	* been copied to the snapshot device. The snapshot code
35	* requires that we copy exception chunks to chunk aligned areas
36	* of the COW store. It makes sense therefore, to store the
37	* metadata in chunk size blocks.
38	*
39	* There is no backward or forward compatibility implemented,
40	* snapshots with different disk versions than the kernel will
41	* not be usable. It is expected that "lvcreate" will blank out
42	* the start of a fresh COW device before calling the snapshot
43	* constructor.
44	*
45	* The first chunk of the COW device just contains the header.
46	* After this there is a chunk filled with exception metadata,
47	* followed by as many exception chunks as can fit in the
48	* metadata areas.
49	*
50	* All on disk structures are in little-endian format. The end
51	* of the exceptions info is indicated by an exception with a
52	* new_chunk of 0, which is invalid since it would point to the
53	* header chunk.
54	*/
55
56	/*
57	* Magic for persistent snapshots: "SnAp" - Feeble isn't it.
58	*/
59	#define SNAP_MAGIC 0x70416e53
60
61	/*
62	* The on-disk version of the metadata.
63	*/
64	#define SNAPSHOT_DISK_VERSION 1
65
66	#define NUM_SNAPSHOT_HDR_CHUNKS 1
67
68	struct disk_header {
69	__le32 magic;
70
71	/*
72	* Is this snapshot valid. There is no way of recovering
73	* an invalid snapshot.
74	*/
75	__le32 valid;
76
77	/*
78	* Simple, incrementing version. no backward
79	* compatibility.
80	*/
81	__le32 version;
82
83	/ In sectors /
84	__le32 chunk_size;
85	} __packed;
86
87	struct disk_exception {
88	__le64 old_chunk;
89	__le64 new_chunk;
90	} __packed;
91
92	struct core_exception {
93	uint64_t old_chunk;
94	uint64_t new_chunk;
95	};
96
97	struct commit_callback {
98	void (callback)(void* ref, int* success);
99	void *context;
100	};
101
102	/*
103	* The top level structure for a persistent exception store.
104	*/
105	struct pstore {
106	struct dm_exception_store *store;
107	int version;
108	int valid;
109	uint32_t exceptions_per_area;
110
111	/*
112	* Now that we have an asynchronous kcopyd there is no
113	* need for large chunk sizes, so it wont hurt to have a
114	* whole chunks worth of metadata in memory at once.
115	*/
116	void *area;
117
118	/*
119	* An area of zeros used to clear the next area.
120	*/
121	void *zero_area;
122
123	/*
124	* An area used for header. The header can be written
125	* concurrently with metadata (when invalidating the snapshot),
126	* so it needs a separate buffer.
127	*/
128	void *header_area;
129
130	/*
131	* Used to keep track of which metadata area the data in
132	* 'chunk' refers to.
133	*/
134	chunk_t current_area;
135
136	/*
137	* The next free chunk for an exception.
138	*
139	* When creating exceptions, all the chunks here and above are
140	* free. It holds the next chunk to be allocated. On rare
141	* occasions (e.g. after a system crash) holes can be left in
142	* the exception store because chunks can be committed out of
143	* order.
144	*
145	* When merging exceptions, it does not necessarily mean all the
146	* chunks here and above are free. It holds the value it would
147	* have held if all chunks had been committed in order of
148	* allocation. Consequently the value may occasionally be
149	* slightly too low, but since it's only used for 'status' and
150	* it can never reach its minimum value too early this doesn't
151	* matter.
152	*/
153
154	chunk_t next_free;
155
156	/*
157	* The index of next free exception in the current
158	* metadata area.
159	*/
160	uint32_t current_committed;
161
162	atomic_t pending_count;
163	uint32_t callback_count;
164	struct commit_callback *callbacks;
165	struct dm_io_client *io_client;
166
167	struct workqueue_struct *metadata_wq;
168	};
169
170	static int alloc_area(struct pstore *ps)
171	{
172	int r = -ENOMEM;
173	size_t len;
174
175	len = ps->store->chunk_size << SECTOR_SHIFT;
176
177	/*
178	* Allocate the chunk_size block of memory that will hold
179	* a single metadata area.
180	*/
181	ps->area = vmalloc(size: len);
182	if (!ps->area)
183	goto err_area;
184
185	ps->zero_area = vzalloc(size: len);
186	if (!ps->zero_area)
187	goto err_zero_area;
188
189	ps->header_area = vmalloc(size: len);
190	if (!ps->header_area)
191	goto err_header_area;
192
193	return `0`;
194
195	err_header_area:
196	vfree(addr: ps->zero_area);
197
198	err_zero_area:
199	vfree(addr: ps->area);
200
201	err_area:
202	return r;
203	}
204
205	static void free_area(struct pstore *ps)
206	{
207	vfree(addr: ps->area);
208	ps->area = NULL;
209	vfree(addr: ps->zero_area);
210	ps->zero_area = NULL;
211	vfree(addr: ps->header_area);
212	ps->header_area = NULL;
213	}
214
215	struct mdata_req {
216	struct dm_io_region *where;
217	struct dm_io_request *io_req;
218	struct work_struct work;
219	int result;
220	};
221
222	static void do_metadata(struct work_struct *work)
223	{
224	struct mdata_req req = container_of(work, struct* mdata_req, work);
225
226	req->result = dm_io(io_req: req->io_req, num_regions: `1`, region: req->where, NULL);
227	}
228
229	/*
230	* Read or write a chunk aligned and sized block of data from a device.
231	*/
232	static int chunk_io(struct pstore ps, void* *area, chunk_t chunk, blk_opf_t opf,
233	int metadata)
234	{
235	struct dm_io_region where = {
236	.bdev = dm_snap_cow(snap: ps->store->snap)->bdev,
237	.sector = ps->store->chunk_size * chunk,
238	.count = ps->store->chunk_size,
239	};
240	struct dm_io_request io_req = {
241	.bi_opf = opf,
242	.mem.type = DM_IO_VMA,
243	.mem.ptr.vma = area,
244	.client = ps->io_client,
245	.notify.fn = NULL,
246	};
247	struct mdata_req req;
248
249	if (!metadata)
250	return dm_io(io_req: &io_req, num_regions: `1`, region: &where, NULL);
251
252	req.where = &where;
253	req.io_req = &io_req;
254
255	/*
256	* Issue the synchronous I/O from a different thread
257	* to avoid submit_bio_noacct recursion.
258	*/
259	INIT_WORK_ONSTACK(&req.work, do_metadata);
260	queue_work(wq: ps->metadata_wq, work: &req.work);
261	flush_workqueue(ps->metadata_wq);
262	destroy_work_on_stack(work: &req.work);
263
264	return req.result;
265	}
266
267	/*
268	* Convert a metadata area index to a chunk index.
269	*/
270	static chunk_t area_location(struct pstore *ps, chunk_t area)
271	{
272	return NUM_SNAPSHOT_HDR_CHUNKS + ((ps->exceptions_per_area + `1`) * area);
273	}
274
275	static void skip_metadata(struct pstore *ps)
276	{
277	uint32_t stride = ps->exceptions_per_area + `1`;
278	chunk_t next_free = ps->next_free;
279
280	if (sector_div(next_free, stride) == NUM_SNAPSHOT_HDR_CHUNKS)
281	ps->next_free++;
282	}
283
284	/*
285	* Read or write a metadata area. Remembering to skip the first
286	* chunk which holds the header.
287	*/
288	static int area_io(struct pstore *ps, blk_opf_t opf)
289	{
290	chunk_t chunk = area_location(ps, area: ps->current_area);
291
292	return chunk_io(ps, area: ps->area, chunk, opf, metadata: `0`);
293	}
294
295	static void zero_memory_area(struct pstore *ps)
296	{
297	memset(ps->area, `0`, ps->store->chunk_size << SECTOR_SHIFT);
298	}
299
300	static int zero_disk_area(struct pstore *ps, chunk_t area)
301	{
302	return chunk_io(ps, area: ps->zero_area, chunk: area_location(ps, area),
303	opf: REQ_OP_WRITE, metadata: `0`);
304	}
305
306	static int read_header(struct pstore ps, int* *new_snapshot)
307	{
308	int r;
309	struct disk_header *dh;
310	unsigned int chunk_size;
311	int chunk_size_supplied = `1`;
312	char *chunk_err;
313
314	/*
315	* Use default chunk size (or logical_block_size, if larger)
316	* if none supplied
317	*/
318	if (!ps->store->chunk_size) {
319	ps->store->chunk_size = max(DM_CHUNK_SIZE_DEFAULT_SECTORS,
320	bdev_logical_block_size(dm_snap_cow(ps->store->snap)->
321	bdev) >> `9`);
322	ps->store->chunk_mask = ps->store->chunk_size - `1`;
323	ps->store->chunk_shift = __ffs(ps->store->chunk_size);
324	chunk_size_supplied = `0`;
325	}
326
327	ps->io_client = dm_io_client_create();
328	if (IS_ERR(ptr: ps->io_client))
329	return PTR_ERR(ptr: ps->io_client);
330
331	r = alloc_area(ps);
332	if (r)
333	return r;
334
335	r = chunk_io(ps, area: ps->header_area, chunk: `0`, opf: REQ_OP_READ, metadata: `1`);
336	if (r)
337	goto bad;
338
339	dh = ps->header_area;
340
341	if (le32_to_cpu(dh->magic) == `0`) {
342	*new_snapshot = `1`;
343	return `0`;
344	}
345
346	if (le32_to_cpu(dh->magic) != SNAP_MAGIC) {
347	DMWARN("Invalid or corrupt snapshot");
348	r = -ENXIO;
349	goto bad;
350	}
351
352	*new_snapshot = `0`;
353	ps->valid = le32_to_cpu(dh->valid);
354	ps->version = le32_to_cpu(dh->version);
355	chunk_size = le32_to_cpu(dh->chunk_size);
356
357	if (ps->store->chunk_size == chunk_size)
358	return `0`;
359
360	if (chunk_size_supplied)
361	DMWARN("chunk size %u in device metadata overrides table chunk size of %u.",
362	chunk_size, ps->store->chunk_size);
363
364	/ We had a bogus chunk_size. Fix stuff up. /
365	free_area(ps);
366
367	r = dm_exception_store_set_chunk_size(store: ps->store, chunk_size,
368	error: &chunk_err);
369	if (r) {
370	DMERR("invalid on-disk chunk size %u: %s.",
371	chunk_size, chunk_err);
372	return r;
373	}
374
375	r = alloc_area(ps);
376	return r;
377
378	bad:
379	free_area(ps);
380	return r;
381	}
382
383	static int write_header(struct pstore *ps)
384	{
385	struct disk_header *dh;
386
387	memset(ps->header_area, `0`, ps->store->chunk_size << SECTOR_SHIFT);
388
389	dh = ps->header_area;
390	dh->magic = cpu_to_le32(SNAP_MAGIC);
391	dh->valid = cpu_to_le32(ps->valid);
392	dh->version = cpu_to_le32(ps->version);
393	dh->chunk_size = cpu_to_le32(ps->store->chunk_size);
394
395	return chunk_io(ps, area: ps->header_area, chunk: `0`, opf: REQ_OP_WRITE, metadata: `1`);
396	}
397
398	/*
399	* Access functions for the disk exceptions, these do the endian conversions.
400	*/
401	static struct disk_exception get_exception(struct* pstore ps, void* *ps_area,
402	uint32_t index)
403	{
404	BUG_ON(index >= ps->exceptions_per_area);
405
406	return ((struct disk_exception *) ps_area) + index;
407	}
408
409	static void read_exception(struct pstore ps, void* *ps_area,
410	uint32_t index, struct core_exception *result)
411	{
412	struct disk_exception *de = get_exception(ps, ps_area, index);
413
414	/ copy it /
415	result->old_chunk = le64_to_cpu(de->old_chunk);
416	result->new_chunk = le64_to_cpu(de->new_chunk);
417	}
418
419	static void write_exception(struct pstore *ps,
420	uint32_t index, struct core_exception *e)
421	{
422	struct disk_exception *de = get_exception(ps, ps_area: ps->area, index);
423
424	/ copy it /
425	de->old_chunk = cpu_to_le64(e->old_chunk);
426	de->new_chunk = cpu_to_le64(e->new_chunk);
427	}
428
429	static void clear_exception(struct pstore *ps, uint32_t index)
430	{
431	struct disk_exception *de = get_exception(ps, ps_area: ps->area, index);
432
433	/ clear it /
434	de->old_chunk = `0`;
435	de->new_chunk = `0`;
436	}
437
438	/*
439	* Registers the exceptions that are present in the current area.
440	* 'full' is filled in to indicate if the area has been
441	* filled.
442	*/
443	static int insert_exceptions(struct pstore ps, void* *ps_area,
444	int (callback)(void* *callback_context,
445	chunk_t old, chunk_t new),
446	void *callback_context,
447	int *full)
448	{
449	int r;
450	unsigned int i;
451	struct core_exception e;
452
453	/ presume the area is full /
454	*full = `1`;
455
456	for (i = `0`; i < ps->exceptions_per_area; i++) {
457	read_exception(ps, ps_area, index: i, result: &e);
458
459	/*
460	* If the new_chunk is pointing at the start of
461	* the COW device, where the first metadata area
462	* is we know that we've hit the end of the
463	* exceptions. Therefore the area is not full.
464	*/
465	if (e.new_chunk == `0LL`) {
466	ps->current_committed = i;
467	*full = `0`;
468	break;
469	}
470
471	/*
472	* Keep track of the start of the free chunks.
473	*/
474	if (ps->next_free <= e.new_chunk)
475	ps->next_free = e.new_chunk + `1`;
476
477	/*
478	* Otherwise we add the exception to the snapshot.
479	*/
480	r = callback(callback_context, e.old_chunk, e.new_chunk);
481	if (r)
482	return r;
483	}
484
485	return `0`;
486	}
487
488	static int read_exceptions(struct pstore *ps,
489	int (callback)(void* *callback_context, chunk_t old,
490	chunk_t new),
491	void *callback_context)
492	{
493	int r, full = `1`;
494	struct dm_bufio_client *client;
495	chunk_t prefetch_area = `0`;
496
497	client = dm_bufio_client_create(bdev: dm_snap_cow(snap: ps->store->snap)->bdev,
498	block_size: ps->store->chunk_size << SECTOR_SHIFT,
499	reserved_buffers: `1`, aux_size: `0`, NULL, NULL, flags: `0`);
500
501	if (IS_ERR(ptr: client))
502	return PTR_ERR(ptr: client);
503
504	/*
505	* Setup for one current buffer + desired readahead buffers.
506	*/
507	dm_bufio_set_minimum_buffers(c: client, n: `1` + DM_PREFETCH_CHUNKS);
508
509	/*
510	* Keeping reading chunks and inserting exceptions until
511	* we find a partially full area.
512	*/
513	for (ps->current_area = `0`; full; ps->current_area++) {
514	struct dm_buffer *bp;
515	void *area;
516	chunk_t chunk;
517
518	if (unlikely(prefetch_area < ps->current_area))
519	prefetch_area = ps->current_area;
520
521	if (DM_PREFETCH_CHUNKS) {
522	do {
523	chunk_t pf_chunk = area_location(ps, area: prefetch_area);
524
525	if (unlikely(pf_chunk >= dm_bufio_get_device_size(client)))
526	break;
527	dm_bufio_prefetch(c: client, block: pf_chunk, n_blocks: `1`);
528	prefetch_area++;
529	if (unlikely(!prefetch_area))
530	break;
531	} while (prefetch_area <= ps->current_area + DM_PREFETCH_CHUNKS);
532	}
533
534	chunk = area_location(ps, area: ps->current_area);
535
536	area = dm_bufio_read(c: client, block: chunk, bp: &bp);
537	if (IS_ERR(ptr: area)) {
538	r = PTR_ERR(ptr: area);
539	goto ret_destroy_bufio;
540	}
541
542	r = insert_exceptions(ps, ps_area: area, callback, callback_context,
543	full: &full);
544
545	if (!full)
546	memcpy(ps->area, area, ps->store->chunk_size << SECTOR_SHIFT);
547
548	dm_bufio_release(b: bp);
549
550	dm_bufio_forget(c: client, block: chunk);
551
552	if (unlikely(r))
553	goto ret_destroy_bufio;
554	}
555
556	ps->current_area--;
557
558	skip_metadata(ps);
559
560	r = `0`;
561
562	ret_destroy_bufio:
563	dm_bufio_client_destroy(c: client);
564
565	return r;
566	}
567
568	static struct pstore get_info(struct* dm_exception_store *store)
569	{
570	return store->context;
571	}
572
573	static void persistent_usage(struct dm_exception_store *store,
574	sector_t *total_sectors,
575	sector_t *sectors_allocated,
576	sector_t *metadata_sectors)
577	{
578	struct pstore *ps = get_info(store);
579
580	sectors_allocated = ps->next_free store->chunk_size;
581	*total_sectors = get_dev_size(bdev: dm_snap_cow(snap: store->snap)->bdev);
582
583	/*
584	* First chunk is the fixed header.
585	* Then there are (ps->current_area + 1) metadata chunks, each one
586	* separated from the next by ps->exceptions_per_area data chunks.
587	*/
588	metadata_sectors = (ps->current_area + `1` + NUM_SNAPSHOT_HDR_CHUNKS)
589	store->chunk_size;
590	}
591
592	static void persistent_dtr(struct dm_exception_store *store)
593	{
594	struct pstore *ps = get_info(store);
595
596	destroy_workqueue(wq: ps->metadata_wq);
597
598	/ Created in read_header /
599	if (ps->io_client)
600	dm_io_client_destroy(client: ps->io_client);
601	free_area(ps);
602
603	/ Allocated in persistent_read_metadata /
604	kvfree(addr: ps->callbacks);
605
606	kfree(objp: ps);
607	}
608
609	static int persistent_read_metadata(struct dm_exception_store *store,
610	int (callback)(void* *callback_context,
611	chunk_t old, chunk_t new),
612	void *callback_context)
613	{
614	int r, new_snapshot;
615	struct pstore *ps = get_info(store);
616
617	/*
618	* Read the snapshot header.
619	*/
620	r = read_header(ps, new_snapshot: &new_snapshot);
621	if (r)
622	return r;
623
624	/*
625	* Now we know correct chunk_size, complete the initialisation.
626	*/
627	ps->exceptions_per_area = (ps->store->chunk_size << SECTOR_SHIFT) /
628	sizeof(struct disk_exception);
629	ps->callbacks = kvcalloc(n: ps->exceptions_per_area,
630	size: sizeof(*ps->callbacks), GFP_KERNEL);
631	if (!ps->callbacks)
632	return -ENOMEM;
633
634	/*
635	* Do we need to setup a new snapshot ?
636	*/
637	if (new_snapshot) {
638	r = write_header(ps);
639	if (r) {
640	DMWARN("write_header failed");
641	return r;
642	}
643
644	ps->current_area = `0`;
645	zero_memory_area(ps);
646	r = zero_disk_area(ps, area: `0`);
647	if (r)
648	DMWARN("zero_disk_area(0) failed");
649	return r;
650	}
651	/*
652	* Sanity checks.
653	*/
654	if (ps->version != SNAPSHOT_DISK_VERSION) {
655	DMWARN("unable to handle snapshot disk version %d",
656	ps->version);
657	return -EINVAL;
658	}
659
660	/*
661	* Metadata are valid, but snapshot is invalidated
662	*/
663	if (!ps->valid)
664	return `1`;
665
666	/*
667	* Read the metadata.
668	*/
669	r = read_exceptions(ps, callback, callback_context);
670
671	return r;
672	}
673
674	static int persistent_prepare_exception(struct dm_exception_store *store,
675	struct dm_exception *e)
676	{
677	struct pstore *ps = get_info(store);
678	sector_t size = get_dev_size(bdev: dm_snap_cow(snap: store->snap)->bdev);
679
680	/ Is there enough room ? /
681	if (size < ((ps->next_free + `1`) * store->chunk_size))
682	return -ENOSPC;
683
684	e->new_chunk = ps->next_free;
685
686	/*
687	* Move onto the next free pending, making sure to take
688	* into account the location of the metadata chunks.
689	*/
690	ps->next_free++;
691	skip_metadata(ps);
692
693	atomic_inc(v: &ps->pending_count);
694	return `0`;
695	}
696
697	static void persistent_commit_exception(struct dm_exception_store *store,
698	struct dm_exception e, int* valid,
699	void (callback)(void* , int* success),
700	void *callback_context)
701	{
702	unsigned int i;
703	struct pstore *ps = get_info(store);
704	struct core_exception ce;
705	struct commit_callback *cb;
706
707	if (!valid)
708	ps->valid = `0`;
709
710	ce.old_chunk = e->old_chunk;
711	ce.new_chunk = e->new_chunk;
712	write_exception(ps, index: ps->current_committed++, e: &ce);
713
714	/*
715	* Add the callback to the back of the array. This code
716	* is the only place where the callback array is
717	* manipulated, and we know that it will never be called
718	* multiple times concurrently.
719	*/
720	cb = ps->callbacks + ps->callback_count++;
721	cb->callback = callback;
722	cb->context = callback_context;
723
724	/*
725	* If there are exceptions in flight and we have not yet
726	* filled this metadata area there's nothing more to do.
727	*/
728	if (!atomic_dec_and_test(v: &ps->pending_count) &&
729	(ps->current_committed != ps->exceptions_per_area))
730	return;
731
732	/*
733	* If we completely filled the current area, then wipe the next one.
734	*/
735	if ((ps->current_committed == ps->exceptions_per_area) &&
736	zero_disk_area(ps, area: ps->current_area + `1`))
737	ps->valid = `0`;
738
739	/*
740	* Commit exceptions to disk.
741	*/
742	if (ps->valid && area_io(ps, opf: REQ_OP_WRITE \| REQ_PREFLUSH \| REQ_FUA \|
743	REQ_SYNC))
744	ps->valid = `0`;
745
746	/*
747	* Advance to the next area if this one is full.
748	*/
749	if (ps->current_committed == ps->exceptions_per_area) {
750	ps->current_committed = `0`;
751	ps->current_area++;
752	zero_memory_area(ps);
753	}
754
755	for (i = `0`; i < ps->callback_count; i++) {
756	cb = ps->callbacks + i;
757	cb->callback(cb->context, ps->valid);
758	}
759
760	ps->callback_count = `0`;
761	}
762
763	static int persistent_prepare_merge(struct dm_exception_store *store,
764	chunk_t *last_old_chunk,
765	chunk_t *last_new_chunk)
766	{
767	struct pstore *ps = get_info(store);
768	struct core_exception ce;
769	int nr_consecutive;
770	int r;
771
772	/*
773	* When current area is empty, move back to preceding area.
774	*/
775	if (!ps->current_committed) {
776	/*
777	* Have we finished?
778	*/
779	if (!ps->current_area)
780	return `0`;
781
782	ps->current_area--;
783	r = area_io(ps, opf: REQ_OP_READ);
784	if (r < `0`)
785	return r;
786	ps->current_committed = ps->exceptions_per_area;
787	}
788
789	read_exception(ps, ps_area: ps->area, index: ps->current_committed - `1`, result: &ce);
790	*last_old_chunk = ce.old_chunk;
791	*last_new_chunk = ce.new_chunk;
792
793	/*
794	* Find number of consecutive chunks within the current area,
795	* working backwards.
796	*/
797	for (nr_consecutive = `1`; nr_consecutive < ps->current_committed;
798	nr_consecutive++) {
799	read_exception(ps, ps_area: ps->area,
800	index: ps->current_committed - `1` - nr_consecutive, result: &ce);
801	if (ce.old_chunk != *last_old_chunk - nr_consecutive \|\|
802	ce.new_chunk != *last_new_chunk - nr_consecutive)
803	break;
804	}
805
806	return nr_consecutive;
807	}
808
809	static int persistent_commit_merge(struct dm_exception_store *store,
810	int nr_merged)
811	{
812	int r, i;
813	struct pstore *ps = get_info(store);
814
815	BUG_ON(nr_merged > ps->current_committed);
816
817	for (i = `0`; i < nr_merged; i++)
818	clear_exception(ps, index: ps->current_committed - `1` - i);
819
820	r = area_io(ps, opf: REQ_OP_WRITE \| REQ_PREFLUSH \| REQ_FUA);
821	if (r < `0`)
822	return r;
823
824	ps->current_committed -= nr_merged;
825
826	/*
827	* At this stage, only persistent_usage() uses ps->next_free, so
828	* we make no attempt to keep ps->next_free strictly accurate
829	* as exceptions may have been committed out-of-order originally.
830	* Once a snapshot has become merging, we set it to the value it
831	* would have held had all the exceptions been committed in order.
832	*
833	* ps->current_area does not get reduced by prepare_merge() until
834	* after commit_merge() has removed the nr_merged previous exceptions.
835	*/
836	ps->next_free = area_location(ps, area: ps->current_area) +
837	ps->current_committed + `1`;
838
839	return `0`;
840	}
841
842	static void persistent_drop_snapshot(struct dm_exception_store *store)
843	{
844	struct pstore *ps = get_info(store);
845
846	ps->valid = `0`;
847	if (write_header(ps))
848	DMWARN("write header failed");
849	}
850
851	static int persistent_ctr(struct dm_exception_store store, char* *options)
852	{
853	struct pstore *ps;
854	int r;
855
856	/ allocate the pstore /
857	ps = kzalloc(size: sizeof(*ps), GFP_KERNEL);
858	if (!ps)
859	return -ENOMEM;
860
861	ps->store = store;
862	ps->valid = `1`;
863	ps->version = SNAPSHOT_DISK_VERSION;
864	ps->area = NULL;
865	ps->zero_area = NULL;
866	ps->header_area = NULL;
867	ps->next_free = NUM_SNAPSHOT_HDR_CHUNKS + `1`; / header and 1st area /
868	ps->current_committed = `0`;
869
870	ps->callback_count = `0`;
871	atomic_set(v: &ps->pending_count, i: `0`);
872	ps->callbacks = NULL;
873
874	ps->metadata_wq = alloc_workqueue(fmt: "ksnaphd", flags: WQ_MEM_RECLAIM, max_active: `0`);
875	if (!ps->metadata_wq) {
876	DMERR("couldn't start header metadata update thread");
877	r = -ENOMEM;
878	goto err_workqueue;
879	}
880
881	if (options) {
882	char overflow = toupper(options[`0`]);
883
884	if (overflow == `'O'`)
885	store->userspace_supports_overflow = true;
886	else {
887	DMERR("Unsupported persistent store option: %s", options);
888	r = -EINVAL;
889	goto err_options;
890	}
891	}
892
893	store->context = ps;
894
895	return `0`;
896
897	err_options:
898	destroy_workqueue(wq: ps->metadata_wq);
899	err_workqueue:
900	kfree(objp: ps);
901
902	return r;
903	}
904
905	static unsigned int persistent_status(struct dm_exception_store *store,
906	status_type_t status, char *result,
907	unsigned int maxlen)
908	{
909	unsigned int sz = `0`;
910
911	switch (status) {
912	case STATUSTYPE_INFO:
913	break;
914	case STATUSTYPE_TABLE:
915	DMEMIT(" %s %llu", store->userspace_supports_overflow ? "PO" : "P",
916	(unsigned long long)store->chunk_size);
917	break;
918	case STATUSTYPE_IMA:
919	*result = `'\0'`;
920	break;
921	}
922
923	return sz;
924	}
925
926	static struct dm_exception_store_type _persistent_type = {
927	.name = "persistent",
928	.module = THIS_MODULE,
929	.ctr = persistent_ctr,
930	.dtr = persistent_dtr,
931	.read_metadata = persistent_read_metadata,
932	.prepare_exception = persistent_prepare_exception,
933	.commit_exception = persistent_commit_exception,
934	.prepare_merge = persistent_prepare_merge,
935	.commit_merge = persistent_commit_merge,
936	.drop_snapshot = persistent_drop_snapshot,
937	.usage = persistent_usage,
938	.status = persistent_status,
939	};
940
941	static struct dm_exception_store_type _persistent_compat_type = {
942	.name = "P",
943	.module = THIS_MODULE,
944	.ctr = persistent_ctr,
945	.dtr = persistent_dtr,
946	.read_metadata = persistent_read_metadata,
947	.prepare_exception = persistent_prepare_exception,
948	.commit_exception = persistent_commit_exception,
949	.prepare_merge = persistent_prepare_merge,
950	.commit_merge = persistent_commit_merge,
951	.drop_snapshot = persistent_drop_snapshot,
952	.usage = persistent_usage,
953	.status = persistent_status,
954	};
955
956	int dm_persistent_snapshot_init(void)
957	{
958	int r;
959
960	r = dm_exception_store_type_register(type: &_persistent_type);
961	if (r) {
962	DMERR("Unable to register persistent exception store type");
963	return r;
964	}
965
966	r = dm_exception_store_type_register(type: &_persistent_compat_type);
967	if (r) {
968	DMERR("Unable to register old-style persistent exception store type");
969	dm_exception_store_type_unregister(type: &_persistent_type);
970	return r;
971	}
972
973	return r;
974	}
975
976	void dm_persistent_snapshot_exit(void)
977	{
978	dm_exception_store_type_unregister(type: &_persistent_type);
979	dm_exception_store_type_unregister(type: &_persistent_compat_type);
980	}
981

source code of linux/drivers/md/dm-snap-persistent.c