drbd_worker.c source code [linux/drivers/block/drbd/drbd_worker.c]

1	// SPDX-License-Identifier: GPL-2.0-only
2	/*
3	drbd_worker.c
4
5	This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
6
7	Copyright (C) 2001-2008, LINBIT Information Technologies GmbH.
8	Copyright (C) 1999-2008, Philipp Reisner <philipp.reisner@linbit.com>.
9	Copyright (C) 2002-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
10
11
12	*/
13
14	#include <linux/module.h>
15	#include <linux/drbd.h>
16	#include <linux/sched/signal.h>
17	#include <linux/wait.h>
18	#include <linux/mm.h>
19	#include <linux/memcontrol.h>
20	#include <linux/mm_inline.h>
21	#include <linux/slab.h>
22	#include <linux/random.h>
23	#include <linux/string.h>
24	#include <linux/scatterlist.h>
25	#include <linux/part_stat.h>
26
27	#include "drbd_int.h"
28	#include "drbd_protocol.h"
29	#include "drbd_req.h"
30
31	static int make_ov_request(struct drbd_peer_device , int*);
32	static int make_resync_request(struct drbd_peer_device , int*);
33
34	/ endio handlers:*
35	* drbd_md_endio (defined here)
36	* drbd_request_endio (defined here)
37	* drbd_peer_request_endio (defined here)
38	* drbd_bm_endio (defined in drbd_bitmap.c)
39	*
40	* For all these callbacks, note the following:
41	* The callbacks will be called in irq context by the IDE drivers,
42	* and in Softirqs/Tasklets/BH context by the SCSI drivers.
43	* Try to get the locking right :)
44	*
45	*/
46
47	/ used for synchronous meta data and bitmap IO*
48	* submitted by drbd_md_sync_page_io()
49	*/
50	void drbd_md_endio(struct bio *bio)
51	{
52	struct drbd_device *device;
53
54	device = bio->bi_private;
55	device->md_io.error = blk_status_to_errno(status: bio->bi_status);
56
57	/ special case: drbd_md_read() during drbd_adm_attach() /
58	if (device->ldev)
59	put_ldev(device);
60	bio_put(bio);
61
62	/ We grabbed an extra reference in _drbd_md_sync_page_io() to be able*
63	* to timeout on the lower level device, and eventually detach from it.
64	* If this io completion runs after that timeout expired, this
65	* drbd_md_put_buffer() may allow us to finally try and re-attach.
66	* During normal operation, this only puts that extra reference
67	* down to 1 again.
68	* Make sure we first drop the reference, and only then signal
69	* completion, or we may (in drbd_al_read_log()) cycle so fast into the
70	* next drbd_md_sync_page_io(), that we trigger the
71	* ASSERT(atomic_read(&device->md_io_in_use) == 1) there.
72	*/
73	drbd_md_put_buffer(device);
74	device->md_io.done = `1`;
75	wake_up(&device->misc_wait);
76	}
77
78	/ reads on behalf of the partner,*
79	* "submitted" by the receiver
80	*/
81	static void drbd_endio_read_sec_final(struct drbd_peer_request *peer_req) __releases(local)
82	{
83	unsigned long flags = `0`;
84	struct drbd_peer_device *peer_device = peer_req->peer_device;
85	struct drbd_device *device = peer_device->device;
86
87	spin_lock_irqsave(&device->resource->req_lock, flags);
88	device->read_cnt += peer_req->i.size >> `9`;
89	list_del(entry: &peer_req->w.list);
90	if (list_empty(head: &device->read_ee))
91	wake_up(&device->ee_wait);
92	if (test_bit(__EE_WAS_ERROR, &peer_req->flags))
93	__drbd_chk_io_error(device, DRBD_READ_ERROR);
94	spin_unlock_irqrestore(lock: &device->resource->req_lock, flags);
95
96	drbd_queue_work(q: &peer_device->connection->sender_work, w: &peer_req->w);
97	put_ldev(device);
98	}
99
100	/ writes on behalf of the partner, or resync writes,*
101	* "submitted" by the receiver, final stage. */
102	void drbd_endio_write_sec_final(struct drbd_peer_request *peer_req) __releases(local)
103	{
104	unsigned long flags = `0`;
105	struct drbd_peer_device *peer_device = peer_req->peer_device;
106	struct drbd_device *device = peer_device->device;
107	struct drbd_connection *connection = peer_device->connection;
108	struct drbd_interval i;
109	int do_wake;
110	u64 block_id;
111	int do_al_complete_io;
112
113	/ after we moved peer_req to done_ee,*
114	* we may no longer access it,
115	* it may be freed/reused already!
116	* (as soon as we release the req_lock) */
117	i = peer_req->i;
118	do_al_complete_io = peer_req->flags & EE_CALL_AL_COMPLETE_IO;
119	block_id = peer_req->block_id;
120	peer_req->flags &= ~EE_CALL_AL_COMPLETE_IO;
121
122	if (peer_req->flags & EE_WAS_ERROR) {
123	/ In protocol != C, we usually do not send write acks.*
124	* In case of a write error, send the neg ack anyways. */
125	if (!__test_and_set_bit(__EE_SEND_WRITE_ACK, &peer_req->flags))
126	inc_unacked(device);
127	drbd_set_out_of_sync(peer_device, peer_req->i.sector, peer_req->i.size);
128	}
129
130	spin_lock_irqsave(&device->resource->req_lock, flags);
131	device->writ_cnt += peer_req->i.size >> `9`;
132	list_move_tail(list: &peer_req->w.list, head: &device->done_ee);
133
134	/*
135	* Do not remove from the write_requests tree here: we did not send the
136	* Ack yet and did not wake possibly waiting conflicting requests.
137	* Removed from the tree from "drbd_process_done_ee" within the
138	* appropriate dw.cb (e_end_block/e_end_resync_block) or from
139	* _drbd_clear_done_ee.
140	*/
141
142	do_wake = list_empty(head: block_id == ID_SYNCER ? &device->sync_ee : &device->active_ee);
143
144	/ FIXME do we want to detach for failed REQ_OP_DISCARD?*
145	* ((peer_req->flags & (EE_WAS_ERROR\|EE_TRIM)) == EE_WAS_ERROR) */
146	if (peer_req->flags & EE_WAS_ERROR)
147	__drbd_chk_io_error(device, DRBD_WRITE_ERROR);
148
149	if (connection->cstate >= C_WF_REPORT_PARAMS) {
150	kref_get(kref: &device->kref); / put is in drbd_send_acks_wf() /
151	if (!queue_work(wq: connection->ack_sender, work: &peer_device->send_acks_work))
152	kref_put(kref: &device->kref, release: drbd_destroy_device);
153	}
154	spin_unlock_irqrestore(lock: &device->resource->req_lock, flags);
155
156	if (block_id == ID_SYNCER)
157	drbd_rs_complete_io(device, sector: i.sector);
158
159	if (do_wake)
160	wake_up(&device->ee_wait);
161
162	if (do_al_complete_io)
163	drbd_al_complete_io(device, i: &i);
164
165	put_ldev(device);
166	}
167
168	/ writes on behalf of the partner, or resync writes,*
169	* "submitted" by the receiver.
170	*/
171	void drbd_peer_request_endio(struct bio *bio)
172	{
173	struct drbd_peer_request *peer_req = bio->bi_private;
174	struct drbd_device *device = peer_req->peer_device->device;
175	bool is_write = bio_data_dir(bio) == WRITE;
176	bool is_discard = bio_op(bio) == REQ_OP_WRITE_ZEROES \|\|
177	bio_op(bio) == REQ_OP_DISCARD;
178
179	if (bio->bi_status && drbd_ratelimit())
180	drbd_warn(device, "%s: error=%d s=%llus\n",
181	is_write ? (is_discard ? "discard" : "write")
182	: "read", bio->bi_status,
183	(unsigned long long)peer_req->i.sector);
184
185	if (bio->bi_status)
186	set_bit(nr: __EE_WAS_ERROR, addr: &peer_req->flags);
187
188	bio_put(bio); / no need for the bio anymore /
189	if (atomic_dec_and_test(v: &peer_req->pending_bios)) {
190	if (is_write)
191	drbd_endio_write_sec_final(peer_req);
192	else
193	drbd_endio_read_sec_final(peer_req);
194	}
195	}
196
197	static void
198	drbd_panic_after_delayed_completion_of_aborted_request(struct drbd_device *device)
199	{
200	panic(fmt: "drbd%u %s/%u potential random memory corruption caused by delayed completion of aborted local request\n",
201	device->minor, device->resource->name, device->vnr);
202	}
203
204	/ read, readA or write requests on R_PRIMARY coming from drbd_make_request*
205	*/
206	void drbd_request_endio(struct bio *bio)
207	{
208	unsigned long flags;
209	struct drbd_request *req = bio->bi_private;
210	struct drbd_device *device = req->device;
211	struct bio_and_error m;
212	enum drbd_req_event what;
213
214	/ If this request was aborted locally before,*
215	* but now was completed "successfully",
216	* chances are that this caused arbitrary data corruption.
217	*
218	* "aborting" requests, or force-detaching the disk, is intended for
219	* completely blocked/hung local backing devices which do no longer
220	* complete requests at all, not even do error completions. In this
221	* situation, usually a hard-reset and failover is the only way out.
222	*
223	* By "aborting", basically faking a local error-completion,
224	* we allow for a more graceful swichover by cleanly migrating services.
225	* Still the affected node has to be rebooted "soon".
226	*
227	* By completing these requests, we allow the upper layers to re-use
228	* the associated data pages.
229	*
230	* If later the local backing device "recovers", and now DMAs some data
231	* from disk into the original request pages, in the best case it will
232	* just put random data into unused pages; but typically it will corrupt
233	* meanwhile completely unrelated data, causing all sorts of damage.
234	*
235	* Which means delayed successful completion,
236	* especially for READ requests,
237	* is a reason to panic().
238	*
239	* We assume that a delayed error completion is OK,
240	* though we still will complain noisily about it.
241	*/
242	if (unlikely(req->rq_state & RQ_LOCAL_ABORTED)) {
243	if (drbd_ratelimit())
244	drbd_emerg(device, "delayed completion of aborted local request; disk-timeout may be too aggressive\n");
245
246	if (!bio->bi_status)
247	drbd_panic_after_delayed_completion_of_aborted_request(device);
248	}
249
250	/ to avoid recursion in __req_mod /
251	if (unlikely(bio->bi_status)) {
252	switch (bio_op(bio)) {
253	case REQ_OP_WRITE_ZEROES:
254	case REQ_OP_DISCARD:
255	if (bio->bi_status == BLK_STS_NOTSUPP)
256	what = DISCARD_COMPLETED_NOTSUPP;
257	else
258	what = DISCARD_COMPLETED_WITH_ERROR;
259	break;
260	case REQ_OP_READ:
261	if (bio->bi_opf & REQ_RAHEAD)
262	what = READ_AHEAD_COMPLETED_WITH_ERROR;
263	else
264	what = READ_COMPLETED_WITH_ERROR;
265	break;
266	default:
267	what = WRITE_COMPLETED_WITH_ERROR;
268	break;
269	}
270	} else {
271	what = COMPLETED_OK;
272	}
273
274	req->private_bio = ERR_PTR(error: blk_status_to_errno(status: bio->bi_status));
275	bio_put(bio);
276
277	/ not req_mod(), we need irqsave here! /
278	spin_lock_irqsave(&device->resource->req_lock, flags);
279	__req_mod(req, what, NULL, m: &m);
280	spin_unlock_irqrestore(lock: &device->resource->req_lock, flags);
281	put_ldev(device);
282
283	if (m.bio)
284	complete_master_bio(device, m: &m);
285	}
286
287	void drbd_csum_ee(struct crypto_shash tfm, struct* drbd_peer_request peer_req, void* *digest)
288	{
289	SHASH_DESC_ON_STACK(desc, tfm);
290	struct page *page = peer_req->pages;
291	struct page *tmp;
292	unsigned len;
293	void *src;
294
295	desc->tfm = tfm;
296
297	crypto_shash_init(desc);
298
299	src = kmap_atomic(page);
300	while ((tmp = page_chain_next(page))) {
301	/ all but the last page will be fully used /
302	crypto_shash_update(desc, data: src, PAGE_SIZE);
303	kunmap_atomic(src);
304	page = tmp;
305	src = kmap_atomic(page);
306	}
307	/ and now the last, possibly only partially used page /
308	len = peer_req->i.size & (PAGE_SIZE - `1`);
309	crypto_shash_update(desc, data: src, len: len ?: PAGE_SIZE);
310	kunmap_atomic(src);
311
312	crypto_shash_final(desc, out: digest);
313	shash_desc_zero(desc);
314	}
315
316	void drbd_csum_bio(struct crypto_shash tfm, struct* bio bio, void* *digest)
317	{
318	SHASH_DESC_ON_STACK(desc, tfm);
319	struct bio_vec bvec;
320	struct bvec_iter iter;
321
322	desc->tfm = tfm;
323
324	crypto_shash_init(desc);
325
326	bio_for_each_segment(bvec, bio, iter) {
327	u8 *src;
328
329	src = bvec_kmap_local(bvec: &bvec);
330	crypto_shash_update(desc, data: src, len: bvec.bv_len);
331	kunmap_local(src);
332	}
333	crypto_shash_final(desc, out: digest);
334	shash_desc_zero(desc);
335	}
336
337	/ MAYBE merge common code with w_e_end_ov_req /
338	static int w_e_send_csum(struct drbd_work w, int* cancel)
339	{
340	struct drbd_peer_request peer_req = container_of(w, struct* drbd_peer_request, w);
341	struct drbd_peer_device *peer_device = peer_req->peer_device;
342	struct drbd_device *device = peer_device->device;
343	int digest_size;
344	void *digest;
345	int err = `0`;
346
347	if (unlikely(cancel))
348	goto out;
349
350	if (unlikely((peer_req->flags & EE_WAS_ERROR) != `0`))
351	goto out;
352
353	digest_size = crypto_shash_digestsize(tfm: peer_device->connection->csums_tfm);
354	digest = kmalloc(size: digest_size, GFP_NOIO);
355	if (digest) {
356	sector_t sector = peer_req->i.sector;
357	unsigned int size = peer_req->i.size;
358	drbd_csum_ee(tfm: peer_device->connection->csums_tfm, peer_req, digest);
359	/ Free peer_req and pages before send.*
360	* In case we block on congestion, we could otherwise run into
361	* some distributed deadlock, if the other side blocks on
362	* congestion as well, because our receiver blocks in
363	* drbd_alloc_pages due to pp_in_use > max_buffers. */
364	drbd_free_peer_req(device, peer_req);
365	peer_req = NULL;
366	inc_rs_pending(peer_device);
367	err = drbd_send_drequest_csum(peer_device, sector, size,
368	digest, digest_size,
369	cmd: P_CSUM_RS_REQUEST);
370	kfree(objp: digest);
371	} else {
372	drbd_err(device, "kmalloc() of digest failed.\n");
373	err = -ENOMEM;
374	}
375
376	out:
377	if (peer_req)
378	drbd_free_peer_req(device, peer_req);
379
380	if (unlikely(err))
381	drbd_err(device, "drbd_send_drequest(..., csum) failed\n");
382	return err;
383	}
384
385	#define GFP_TRY (__GFP_HIGHMEM \| __GFP_NOWARN)
386
387	static int read_for_csum(struct drbd_peer_device peer_device, sector_t sector, int* size)
388	{
389	struct drbd_device *device = peer_device->device;
390	struct drbd_peer_request *peer_req;
391
392	if (!get_ldev(device))
393	return -EIO;
394
395	/ GFP_TRY, because if there is no memory available right now, this may*
396	* be rescheduled for later. It is "only" background resync, after all. */
397	peer_req = drbd_alloc_peer_req(peer_device, ID_SYNCER / unused /, sector,
398	size, size, GFP_TRY);
399	if (!peer_req)
400	goto defer;
401
402	peer_req->w.cb = w_e_send_csum;
403	peer_req->opf = REQ_OP_READ;
404	spin_lock_irq(lock: &device->resource->req_lock);
405	list_add_tail(new: &peer_req->w.list, head: &device->read_ee);
406	spin_unlock_irq(lock: &device->resource->req_lock);
407
408	atomic_add(i: size >> `9`, v: &device->rs_sect_ev);
409	if (drbd_submit_peer_request(peer_req) == `0`)
410	return `0`;
411
412	/ If it failed because of ENOMEM, retry should help. If it failed*
413	* because bio_add_page failed (probably broken lower level driver),
414	* retry may or may not help.
415	* If it does not, you may need to force disconnect. */
416	spin_lock_irq(lock: &device->resource->req_lock);
417	list_del(entry: &peer_req->w.list);
418	spin_unlock_irq(lock: &device->resource->req_lock);
419
420	drbd_free_peer_req(device, peer_req);
421	defer:
422	put_ldev(device);
423	return -EAGAIN;
424	}
425
426	int w_resync_timer(struct drbd_work w, int* cancel)
427	{
428	struct drbd_device *device =
429	container_of(w, struct drbd_device, resync_work);
430
431	switch (device->state.conn) {
432	case C_VERIFY_S:
433	make_ov_request(first_peer_device(device), cancel);
434	break;
435	case C_SYNC_TARGET:
436	make_resync_request(first_peer_device(device), cancel);
437	break;
438	}
439
440	return `0`;
441	}
442
443	void resync_timer_fn(struct timer_list *t)
444	{
445	struct drbd_device *device = from_timer(device, t, resync_timer);
446
447	drbd_queue_work_if_unqueued(
448	q: &first_peer_device(device)->connection->sender_work,
449	w: &device->resync_work);
450	}
451
452	static void fifo_set(struct fifo_buffer fb, int* value)
453	{
454	int i;
455
456	for (i = `0`; i < fb->size; i++)
457	fb->values[i] = value;
458	}
459
460	static int fifo_push(struct fifo_buffer fb, int* value)
461	{
462	int ov;
463
464	ov = fb->values[fb->head_index];
465	fb->values[fb->head_index++] = value;
466
467	if (fb->head_index >= fb->size)
468	fb->head_index = `0`;
469
470	return ov;
471	}
472
473	static void fifo_add_val(struct fifo_buffer fb, int* value)
474	{
475	int i;
476
477	for (i = `0`; i < fb->size; i++)
478	fb->values[i] += value;
479	}
480
481	struct fifo_buffer fifo_alloc(unsigned* int fifo_size)
482	{
483	struct fifo_buffer *fb;
484
485	fb = kzalloc(struct_size(fb, values, fifo_size), GFP_NOIO);
486	if (!fb)
487	return NULL;
488
489	fb->head_index = `0`;
490	fb->size = fifo_size;
491	fb->total = `0`;
492
493	return fb;
494	}
495
496	static int drbd_rs_controller(struct drbd_peer_device peer_device, unsigned* int sect_in)
497	{
498	struct drbd_device *device = peer_device->device;
499	struct disk_conf *dc;
500	unsigned int want; / The number of sectors we want in-flight /
501	int req_sect; / Number of sectors to request in this turn /
502	int correction; / Number of sectors more we need in-flight /
503	int cps; / correction per invocation of drbd_rs_controller() /
504	int steps; / Number of time steps to plan ahead /
505	int curr_corr;
506	int max_sect;
507	struct fifo_buffer *plan;
508
509	dc = rcu_dereference(device->ldev->disk_conf);
510	plan = rcu_dereference(device->rs_plan_s);
511
512	steps = plan->size; / (dc->c_plan_ahead * 10 * SLEEP_TIME) / HZ; /
513
514	if (device->rs_in_flight + sect_in == `0`) { / At start of resync /
515	want = ((dc->resync_rate * `2` * SLEEP_TIME) / HZ) * steps;
516	} else { / normal path /
517	want = dc->c_fill_target ? dc->c_fill_target :
518	sect_in * dc->c_delay_target * HZ / (SLEEP_TIME * `10`);
519	}
520
521	correction = want - device->rs_in_flight - plan->total;
522
523	/ Plan ahead /
524	cps = correction / steps;
525	fifo_add_val(fb: plan, value: cps);
526	plan->total += cps * steps;
527
528	/ What we do in this step /
529	curr_corr = fifo_push(fb: plan, value: `0`);
530	plan->total -= curr_corr;
531
532	req_sect = sect_in + curr_corr;
533	if (req_sect < `0`)
534	req_sect = `0`;
535
536	max_sect = (dc->c_max_rate * `2` * SLEEP_TIME) / HZ;
537	if (req_sect > max_sect)
538	req_sect = max_sect;
539
540	/*
541	drbd_warn(device, "si=%u if=%d wa=%u co=%d st=%d cps=%d pl=%d cc=%d rs=%d\n",
542	sect_in, device->rs_in_flight, want, correction,
543	steps, cps, device->rs_planed, curr_corr, req_sect);
544	*/
545
546	return req_sect;
547	}
548
549	static int drbd_rs_number_requests(struct drbd_peer_device *peer_device)
550	{
551	struct drbd_device *device = peer_device->device;
552	unsigned int sect_in; / Number of sectors that came in since the last turn /
553	int number, mxb;
554
555	sect_in = atomic_xchg(v: &device->rs_sect_in, new: `0`);
556	device->rs_in_flight -= sect_in;
557
558	rcu_read_lock();
559	mxb = drbd_get_max_buffers(device) / `2`;
560	if (rcu_dereference(device->rs_plan_s)->size) {
561	number = drbd_rs_controller(peer_device, sect_in) >> (BM_BLOCK_SHIFT - `9`);
562	device->c_sync_rate = number * HZ * (BM_BLOCK_SIZE / `1024`) / SLEEP_TIME;
563	} else {
564	device->c_sync_rate = rcu_dereference(device->ldev->disk_conf)->resync_rate;
565	number = SLEEP_TIME * device->c_sync_rate / ((BM_BLOCK_SIZE / `1024`) * HZ);
566	}
567	rcu_read_unlock();
568
569	/ Don't have more than "max-buffers"/2 in-flight.*
570	* Otherwise we may cause the remote site to stall on drbd_alloc_pages(),
571	* potentially causing a distributed deadlock on congestion during
572	* online-verify or (checksum-based) resync, if max-buffers,
573	* socket buffer sizes and resync rate settings are mis-configured. */
574
575	/ note that "number" is in units of "BM_BLOCK_SIZE" (which is 4k),*
576	* mxb (as used here, and in drbd_alloc_pages on the peer) is
577	* "number of pages" (typically also 4k),
578	* but "rs_in_flight" is in "sectors" (512 Byte). */
579	if (mxb - device->rs_in_flight/`8` < number)
580	number = mxb - device->rs_in_flight/`8`;
581
582	return number;
583	}
584
585	static int make_resync_request(struct drbd_peer_device *const peer_device, int cancel)
586	{
587	struct drbd_device *const device = peer_device->device;
588	struct drbd_connection *const connection = peer_device ? peer_device->connection : NULL;
589	unsigned long bit;
590	sector_t sector;
591	const sector_t capacity = get_capacity(disk: device->vdisk);
592	int max_bio_size;
593	int number, rollback_i, size;
594	int align, requeue = `0`;
595	int i = `0`;
596	int discard_granularity = `0`;
597
598	if (unlikely(cancel))
599	return `0`;
600
601	if (device->rs_total == `0`) {
602	/ empty resync? /
603	drbd_resync_finished(peer_device);
604	return `0`;
605	}
606
607	if (!get_ldev(device)) {
608	/ Since we only need to access device->rsync a*
609	get_ldev_if_state(device,D_FAILED) would be sufficient, but
610	to continue resync with a broken disk makes no sense at
611	all /*
612	drbd_err(device, "Disk broke down during resync!\n");
613	return `0`;
614	}
615
616	if (connection->agreed_features & DRBD_FF_THIN_RESYNC) {
617	rcu_read_lock();
618	discard_granularity = rcu_dereference(device->ldev->disk_conf)->rs_discard_granularity;
619	rcu_read_unlock();
620	}
621
622	max_bio_size = queue_max_hw_sectors(q: device->rq_queue) << `9`;
623	number = drbd_rs_number_requests(peer_device);
624	if (number <= `0`)
625	goto requeue;
626
627	for (i = `0`; i < number; i++) {
628	/ Stop generating RS requests when half of the send buffer is filled,*
629	* but notify TCP that we'd like to have more space. */
630	mutex_lock(&connection->data.mutex);
631	if (connection->data.socket) {
632	struct sock *sk = connection->data.socket->sk;
633	int queued = sk->sk_wmem_queued;
634	int sndbuf = sk->sk_sndbuf;
635	if (queued > sndbuf / `2`) {
636	requeue = `1`;
637	if (sk->sk_socket)
638	set_bit(SOCK_NOSPACE, addr: &sk->sk_socket->flags);
639	}
640	} else
641	requeue = `1`;
642	mutex_unlock(lock: &connection->data.mutex);
643	if (requeue)
644	goto requeue;
645
646	next_sector:
647	size = BM_BLOCK_SIZE;
648	bit = drbd_bm_find_next(device, bm_fo: device->bm_resync_fo);
649
650	if (bit == DRBD_END_OF_BITMAP) {
651	device->bm_resync_fo = drbd_bm_bits(device);
652	put_ldev(device);
653	return `0`;
654	}
655
656	sector = BM_BIT_TO_SECT(bit);
657
658	if (drbd_try_rs_begin_io(peer_device, sector)) {
659	device->bm_resync_fo = bit;
660	goto requeue;
661	}
662	device->bm_resync_fo = bit + `1`;
663
664	if (unlikely(drbd_bm_test_bit(device, bit) == `0`)) {
665	drbd_rs_complete_io(device, sector);
666	goto next_sector;
667	}
668
669	#if DRBD_MAX_BIO_SIZE > BM_BLOCK_SIZE
670	/ try to find some adjacent bits.*
671	* we stop if we have already the maximum req size.
672	*
673	* Additionally always align bigger requests, in order to
674	* be prepared for all stripe sizes of software RAIDs.
675	*/
676	align = `1`;
677	rollback_i = i;
678	while (i < number) {
679	if (size + BM_BLOCK_SIZE > max_bio_size)
680	break;
681
682	/ Be always aligned /
683	if (sector & ((`1`<<(align+`3`))-`1`))
684	break;
685
686	if (discard_granularity && size == discard_granularity)
687	break;
688
689	/ do not cross extent boundaries /
690	if (((bit+`1`) & BM_BLOCKS_PER_BM_EXT_MASK) == `0`)
691	break;
692	/ now, is it actually dirty, after all?*
693	* caution, drbd_bm_test_bit is tri-state for some
694	* obscure reason; ( b == 0 ) would get the out-of-band
695	* only accidentally right because of the "oddly sized"
696	* adjustment below */
697	if (drbd_bm_test_bit(device, bitnr: bit+`1`) != `1`)
698	break;
699	bit++;
700	size += BM_BLOCK_SIZE;
701	if ((BM_BLOCK_SIZE << align) <= size)
702	align++;
703	i++;
704	}
705	/ if we merged some,*
706	* reset the offset to start the next drbd_bm_find_next from */
707	if (size > BM_BLOCK_SIZE)
708	device->bm_resync_fo = bit + `1`;
709	#endif
710
711	/ adjust very last sectors, in case we are oddly sized /
712	if (sector + (size>>`9`) > capacity)
713	size = (capacity-sector)<<`9`;
714
715	if (device->use_csums) {
716	switch (read_for_csum(peer_device, sector, size)) {
717	case -EIO: / Disk failure /
718	put_ldev(device);
719	return -EIO;
720	case -EAGAIN: / allocation failed, or ldev busy /
721	drbd_rs_complete_io(device, sector);
722	device->bm_resync_fo = BM_SECT_TO_BIT(sector);
723	i = rollback_i;
724	goto requeue;
725	case `0`:
726	/ everything ok /
727	break;
728	default:
729	BUG();
730	}
731	} else {
732	int err;
733
734	inc_rs_pending(peer_device);
735	err = drbd_send_drequest(peer_device,
736	cmd: size == discard_granularity ? P_RS_THIN_REQ : P_RS_DATA_REQUEST,
737	sector, size, ID_SYNCER);
738	if (err) {
739	drbd_err(device, "drbd_send_drequest() failed, aborting...\n");
740	dec_rs_pending(peer_device);
741	put_ldev(device);
742	return err;
743	}
744	}
745	}
746
747	if (device->bm_resync_fo >= drbd_bm_bits(device)) {
748	/ last syncer _request_ was sent,*
749	* but the P_RS_DATA_REPLY not yet received. sync will end (and
750	* next sync group will resume), as soon as we receive the last
751	* resync data block, and the last bit is cleared.
752	* until then resync "work" is "inactive" ...
753	*/
754	put_ldev(device);
755	return `0`;
756	}
757
758	requeue:
759	device->rs_in_flight += (i << (BM_BLOCK_SHIFT - `9`));
760	mod_timer(timer: &device->resync_timer, expires: jiffies + SLEEP_TIME);
761	put_ldev(device);
762	return `0`;
763	}
764
765	static int make_ov_request(struct drbd_peer_device peer_device, int* cancel)
766	{
767	struct drbd_device *device = peer_device->device;
768	int number, i, size;
769	sector_t sector;
770	const sector_t capacity = get_capacity(disk: device->vdisk);
771	bool stop_sector_reached = false;
772
773	if (unlikely(cancel))
774	return `1`;
775
776	number = drbd_rs_number_requests(peer_device);
777
778	sector = device->ov_position;
779	for (i = `0`; i < number; i++) {
780	if (sector >= capacity)
781	return `1`;
782
783	/ We check for "finished" only in the reply path:*
784	* w_e_end_ov_reply().
785	* We need to send at least one request out. */
786	stop_sector_reached = i > `0`
787	&& verify_can_do_stop_sector(device)
788	&& sector >= device->ov_stop_sector;
789	if (stop_sector_reached)
790	break;
791
792	size = BM_BLOCK_SIZE;
793
794	if (drbd_try_rs_begin_io(peer_device, sector)) {
795	device->ov_position = sector;
796	goto requeue;
797	}
798
799	if (sector + (size>>`9`) > capacity)
800	size = (capacity-sector)<<`9`;
801
802	inc_rs_pending(peer_device);
803	if (drbd_send_ov_request(first_peer_device(device), sector, size)) {
804	dec_rs_pending(peer_device);
805	return `0`;
806	}
807	sector += BM_SECT_PER_BIT;
808	}
809	device->ov_position = sector;
810
811	requeue:
812	device->rs_in_flight += (i << (BM_BLOCK_SHIFT - `9`));
813	if (i == `0` \|\| !stop_sector_reached)
814	mod_timer(timer: &device->resync_timer, expires: jiffies + SLEEP_TIME);
815	return `1`;
816	}
817
818	int w_ov_finished(struct drbd_work w, int* cancel)
819	{
820	struct drbd_device_work *dw =
821	container_of(w, struct drbd_device_work, w);
822	struct drbd_device *device = dw->device;
823	kfree(objp: dw);
824	ov_out_of_sync_print(peer_device: first_peer_device(device));
825	drbd_resync_finished(peer_device: first_peer_device(device));
826
827	return `0`;
828	}
829
830	static int w_resync_finished(struct drbd_work w, int* cancel)
831	{
832	struct drbd_device_work *dw =
833	container_of(w, struct drbd_device_work, w);
834	struct drbd_device *device = dw->device;
835	kfree(objp: dw);
836
837	drbd_resync_finished(peer_device: first_peer_device(device));
838
839	return `0`;
840	}
841
842	static void ping_peer(struct drbd_device *device)
843	{
844	struct drbd_connection *connection = first_peer_device(device)->connection;
845
846	clear_bit(nr: GOT_PING_ACK, addr: &connection->flags);
847	request_ping(connection);
848	wait_event(connection->ping_wait,
849	test_bit(GOT_PING_ACK, &connection->flags) \|\| device->state.conn < C_CONNECTED);
850	}
851
852	int drbd_resync_finished(struct drbd_peer_device *peer_device)
853	{
854	struct drbd_device *device = peer_device->device;
855	struct drbd_connection *connection = peer_device->connection;
856	unsigned long db, dt, dbdt;
857	unsigned long n_oos;
858	union drbd_state os, ns;
859	struct drbd_device_work *dw;
860	char *khelper_cmd = NULL;
861	int verify_done = `0`;
862
863	/ Remove all elements from the resync LRU. Since future actions*
864	* might set bits in the (main) bitmap, then the entries in the
865	* resync LRU would be wrong. */
866	if (drbd_rs_del_all(device)) {
867	/ In case this is not possible now, most probably because*
868	* there are P_RS_DATA_REPLY Packets lingering on the worker's
869	* queue (or even the read operations for those packets
870	* is not finished by now). Retry in 100ms. */
871
872	schedule_timeout_interruptible(HZ / `10`);
873	dw = kmalloc(size: sizeof(struct drbd_device_work), GFP_ATOMIC);
874	if (dw) {
875	dw->w.cb = w_resync_finished;
876	dw->device = device;
877	drbd_queue_work(q: &connection->sender_work, w: &dw->w);
878	return `1`;
879	}
880	drbd_err(device, "Warn failed to drbd_rs_del_all() and to kmalloc(dw).\n");
881	}
882
883	dt = (jiffies - device->rs_start - device->rs_paused) / HZ;
884	if (dt <= `0`)
885	dt = `1`;
886
887	db = device->rs_total;
888	/ adjust for verify start and stop sectors, respective reached position /
889	if (device->state.conn == C_VERIFY_S \|\| device->state.conn == C_VERIFY_T)
890	db -= device->ov_left;
891
892	dbdt = Bit2KB(db/dt);
893	device->rs_paused /= HZ;
894
895	if (!get_ldev(device))
896	goto out;
897
898	ping_peer(device);
899
900	spin_lock_irq(lock: &device->resource->req_lock);
901	os = drbd_read_state(device);
902
903	verify_done = (os.conn == C_VERIFY_S \|\| os.conn == C_VERIFY_T);
904
905	/ This protects us against multiple calls (that can happen in the presence*
906	of application IO), and against connectivity loss just before we arrive here. /*
907	if (os.conn <= C_CONNECTED)
908	goto out_unlock;
909
910	ns = os;
911	ns.conn = C_CONNECTED;
912
913	drbd_info(device, "%s done (total %lu sec; paused %lu sec; %lu K/sec)\n",
914	verify_done ? "Online verify" : "Resync",
915	dt + device->rs_paused, device->rs_paused, dbdt);
916
917	n_oos = drbd_bm_total_weight(device);
918
919	if (os.conn == C_VERIFY_S \|\| os.conn == C_VERIFY_T) {
920	if (n_oos) {
921	drbd_alert(device, "Online verify found %lu %dk block out of sync!\n",
922	n_oos, Bit2KB(`1`));
923	khelper_cmd = "out-of-sync";
924	}
925	} else {
926	D_ASSERT(device, (n_oos - device->rs_failed) == `0`);
927
928	if (os.conn == C_SYNC_TARGET \|\| os.conn == C_PAUSED_SYNC_T)
929	khelper_cmd = "after-resync-target";
930
931	if (device->use_csums && device->rs_total) {
932	const unsigned long s = device->rs_same_csum;
933	const unsigned long t = device->rs_total;
934	const int ratio =
935	(t == `0`) ? `0` :
936	(t < `100000`) ? ((s*`100`)/t) : (s/(t/`100`));
937	drbd_info(device, "%u %% had equal checksums, eliminated: %luK; "
938	"transferred %luK total %luK\n",
939	ratio,
940	Bit2KB(device->rs_same_csum),
941	Bit2KB(device->rs_total - device->rs_same_csum),
942	Bit2KB(device->rs_total));
943	}
944	}
945
946	if (device->rs_failed) {
947	drbd_info(device, " %lu failed blocks\n", device->rs_failed);
948
949	if (os.conn == C_SYNC_TARGET \|\| os.conn == C_PAUSED_SYNC_T) {
950	ns.disk = D_INCONSISTENT;
951	ns.pdsk = D_UP_TO_DATE;
952	} else {
953	ns.disk = D_UP_TO_DATE;
954	ns.pdsk = D_INCONSISTENT;
955	}
956	} else {
957	ns.disk = D_UP_TO_DATE;
958	ns.pdsk = D_UP_TO_DATE;
959
960	if (os.conn == C_SYNC_TARGET \|\| os.conn == C_PAUSED_SYNC_T) {
961	if (device->p_uuid) {
962	int i;
963	for (i = UI_BITMAP ; i <= UI_HISTORY_END ; i++)
964	_drbd_uuid_set(device, idx: i, val: device->p_uuid[i]);
965	drbd_uuid_set(device, idx: UI_BITMAP, val: device->ldev->md.uuid[UI_CURRENT]);
966	_drbd_uuid_set(device, idx: UI_CURRENT, val: device->p_uuid[UI_CURRENT]);
967	} else {
968	drbd_err(device, "device->p_uuid is NULL! BUG\n");
969	}
970	}
971
972	if (!(os.conn == C_VERIFY_S \|\| os.conn == C_VERIFY_T)) {
973	/ for verify runs, we don't update uuids here,*
974	* so there would be nothing to report. */
975	drbd_uuid_set_bm(device, val: `0UL`);
976	drbd_print_uuids(device, text: "updated UUIDs");
977	if (device->p_uuid) {
978	/ Now the two UUID sets are equal, update what we*
979	* know of the peer. */
980	int i;
981	for (i = UI_CURRENT ; i <= UI_HISTORY_END ; i++)
982	device->p_uuid[i] = device->ldev->md.uuid[i];
983	}
984	}
985	}
986
987	_drbd_set_state(device, ns, CS_VERBOSE, NULL);
988	out_unlock:
989	spin_unlock_irq(lock: &device->resource->req_lock);
990
991	/ If we have been sync source, and have an effective fencing-policy,*
992	* once all volumes are back in sync, call "unfence". */
993	if (os.conn == C_SYNC_SOURCE) {
994	enum drbd_disk_state disk_state = D_MASK;
995	enum drbd_disk_state pdsk_state = D_MASK;
996	enum drbd_fencing_p fp = FP_DONT_CARE;
997
998	rcu_read_lock();
999	fp = rcu_dereference(device->ldev->disk_conf)->fencing;
1000	if (fp != FP_DONT_CARE) {
1001	struct drbd_peer_device *peer_device;
1002	int vnr;
1003	idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
1004	struct drbd_device *device = peer_device->device;
1005	disk_state = min_t(enum drbd_disk_state, disk_state, device->state.disk);
1006	pdsk_state = min_t(enum drbd_disk_state, pdsk_state, device->state.pdsk);
1007	}
1008	}
1009	rcu_read_unlock();
1010	if (disk_state == D_UP_TO_DATE && pdsk_state == D_UP_TO_DATE)
1011	conn_khelper(connection, cmd: "unfence-peer");
1012	}
1013
1014	put_ldev(device);
1015	out:
1016	device->rs_total = `0`;
1017	device->rs_failed = `0`;
1018	device->rs_paused = `0`;
1019
1020	/ reset start sector, if we reached end of device /
1021	if (verify_done && device->ov_left == `0`)
1022	device->ov_start_sector = `0`;
1023
1024	drbd_md_sync(device);
1025
1026	if (khelper_cmd)
1027	drbd_khelper(device, cmd: khelper_cmd);
1028
1029	return `1`;
1030	}
1031
1032	/ helper /
1033	static void move_to_net_ee_or_free(struct drbd_device device, struct* drbd_peer_request *peer_req)
1034	{
1035	if (drbd_peer_req_has_active_page(peer_req)) {
1036	/ This might happen if sendpage() has not finished /
1037	int i = PFN_UP(peer_req->i.size);
1038	atomic_add(i, v: &device->pp_in_use_by_net);
1039	atomic_sub(i, v: &device->pp_in_use);
1040	spin_lock_irq(lock: &device->resource->req_lock);
1041	list_add_tail(new: &peer_req->w.list, head: &device->net_ee);
1042	spin_unlock_irq(lock: &device->resource->req_lock);
1043	wake_up(&drbd_pp_wait);
1044	} else
1045	drbd_free_peer_req(device, peer_req);
1046	}
1047
1048	/**
1049	* w_e_end_data_req() - Worker callback, to send a P_DATA_REPLY packet in response to a P_DATA_REQUEST
1050	* @w: work object.
1051	* @cancel: The connection will be closed anyways
1052	*/
1053	int w_e_end_data_req(struct drbd_work w, int* cancel)
1054	{
1055	struct drbd_peer_request peer_req = container_of(w, struct* drbd_peer_request, w);
1056	struct drbd_peer_device *peer_device = peer_req->peer_device;
1057	struct drbd_device *device = peer_device->device;
1058	int err;
1059
1060	if (unlikely(cancel)) {
1061	drbd_free_peer_req(device, peer_req);
1062	dec_unacked(device);
1063	return `0`;
1064	}
1065
1066	if (likely((peer_req->flags & EE_WAS_ERROR) == `0`)) {
1067	err = drbd_send_block(peer_device, P_DATA_REPLY, peer_req);
1068	} else {
1069	if (drbd_ratelimit())
1070	drbd_err(device, "Sending NegDReply. sector=%llus.\n",
1071	(unsigned long long)peer_req->i.sector);
1072
1073	err = drbd_send_ack(peer_device, P_NEG_DREPLY, peer_req);
1074	}
1075
1076	dec_unacked(device);
1077
1078	move_to_net_ee_or_free(device, peer_req);
1079
1080	if (unlikely(err))
1081	drbd_err(device, "drbd_send_block() failed\n");
1082	return err;
1083	}
1084
1085	static bool all_zero(struct drbd_peer_request *peer_req)
1086	{
1087	struct page *page = peer_req->pages;
1088	unsigned int len = peer_req->i.size;
1089
1090	page_chain_for_each(page) {
1091	unsigned int l = min_t(unsigned int, len, PAGE_SIZE);
1092	unsigned int i, words = l / sizeof(long);
1093	unsigned long *d;
1094
1095	d = kmap_atomic(page);
1096	for (i = `0`; i < words; i++) {
1097	if (d[i]) {
1098	kunmap_atomic(d);
1099	return false;
1100	}
1101	}
1102	kunmap_atomic(d);
1103	len -= l;
1104	}
1105
1106	return true;
1107	}
1108
1109	/**
1110	* w_e_end_rsdata_req() - Worker callback to send a P_RS_DATA_REPLY packet in response to a P_RS_DATA_REQUEST
1111	* @w: work object.
1112	* @cancel: The connection will be closed anyways
1113	*/
1114	int w_e_end_rsdata_req(struct drbd_work w, int* cancel)
1115	{
1116	struct drbd_peer_request peer_req = container_of(w, struct* drbd_peer_request, w);
1117	struct drbd_peer_device *peer_device = peer_req->peer_device;
1118	struct drbd_device *device = peer_device->device;
1119	int err;
1120
1121	if (unlikely(cancel)) {
1122	drbd_free_peer_req(device, peer_req);
1123	dec_unacked(device);
1124	return `0`;
1125	}
1126
1127	if (get_ldev_if_state(device, D_FAILED)) {
1128	drbd_rs_complete_io(device, sector: peer_req->i.sector);
1129	put_ldev(device);
1130	}
1131
1132	if (device->state.conn == C_AHEAD) {
1133	err = drbd_send_ack(peer_device, P_RS_CANCEL, peer_req);
1134	} else if (likely((peer_req->flags & EE_WAS_ERROR) == `0`)) {
1135	if (likely(device->state.pdsk >= D_INCONSISTENT)) {
1136	inc_rs_pending(peer_device);
1137	if (peer_req->flags & EE_RS_THIN_REQ && all_zero(peer_req))
1138	err = drbd_send_rs_deallocated(peer_device, peer_req);
1139	else
1140	err = drbd_send_block(peer_device, P_RS_DATA_REPLY, peer_req);
1141	} else {
1142	if (drbd_ratelimit())
1143	drbd_err(device, "Not sending RSDataReply, "
1144	"partner DISKLESS!\n");
1145	err = `0`;
1146	}
1147	} else {
1148	if (drbd_ratelimit())
1149	drbd_err(device, "Sending NegRSDReply. sector %llus.\n",
1150	(unsigned long long)peer_req->i.sector);
1151
1152	err = drbd_send_ack(peer_device, P_NEG_RS_DREPLY, peer_req);
1153
1154	/ update resync data with failure /
1155	drbd_rs_failed_io(peer_device, peer_req->i.sector, peer_req->i.size);
1156	}
1157
1158	dec_unacked(device);
1159
1160	move_to_net_ee_or_free(device, peer_req);
1161
1162	if (unlikely(err))
1163	drbd_err(device, "drbd_send_block() failed\n");
1164	return err;
1165	}
1166
1167	int w_e_end_csum_rs_req(struct drbd_work w, int* cancel)
1168	{
1169	struct drbd_peer_request peer_req = container_of(w, struct* drbd_peer_request, w);
1170	struct drbd_peer_device *peer_device = peer_req->peer_device;
1171	struct drbd_device *device = peer_device->device;
1172	struct digest_info *di;
1173	int digest_size;
1174	void *digest = NULL;
1175	int err, eq = `0`;
1176
1177	if (unlikely(cancel)) {
1178	drbd_free_peer_req(device, peer_req);
1179	dec_unacked(device);
1180	return `0`;
1181	}
1182
1183	if (get_ldev(device)) {
1184	drbd_rs_complete_io(device, sector: peer_req->i.sector);
1185	put_ldev(device);
1186	}
1187
1188	di = peer_req->digest;
1189
1190	if (likely((peer_req->flags & EE_WAS_ERROR) == `0`)) {
1191	/ quick hack to try to avoid a race against reconfiguration.*
1192	* a real fix would be much more involved,
1193	* introducing more locking mechanisms */
1194	if (peer_device->connection->csums_tfm) {
1195	digest_size = crypto_shash_digestsize(tfm: peer_device->connection->csums_tfm);
1196	D_ASSERT(device, digest_size == di->digest_size);
1197	digest = kmalloc(size: digest_size, GFP_NOIO);
1198	}
1199	if (digest) {
1200	drbd_csum_ee(tfm: peer_device->connection->csums_tfm, peer_req, digest);
1201	eq = !memcmp(p: digest, q: di->digest, size: digest_size);
1202	kfree(objp: digest);
1203	}
1204
1205	if (eq) {
1206	drbd_set_in_sync(peer_device, peer_req->i.sector, peer_req->i.size);
1207	/ rs_same_csums unit is BM_BLOCK_SIZE /
1208	device->rs_same_csum += peer_req->i.size >> BM_BLOCK_SHIFT;
1209	err = drbd_send_ack(peer_device, P_RS_IS_IN_SYNC, peer_req);
1210	} else {
1211	inc_rs_pending(peer_device);
1212	peer_req->block_id = ID_SYNCER; / By setting block_id, digest pointer becomes invalid! /
1213	peer_req->flags &= ~EE_HAS_DIGEST; / This peer request no longer has a digest pointer /
1214	kfree(objp: di);
1215	err = drbd_send_block(peer_device, P_RS_DATA_REPLY, peer_req);
1216	}
1217	} else {
1218	err = drbd_send_ack(peer_device, P_NEG_RS_DREPLY, peer_req);
1219	if (drbd_ratelimit())
1220	drbd_err(device, "Sending NegDReply. I guess it gets messy.\n");
1221	}
1222
1223	dec_unacked(device);
1224	move_to_net_ee_or_free(device, peer_req);
1225
1226	if (unlikely(err))
1227	drbd_err(device, "drbd_send_block/ack() failed\n");
1228	return err;
1229	}
1230
1231	int w_e_end_ov_req(struct drbd_work w, int* cancel)
1232	{
1233	struct drbd_peer_request peer_req = container_of(w, struct* drbd_peer_request, w);
1234	struct drbd_peer_device *peer_device = peer_req->peer_device;
1235	struct drbd_device *device = peer_device->device;
1236	sector_t sector = peer_req->i.sector;
1237	unsigned int size = peer_req->i.size;
1238	int digest_size;
1239	void *digest;
1240	int err = `0`;
1241
1242	if (unlikely(cancel))
1243	goto out;
1244
1245	digest_size = crypto_shash_digestsize(tfm: peer_device->connection->verify_tfm);
1246	digest = kmalloc(size: digest_size, GFP_NOIO);
1247	if (!digest) {
1248	err = `1`; / terminate the connection in case the allocation failed /
1249	goto out;
1250	}
1251
1252	if (likely(!(peer_req->flags & EE_WAS_ERROR)))
1253	drbd_csum_ee(tfm: peer_device->connection->verify_tfm, peer_req, digest);
1254	else
1255	memset(digest, `0`, digest_size);
1256
1257	/ Free e and pages before send.*
1258	* In case we block on congestion, we could otherwise run into
1259	* some distributed deadlock, if the other side blocks on
1260	* congestion as well, because our receiver blocks in
1261	* drbd_alloc_pages due to pp_in_use > max_buffers. */
1262	drbd_free_peer_req(device, peer_req);
1263	peer_req = NULL;
1264	inc_rs_pending(peer_device);
1265	err = drbd_send_drequest_csum(peer_device, sector, size, digest, digest_size, cmd: P_OV_REPLY);
1266	if (err)
1267	dec_rs_pending(peer_device);
1268	kfree(objp: digest);
1269
1270	out:
1271	if (peer_req)
1272	drbd_free_peer_req(device, peer_req);
1273	dec_unacked(device);
1274	return err;
1275	}
1276
1277	void drbd_ov_out_of_sync_found(struct drbd_peer_device peer_device, sector_t sector, int* size)
1278	{
1279	struct drbd_device *device = peer_device->device;
1280	if (device->ov_last_oos_start + device->ov_last_oos_size == sector) {
1281	device->ov_last_oos_size += size>>`9`;
1282	} else {
1283	device->ov_last_oos_start = sector;
1284	device->ov_last_oos_size = size>>`9`;
1285	}
1286	drbd_set_out_of_sync(peer_device, sector, size);
1287	}
1288
1289	int w_e_end_ov_reply(struct drbd_work w, int* cancel)
1290	{
1291	struct drbd_peer_request peer_req = container_of(w, struct* drbd_peer_request, w);
1292	struct drbd_peer_device *peer_device = peer_req->peer_device;
1293	struct drbd_device *device = peer_device->device;
1294	struct digest_info *di;
1295	void *digest;
1296	sector_t sector = peer_req->i.sector;
1297	unsigned int size = peer_req->i.size;
1298	int digest_size;
1299	int err, eq = `0`;
1300	bool stop_sector_reached = false;
1301
1302	if (unlikely(cancel)) {
1303	drbd_free_peer_req(device, peer_req);
1304	dec_unacked(device);
1305	return `0`;
1306	}
1307
1308	/ after "cancel", because after drbd_disconnect/drbd_rs_cancel_all*
1309	* the resync lru has been cleaned up already */
1310	if (get_ldev(device)) {
1311	drbd_rs_complete_io(device, sector: peer_req->i.sector);
1312	put_ldev(device);
1313	}
1314
1315	di = peer_req->digest;
1316
1317	if (likely((peer_req->flags & EE_WAS_ERROR) == `0`)) {
1318	digest_size = crypto_shash_digestsize(tfm: peer_device->connection->verify_tfm);
1319	digest = kmalloc(size: digest_size, GFP_NOIO);
1320	if (digest) {
1321	drbd_csum_ee(tfm: peer_device->connection->verify_tfm, peer_req, digest);
1322
1323	D_ASSERT(device, digest_size == di->digest_size);
1324	eq = !memcmp(p: digest, q: di->digest, size: digest_size);
1325	kfree(objp: digest);
1326	}
1327	}
1328
1329	/ Free peer_req and pages before send.*
1330	* In case we block on congestion, we could otherwise run into
1331	* some distributed deadlock, if the other side blocks on
1332	* congestion as well, because our receiver blocks in
1333	* drbd_alloc_pages due to pp_in_use > max_buffers. */
1334	drbd_free_peer_req(device, peer_req);
1335	if (!eq)
1336	drbd_ov_out_of_sync_found(peer_device, sector, size);
1337	else
1338	ov_out_of_sync_print(peer_device);
1339
1340	err = drbd_send_ack_ex(peer_device, P_OV_RESULT, sector, blksize: size,
1341	block_id: eq ? ID_IN_SYNC : ID_OUT_OF_SYNC);
1342
1343	dec_unacked(device);
1344
1345	--device->ov_left;
1346
1347	/ let's advance progress step marks only for every other megabyte /
1348	if ((device->ov_left & `0x200`) == `0x200`)
1349	drbd_advance_rs_marks(peer_device, still_to_go: device->ov_left);
1350
1351	stop_sector_reached = verify_can_do_stop_sector(device) &&
1352	(sector + (size>>`9`)) >= device->ov_stop_sector;
1353
1354	if (device->ov_left == `0` \|\| stop_sector_reached) {
1355	ov_out_of_sync_print(peer_device);
1356	drbd_resync_finished(peer_device);
1357	}
1358
1359	return err;
1360	}
1361
1362	/ FIXME*
1363	* We need to track the number of pending barrier acks,
1364	* and to be able to wait for them.
1365	* See also comment in drbd_adm_attach before drbd_suspend_io.
1366	*/
1367	static int drbd_send_barrier(struct drbd_connection *connection)
1368	{
1369	struct p_barrier *p;
1370	struct drbd_socket *sock;
1371
1372	sock = &connection->data;
1373	p = conn_prepare_command(connection, sock);
1374	if (!p)
1375	return -EIO;
1376	p->barrier = connection->send.current_epoch_nr;
1377	p->pad = `0`;
1378	connection->send.current_epoch_writes = `0`;
1379	connection->send.last_sent_barrier_jif = jiffies;
1380
1381	return conn_send_command(connection, sock, P_BARRIER, sizeof(*p), NULL, `0`);
1382	}
1383
1384	static int pd_send_unplug_remote(struct drbd_peer_device *pd)
1385	{
1386	struct drbd_socket *sock = &pd->connection->data;
1387	if (!drbd_prepare_command(pd, sock))
1388	return -EIO;
1389	return drbd_send_command(pd, sock, P_UNPLUG_REMOTE, `0`, NULL, `0`);
1390	}
1391
1392	int w_send_write_hint(struct drbd_work w, int* cancel)
1393	{
1394	struct drbd_device *device =
1395	container_of(w, struct drbd_device, unplug_work);
1396
1397	if (cancel)
1398	return `0`;
1399	return pd_send_unplug_remote(pd: first_peer_device(device));
1400	}
1401
1402	static void re_init_if_first_write(struct drbd_connection connection, unsigned* int epoch)
1403	{
1404	if (!connection->send.seen_any_write_yet) {
1405	connection->send.seen_any_write_yet = true;
1406	connection->send.current_epoch_nr = epoch;
1407	connection->send.current_epoch_writes = `0`;
1408	connection->send.last_sent_barrier_jif = jiffies;
1409	}
1410	}
1411
1412	static void maybe_send_barrier(struct drbd_connection connection, unsigned* int epoch)
1413	{
1414	/ re-init if first write on this connection /
1415	if (!connection->send.seen_any_write_yet)
1416	return;
1417	if (connection->send.current_epoch_nr != epoch) {
1418	if (connection->send.current_epoch_writes)
1419	drbd_send_barrier(connection);
1420	connection->send.current_epoch_nr = epoch;
1421	}
1422	}
1423
1424	int w_send_out_of_sync(struct drbd_work w, int* cancel)
1425	{
1426	struct drbd_request req = container_of(w, struct* drbd_request, w);
1427	struct drbd_device *device = req->device;
1428	struct drbd_peer_device *const peer_device = first_peer_device(device);
1429	struct drbd_connection *const connection = peer_device->connection;
1430	int err;
1431
1432	if (unlikely(cancel)) {
1433	req_mod(req, what: SEND_CANCELED, peer_device);
1434	return `0`;
1435	}
1436	req->pre_send_jif = jiffies;
1437
1438	/ this time, no connection->send.current_epoch_writes++;*
1439	* If it was sent, it was the closing barrier for the last
1440	* replicated epoch, before we went into AHEAD mode.
1441	* No more barriers will be sent, until we leave AHEAD mode again. */
1442	maybe_send_barrier(connection, epoch: req->epoch);
1443
1444	err = drbd_send_out_of_sync(peer_device, req);
1445	req_mod(req, what: OOS_HANDED_TO_NETWORK, peer_device);
1446
1447	return err;
1448	}
1449
1450	/**
1451	* w_send_dblock() - Worker callback to send a P_DATA packet in order to mirror a write request
1452	* @w: work object.
1453	* @cancel: The connection will be closed anyways
1454	*/
1455	int w_send_dblock(struct drbd_work w, int* cancel)
1456	{
1457	struct drbd_request req = container_of(w, struct* drbd_request, w);
1458	struct drbd_device *device = req->device;
1459	struct drbd_peer_device *const peer_device = first_peer_device(device);
1460	struct drbd_connection *connection = peer_device->connection;
1461	bool do_send_unplug = req->rq_state & RQ_UNPLUG;
1462	int err;
1463
1464	if (unlikely(cancel)) {
1465	req_mod(req, what: SEND_CANCELED, peer_device);
1466	return `0`;
1467	}
1468	req->pre_send_jif = jiffies;
1469
1470	re_init_if_first_write(connection, epoch: req->epoch);
1471	maybe_send_barrier(connection, epoch: req->epoch);
1472	connection->send.current_epoch_writes++;
1473
1474	err = drbd_send_dblock(peer_device, req);
1475	req_mod(req, what: err ? SEND_FAILED : HANDED_OVER_TO_NETWORK, peer_device);
1476
1477	if (do_send_unplug && !err)
1478	pd_send_unplug_remote(pd: peer_device);
1479
1480	return err;
1481	}
1482
1483	/**
1484	* w_send_read_req() - Worker callback to send a read request (P_DATA_REQUEST) packet
1485	* @w: work object.
1486	* @cancel: The connection will be closed anyways
1487	*/
1488	int w_send_read_req(struct drbd_work w, int* cancel)
1489	{
1490	struct drbd_request req = container_of(w, struct* drbd_request, w);
1491	struct drbd_device *device = req->device;
1492	struct drbd_peer_device *const peer_device = first_peer_device(device);
1493	struct drbd_connection *connection = peer_device->connection;
1494	bool do_send_unplug = req->rq_state & RQ_UNPLUG;
1495	int err;
1496
1497	if (unlikely(cancel)) {
1498	req_mod(req, what: SEND_CANCELED, peer_device);
1499	return `0`;
1500	}
1501	req->pre_send_jif = jiffies;
1502
1503	/ Even read requests may close a write epoch,*
1504	* if there was any yet. */
1505	maybe_send_barrier(connection, epoch: req->epoch);
1506
1507	err = drbd_send_drequest(peer_device, cmd: P_DATA_REQUEST, sector: req->i.sector, size: req->i.size,
1508	block_id: (unsigned long)req);
1509
1510	req_mod(req, what: err ? SEND_FAILED : HANDED_OVER_TO_NETWORK, peer_device);
1511
1512	if (do_send_unplug && !err)
1513	pd_send_unplug_remote(pd: peer_device);
1514
1515	return err;
1516	}
1517
1518	int w_restart_disk_io(struct drbd_work w, int* cancel)
1519	{
1520	struct drbd_request req = container_of(w, struct* drbd_request, w);
1521	struct drbd_device *device = req->device;
1522
1523	if (bio_data_dir(req->master_bio) == WRITE && req->rq_state & RQ_IN_ACT_LOG)
1524	drbd_al_begin_io(device, i: &req->i);
1525
1526	req->private_bio = bio_alloc_clone(bdev: device->ldev->backing_bdev,
1527	bio_src: req->master_bio, GFP_NOIO,
1528	bs: &drbd_io_bio_set);
1529	req->private_bio->bi_private = req;
1530	req->private_bio->bi_end_io = drbd_request_endio;
1531	submit_bio_noacct(bio: req->private_bio);
1532
1533	return `0`;
1534	}
1535
1536	static int _drbd_may_sync_now(struct drbd_device *device)
1537	{
1538	struct drbd_device *odev = device;
1539	int resync_after;
1540
1541	while (`1`) {
1542	if (!odev->ldev \|\| odev->state.disk == D_DISKLESS)
1543	return `1`;
1544	rcu_read_lock();
1545	resync_after = rcu_dereference(odev->ldev->disk_conf)->resync_after;
1546	rcu_read_unlock();
1547	if (resync_after == -`1`)
1548	return `1`;
1549	odev = minor_to_device(minor: resync_after);
1550	if (!odev)
1551	return `1`;
1552	if ((odev->state.conn >= C_SYNC_SOURCE &&
1553	odev->state.conn <= C_PAUSED_SYNC_T) \|\|
1554	odev->state.aftr_isp \|\| odev->state.peer_isp \|\|
1555	odev->state.user_isp)
1556	return `0`;
1557	}
1558	}
1559
1560	/**
1561	* drbd_pause_after() - Pause resync on all devices that may not resync now
1562	* @device: DRBD device.
1563	*
1564	* Called from process context only (admin command and after_state_ch).
1565	*/
1566	static bool drbd_pause_after(struct drbd_device *device)
1567	{
1568	bool changed = false;
1569	struct drbd_device *odev;
1570	int i;
1571
1572	rcu_read_lock();
1573	idr_for_each_entry(&drbd_devices, odev, i) {
1574	if (odev->state.conn == C_STANDALONE && odev->state.disk == D_DISKLESS)
1575	continue;
1576	if (!_drbd_may_sync_now(device: odev) &&
1577	_drbd_set_state(_NS(odev, aftr_isp, `1`),
1578	CS_HARD, NULL) != SS_NOTHING_TO_DO)
1579	changed = true;
1580	}
1581	rcu_read_unlock();
1582
1583	return changed;
1584	}
1585
1586	/**
1587	* drbd_resume_next() - Resume resync on all devices that may resync now
1588	* @device: DRBD device.
1589	*
1590	* Called from process context only (admin command and worker).
1591	*/
1592	static bool drbd_resume_next(struct drbd_device *device)
1593	{
1594	bool changed = false;
1595	struct drbd_device *odev;
1596	int i;
1597
1598	rcu_read_lock();
1599	idr_for_each_entry(&drbd_devices, odev, i) {
1600	if (odev->state.conn == C_STANDALONE && odev->state.disk == D_DISKLESS)
1601	continue;
1602	if (odev->state.aftr_isp) {
1603	if (_drbd_may_sync_now(device: odev) &&
1604	_drbd_set_state(_NS(odev, aftr_isp, `0`),
1605	CS_HARD, NULL) != SS_NOTHING_TO_DO)
1606	changed = true;
1607	}
1608	}
1609	rcu_read_unlock();
1610	return changed;
1611	}
1612
1613	void resume_next_sg(struct drbd_device *device)
1614	{
1615	lock_all_resources();
1616	drbd_resume_next(device);
1617	unlock_all_resources();
1618	}
1619
1620	void suspend_other_sg(struct drbd_device *device)
1621	{
1622	lock_all_resources();
1623	drbd_pause_after(device);
1624	unlock_all_resources();
1625	}
1626
1627	/ caller must lock_all_resources() /
1628	enum drbd_ret_code drbd_resync_after_valid(struct drbd_device device, int* o_minor)
1629	{
1630	struct drbd_device *odev;
1631	int resync_after;
1632
1633	if (o_minor == -`1`)
1634	return NO_ERROR;
1635	if (o_minor < -`1` \|\| o_minor > MINORMASK)
1636	return ERR_RESYNC_AFTER;
1637
1638	/ check for loops /
1639	odev = minor_to_device(minor: o_minor);
1640	while (`1`) {
1641	if (odev == device)
1642	return ERR_RESYNC_AFTER_CYCLE;
1643
1644	/ You are free to depend on diskless, non-existing,*
1645	* or not yet/no longer existing minors.
1646	* We only reject dependency loops.
1647	* We cannot follow the dependency chain beyond a detached or
1648	* missing minor.
1649	*/
1650	if (!odev \|\| !odev->ldev \|\| odev->state.disk == D_DISKLESS)
1651	return NO_ERROR;
1652
1653	rcu_read_lock();
1654	resync_after = rcu_dereference(odev->ldev->disk_conf)->resync_after;
1655	rcu_read_unlock();
1656	/ dependency chain ends here, no cycles. /
1657	if (resync_after == -`1`)
1658	return NO_ERROR;
1659
1660	/ follow the dependency chain /
1661	odev = minor_to_device(minor: resync_after);
1662	}
1663	}
1664
1665	/ caller must lock_all_resources() /
1666	void drbd_resync_after_changed(struct drbd_device *device)
1667	{
1668	int changed;
1669
1670	do {
1671	changed = drbd_pause_after(device);
1672	changed \|= drbd_resume_next(device);
1673	} while (changed);
1674	}
1675
1676	void drbd_rs_controller_reset(struct drbd_peer_device *peer_device)
1677	{
1678	struct drbd_device *device = peer_device->device;
1679	struct gendisk *disk = device->ldev->backing_bdev->bd_disk;
1680	struct fifo_buffer *plan;
1681
1682	atomic_set(v: &device->rs_sect_in, i: `0`);
1683	atomic_set(v: &device->rs_sect_ev, i: `0`);
1684	device->rs_in_flight = `0`;
1685	device->rs_last_events =
1686	(int)part_stat_read_accum(disk->part0, sectors);
1687
1688	/ Updating the RCU protected object in place is necessary since*
1689	this function gets called from atomic context.
1690	It is valid since all other updates also lead to an completely
1691	empty fifo /*
1692	rcu_read_lock();
1693	plan = rcu_dereference(device->rs_plan_s);
1694	plan->total = `0`;
1695	fifo_set(fb: plan, value: `0`);
1696	rcu_read_unlock();
1697	}
1698
1699	void start_resync_timer_fn(struct timer_list *t)
1700	{
1701	struct drbd_device *device = from_timer(device, t, start_resync_timer);
1702	drbd_device_post_work(device, work_bit: RS_START);
1703	}
1704
1705	static void do_start_resync(struct drbd_device *device)
1706	{
1707	if (atomic_read(v: &device->unacked_cnt) \|\| atomic_read(v: &device->rs_pending_cnt)) {
1708	drbd_warn(device, "postponing start_resync ...\n");
1709	device->start_resync_timer.expires = jiffies + HZ/`10`;
1710	add_timer(timer: &device->start_resync_timer);
1711	return;
1712	}
1713
1714	drbd_start_resync(device, side: C_SYNC_SOURCE);
1715	clear_bit(nr: AHEAD_TO_SYNC_SOURCE, addr: &device->flags);
1716	}
1717
1718	static bool use_checksum_based_resync(struct drbd_connection connection, struct* drbd_device *device)
1719	{
1720	bool csums_after_crash_only;
1721	rcu_read_lock();
1722	csums_after_crash_only = rcu_dereference(connection->net_conf)->csums_after_crash_only;
1723	rcu_read_unlock();
1724	return connection->agreed_pro_version >= `89` && / supported? /
1725	connection->csums_tfm && / configured? /
1726	(csums_after_crash_only == false / use for each resync? /
1727	\|\| test_bit(CRASHED_PRIMARY, &device->flags)); / or only after Primary crash? /
1728	}
1729
1730	/**
1731	* drbd_start_resync() - Start the resync process
1732	* @device: DRBD device.
1733	* @side: Either C_SYNC_SOURCE or C_SYNC_TARGET
1734	*
1735	* This function might bring you directly into one of the
1736	* C_PAUSED_SYNC_* states.
1737	*/
1738	void drbd_start_resync(struct drbd_device device, enum* drbd_conns side)
1739	{
1740	struct drbd_peer_device *peer_device = first_peer_device(device);
1741	struct drbd_connection *connection = peer_device ? peer_device->connection : NULL;
1742	union drbd_state ns;
1743	int r;
1744
1745	if (device->state.conn >= C_SYNC_SOURCE && device->state.conn < C_AHEAD) {
1746	drbd_err(device, "Resync already running!\n");
1747	return;
1748	}
1749
1750	if (!connection) {
1751	drbd_err(device, "No connection to peer, aborting!\n");
1752	return;
1753	}
1754
1755	if (!test_bit(B_RS_H_DONE, &device->flags)) {
1756	if (side == C_SYNC_TARGET) {
1757	/ Since application IO was locked out during C_WF_BITMAP_T and*
1758	C_WF_SYNC_UUID we are still unmodified. Before going to C_SYNC_TARGET
1759	we check that we might make the data inconsistent. /*
1760	r = drbd_khelper(device, cmd: "before-resync-target");
1761	r = (r >> `8`) & `0xff`;
1762	if (r > `0`) {
1763	drbd_info(device, "before-resync-target handler returned %d, "
1764	"dropping connection.\n", r);
1765	conn_request_state(connection, NS(conn, C_DISCONNECTING), flags: CS_HARD);
1766	return;
1767	}
1768	} else / C_SYNC_SOURCE / {
1769	r = drbd_khelper(device, cmd: "before-resync-source");
1770	r = (r >> `8`) & `0xff`;
1771	if (r > `0`) {
1772	if (r == `3`) {
1773	drbd_info(device, "before-resync-source handler returned %d, "
1774	"ignoring. Old userland tools?", r);
1775	} else {
1776	drbd_info(device, "before-resync-source handler returned %d, "
1777	"dropping connection.\n", r);
1778	conn_request_state(connection,
1779	NS(conn, C_DISCONNECTING), flags: CS_HARD);
1780	return;
1781	}
1782	}
1783	}
1784	}
1785
1786	if (current == connection->worker.task) {
1787	/ The worker should not sleep waiting for state_mutex,*
1788	that can take long /*
1789	if (!mutex_trylock(lock: device->state_mutex)) {
1790	set_bit(nr: B_RS_H_DONE, addr: &device->flags);
1791	device->start_resync_timer.expires = jiffies + HZ/`5`;
1792	add_timer(timer: &device->start_resync_timer);
1793	return;
1794	}
1795	} else {
1796	mutex_lock(device->state_mutex);
1797	}
1798
1799	lock_all_resources();
1800	clear_bit(nr: B_RS_H_DONE, addr: &device->flags);
1801	/ Did some connection breakage or IO error race with us? /
1802	if (device->state.conn < C_CONNECTED
1803	\|\| !get_ldev_if_state(device, D_NEGOTIATING)) {
1804	unlock_all_resources();
1805	goto out;
1806	}
1807
1808	ns = drbd_read_state(device);
1809
1810	ns.aftr_isp = !_drbd_may_sync_now(device);
1811
1812	ns.conn = side;
1813
1814	if (side == C_SYNC_TARGET)
1815	ns.disk = D_INCONSISTENT;
1816	else / side == C_SYNC_SOURCE /
1817	ns.pdsk = D_INCONSISTENT;
1818
1819	r = _drbd_set_state(device, ns, CS_VERBOSE, NULL);
1820	ns = drbd_read_state(device);
1821
1822	if (ns.conn < C_CONNECTED)
1823	r = SS_UNKNOWN_ERROR;
1824
1825	if (r == SS_SUCCESS) {
1826	unsigned long tw = drbd_bm_total_weight(device);
1827	unsigned long now = jiffies;
1828	int i;
1829
1830	device->rs_failed = `0`;
1831	device->rs_paused = `0`;
1832	device->rs_same_csum = `0`;
1833	device->rs_last_sect_ev = `0`;
1834	device->rs_total = tw;
1835	device->rs_start = now;
1836	for (i = `0`; i < DRBD_SYNC_MARKS; i++) {
1837	device->rs_mark_left[i] = tw;
1838	device->rs_mark_time[i] = now;
1839	}
1840	drbd_pause_after(device);
1841	/ Forget potentially stale cached per resync extent bit-counts.*
1842	* Open coded drbd_rs_cancel_all(device), we already have IRQs
1843	* disabled, and know the disk state is ok. */
1844	spin_lock(lock: &device->al_lock);
1845	lc_reset(lc: device->resync);
1846	device->resync_locked = `0`;
1847	device->resync_wenr = LC_FREE;
1848	spin_unlock(lock: &device->al_lock);
1849	}
1850	unlock_all_resources();
1851
1852	if (r == SS_SUCCESS) {
1853	wake_up(&device->al_wait); / for lc_reset() above /
1854	/ reset rs_last_bcast when a resync or verify is started,*
1855	* to deal with potential jiffies wrap. */
1856	device->rs_last_bcast = jiffies - HZ;
1857
1858	drbd_info(device, "Began resync as %s (will sync %lu KB [%lu bits set]).\n",
1859	drbd_conn_str(ns.conn),
1860	(unsigned long) device->rs_total << (BM_BLOCK_SHIFT-`10`),
1861	(unsigned long) device->rs_total);
1862	if (side == C_SYNC_TARGET) {
1863	device->bm_resync_fo = `0`;
1864	device->use_csums = use_checksum_based_resync(connection, device);
1865	} else {
1866	device->use_csums = false;
1867	}
1868
1869	/ Since protocol 96, we must serialize drbd_gen_and_send_sync_uuid*
1870	* with w_send_oos, or the sync target will get confused as to
1871	* how much bits to resync. We cannot do that always, because for an
1872	* empty resync and protocol < 95, we need to do it here, as we call
1873	* drbd_resync_finished from here in that case.
1874	* We drbd_gen_and_send_sync_uuid here for protocol < 96,
1875	* and from after_state_ch otherwise. */
1876	if (side == C_SYNC_SOURCE && connection->agreed_pro_version < `96`)
1877	drbd_gen_and_send_sync_uuid(peer_device);
1878
1879	if (connection->agreed_pro_version < `95` && device->rs_total == `0`) {
1880	/ This still has a race (about when exactly the peers*
1881	* detect connection loss) that can lead to a full sync
1882	* on next handshake. In 8.3.9 we fixed this with explicit
1883	* resync-finished notifications, but the fix
1884	* introduces a protocol change. Sleeping for some
1885	* time longer than the ping interval + timeout on the
1886	* SyncSource, to give the SyncTarget the chance to
1887	* detect connection loss, then waiting for a ping
1888	* response (implicit in drbd_resync_finished) reduces
1889	* the race considerably, but does not solve it. */
1890	if (side == C_SYNC_SOURCE) {
1891	struct net_conf *nc;
1892	int timeo;
1893
1894	rcu_read_lock();
1895	nc = rcu_dereference(connection->net_conf);
1896	timeo = nc->ping_int * HZ + nc->ping_timeo * HZ / `9`;
1897	rcu_read_unlock();
1898	schedule_timeout_interruptible(timeout: timeo);
1899	}
1900	drbd_resync_finished(peer_device);
1901	}
1902
1903	drbd_rs_controller_reset(peer_device);
1904	/ ns.conn may already be != device->state.conn,*
1905	* we may have been paused in between, or become paused until
1906	* the timer triggers.
1907	* No matter, that is handled in resync_timer_fn() */
1908	if (ns.conn == C_SYNC_TARGET)
1909	mod_timer(timer: &device->resync_timer, expires: jiffies);
1910
1911	drbd_md_sync(device);
1912	}
1913	put_ldev(device);
1914	out:
1915	mutex_unlock(lock: device->state_mutex);
1916	}
1917
1918	static void update_on_disk_bitmap(struct drbd_peer_device *peer_device, bool resync_done)
1919	{
1920	struct drbd_device *device = peer_device->device;
1921	struct sib_info sib = { .sib_reason = SIB_SYNC_PROGRESS, };
1922	device->rs_last_bcast = jiffies;
1923
1924	if (!get_ldev(device))
1925	return;
1926
1927	drbd_bm_write_lazy(device, upper_idx: `0`);
1928	if (resync_done && is_sync_state(connection_state: device->state.conn))
1929	drbd_resync_finished(peer_device);
1930
1931	drbd_bcast_event(device, sib: &sib);
1932	/ update timestamp, in case it took a while to write out stuff /
1933	device->rs_last_bcast = jiffies;
1934	put_ldev(device);
1935	}
1936
1937	static void drbd_ldev_destroy(struct drbd_device *device)
1938	{
1939	lc_destroy(lc: device->resync);
1940	device->resync = NULL;
1941	lc_destroy(lc: device->act_log);
1942	device->act_log = NULL;
1943
1944	__acquire(local);
1945	drbd_backing_dev_free(device, ldev: device->ldev);
1946	device->ldev = NULL;
1947	__release(local);
1948
1949	clear_bit(nr: GOING_DISKLESS, addr: &device->flags);
1950	wake_up(&device->misc_wait);
1951	}
1952
1953	static void go_diskless(struct drbd_device *device)
1954	{
1955	struct drbd_peer_device *peer_device = first_peer_device(device);
1956	D_ASSERT(device, device->state.disk == D_FAILED);
1957	/ we cannot assert local_cnt == 0 here, as get_ldev_if_state will*
1958	* inc/dec it frequently. Once we are D_DISKLESS, no one will touch
1959	* the protected members anymore, though, so once put_ldev reaches zero
1960	* again, it will be safe to free them. */
1961
1962	/ Try to write changed bitmap pages, read errors may have just*
1963	* set some bits outside the area covered by the activity log.
1964	*
1965	* If we have an IO error during the bitmap writeout,
1966	* we will want a full sync next time, just in case.
1967	* (Do we want a specific meta data flag for this?)
1968	*
1969	* If that does not make it to stable storage either,
1970	* we cannot do anything about that anymore.
1971	*
1972	* We still need to check if both bitmap and ldev are present, we may
1973	* end up here after a failed attach, before ldev was even assigned.
1974	*/
1975	if (device->bitmap && device->ldev) {
1976	/ An interrupted resync or similar is allowed to recounts bits*
1977	* while we detach.
1978	* Any modifications would not be expected anymore, though.
1979	*/
1980	if (drbd_bitmap_io_from_worker(device, io_fn: drbd_bm_write,
1981	why: "detach", flags: BM_LOCKED_TEST_ALLOWED, peer_device)) {
1982	if (test_bit(WAS_READ_ERROR, &device->flags)) {
1983	drbd_md_set_flag(device, MDF_FULL_SYNC);
1984	drbd_md_sync(device);
1985	}
1986	}
1987	}
1988
1989	drbd_force_state(device, NS(disk, D_DISKLESS));
1990	}
1991
1992	static int do_md_sync(struct drbd_device *device)
1993	{
1994	drbd_warn(device, "md_sync_timer expired! Worker calls drbd_md_sync().\n");
1995	drbd_md_sync(device);
1996	return `0`;
1997	}
1998
1999	/ only called from drbd_worker thread, no locking /
2000	void __update_timing_details(
2001	struct drbd_thread_timing_details *tdp,
2002	unsigned int *cb_nr,
2003	void *cb,
2004	const char fn, const* unsigned int line)
2005	{
2006	unsigned int i = *cb_nr % DRBD_THREAD_DETAILS_HIST;
2007	struct drbd_thread_timing_details *td = tdp + i;
2008
2009	td->start_jif = jiffies;
2010	td->cb_addr = cb;
2011	td->caller_fn = fn;
2012	td->line = line;
2013	td->cb_nr = *cb_nr;
2014
2015	i = (i+`1`) % DRBD_THREAD_DETAILS_HIST;
2016	td = tdp + i;
2017	memset(td, `0`, sizeof(*td));
2018
2019	++(*cb_nr);
2020	}
2021
2022	static void do_device_work(struct drbd_device device, const* unsigned long todo)
2023	{
2024	if (test_bit(MD_SYNC, &todo))
2025	do_md_sync(device);
2026	if (test_bit(RS_DONE, &todo) \|\|
2027	test_bit(RS_PROGRESS, &todo))
2028	update_on_disk_bitmap(peer_device: first_peer_device(device), test_bit(RS_DONE, &todo));
2029	if (test_bit(GO_DISKLESS, &todo))
2030	go_diskless(device);
2031	if (test_bit(DESTROY_DISK, &todo))
2032	drbd_ldev_destroy(device);
2033	if (test_bit(RS_START, &todo))
2034	do_start_resync(device);
2035	}
2036
2037	#define DRBD_DEVICE_WORK_MASK \
2038	((1UL << GO_DISKLESS) \
2039	\|(1UL << DESTROY_DISK) \
2040	\|(1UL << MD_SYNC) \
2041	\|(1UL << RS_START) \
2042	\|(1UL << RS_PROGRESS) \
2043	\|(1UL << RS_DONE) \
2044	)
2045
2046	static unsigned long get_work_bits(unsigned long *flags)
2047	{
2048	unsigned long old, new;
2049	do {
2050	old = *flags;
2051	new = old & ~DRBD_DEVICE_WORK_MASK;
2052	} while (cmpxchg(flags, old, new) != old);
2053	return old & DRBD_DEVICE_WORK_MASK;
2054	}
2055
2056	static void do_unqueued_work(struct drbd_connection *connection)
2057	{
2058	struct drbd_peer_device *peer_device;
2059	int vnr;
2060
2061	rcu_read_lock();
2062	idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
2063	struct drbd_device *device = peer_device->device;
2064	unsigned long todo = get_work_bits(flags: &device->flags);
2065	if (!todo)
2066	continue;
2067
2068	kref_get(kref: &device->kref);
2069	rcu_read_unlock();
2070	do_device_work(device, todo);
2071	kref_put(kref: &device->kref, release: drbd_destroy_device);
2072	rcu_read_lock();
2073	}
2074	rcu_read_unlock();
2075	}
2076
2077	static bool dequeue_work_batch(struct drbd_work_queue queue, struct* list_head *work_list)
2078	{
2079	spin_lock_irq(lock: &queue->q_lock);
2080	list_splice_tail_init(list: &queue->q, head: work_list);
2081	spin_unlock_irq(lock: &queue->q_lock);
2082	return !list_empty(head: work_list);
2083	}
2084
2085	static void wait_for_work(struct drbd_connection connection, struct* list_head *work_list)
2086	{
2087	DEFINE_WAIT(wait);
2088	struct net_conf *nc;
2089	int uncork, cork;
2090
2091	dequeue_work_batch(queue: &connection->sender_work, work_list);
2092	if (!list_empty(head: work_list))
2093	return;
2094
2095	/ Still nothing to do?*
2096	* Maybe we still need to close the current epoch,
2097	* even if no new requests are queued yet.
2098	*
2099	* Also, poke TCP, just in case.
2100	* Then wait for new work (or signal). */
2101	rcu_read_lock();
2102	nc = rcu_dereference(connection->net_conf);
2103	uncork = nc ? nc->tcp_cork : `0`;
2104	rcu_read_unlock();
2105	if (uncork) {
2106	mutex_lock(&connection->data.mutex);
2107	if (connection->data.socket)
2108	tcp_sock_set_cork(sk: connection->data.socket->sk, on: false);
2109	mutex_unlock(lock: &connection->data.mutex);
2110	}
2111
2112	for (;;) {
2113	int send_barrier;
2114	prepare_to_wait(wq_head: &connection->sender_work.q_wait, wq_entry: &wait, TASK_INTERRUPTIBLE);
2115	spin_lock_irq(lock: &connection->resource->req_lock);
2116	spin_lock(lock: &connection->sender_work.q_lock); / FIXME get rid of this one? /
2117	if (!list_empty(head: &connection->sender_work.q))
2118	list_splice_tail_init(list: &connection->sender_work.q, head: work_list);
2119	spin_unlock(lock: &connection->sender_work.q_lock); / FIXME get rid of this one? /
2120	if (!list_empty(head: work_list) \|\| signal_pending(current)) {
2121	spin_unlock_irq(lock: &connection->resource->req_lock);
2122	break;
2123	}
2124
2125	/ We found nothing new to do, no to-be-communicated request,*
2126	* no other work item. We may still need to close the last
2127	* epoch. Next incoming request epoch will be connection ->
2128	* current transfer log epoch number. If that is different
2129	* from the epoch of the last request we communicated, it is
2130	* safe to send the epoch separating barrier now.
2131	*/
2132	send_barrier =
2133	atomic_read(v: &connection->current_tle_nr) !=
2134	connection->send.current_epoch_nr;
2135	spin_unlock_irq(lock: &connection->resource->req_lock);
2136
2137	if (send_barrier)
2138	maybe_send_barrier(connection,
2139	epoch: connection->send.current_epoch_nr + `1`);
2140
2141	if (test_bit(DEVICE_WORK_PENDING, &connection->flags))
2142	break;
2143
2144	/ drbd_send() may have called flush_signals() /
2145	if (get_t_state(thi: &connection->worker) != RUNNING)
2146	break;
2147
2148	schedule();
2149	/ may be woken up for other things but new work, too,*
2150	* e.g. if the current epoch got closed.
2151	* In which case we send the barrier above. */
2152	}
2153	finish_wait(wq_head: &connection->sender_work.q_wait, wq_entry: &wait);
2154
2155	/ someone may have changed the config while we have been waiting above. /
2156	rcu_read_lock();
2157	nc = rcu_dereference(connection->net_conf);
2158	cork = nc ? nc->tcp_cork : `0`;
2159	rcu_read_unlock();
2160	mutex_lock(&connection->data.mutex);
2161	if (connection->data.socket) {
2162	if (cork)
2163	tcp_sock_set_cork(sk: connection->data.socket->sk, on: true);
2164	else if (!uncork)
2165	tcp_sock_set_cork(sk: connection->data.socket->sk, on: false);
2166	}
2167	mutex_unlock(lock: &connection->data.mutex);
2168	}
2169
2170	int drbd_worker(struct drbd_thread *thi)
2171	{
2172	struct drbd_connection *connection = thi->connection;
2173	struct drbd_work *w = NULL;
2174	struct drbd_peer_device *peer_device;
2175	LIST_HEAD(work_list);
2176	int vnr;
2177
2178	while (get_t_state(thi) == RUNNING) {
2179	drbd_thread_current_set_cpu(thi);
2180
2181	if (list_empty(head: &work_list)) {
2182	update_worker_timing_details(connection, wait_for_work);
2183	wait_for_work(connection, work_list: &work_list);
2184	}
2185
2186	if (test_and_clear_bit(nr: DEVICE_WORK_PENDING, addr: &connection->flags)) {
2187	update_worker_timing_details(connection, do_unqueued_work);
2188	do_unqueued_work(connection);
2189	}
2190
2191	if (signal_pending(current)) {
2192	flush_signals(current);
2193	if (get_t_state(thi) == RUNNING) {
2194	drbd_warn(connection, "Worker got an unexpected signal\n");
2195	continue;
2196	}
2197	break;
2198	}
2199
2200	if (get_t_state(thi) != RUNNING)
2201	break;
2202
2203	if (!list_empty(head: &work_list)) {
2204	w = list_first_entry(&work_list, struct drbd_work, list);
2205	list_del_init(entry: &w->list);
2206	update_worker_timing_details(connection, w->cb);
2207	if (w->cb(w, connection->cstate < C_WF_REPORT_PARAMS) == `0`)
2208	continue;
2209	if (connection->cstate >= C_WF_REPORT_PARAMS)
2210	conn_request_state(connection, NS(conn, C_NETWORK_FAILURE), flags: CS_HARD);
2211	}
2212	}
2213
2214	do {
2215	if (test_and_clear_bit(nr: DEVICE_WORK_PENDING, addr: &connection->flags)) {
2216	update_worker_timing_details(connection, do_unqueued_work);
2217	do_unqueued_work(connection);
2218	}
2219	if (!list_empty(head: &work_list)) {
2220	w = list_first_entry(&work_list, struct drbd_work, list);
2221	list_del_init(entry: &w->list);
2222	update_worker_timing_details(connection, w->cb);
2223	w->cb(w, `1`);
2224	} else
2225	dequeue_work_batch(queue: &connection->sender_work, work_list: &work_list);
2226	} while (!list_empty(head: &work_list) \|\| test_bit(DEVICE_WORK_PENDING, &connection->flags));
2227
2228	rcu_read_lock();
2229	idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
2230	struct drbd_device *device = peer_device->device;
2231	D_ASSERT(device, device->state.disk == D_DISKLESS && device->state.conn == C_STANDALONE);
2232	kref_get(kref: &device->kref);
2233	rcu_read_unlock();
2234	drbd_device_cleanup(device);
2235	kref_put(kref: &device->kref, release: drbd_destroy_device);
2236	rcu_read_lock();
2237	}
2238	rcu_read_unlock();
2239
2240	return `0`;
2241	}
2242

source code of linux/drivers/block/drbd/drbd_worker.c