blob: 7d210548a98e3371831591a5810acc5aa8b4bafe [file] [log] [blame]
Philipp Reisnerb411b362009-09-25 16:07:19 -07001/*
2 drbd_receiver.c
3
4 This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
5
6 Copyright (C) 2001-2008, LINBIT Information Technologies GmbH.
7 Copyright (C) 1999-2008, Philipp Reisner <philipp.reisner@linbit.com>.
8 Copyright (C) 2002-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
9
10 drbd is free software; you can redistribute it and/or modify
11 it under the terms of the GNU General Public License as published by
12 the Free Software Foundation; either version 2, or (at your option)
13 any later version.
14
15 drbd is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License for more details.
19
20 You should have received a copy of the GNU General Public License
21 along with drbd; see the file COPYING. If not, write to
22 the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
23 */
24
25
Philipp Reisnerb411b362009-09-25 16:07:19 -070026#include <linux/module.h>
27
28#include <asm/uaccess.h>
29#include <net/sock.h>
30
Philipp Reisnerb411b362009-09-25 16:07:19 -070031#include <linux/drbd.h>
32#include <linux/fs.h>
33#include <linux/file.h>
34#include <linux/in.h>
35#include <linux/mm.h>
36#include <linux/memcontrol.h>
37#include <linux/mm_inline.h>
38#include <linux/slab.h>
Philipp Reisnerb411b362009-09-25 16:07:19 -070039#include <linux/pkt_sched.h>
40#define __KERNEL_SYSCALLS__
41#include <linux/unistd.h>
42#include <linux/vmalloc.h>
43#include <linux/random.h>
Philipp Reisnerb411b362009-09-25 16:07:19 -070044#include <linux/string.h>
45#include <linux/scatterlist.h>
46#include "drbd_int.h"
Philipp Reisnerb411b362009-09-25 16:07:19 -070047#include "drbd_req.h"
48
49#include "drbd_vli.h"
50
Philipp Reisner77351055b2011-02-07 17:24:26 +010051struct packet_info {
52 enum drbd_packet cmd;
53 int size;
54 int vnr;
55};
56
Philipp Reisnerb411b362009-09-25 16:07:19 -070057enum finish_epoch {
58 FE_STILL_LIVE,
59 FE_DESTROYED,
60 FE_RECYCLED,
61};
62
63static int drbd_do_handshake(struct drbd_conf *mdev);
64static int drbd_do_auth(struct drbd_conf *mdev);
65
66static enum finish_epoch drbd_may_finish_epoch(struct drbd_conf *, struct drbd_epoch *, enum epoch_event);
67static int e_end_block(struct drbd_conf *, struct drbd_work *, int);
68
Philipp Reisnerb411b362009-09-25 16:07:19 -070069
70#define GFP_TRY (__GFP_HIGHMEM | __GFP_NOWARN)
71
Lars Ellenberg45bb9122010-05-14 17:10:48 +020072/*
73 * some helper functions to deal with single linked page lists,
74 * page->private being our "next" pointer.
75 */
76
77/* If at least n pages are linked at head, get n pages off.
78 * Otherwise, don't modify head, and return NULL.
79 * Locking is the responsibility of the caller.
80 */
81static struct page *page_chain_del(struct page **head, int n)
82{
83 struct page *page;
84 struct page *tmp;
85
86 BUG_ON(!n);
87 BUG_ON(!head);
88
89 page = *head;
Philipp Reisner23ce4222010-05-20 13:35:31 +020090
91 if (!page)
92 return NULL;
93
Lars Ellenberg45bb9122010-05-14 17:10:48 +020094 while (page) {
95 tmp = page_chain_next(page);
96 if (--n == 0)
97 break; /* found sufficient pages */
98 if (tmp == NULL)
99 /* insufficient pages, don't use any of them. */
100 return NULL;
101 page = tmp;
102 }
103
104 /* add end of list marker for the returned list */
105 set_page_private(page, 0);
106 /* actual return value, and adjustment of head */
107 page = *head;
108 *head = tmp;
109 return page;
110}
111
112/* may be used outside of locks to find the tail of a (usually short)
113 * "private" page chain, before adding it back to a global chain head
114 * with page_chain_add() under a spinlock. */
115static struct page *page_chain_tail(struct page *page, int *len)
116{
117 struct page *tmp;
118 int i = 1;
119 while ((tmp = page_chain_next(page)))
120 ++i, page = tmp;
121 if (len)
122 *len = i;
123 return page;
124}
125
126static int page_chain_free(struct page *page)
127{
128 struct page *tmp;
129 int i = 0;
130 page_chain_for_each_safe(page, tmp) {
131 put_page(page);
132 ++i;
133 }
134 return i;
135}
136
137static void page_chain_add(struct page **head,
138 struct page *chain_first, struct page *chain_last)
139{
140#if 1
141 struct page *tmp;
142 tmp = page_chain_tail(chain_first, NULL);
143 BUG_ON(tmp != chain_last);
144#endif
145
146 /* add chain to head */
147 set_page_private(chain_last, (unsigned long)*head);
148 *head = chain_first;
149}
150
151static struct page *drbd_pp_first_pages_or_try_alloc(struct drbd_conf *mdev, int number)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700152{
153 struct page *page = NULL;
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200154 struct page *tmp = NULL;
155 int i = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700156
157 /* Yes, testing drbd_pp_vacant outside the lock is racy.
158 * So what. It saves a spin_lock. */
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200159 if (drbd_pp_vacant >= number) {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700160 spin_lock(&drbd_pp_lock);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200161 page = page_chain_del(&drbd_pp_pool, number);
162 if (page)
163 drbd_pp_vacant -= number;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700164 spin_unlock(&drbd_pp_lock);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200165 if (page)
166 return page;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700167 }
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200168
Philipp Reisnerb411b362009-09-25 16:07:19 -0700169 /* GFP_TRY, because we must not cause arbitrary write-out: in a DRBD
170 * "criss-cross" setup, that might cause write-out on some other DRBD,
171 * which in turn might block on the other node at this very place. */
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200172 for (i = 0; i < number; i++) {
173 tmp = alloc_page(GFP_TRY);
174 if (!tmp)
175 break;
176 set_page_private(tmp, (unsigned long)page);
177 page = tmp;
178 }
179
180 if (i == number)
181 return page;
182
183 /* Not enough pages immediately available this time.
184 * No need to jump around here, drbd_pp_alloc will retry this
185 * function "soon". */
186 if (page) {
187 tmp = page_chain_tail(page, NULL);
188 spin_lock(&drbd_pp_lock);
189 page_chain_add(&drbd_pp_pool, page, tmp);
190 drbd_pp_vacant += i;
191 spin_unlock(&drbd_pp_lock);
192 }
193 return NULL;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700194}
195
Philipp Reisnerb411b362009-09-25 16:07:19 -0700196static void reclaim_net_ee(struct drbd_conf *mdev, struct list_head *to_be_freed)
197{
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100198 struct drbd_peer_request *peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700199 struct list_head *le, *tle;
200
201 /* The EEs are always appended to the end of the list. Since
202 they are sent in order over the wire, they have to finish
203 in order. As soon as we see the first not finished we can
204 stop to examine the list... */
205
206 list_for_each_safe(le, tle, &mdev->net_ee) {
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100207 peer_req = list_entry(le, struct drbd_peer_request, w.list);
208 if (drbd_ee_has_active_page(peer_req))
Philipp Reisnerb411b362009-09-25 16:07:19 -0700209 break;
210 list_move(le, to_be_freed);
211 }
212}
213
214static void drbd_kick_lo_and_reclaim_net(struct drbd_conf *mdev)
215{
216 LIST_HEAD(reclaimed);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100217 struct drbd_peer_request *peer_req, *t;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700218
Philipp Reisner87eeee42011-01-19 14:16:30 +0100219 spin_lock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700220 reclaim_net_ee(mdev, &reclaimed);
Philipp Reisner87eeee42011-01-19 14:16:30 +0100221 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700222
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100223 list_for_each_entry_safe(peer_req, t, &reclaimed, w.list)
224 drbd_free_net_ee(mdev, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700225}
226
227/**
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200228 * drbd_pp_alloc() - Returns @number pages, retries forever (or until signalled)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700229 * @mdev: DRBD device.
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200230 * @number: number of pages requested
231 * @retry: whether to retry, if not enough pages are available right now
Philipp Reisnerb411b362009-09-25 16:07:19 -0700232 *
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200233 * Tries to allocate number pages, first from our own page pool, then from
234 * the kernel, unless this allocation would exceed the max_buffers setting.
235 * Possibly retry until DRBD frees sufficient pages somewhere else.
236 *
237 * Returns a page chain linked via page->private.
Philipp Reisnerb411b362009-09-25 16:07:19 -0700238 */
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200239static struct page *drbd_pp_alloc(struct drbd_conf *mdev, unsigned number, bool retry)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700240{
241 struct page *page = NULL;
242 DEFINE_WAIT(wait);
243
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200244 /* Yes, we may run up to @number over max_buffers. If we
245 * follow it strictly, the admin will get it wrong anyways. */
Philipp Reisner89e58e72011-01-19 13:12:45 +0100246 if (atomic_read(&mdev->pp_in_use) < mdev->tconn->net_conf->max_buffers)
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200247 page = drbd_pp_first_pages_or_try_alloc(mdev, number);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700248
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200249 while (page == NULL) {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700250 prepare_to_wait(&drbd_pp_wait, &wait, TASK_INTERRUPTIBLE);
251
252 drbd_kick_lo_and_reclaim_net(mdev);
253
Philipp Reisner89e58e72011-01-19 13:12:45 +0100254 if (atomic_read(&mdev->pp_in_use) < mdev->tconn->net_conf->max_buffers) {
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200255 page = drbd_pp_first_pages_or_try_alloc(mdev, number);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700256 if (page)
257 break;
258 }
259
260 if (!retry)
261 break;
262
263 if (signal_pending(current)) {
264 dev_warn(DEV, "drbd_pp_alloc interrupted!\n");
265 break;
266 }
267
268 schedule();
269 }
270 finish_wait(&drbd_pp_wait, &wait);
271
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200272 if (page)
273 atomic_add(number, &mdev->pp_in_use);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700274 return page;
275}
276
277/* Must not be used from irq, as that may deadlock: see drbd_pp_alloc.
Philipp Reisner87eeee42011-01-19 14:16:30 +0100278 * Is also used from inside an other spin_lock_irq(&mdev->tconn->req_lock);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200279 * Either links the page chain back to the global pool,
280 * or returns all pages to the system. */
Lars Ellenberg435f0742010-09-06 12:30:25 +0200281static void drbd_pp_free(struct drbd_conf *mdev, struct page *page, int is_net)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700282{
Lars Ellenberg435f0742010-09-06 12:30:25 +0200283 atomic_t *a = is_net ? &mdev->pp_in_use_by_net : &mdev->pp_in_use;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700284 int i;
Lars Ellenberg435f0742010-09-06 12:30:25 +0200285
Lars Ellenberg1816a2b2010-11-11 15:19:07 +0100286 if (drbd_pp_vacant > (DRBD_MAX_BIO_SIZE/PAGE_SIZE)*minor_count)
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200287 i = page_chain_free(page);
288 else {
289 struct page *tmp;
290 tmp = page_chain_tail(page, &i);
291 spin_lock(&drbd_pp_lock);
292 page_chain_add(&drbd_pp_pool, page, tmp);
293 drbd_pp_vacant += i;
294 spin_unlock(&drbd_pp_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700295 }
Lars Ellenberg435f0742010-09-06 12:30:25 +0200296 i = atomic_sub_return(i, a);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200297 if (i < 0)
Lars Ellenberg435f0742010-09-06 12:30:25 +0200298 dev_warn(DEV, "ASSERTION FAILED: %s: %d < 0\n",
299 is_net ? "pp_in_use_by_net" : "pp_in_use", i);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700300 wake_up(&drbd_pp_wait);
301}
302
303/*
304You need to hold the req_lock:
305 _drbd_wait_ee_list_empty()
306
307You must not have the req_lock:
308 drbd_free_ee()
309 drbd_alloc_ee()
310 drbd_init_ee()
311 drbd_release_ee()
312 drbd_ee_fix_bhs()
313 drbd_process_done_ee()
314 drbd_clear_done_ee()
315 drbd_wait_ee_list_empty()
316*/
317
Andreas Gruenbacherf6ffca92011-02-04 15:30:34 +0100318struct drbd_peer_request *
319drbd_alloc_ee(struct drbd_conf *mdev, u64 id, sector_t sector,
320 unsigned int data_size, gfp_t gfp_mask) __must_hold(local)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700321{
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100322 struct drbd_peer_request *peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700323 struct page *page;
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200324 unsigned nr_pages = (data_size + PAGE_SIZE -1) >> PAGE_SHIFT;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700325
Andreas Gruenbacher0cf9d272010-12-07 10:43:29 +0100326 if (drbd_insert_fault(mdev, DRBD_FAULT_AL_EE))
Philipp Reisnerb411b362009-09-25 16:07:19 -0700327 return NULL;
328
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100329 peer_req = mempool_alloc(drbd_ee_mempool, gfp_mask & ~__GFP_HIGHMEM);
330 if (!peer_req) {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700331 if (!(gfp_mask & __GFP_NOWARN))
332 dev_err(DEV, "alloc_ee: Allocation of an EE failed\n");
333 return NULL;
334 }
335
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200336 page = drbd_pp_alloc(mdev, nr_pages, (gfp_mask & __GFP_WAIT));
337 if (!page)
338 goto fail;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700339
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100340 drbd_clear_interval(&peer_req->i);
341 peer_req->i.size = data_size;
342 peer_req->i.sector = sector;
343 peer_req->i.local = false;
344 peer_req->i.waiting = false;
Andreas Gruenbacher53840642011-01-28 10:31:04 +0100345
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100346 peer_req->epoch = NULL;
347 peer_req->mdev = mdev;
348 peer_req->pages = page;
349 atomic_set(&peer_req->pending_bios, 0);
350 peer_req->flags = 0;
Andreas Gruenbacher9a8e7752011-01-11 14:04:09 +0100351 /*
352 * The block_id is opaque to the receiver. It is not endianness
353 * converted, and sent back to the sender unchanged.
354 */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100355 peer_req->block_id = id;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700356
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100357 return peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700358
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200359 fail:
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100360 mempool_free(peer_req, drbd_ee_mempool);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700361 return NULL;
362}
363
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100364void drbd_free_some_ee(struct drbd_conf *mdev, struct drbd_peer_request *peer_req,
Andreas Gruenbacherf6ffca92011-02-04 15:30:34 +0100365 int is_net)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700366{
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100367 if (peer_req->flags & EE_HAS_DIGEST)
368 kfree(peer_req->digest);
369 drbd_pp_free(mdev, peer_req->pages, is_net);
370 D_ASSERT(atomic_read(&peer_req->pending_bios) == 0);
371 D_ASSERT(drbd_interval_empty(&peer_req->i));
372 mempool_free(peer_req, drbd_ee_mempool);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700373}
374
375int drbd_release_ee(struct drbd_conf *mdev, struct list_head *list)
376{
377 LIST_HEAD(work_list);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100378 struct drbd_peer_request *peer_req, *t;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700379 int count = 0;
Lars Ellenberg435f0742010-09-06 12:30:25 +0200380 int is_net = list == &mdev->net_ee;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700381
Philipp Reisner87eeee42011-01-19 14:16:30 +0100382 spin_lock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700383 list_splice_init(list, &work_list);
Philipp Reisner87eeee42011-01-19 14:16:30 +0100384 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700385
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100386 list_for_each_entry_safe(peer_req, t, &work_list, w.list) {
387 drbd_free_some_ee(mdev, peer_req, is_net);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700388 count++;
389 }
390 return count;
391}
392
393
394/*
395 * This function is called from _asender only_
Andreas Gruenbacher8554df12011-01-25 15:37:43 +0100396 * but see also comments in _req_mod(,BARRIER_ACKED)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700397 * and receive_Barrier.
398 *
399 * Move entries from net_ee to done_ee, if ready.
400 * Grab done_ee, call all callbacks, free the entries.
401 * The callbacks typically send out ACKs.
402 */
403static int drbd_process_done_ee(struct drbd_conf *mdev)
404{
405 LIST_HEAD(work_list);
406 LIST_HEAD(reclaimed);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100407 struct drbd_peer_request *peer_req, *t;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700408 int ok = (mdev->state.conn >= C_WF_REPORT_PARAMS);
409
Philipp Reisner87eeee42011-01-19 14:16:30 +0100410 spin_lock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700411 reclaim_net_ee(mdev, &reclaimed);
412 list_splice_init(&mdev->done_ee, &work_list);
Philipp Reisner87eeee42011-01-19 14:16:30 +0100413 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700414
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100415 list_for_each_entry_safe(peer_req, t, &reclaimed, w.list)
416 drbd_free_net_ee(mdev, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700417
418 /* possible callbacks here:
419 * e_end_block, and e_end_resync_block, e_send_discard_ack.
420 * all ignore the last argument.
421 */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100422 list_for_each_entry_safe(peer_req, t, &work_list, w.list) {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700423 /* list_del not necessary, next/prev members not touched */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100424 ok = peer_req->w.cb(mdev, &peer_req->w, !ok) && ok;
425 drbd_free_ee(mdev, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700426 }
427 wake_up(&mdev->ee_wait);
428
429 return ok;
430}
431
432void _drbd_wait_ee_list_empty(struct drbd_conf *mdev, struct list_head *head)
433{
434 DEFINE_WAIT(wait);
435
436 /* avoids spin_lock/unlock
437 * and calling prepare_to_wait in the fast path */
438 while (!list_empty(head)) {
439 prepare_to_wait(&mdev->ee_wait, &wait, TASK_UNINTERRUPTIBLE);
Philipp Reisner87eeee42011-01-19 14:16:30 +0100440 spin_unlock_irq(&mdev->tconn->req_lock);
Jens Axboe7eaceac2011-03-10 08:52:07 +0100441 io_schedule();
Philipp Reisnerb411b362009-09-25 16:07:19 -0700442 finish_wait(&mdev->ee_wait, &wait);
Philipp Reisner87eeee42011-01-19 14:16:30 +0100443 spin_lock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700444 }
445}
446
447void drbd_wait_ee_list_empty(struct drbd_conf *mdev, struct list_head *head)
448{
Philipp Reisner87eeee42011-01-19 14:16:30 +0100449 spin_lock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700450 _drbd_wait_ee_list_empty(mdev, head);
Philipp Reisner87eeee42011-01-19 14:16:30 +0100451 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700452}
453
454/* see also kernel_accept; which is only present since 2.6.18.
455 * also we want to log which part of it failed, exactly */
Philipp Reisner76536202011-02-07 14:09:54 +0100456static int drbd_accept(const char **what, struct socket *sock, struct socket **newsock)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700457{
458 struct sock *sk = sock->sk;
459 int err = 0;
460
461 *what = "listen";
462 err = sock->ops->listen(sock, 5);
463 if (err < 0)
464 goto out;
465
466 *what = "sock_create_lite";
467 err = sock_create_lite(sk->sk_family, sk->sk_type, sk->sk_protocol,
468 newsock);
469 if (err < 0)
470 goto out;
471
472 *what = "accept";
473 err = sock->ops->accept(sock, *newsock, 0);
474 if (err < 0) {
475 sock_release(*newsock);
476 *newsock = NULL;
477 goto out;
478 }
479 (*newsock)->ops = sock->ops;
480
481out:
482 return err;
483}
484
Philipp Reisnerdbd9eea2011-02-07 15:34:16 +0100485static int drbd_recv_short(struct socket *sock, void *buf, size_t size, int flags)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700486{
487 mm_segment_t oldfs;
488 struct kvec iov = {
489 .iov_base = buf,
490 .iov_len = size,
491 };
492 struct msghdr msg = {
493 .msg_iovlen = 1,
494 .msg_iov = (struct iovec *)&iov,
495 .msg_flags = (flags ? flags : MSG_WAITALL | MSG_NOSIGNAL)
496 };
497 int rv;
498
499 oldfs = get_fs();
500 set_fs(KERNEL_DS);
501 rv = sock_recvmsg(sock, &msg, size, msg.msg_flags);
502 set_fs(oldfs);
503
504 return rv;
505}
506
Philipp Reisnerde0ff332011-02-07 16:56:20 +0100507static int drbd_recv(struct drbd_tconn *tconn, void *buf, size_t size)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700508{
509 mm_segment_t oldfs;
510 struct kvec iov = {
511 .iov_base = buf,
512 .iov_len = size,
513 };
514 struct msghdr msg = {
515 .msg_iovlen = 1,
516 .msg_iov = (struct iovec *)&iov,
517 .msg_flags = MSG_WAITALL | MSG_NOSIGNAL
518 };
519 int rv;
520
521 oldfs = get_fs();
522 set_fs(KERNEL_DS);
523
524 for (;;) {
Philipp Reisnerde0ff332011-02-07 16:56:20 +0100525 rv = sock_recvmsg(tconn->data.socket, &msg, size, msg.msg_flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700526 if (rv == size)
527 break;
528
529 /* Note:
530 * ECONNRESET other side closed the connection
531 * ERESTARTSYS (on sock) we got a signal
532 */
533
534 if (rv < 0) {
535 if (rv == -ECONNRESET)
Philipp Reisnerde0ff332011-02-07 16:56:20 +0100536 conn_info(tconn, "sock was reset by peer\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -0700537 else if (rv != -ERESTARTSYS)
Philipp Reisnerde0ff332011-02-07 16:56:20 +0100538 conn_err(tconn, "sock_recvmsg returned %d\n", rv);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700539 break;
540 } else if (rv == 0) {
Philipp Reisnerde0ff332011-02-07 16:56:20 +0100541 conn_info(tconn, "sock was shut down by peer\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -0700542 break;
543 } else {
544 /* signal came in, or peer/link went down,
545 * after we read a partial message
546 */
547 /* D_ASSERT(signal_pending(current)); */
548 break;
549 }
550 };
551
552 set_fs(oldfs);
553
554 if (rv != size)
Philipp Reisnerde0ff332011-02-07 16:56:20 +0100555 drbd_force_state(tconn->volume0, NS(conn, C_BROKEN_PIPE));
Philipp Reisnerb411b362009-09-25 16:07:19 -0700556
557 return rv;
558}
559
Lars Ellenberg5dbf1672010-05-25 16:18:01 +0200560/* quoting tcp(7):
561 * On individual connections, the socket buffer size must be set prior to the
562 * listen(2) or connect(2) calls in order to have it take effect.
563 * This is our wrapper to do so.
564 */
565static void drbd_setbufsize(struct socket *sock, unsigned int snd,
566 unsigned int rcv)
567{
568 /* open coded SO_SNDBUF, SO_RCVBUF */
569 if (snd) {
570 sock->sk->sk_sndbuf = snd;
571 sock->sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
572 }
573 if (rcv) {
574 sock->sk->sk_rcvbuf = rcv;
575 sock->sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
576 }
577}
578
Philipp Reisnereac3e992011-02-07 14:05:07 +0100579static struct socket *drbd_try_connect(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700580{
581 const char *what;
582 struct socket *sock;
583 struct sockaddr_in6 src_in6;
584 int err;
585 int disconnect_on_error = 1;
586
Philipp Reisnereac3e992011-02-07 14:05:07 +0100587 if (!get_net_conf(tconn))
Philipp Reisnerb411b362009-09-25 16:07:19 -0700588 return NULL;
589
590 what = "sock_create_kern";
Philipp Reisnereac3e992011-02-07 14:05:07 +0100591 err = sock_create_kern(((struct sockaddr *)tconn->net_conf->my_addr)->sa_family,
Philipp Reisnerb411b362009-09-25 16:07:19 -0700592 SOCK_STREAM, IPPROTO_TCP, &sock);
593 if (err < 0) {
594 sock = NULL;
595 goto out;
596 }
597
598 sock->sk->sk_rcvtimeo =
Philipp Reisnereac3e992011-02-07 14:05:07 +0100599 sock->sk->sk_sndtimeo = tconn->net_conf->try_connect_int*HZ;
600 drbd_setbufsize(sock, tconn->net_conf->sndbuf_size,
601 tconn->net_conf->rcvbuf_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700602
603 /* explicitly bind to the configured IP as source IP
604 * for the outgoing connections.
605 * This is needed for multihomed hosts and to be
606 * able to use lo: interfaces for drbd.
607 * Make sure to use 0 as port number, so linux selects
608 * a free one dynamically.
609 */
Philipp Reisnereac3e992011-02-07 14:05:07 +0100610 memcpy(&src_in6, tconn->net_conf->my_addr,
611 min_t(int, tconn->net_conf->my_addr_len, sizeof(src_in6)));
612 if (((struct sockaddr *)tconn->net_conf->my_addr)->sa_family == AF_INET6)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700613 src_in6.sin6_port = 0;
614 else
615 ((struct sockaddr_in *)&src_in6)->sin_port = 0; /* AF_INET & AF_SCI */
616
617 what = "bind before connect";
618 err = sock->ops->bind(sock,
619 (struct sockaddr *) &src_in6,
Philipp Reisnereac3e992011-02-07 14:05:07 +0100620 tconn->net_conf->my_addr_len);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700621 if (err < 0)
622 goto out;
623
624 /* connect may fail, peer not yet available.
625 * stay C_WF_CONNECTION, don't go Disconnecting! */
626 disconnect_on_error = 0;
627 what = "connect";
628 err = sock->ops->connect(sock,
Philipp Reisnereac3e992011-02-07 14:05:07 +0100629 (struct sockaddr *)tconn->net_conf->peer_addr,
630 tconn->net_conf->peer_addr_len, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700631
632out:
633 if (err < 0) {
634 if (sock) {
635 sock_release(sock);
636 sock = NULL;
637 }
638 switch (-err) {
639 /* timeout, busy, signal pending */
640 case ETIMEDOUT: case EAGAIN: case EINPROGRESS:
641 case EINTR: case ERESTARTSYS:
642 /* peer not (yet) available, network problem */
643 case ECONNREFUSED: case ENETUNREACH:
644 case EHOSTDOWN: case EHOSTUNREACH:
645 disconnect_on_error = 0;
646 break;
647 default:
Philipp Reisnereac3e992011-02-07 14:05:07 +0100648 conn_err(tconn, "%s failed, err = %d\n", what, err);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700649 }
650 if (disconnect_on_error)
Philipp Reisnereac3e992011-02-07 14:05:07 +0100651 drbd_force_state(tconn->volume0, NS(conn, C_DISCONNECTING));
Philipp Reisnerb411b362009-09-25 16:07:19 -0700652 }
Philipp Reisnereac3e992011-02-07 14:05:07 +0100653 put_net_conf(tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700654 return sock;
655}
656
Philipp Reisner76536202011-02-07 14:09:54 +0100657static struct socket *drbd_wait_for_connect(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700658{
659 int timeo, err;
660 struct socket *s_estab = NULL, *s_listen;
661 const char *what;
662
Philipp Reisner76536202011-02-07 14:09:54 +0100663 if (!get_net_conf(tconn))
Philipp Reisnerb411b362009-09-25 16:07:19 -0700664 return NULL;
665
666 what = "sock_create_kern";
Philipp Reisner76536202011-02-07 14:09:54 +0100667 err = sock_create_kern(((struct sockaddr *)tconn->net_conf->my_addr)->sa_family,
Philipp Reisnerb411b362009-09-25 16:07:19 -0700668 SOCK_STREAM, IPPROTO_TCP, &s_listen);
669 if (err) {
670 s_listen = NULL;
671 goto out;
672 }
673
Philipp Reisner76536202011-02-07 14:09:54 +0100674 timeo = tconn->net_conf->try_connect_int * HZ;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700675 timeo += (random32() & 1) ? timeo / 7 : -timeo / 7; /* 28.5% random jitter */
676
677 s_listen->sk->sk_reuse = 1; /* SO_REUSEADDR */
678 s_listen->sk->sk_rcvtimeo = timeo;
679 s_listen->sk->sk_sndtimeo = timeo;
Philipp Reisner76536202011-02-07 14:09:54 +0100680 drbd_setbufsize(s_listen, tconn->net_conf->sndbuf_size,
681 tconn->net_conf->rcvbuf_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700682
683 what = "bind before listen";
684 err = s_listen->ops->bind(s_listen,
Philipp Reisner76536202011-02-07 14:09:54 +0100685 (struct sockaddr *) tconn->net_conf->my_addr,
686 tconn->net_conf->my_addr_len);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700687 if (err < 0)
688 goto out;
689
Philipp Reisner76536202011-02-07 14:09:54 +0100690 err = drbd_accept(&what, s_listen, &s_estab);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700691
692out:
693 if (s_listen)
694 sock_release(s_listen);
695 if (err < 0) {
696 if (err != -EAGAIN && err != -EINTR && err != -ERESTARTSYS) {
Philipp Reisner76536202011-02-07 14:09:54 +0100697 conn_err(tconn, "%s failed, err = %d\n", what, err);
698 drbd_force_state(tconn->volume0, NS(conn, C_DISCONNECTING));
Philipp Reisnerb411b362009-09-25 16:07:19 -0700699 }
700 }
Philipp Reisner76536202011-02-07 14:09:54 +0100701 put_net_conf(tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700702
703 return s_estab;
704}
705
Philipp Reisnerd38e7872011-02-07 15:32:04 +0100706static int drbd_send_fp(struct drbd_tconn *tconn, struct socket *sock, enum drbd_packet cmd)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700707{
Philipp Reisnerd38e7872011-02-07 15:32:04 +0100708 struct p_header *h = &tconn->data.sbuf.header;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700709
Philipp Reisnerd38e7872011-02-07 15:32:04 +0100710 return _conn_send_cmd(tconn, 0, sock, cmd, h, sizeof(*h), 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700711}
712
Philipp Reisnera25b63f2011-02-07 15:43:45 +0100713static enum drbd_packet drbd_recv_fp(struct drbd_tconn *tconn, struct socket *sock)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700714{
Philipp Reisnera25b63f2011-02-07 15:43:45 +0100715 struct p_header80 *h = &tconn->data.rbuf.header.h80;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700716 int rr;
717
Philipp Reisnerdbd9eea2011-02-07 15:34:16 +0100718 rr = drbd_recv_short(sock, h, sizeof(*h), 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700719
Andreas Gruenbacherca9bc122011-01-11 13:47:24 +0100720 if (rr == sizeof(*h) && h->magic == cpu_to_be32(DRBD_MAGIC))
Philipp Reisnerb411b362009-09-25 16:07:19 -0700721 return be16_to_cpu(h->command);
722
723 return 0xffff;
724}
725
726/**
727 * drbd_socket_okay() - Free the socket if its connection is not okay
Philipp Reisnerb411b362009-09-25 16:07:19 -0700728 * @sock: pointer to the pointer to the socket.
729 */
Philipp Reisnerdbd9eea2011-02-07 15:34:16 +0100730static int drbd_socket_okay(struct socket **sock)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700731{
732 int rr;
733 char tb[4];
734
735 if (!*sock)
Andreas Gruenbacher81e84652010-12-09 15:03:57 +0100736 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700737
Philipp Reisnerdbd9eea2011-02-07 15:34:16 +0100738 rr = drbd_recv_short(*sock, tb, 4, MSG_DONTWAIT | MSG_PEEK);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700739
740 if (rr > 0 || rr == -EAGAIN) {
Andreas Gruenbacher81e84652010-12-09 15:03:57 +0100741 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700742 } else {
743 sock_release(*sock);
744 *sock = NULL;
Andreas Gruenbacher81e84652010-12-09 15:03:57 +0100745 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700746 }
747}
748
749/*
750 * return values:
751 * 1 yes, we have a valid connection
752 * 0 oops, did not work out, please try again
753 * -1 peer talks different language,
754 * no point in trying again, please go standalone.
755 * -2 We do not have a network config...
756 */
757static int drbd_connect(struct drbd_conf *mdev)
758{
759 struct socket *s, *sock, *msock;
760 int try, h, ok;
761
Philipp Reisnere42325a2011-01-19 13:55:45 +0100762 D_ASSERT(!mdev->tconn->data.socket);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700763
Philipp Reisnerb411b362009-09-25 16:07:19 -0700764 if (drbd_request_state(mdev, NS(conn, C_WF_CONNECTION)) < SS_SUCCESS)
765 return -2;
766
Philipp Reisner25703f82011-02-07 14:35:25 +0100767 clear_bit(DISCARD_CONCURRENT, &mdev->tconn->flags);
Philipp Reisnerfd340c12011-01-19 16:57:39 +0100768 mdev->tconn->agreed_pro_version = 99;
769 /* agreed_pro_version must be smaller than 100 so we send the old
770 header (h80) in the first packet and in the handshake packet. */
Philipp Reisnerb411b362009-09-25 16:07:19 -0700771
772 sock = NULL;
773 msock = NULL;
774
775 do {
776 for (try = 0;;) {
777 /* 3 tries, this should take less than a second! */
Philipp Reisnereac3e992011-02-07 14:05:07 +0100778 s = drbd_try_connect(mdev->tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700779 if (s || ++try >= 3)
780 break;
781 /* give the other side time to call bind() & listen() */
Philipp Reisner20ee6392011-01-18 15:28:59 +0100782 schedule_timeout_interruptible(HZ / 10);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700783 }
784
785 if (s) {
786 if (!sock) {
Philipp Reisnerd38e7872011-02-07 15:32:04 +0100787 drbd_send_fp(mdev->tconn, s, P_HAND_SHAKE_S);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700788 sock = s;
789 s = NULL;
790 } else if (!msock) {
Philipp Reisnerd38e7872011-02-07 15:32:04 +0100791 drbd_send_fp(mdev->tconn, s, P_HAND_SHAKE_M);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700792 msock = s;
793 s = NULL;
794 } else {
795 dev_err(DEV, "Logic error in drbd_connect()\n");
796 goto out_release_sockets;
797 }
798 }
799
800 if (sock && msock) {
Philipp Reisner89e58e72011-01-19 13:12:45 +0100801 schedule_timeout_interruptible(mdev->tconn->net_conf->ping_timeo*HZ/10);
Philipp Reisnerdbd9eea2011-02-07 15:34:16 +0100802 ok = drbd_socket_okay(&sock);
803 ok = drbd_socket_okay(&msock) && ok;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700804 if (ok)
805 break;
806 }
807
808retry:
Philipp Reisner76536202011-02-07 14:09:54 +0100809 s = drbd_wait_for_connect(mdev->tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700810 if (s) {
Philipp Reisnera25b63f2011-02-07 15:43:45 +0100811 try = drbd_recv_fp(mdev->tconn, s);
Philipp Reisnerdbd9eea2011-02-07 15:34:16 +0100812 drbd_socket_okay(&sock);
813 drbd_socket_okay(&msock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700814 switch (try) {
815 case P_HAND_SHAKE_S:
816 if (sock) {
817 dev_warn(DEV, "initial packet S crossed\n");
818 sock_release(sock);
819 }
820 sock = s;
821 break;
822 case P_HAND_SHAKE_M:
823 if (msock) {
824 dev_warn(DEV, "initial packet M crossed\n");
825 sock_release(msock);
826 }
827 msock = s;
Philipp Reisner25703f82011-02-07 14:35:25 +0100828 set_bit(DISCARD_CONCURRENT, &mdev->tconn->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700829 break;
830 default:
831 dev_warn(DEV, "Error receiving initial packet\n");
832 sock_release(s);
833 if (random32() & 1)
834 goto retry;
835 }
836 }
837
838 if (mdev->state.conn <= C_DISCONNECTING)
839 goto out_release_sockets;
840 if (signal_pending(current)) {
841 flush_signals(current);
842 smp_rmb();
Philipp Reisnere6b3ea82011-01-19 14:02:01 +0100843 if (get_t_state(&mdev->tconn->receiver) == EXITING)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700844 goto out_release_sockets;
845 }
846
847 if (sock && msock) {
Philipp Reisnerdbd9eea2011-02-07 15:34:16 +0100848 ok = drbd_socket_okay(&sock);
849 ok = drbd_socket_okay(&msock) && ok;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700850 if (ok)
851 break;
852 }
853 } while (1);
854
855 msock->sk->sk_reuse = 1; /* SO_REUSEADDR */
856 sock->sk->sk_reuse = 1; /* SO_REUSEADDR */
857
858 sock->sk->sk_allocation = GFP_NOIO;
859 msock->sk->sk_allocation = GFP_NOIO;
860
861 sock->sk->sk_priority = TC_PRIO_INTERACTIVE_BULK;
862 msock->sk->sk_priority = TC_PRIO_INTERACTIVE;
863
Philipp Reisnerb411b362009-09-25 16:07:19 -0700864 /* NOT YET ...
Philipp Reisner89e58e72011-01-19 13:12:45 +0100865 * sock->sk->sk_sndtimeo = mdev->tconn->net_conf->timeout*HZ/10;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700866 * sock->sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
867 * first set it to the P_HAND_SHAKE timeout,
868 * which we set to 4x the configured ping_timeout. */
869 sock->sk->sk_sndtimeo =
Philipp Reisner89e58e72011-01-19 13:12:45 +0100870 sock->sk->sk_rcvtimeo = mdev->tconn->net_conf->ping_timeo*4*HZ/10;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700871
Philipp Reisner89e58e72011-01-19 13:12:45 +0100872 msock->sk->sk_sndtimeo = mdev->tconn->net_conf->timeout*HZ/10;
873 msock->sk->sk_rcvtimeo = mdev->tconn->net_conf->ping_int*HZ;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700874
875 /* we don't want delays.
Lucas De Marchi25985ed2011-03-30 22:57:33 -0300876 * we use TCP_CORK where appropriate, though */
Philipp Reisnerb411b362009-09-25 16:07:19 -0700877 drbd_tcp_nodelay(sock);
878 drbd_tcp_nodelay(msock);
879
Philipp Reisnere42325a2011-01-19 13:55:45 +0100880 mdev->tconn->data.socket = sock;
881 mdev->tconn->meta.socket = msock;
Philipp Reisner31890f42011-01-19 14:12:51 +0100882 mdev->tconn->last_received = jiffies;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700883
Philipp Reisnere6b3ea82011-01-19 14:02:01 +0100884 D_ASSERT(mdev->tconn->asender.task == NULL);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700885
886 h = drbd_do_handshake(mdev);
887 if (h <= 0)
888 return h;
889
Philipp Reisnera0638452011-01-19 14:31:32 +0100890 if (mdev->tconn->cram_hmac_tfm) {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700891 /* drbd_request_state(mdev, NS(conn, WFAuth)); */
Johannes Thomab10d96c2010-01-07 16:02:50 +0100892 switch (drbd_do_auth(mdev)) {
893 case -1:
Philipp Reisnerb411b362009-09-25 16:07:19 -0700894 dev_err(DEV, "Authentication of peer failed\n");
895 return -1;
Johannes Thomab10d96c2010-01-07 16:02:50 +0100896 case 0:
897 dev_err(DEV, "Authentication of peer failed, trying again.\n");
898 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700899 }
900 }
901
902 if (drbd_request_state(mdev, NS(conn, C_WF_REPORT_PARAMS)) < SS_SUCCESS)
903 return 0;
904
Philipp Reisner89e58e72011-01-19 13:12:45 +0100905 sock->sk->sk_sndtimeo = mdev->tconn->net_conf->timeout*HZ/10;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700906 sock->sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
907
908 atomic_set(&mdev->packet_seq, 0);
909 mdev->peer_seq = 0;
910
Philipp Reisnere6b3ea82011-01-19 14:02:01 +0100911 drbd_thread_start(&mdev->tconn->asender);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700912
Philipp Reisner148efa12011-01-15 00:21:15 +0100913 if (drbd_send_protocol(mdev) == -1)
Philipp Reisner7e2455c2010-04-22 14:50:23 +0200914 return -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700915 drbd_send_sync_param(mdev, &mdev->sync_conf);
Philipp Reisnere89b5912010-03-24 17:11:33 +0100916 drbd_send_sizes(mdev, 0, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700917 drbd_send_uuids(mdev);
918 drbd_send_state(mdev);
919 clear_bit(USE_DEGR_WFC_T, &mdev->flags);
920 clear_bit(RESIZE_PENDING, &mdev->flags);
Philipp Reisner7fde2be2011-03-01 11:08:28 +0100921 mod_timer(&mdev->request_timer, jiffies + HZ); /* just start it here. */
Philipp Reisnerb411b362009-09-25 16:07:19 -0700922
923 return 1;
924
925out_release_sockets:
926 if (sock)
927 sock_release(sock);
928 if (msock)
929 sock_release(msock);
930 return -1;
931}
932
Philipp Reisnerce243852011-02-07 17:27:47 +0100933static bool decode_header(struct drbd_tconn *tconn, struct p_header *h, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700934{
Philipp Reisnerfd340c12011-01-19 16:57:39 +0100935 if (h->h80.magic == cpu_to_be32(DRBD_MAGIC)) {
Philipp Reisner77351055b2011-02-07 17:24:26 +0100936 pi->cmd = be16_to_cpu(h->h80.command);
937 pi->size = be16_to_cpu(h->h80.length);
Andreas Gruenbacherca9bc122011-01-11 13:47:24 +0100938 } else if (h->h95.magic == cpu_to_be16(DRBD_MAGIC_BIG)) {
Philipp Reisner77351055b2011-02-07 17:24:26 +0100939 pi->cmd = be16_to_cpu(h->h95.command);
940 pi->size = be32_to_cpu(h->h95.length) & 0x00ffffff;
941 pi->vnr = 0;
Philipp Reisner02918be2010-08-20 14:35:10 +0200942 } else {
Philipp Reisnerce243852011-02-07 17:27:47 +0100943 conn_err(tconn, "magic?? on data m: 0x%08x c: %d l: %d\n",
Lars Ellenberg004352f2010-10-05 20:13:58 +0200944 be32_to_cpu(h->h80.magic),
945 be16_to_cpu(h->h80.command),
946 be16_to_cpu(h->h80.length));
Andreas Gruenbacher81e84652010-12-09 15:03:57 +0100947 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700948 }
Philipp Reisner257d0af2011-01-26 12:15:29 +0100949 return true;
950}
951
Philipp Reisner77351055b2011-02-07 17:24:26 +0100952static int drbd_recv_header(struct drbd_conf *mdev, struct packet_info *pi)
Philipp Reisner257d0af2011-01-26 12:15:29 +0100953{
954 struct p_header *h = &mdev->tconn->data.rbuf.header;
955 int r;
956
Philipp Reisnerde0ff332011-02-07 16:56:20 +0100957 r = drbd_recv(mdev->tconn, h, sizeof(*h));
Philipp Reisner257d0af2011-01-26 12:15:29 +0100958 if (unlikely(r != sizeof(*h))) {
959 if (!signal_pending(current))
960 dev_warn(DEV, "short read expecting header on sock: r=%d\n", r);
961 return false;
962 }
963
Philipp Reisnerce243852011-02-07 17:27:47 +0100964 r = decode_header(mdev->tconn, h, pi);
Philipp Reisner31890f42011-01-19 14:12:51 +0100965 mdev->tconn->last_received = jiffies;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700966
Philipp Reisner257d0af2011-01-26 12:15:29 +0100967 return r;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700968}
969
Philipp Reisner2451fc32010-08-24 13:43:11 +0200970static void drbd_flush(struct drbd_conf *mdev)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700971{
972 int rv;
973
974 if (mdev->write_ordering >= WO_bdev_flush && get_ldev(mdev)) {
Dmitry Monakhovfbd9b092010-04-28 17:55:06 +0400975 rv = blkdev_issue_flush(mdev->ldev->backing_bdev, GFP_KERNEL,
Christoph Hellwigdd3932e2010-09-16 20:51:46 +0200976 NULL);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700977 if (rv) {
978 dev_err(DEV, "local disk flush failed with status %d\n", rv);
979 /* would rather check on EOPNOTSUPP, but that is not reliable.
980 * don't try again for ANY return value != 0
981 * if (rv == -EOPNOTSUPP) */
982 drbd_bump_write_ordering(mdev, WO_drain_io);
983 }
984 put_ldev(mdev);
985 }
Philipp Reisnerb411b362009-09-25 16:07:19 -0700986}
987
988/**
989 * drbd_may_finish_epoch() - Applies an epoch_event to the epoch's state, eventually finishes it.
990 * @mdev: DRBD device.
991 * @epoch: Epoch object.
992 * @ev: Epoch event.
993 */
994static enum finish_epoch drbd_may_finish_epoch(struct drbd_conf *mdev,
995 struct drbd_epoch *epoch,
996 enum epoch_event ev)
997{
Philipp Reisner2451fc32010-08-24 13:43:11 +0200998 int epoch_size;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700999 struct drbd_epoch *next_epoch;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001000 enum finish_epoch rv = FE_STILL_LIVE;
1001
1002 spin_lock(&mdev->epoch_lock);
1003 do {
1004 next_epoch = NULL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001005
1006 epoch_size = atomic_read(&epoch->epoch_size);
1007
1008 switch (ev & ~EV_CLEANUP) {
1009 case EV_PUT:
1010 atomic_dec(&epoch->active);
1011 break;
1012 case EV_GOT_BARRIER_NR:
1013 set_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001014 break;
1015 case EV_BECAME_LAST:
1016 /* nothing to do*/
1017 break;
1018 }
1019
Philipp Reisnerb411b362009-09-25 16:07:19 -07001020 if (epoch_size != 0 &&
1021 atomic_read(&epoch->active) == 0 &&
Philipp Reisner2451fc32010-08-24 13:43:11 +02001022 test_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001023 if (!(ev & EV_CLEANUP)) {
1024 spin_unlock(&mdev->epoch_lock);
1025 drbd_send_b_ack(mdev, epoch->barrier_nr, epoch_size);
1026 spin_lock(&mdev->epoch_lock);
1027 }
1028 dec_unacked(mdev);
1029
1030 if (mdev->current_epoch != epoch) {
1031 next_epoch = list_entry(epoch->list.next, struct drbd_epoch, list);
1032 list_del(&epoch->list);
1033 ev = EV_BECAME_LAST | (ev & EV_CLEANUP);
1034 mdev->epochs--;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001035 kfree(epoch);
1036
1037 if (rv == FE_STILL_LIVE)
1038 rv = FE_DESTROYED;
1039 } else {
1040 epoch->flags = 0;
1041 atomic_set(&epoch->epoch_size, 0);
Uwe Kleine-König698f9312010-07-02 20:41:51 +02001042 /* atomic_set(&epoch->active, 0); is already zero */
Philipp Reisnerb411b362009-09-25 16:07:19 -07001043 if (rv == FE_STILL_LIVE)
1044 rv = FE_RECYCLED;
Philipp Reisner2451fc32010-08-24 13:43:11 +02001045 wake_up(&mdev->ee_wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001046 }
1047 }
1048
1049 if (!next_epoch)
1050 break;
1051
1052 epoch = next_epoch;
1053 } while (1);
1054
1055 spin_unlock(&mdev->epoch_lock);
1056
Philipp Reisnerb411b362009-09-25 16:07:19 -07001057 return rv;
1058}
1059
1060/**
1061 * drbd_bump_write_ordering() - Fall back to an other write ordering method
1062 * @mdev: DRBD device.
1063 * @wo: Write ordering method to try.
1064 */
1065void drbd_bump_write_ordering(struct drbd_conf *mdev, enum write_ordering_e wo) __must_hold(local)
1066{
1067 enum write_ordering_e pwo;
1068 static char *write_ordering_str[] = {
1069 [WO_none] = "none",
1070 [WO_drain_io] = "drain",
1071 [WO_bdev_flush] = "flush",
Philipp Reisnerb411b362009-09-25 16:07:19 -07001072 };
1073
1074 pwo = mdev->write_ordering;
1075 wo = min(pwo, wo);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001076 if (wo == WO_bdev_flush && mdev->ldev->dc.no_disk_flush)
1077 wo = WO_drain_io;
1078 if (wo == WO_drain_io && mdev->ldev->dc.no_disk_drain)
1079 wo = WO_none;
1080 mdev->write_ordering = wo;
Philipp Reisner2451fc32010-08-24 13:43:11 +02001081 if (pwo != mdev->write_ordering || wo == WO_bdev_flush)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001082 dev_info(DEV, "Method to ensure write ordering: %s\n", write_ordering_str[mdev->write_ordering]);
1083}
1084
1085/**
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001086 * drbd_submit_ee()
1087 * @mdev: DRBD device.
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001088 * @peer_req: peer request
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001089 * @rw: flag field, see bio->bi_rw
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001090 *
1091 * May spread the pages to multiple bios,
1092 * depending on bio_add_page restrictions.
1093 *
1094 * Returns 0 if all bios have been submitted,
1095 * -ENOMEM if we could not allocate enough bios,
1096 * -ENOSPC (any better suggestion?) if we have not been able to bio_add_page a
1097 * single page to an empty bio (which should never happen and likely indicates
1098 * that the lower level IO stack is in some way broken). This has been observed
1099 * on certain Xen deployments.
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001100 */
1101/* TODO allocate from our own bio_set. */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001102int drbd_submit_ee(struct drbd_conf *mdev, struct drbd_peer_request *peer_req,
Andreas Gruenbacherf6ffca92011-02-04 15:30:34 +01001103 const unsigned rw, const int fault_type)
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001104{
1105 struct bio *bios = NULL;
1106 struct bio *bio;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001107 struct page *page = peer_req->pages;
1108 sector_t sector = peer_req->i.sector;
1109 unsigned ds = peer_req->i.size;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001110 unsigned n_bios = 0;
1111 unsigned nr_pages = (ds + PAGE_SIZE -1) >> PAGE_SHIFT;
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001112 int err = -ENOMEM;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001113
1114 /* In most cases, we will only need one bio. But in case the lower
1115 * level restrictions happen to be different at this offset on this
1116 * side than those of the sending peer, we may need to submit the
1117 * request in more than one bio. */
1118next_bio:
1119 bio = bio_alloc(GFP_NOIO, nr_pages);
1120 if (!bio) {
1121 dev_err(DEV, "submit_ee: Allocation of a bio failed\n");
1122 goto fail;
1123 }
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001124 /* > peer_req->i.sector, unless this is the first bio */
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001125 bio->bi_sector = sector;
1126 bio->bi_bdev = mdev->ldev->backing_bdev;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001127 bio->bi_rw = rw;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001128 bio->bi_private = peer_req;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001129 bio->bi_end_io = drbd_endio_sec;
1130
1131 bio->bi_next = bios;
1132 bios = bio;
1133 ++n_bios;
1134
1135 page_chain_for_each(page) {
1136 unsigned len = min_t(unsigned, ds, PAGE_SIZE);
1137 if (!bio_add_page(bio, page, len, 0)) {
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001138 /* A single page must always be possible!
1139 * But in case it fails anyways,
1140 * we deal with it, and complain (below). */
1141 if (bio->bi_vcnt == 0) {
1142 dev_err(DEV,
1143 "bio_add_page failed for len=%u, "
1144 "bi_vcnt=0 (bi_sector=%llu)\n",
1145 len, (unsigned long long)bio->bi_sector);
1146 err = -ENOSPC;
1147 goto fail;
1148 }
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001149 goto next_bio;
1150 }
1151 ds -= len;
1152 sector += len >> 9;
1153 --nr_pages;
1154 }
1155 D_ASSERT(page == NULL);
1156 D_ASSERT(ds == 0);
1157
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001158 atomic_set(&peer_req->pending_bios, n_bios);
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001159 do {
1160 bio = bios;
1161 bios = bios->bi_next;
1162 bio->bi_next = NULL;
1163
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001164 drbd_generic_make_request(mdev, fault_type, bio);
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001165 } while (bios);
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001166 return 0;
1167
1168fail:
1169 while (bios) {
1170 bio = bios;
1171 bios = bios->bi_next;
1172 bio_put(bio);
1173 }
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001174 return err;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001175}
1176
Andreas Gruenbacher53840642011-01-28 10:31:04 +01001177static void drbd_remove_epoch_entry_interval(struct drbd_conf *mdev,
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001178 struct drbd_peer_request *peer_req)
Andreas Gruenbacher53840642011-01-28 10:31:04 +01001179{
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001180 struct drbd_interval *i = &peer_req->i;
Andreas Gruenbacher53840642011-01-28 10:31:04 +01001181
1182 drbd_remove_interval(&mdev->write_requests, i);
1183 drbd_clear_interval(i);
1184
Andreas Gruenbacher6c852be2011-02-04 15:38:52 +01001185 /* Wake up any processes waiting for this peer request to complete. */
Andreas Gruenbacher53840642011-01-28 10:31:04 +01001186 if (i->waiting)
1187 wake_up(&mdev->misc_wait);
1188}
1189
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01001190static int receive_Barrier(struct drbd_conf *mdev, enum drbd_packet cmd,
1191 unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001192{
Philipp Reisner2451fc32010-08-24 13:43:11 +02001193 int rv;
Philipp Reisnere42325a2011-01-19 13:55:45 +01001194 struct p_barrier *p = &mdev->tconn->data.rbuf.barrier;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001195 struct drbd_epoch *epoch;
1196
Philipp Reisnerb411b362009-09-25 16:07:19 -07001197 inc_unacked(mdev);
1198
Philipp Reisnerb411b362009-09-25 16:07:19 -07001199 mdev->current_epoch->barrier_nr = p->barrier;
1200 rv = drbd_may_finish_epoch(mdev, mdev->current_epoch, EV_GOT_BARRIER_NR);
1201
1202 /* P_BARRIER_ACK may imply that the corresponding extent is dropped from
1203 * the activity log, which means it would not be resynced in case the
1204 * R_PRIMARY crashes now.
1205 * Therefore we must send the barrier_ack after the barrier request was
1206 * completed. */
1207 switch (mdev->write_ordering) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001208 case WO_none:
1209 if (rv == FE_RECYCLED)
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01001210 return true;
Philipp Reisner2451fc32010-08-24 13:43:11 +02001211
1212 /* receiver context, in the writeout path of the other node.
1213 * avoid potential distributed deadlock */
1214 epoch = kmalloc(sizeof(struct drbd_epoch), GFP_NOIO);
1215 if (epoch)
1216 break;
1217 else
1218 dev_warn(DEV, "Allocation of an epoch failed, slowing down\n");
1219 /* Fall through */
Philipp Reisnerb411b362009-09-25 16:07:19 -07001220
1221 case WO_bdev_flush:
1222 case WO_drain_io:
Philipp Reisnerb411b362009-09-25 16:07:19 -07001223 drbd_wait_ee_list_empty(mdev, &mdev->active_ee);
Philipp Reisner2451fc32010-08-24 13:43:11 +02001224 drbd_flush(mdev);
1225
1226 if (atomic_read(&mdev->current_epoch->epoch_size)) {
1227 epoch = kmalloc(sizeof(struct drbd_epoch), GFP_NOIO);
1228 if (epoch)
1229 break;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001230 }
1231
Philipp Reisner2451fc32010-08-24 13:43:11 +02001232 epoch = mdev->current_epoch;
1233 wait_event(mdev->ee_wait, atomic_read(&epoch->epoch_size) == 0);
1234
1235 D_ASSERT(atomic_read(&epoch->active) == 0);
1236 D_ASSERT(epoch->flags == 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001237
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01001238 return true;
Philipp Reisner2451fc32010-08-24 13:43:11 +02001239 default:
1240 dev_err(DEV, "Strangeness in mdev->write_ordering %d\n", mdev->write_ordering);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01001241 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001242 }
1243
1244 epoch->flags = 0;
1245 atomic_set(&epoch->epoch_size, 0);
1246 atomic_set(&epoch->active, 0);
1247
1248 spin_lock(&mdev->epoch_lock);
1249 if (atomic_read(&mdev->current_epoch->epoch_size)) {
1250 list_add(&epoch->list, &mdev->current_epoch->list);
1251 mdev->current_epoch = epoch;
1252 mdev->epochs++;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001253 } else {
1254 /* The current_epoch got recycled while we allocated this one... */
1255 kfree(epoch);
1256 }
1257 spin_unlock(&mdev->epoch_lock);
1258
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01001259 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001260}
1261
1262/* used from receive_RSDataReply (recv_resync_read)
1263 * and from receive_Data */
Andreas Gruenbacherf6ffca92011-02-04 15:30:34 +01001264static struct drbd_peer_request *
1265read_in_block(struct drbd_conf *mdev, u64 id, sector_t sector,
1266 int data_size) __must_hold(local)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001267{
Lars Ellenberg66660322010-04-06 12:15:04 +02001268 const sector_t capacity = drbd_get_capacity(mdev->this_bdev);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001269 struct drbd_peer_request *peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001270 struct page *page;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001271 int dgs, ds, rr;
Philipp Reisnera0638452011-01-19 14:31:32 +01001272 void *dig_in = mdev->tconn->int_dig_in;
1273 void *dig_vv = mdev->tconn->int_dig_vv;
Philipp Reisner6b4388a2010-04-26 14:11:45 +02001274 unsigned long *data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001275
Philipp Reisnera0638452011-01-19 14:31:32 +01001276 dgs = (mdev->tconn->agreed_pro_version >= 87 && mdev->tconn->integrity_r_tfm) ?
1277 crypto_hash_digestsize(mdev->tconn->integrity_r_tfm) : 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001278
1279 if (dgs) {
Philipp Reisnerde0ff332011-02-07 16:56:20 +01001280 rr = drbd_recv(mdev->tconn, dig_in, dgs);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001281 if (rr != dgs) {
Lars Ellenberg0ddc5542011-01-21 12:35:15 +01001282 if (!signal_pending(current))
1283 dev_warn(DEV,
1284 "short read receiving data digest: read %d expected %d\n",
1285 rr, dgs);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001286 return NULL;
1287 }
1288 }
1289
1290 data_size -= dgs;
1291
Andreas Gruenbacher841ce242010-12-15 19:31:20 +01001292 if (!expect(data_size != 0))
1293 return NULL;
1294 if (!expect(IS_ALIGNED(data_size, 512)))
1295 return NULL;
1296 if (!expect(data_size <= DRBD_MAX_BIO_SIZE))
1297 return NULL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001298
Lars Ellenberg66660322010-04-06 12:15:04 +02001299 /* even though we trust out peer,
1300 * we sometimes have to double check. */
1301 if (sector + (data_size>>9) > capacity) {
Lars Ellenbergfdda6542011-01-24 15:11:01 +01001302 dev_err(DEV, "request from peer beyond end of local disk: "
1303 "capacity: %llus < sector: %llus + size: %u\n",
Lars Ellenberg66660322010-04-06 12:15:04 +02001304 (unsigned long long)capacity,
1305 (unsigned long long)sector, data_size);
1306 return NULL;
1307 }
1308
Philipp Reisnerb411b362009-09-25 16:07:19 -07001309 /* GFP_NOIO, because we must not cause arbitrary write-out: in a DRBD
1310 * "criss-cross" setup, that might cause write-out on some other DRBD,
1311 * which in turn might block on the other node at this very place. */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001312 peer_req = drbd_alloc_ee(mdev, id, sector, data_size, GFP_NOIO);
1313 if (!peer_req)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001314 return NULL;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001315
Philipp Reisnerb411b362009-09-25 16:07:19 -07001316 ds = data_size;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001317 page = peer_req->pages;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001318 page_chain_for_each(page) {
1319 unsigned len = min_t(int, ds, PAGE_SIZE);
Philipp Reisner6b4388a2010-04-26 14:11:45 +02001320 data = kmap(page);
Philipp Reisnerde0ff332011-02-07 16:56:20 +01001321 rr = drbd_recv(mdev->tconn, data, len);
Andreas Gruenbacher0cf9d272010-12-07 10:43:29 +01001322 if (drbd_insert_fault(mdev, DRBD_FAULT_RECEIVE)) {
Philipp Reisner6b4388a2010-04-26 14:11:45 +02001323 dev_err(DEV, "Fault injection: Corrupting data on receive\n");
1324 data[0] = data[0] ^ (unsigned long)-1;
1325 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07001326 kunmap(page);
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001327 if (rr != len) {
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001328 drbd_free_ee(mdev, peer_req);
Lars Ellenberg0ddc5542011-01-21 12:35:15 +01001329 if (!signal_pending(current))
1330 dev_warn(DEV, "short read receiving data: read %d expected %d\n",
1331 rr, len);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001332 return NULL;
1333 }
1334 ds -= rr;
1335 }
1336
1337 if (dgs) {
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001338 drbd_csum_ee(mdev, mdev->tconn->integrity_r_tfm, peer_req, dig_vv);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001339 if (memcmp(dig_in, dig_vv, dgs)) {
Lars Ellenberg470be442010-11-10 10:36:52 +01001340 dev_err(DEV, "Digest integrity check FAILED: %llus +%u\n",
1341 (unsigned long long)sector, data_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001342 drbd_bcast_ee(mdev, "digest failed",
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001343 dgs, dig_in, dig_vv, peer_req);
1344 drbd_free_ee(mdev, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001345 return NULL;
1346 }
1347 }
1348 mdev->recv_cnt += data_size>>9;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001349 return peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001350}
1351
1352/* drbd_drain_block() just takes a data block
1353 * out of the socket input buffer, and discards it.
1354 */
1355static int drbd_drain_block(struct drbd_conf *mdev, int data_size)
1356{
1357 struct page *page;
1358 int rr, rv = 1;
1359 void *data;
1360
Lars Ellenbergc3470cd2010-04-01 16:57:19 +02001361 if (!data_size)
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01001362 return true;
Lars Ellenbergc3470cd2010-04-01 16:57:19 +02001363
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001364 page = drbd_pp_alloc(mdev, 1, 1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001365
1366 data = kmap(page);
1367 while (data_size) {
Philipp Reisnerde0ff332011-02-07 16:56:20 +01001368 rr = drbd_recv(mdev->tconn, data, min_t(int, data_size, PAGE_SIZE));
Philipp Reisnerb411b362009-09-25 16:07:19 -07001369 if (rr != min_t(int, data_size, PAGE_SIZE)) {
1370 rv = 0;
Lars Ellenberg0ddc5542011-01-21 12:35:15 +01001371 if (!signal_pending(current))
1372 dev_warn(DEV,
1373 "short read receiving data: read %d expected %d\n",
1374 rr, min_t(int, data_size, PAGE_SIZE));
Philipp Reisnerb411b362009-09-25 16:07:19 -07001375 break;
1376 }
1377 data_size -= rr;
1378 }
1379 kunmap(page);
Lars Ellenberg435f0742010-09-06 12:30:25 +02001380 drbd_pp_free(mdev, page, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001381 return rv;
1382}
1383
1384static int recv_dless_read(struct drbd_conf *mdev, struct drbd_request *req,
1385 sector_t sector, int data_size)
1386{
1387 struct bio_vec *bvec;
1388 struct bio *bio;
1389 int dgs, rr, i, expect;
Philipp Reisnera0638452011-01-19 14:31:32 +01001390 void *dig_in = mdev->tconn->int_dig_in;
1391 void *dig_vv = mdev->tconn->int_dig_vv;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001392
Philipp Reisnera0638452011-01-19 14:31:32 +01001393 dgs = (mdev->tconn->agreed_pro_version >= 87 && mdev->tconn->integrity_r_tfm) ?
1394 crypto_hash_digestsize(mdev->tconn->integrity_r_tfm) : 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001395
1396 if (dgs) {
Philipp Reisnerde0ff332011-02-07 16:56:20 +01001397 rr = drbd_recv(mdev->tconn, dig_in, dgs);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001398 if (rr != dgs) {
Lars Ellenberg0ddc5542011-01-21 12:35:15 +01001399 if (!signal_pending(current))
1400 dev_warn(DEV,
1401 "short read receiving data reply digest: read %d expected %d\n",
1402 rr, dgs);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001403 return 0;
1404 }
1405 }
1406
1407 data_size -= dgs;
1408
1409 /* optimistically update recv_cnt. if receiving fails below,
1410 * we disconnect anyways, and counters will be reset. */
1411 mdev->recv_cnt += data_size>>9;
1412
1413 bio = req->master_bio;
1414 D_ASSERT(sector == bio->bi_sector);
1415
1416 bio_for_each_segment(bvec, bio, i) {
1417 expect = min_t(int, data_size, bvec->bv_len);
Philipp Reisnerde0ff332011-02-07 16:56:20 +01001418 rr = drbd_recv(mdev->tconn,
Philipp Reisnerb411b362009-09-25 16:07:19 -07001419 kmap(bvec->bv_page)+bvec->bv_offset,
1420 expect);
1421 kunmap(bvec->bv_page);
1422 if (rr != expect) {
Lars Ellenberg0ddc5542011-01-21 12:35:15 +01001423 if (!signal_pending(current))
1424 dev_warn(DEV, "short read receiving data reply: "
1425 "read %d expected %d\n",
1426 rr, expect);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001427 return 0;
1428 }
1429 data_size -= rr;
1430 }
1431
1432 if (dgs) {
Philipp Reisnera0638452011-01-19 14:31:32 +01001433 drbd_csum_bio(mdev, mdev->tconn->integrity_r_tfm, bio, dig_vv);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001434 if (memcmp(dig_in, dig_vv, dgs)) {
1435 dev_err(DEV, "Digest integrity check FAILED. Broken NICs?\n");
1436 return 0;
1437 }
1438 }
1439
1440 D_ASSERT(data_size == 0);
1441 return 1;
1442}
1443
1444/* e_end_resync_block() is called via
1445 * drbd_process_done_ee() by asender only */
1446static int e_end_resync_block(struct drbd_conf *mdev, struct drbd_work *w, int unused)
1447{
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001448 struct drbd_peer_request *peer_req = (struct drbd_peer_request *)w;
1449 sector_t sector = peer_req->i.sector;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001450 int ok;
1451
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001452 D_ASSERT(drbd_interval_empty(&peer_req->i));
Philipp Reisnerb411b362009-09-25 16:07:19 -07001453
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001454 if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
1455 drbd_set_in_sync(mdev, sector, peer_req->i.size);
1456 ok = drbd_send_ack(mdev, P_RS_WRITE_ACK, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001457 } else {
1458 /* Record failure to sync */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001459 drbd_rs_failed_io(mdev, sector, peer_req->i.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001460
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001461 ok = drbd_send_ack(mdev, P_NEG_ACK, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001462 }
1463 dec_unacked(mdev);
1464
1465 return ok;
1466}
1467
1468static int recv_resync_read(struct drbd_conf *mdev, sector_t sector, int data_size) __releases(local)
1469{
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001470 struct drbd_peer_request *peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001471
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001472 peer_req = read_in_block(mdev, ID_SYNCER, sector, data_size);
1473 if (!peer_req)
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001474 goto fail;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001475
1476 dec_rs_pending(mdev);
1477
Philipp Reisnerb411b362009-09-25 16:07:19 -07001478 inc_unacked(mdev);
1479 /* corresponding dec_unacked() in e_end_resync_block()
1480 * respective _drbd_clear_done_ee */
1481
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001482 peer_req->w.cb = e_end_resync_block;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001483
Philipp Reisner87eeee42011-01-19 14:16:30 +01001484 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001485 list_add(&peer_req->w.list, &mdev->sync_ee);
Philipp Reisner87eeee42011-01-19 14:16:30 +01001486 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001487
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02001488 atomic_add(data_size >> 9, &mdev->rs_sect_ev);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001489 if (drbd_submit_ee(mdev, peer_req, WRITE, DRBD_FAULT_RS_WR) == 0)
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01001490 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001491
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001492 /* don't care for the reason here */
1493 dev_err(DEV, "submit failed, triggering re-connect\n");
Philipp Reisner87eeee42011-01-19 14:16:30 +01001494 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001495 list_del(&peer_req->w.list);
Philipp Reisner87eeee42011-01-19 14:16:30 +01001496 spin_unlock_irq(&mdev->tconn->req_lock);
Lars Ellenberg22cc37a2010-09-14 20:40:41 +02001497
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001498 drbd_free_ee(mdev, peer_req);
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001499fail:
1500 put_ldev(mdev);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01001501 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001502}
1503
Andreas Gruenbacher668eebc2011-01-20 17:14:26 +01001504static struct drbd_request *
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01001505find_request(struct drbd_conf *mdev, struct rb_root *root, u64 id,
1506 sector_t sector, bool missing_ok, const char *func)
Andreas Gruenbacher668eebc2011-01-20 17:14:26 +01001507{
Andreas Gruenbacher668eebc2011-01-20 17:14:26 +01001508 struct drbd_request *req;
1509
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01001510 /* Request object according to our peer */
1511 req = (struct drbd_request *)(unsigned long)id;
Andreas Gruenbacher5e472262011-01-27 14:42:51 +01001512 if (drbd_contains_interval(root, sector, &req->i) && req->i.local)
Andreas Gruenbacher668eebc2011-01-20 17:14:26 +01001513 return req;
Andreas Gruenbacherc3afd8f2011-01-20 22:25:40 +01001514 if (!missing_ok) {
1515 dev_err(DEV, "%s: failed to find request %lu, sector %llus\n", func,
1516 (unsigned long)id, (unsigned long long)sector);
1517 }
Andreas Gruenbacher668eebc2011-01-20 17:14:26 +01001518 return NULL;
1519}
1520
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01001521static int receive_DataReply(struct drbd_conf *mdev, enum drbd_packet cmd,
1522 unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001523{
1524 struct drbd_request *req;
1525 sector_t sector;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001526 int ok;
Philipp Reisnere42325a2011-01-19 13:55:45 +01001527 struct p_data *p = &mdev->tconn->data.rbuf.data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001528
1529 sector = be64_to_cpu(p->sector);
1530
Philipp Reisner87eeee42011-01-19 14:16:30 +01001531 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01001532 req = find_request(mdev, &mdev->read_requests, p->block_id, sector, false, __func__);
Philipp Reisner87eeee42011-01-19 14:16:30 +01001533 spin_unlock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherc3afd8f2011-01-20 22:25:40 +01001534 if (unlikely(!req))
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01001535 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001536
Bart Van Assche24c48302011-05-21 18:32:29 +02001537 /* hlist_del(&req->collision) is done in _req_may_be_done, to avoid
Philipp Reisnerb411b362009-09-25 16:07:19 -07001538 * special casing it there for the various failure cases.
1539 * still no race with drbd_fail_pending_reads */
1540 ok = recv_dless_read(mdev, req, sector, data_size);
1541
1542 if (ok)
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01001543 req_mod(req, DATA_RECEIVED);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001544 /* else: nothing. handled from drbd_disconnect...
1545 * I don't think we may complete this just yet
1546 * in case we are "on-disconnect: freeze" */
1547
1548 return ok;
1549}
1550
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01001551static int receive_RSDataReply(struct drbd_conf *mdev, enum drbd_packet cmd,
1552 unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001553{
1554 sector_t sector;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001555 int ok;
Philipp Reisnere42325a2011-01-19 13:55:45 +01001556 struct p_data *p = &mdev->tconn->data.rbuf.data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001557
1558 sector = be64_to_cpu(p->sector);
1559 D_ASSERT(p->block_id == ID_SYNCER);
1560
1561 if (get_ldev(mdev)) {
1562 /* data is submitted to disk within recv_resync_read.
1563 * corresponding put_ldev done below on error,
Andreas Gruenbacher9c508422011-01-14 21:19:36 +01001564 * or in drbd_endio_sec. */
Philipp Reisnerb411b362009-09-25 16:07:19 -07001565 ok = recv_resync_read(mdev, sector, data_size);
1566 } else {
1567 if (__ratelimit(&drbd_ratelimit_state))
1568 dev_err(DEV, "Can not write resync data to local disk.\n");
1569
1570 ok = drbd_drain_block(mdev, data_size);
1571
Lars Ellenberg2b2bf212010-10-06 11:46:55 +02001572 drbd_send_ack_dp(mdev, P_NEG_ACK, p, data_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001573 }
1574
Philipp Reisner778f2712010-07-06 11:14:00 +02001575 atomic_add(data_size >> 9, &mdev->rs_sect_in);
1576
Philipp Reisnerb411b362009-09-25 16:07:19 -07001577 return ok;
1578}
1579
1580/* e_end_block() is called via drbd_process_done_ee().
1581 * this means this function only runs in the asender thread
1582 */
1583static int e_end_block(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
1584{
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001585 struct drbd_peer_request *peer_req = (struct drbd_peer_request *)w;
1586 sector_t sector = peer_req->i.sector;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001587 int ok = 1, pcmd;
1588
Philipp Reisner89e58e72011-01-19 13:12:45 +01001589 if (mdev->tconn->net_conf->wire_protocol == DRBD_PROT_C) {
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001590 if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001591 pcmd = (mdev->state.conn >= C_SYNC_SOURCE &&
1592 mdev->state.conn <= C_PAUSED_SYNC_T &&
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001593 peer_req->flags & EE_MAY_SET_IN_SYNC) ?
Philipp Reisnerb411b362009-09-25 16:07:19 -07001594 P_RS_WRITE_ACK : P_WRITE_ACK;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001595 ok &= drbd_send_ack(mdev, pcmd, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001596 if (pcmd == P_RS_WRITE_ACK)
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001597 drbd_set_in_sync(mdev, sector, peer_req->i.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001598 } else {
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001599 ok = drbd_send_ack(mdev, P_NEG_ACK, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001600 /* we expect it to be marked out of sync anyways...
1601 * maybe assert this? */
1602 }
1603 dec_unacked(mdev);
1604 }
1605 /* we delete from the conflict detection hash _after_ we sent out the
1606 * P_WRITE_ACK / P_NEG_ACK, to get the sequence number right. */
Philipp Reisner89e58e72011-01-19 13:12:45 +01001607 if (mdev->tconn->net_conf->two_primaries) {
Philipp Reisner87eeee42011-01-19 14:16:30 +01001608 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001609 D_ASSERT(!drbd_interval_empty(&peer_req->i));
1610 drbd_remove_epoch_entry_interval(mdev, peer_req);
Philipp Reisner87eeee42011-01-19 14:16:30 +01001611 spin_unlock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherbb3bfe92011-01-21 15:59:23 +01001612 } else
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001613 D_ASSERT(drbd_interval_empty(&peer_req->i));
Philipp Reisnerb411b362009-09-25 16:07:19 -07001614
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001615 drbd_may_finish_epoch(mdev, peer_req->epoch, EV_PUT + (cancel ? EV_CLEANUP : 0));
Philipp Reisnerb411b362009-09-25 16:07:19 -07001616
1617 return ok;
1618}
1619
1620static int e_send_discard_ack(struct drbd_conf *mdev, struct drbd_work *w, int unused)
1621{
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001622 struct drbd_peer_request *peer_req = (struct drbd_peer_request *)w;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001623 int ok = 1;
1624
Philipp Reisner89e58e72011-01-19 13:12:45 +01001625 D_ASSERT(mdev->tconn->net_conf->wire_protocol == DRBD_PROT_C);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001626 ok = drbd_send_ack(mdev, P_DISCARD_ACK, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001627
Philipp Reisner87eeee42011-01-19 14:16:30 +01001628 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001629 D_ASSERT(!drbd_interval_empty(&peer_req->i));
1630 drbd_remove_epoch_entry_interval(mdev, peer_req);
Philipp Reisner87eeee42011-01-19 14:16:30 +01001631 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001632
1633 dec_unacked(mdev);
1634
1635 return ok;
1636}
1637
Andreas Gruenbacher3e394da2011-01-26 18:36:55 +01001638static bool seq_greater(u32 a, u32 b)
1639{
1640 /*
1641 * We assume 32-bit wrap-around here.
1642 * For 24-bit wrap-around, we would have to shift:
1643 * a <<= 8; b <<= 8;
1644 */
1645 return (s32)a - (s32)b > 0;
1646}
1647
1648static u32 seq_max(u32 a, u32 b)
1649{
1650 return seq_greater(a, b) ? a : b;
1651}
1652
Andreas Gruenbacher43ae0772011-02-03 18:42:08 +01001653static void update_peer_seq(struct drbd_conf *mdev, unsigned int peer_seq)
Andreas Gruenbacher3e394da2011-01-26 18:36:55 +01001654{
Andreas Gruenbacher43ae0772011-02-03 18:42:08 +01001655 unsigned int old_peer_seq;
Andreas Gruenbacher3e394da2011-01-26 18:36:55 +01001656
1657 spin_lock(&mdev->peer_seq_lock);
Andreas Gruenbacher43ae0772011-02-03 18:42:08 +01001658 old_peer_seq = mdev->peer_seq;
1659 mdev->peer_seq = seq_max(mdev->peer_seq, peer_seq);
Andreas Gruenbacher3e394da2011-01-26 18:36:55 +01001660 spin_unlock(&mdev->peer_seq_lock);
Andreas Gruenbacher43ae0772011-02-03 18:42:08 +01001661 if (old_peer_seq != peer_seq)
Andreas Gruenbacher3e394da2011-01-26 18:36:55 +01001662 wake_up(&mdev->seq_wait);
1663}
1664
Philipp Reisnerb411b362009-09-25 16:07:19 -07001665/* Called from receive_Data.
1666 * Synchronize packets on sock with packets on msock.
1667 *
1668 * This is here so even when a P_DATA packet traveling via sock overtook an Ack
1669 * packet traveling on msock, they are still processed in the order they have
1670 * been sent.
1671 *
1672 * Note: we don't care for Ack packets overtaking P_DATA packets.
1673 *
1674 * In case packet_seq is larger than mdev->peer_seq number, there are
1675 * outstanding packets on the msock. We wait for them to arrive.
1676 * In case we are the logically next packet, we update mdev->peer_seq
1677 * ourselves. Correctly handles 32bit wrap around.
1678 *
1679 * Assume we have a 10 GBit connection, that is about 1<<30 byte per second,
1680 * about 1<<21 sectors per second. So "worst" case, we have 1<<3 == 8 seconds
1681 * for the 24bit wrap (historical atomic_t guarantee on some archs), and we have
1682 * 1<<9 == 512 seconds aka ages for the 32bit wrap around...
1683 *
1684 * returns 0 if we may process the packet,
1685 * -ERESTARTSYS if we were interrupted (by disconnect signal). */
1686static int drbd_wait_peer_seq(struct drbd_conf *mdev, const u32 packet_seq)
1687{
1688 DEFINE_WAIT(wait);
1689 unsigned int p_seq;
1690 long timeout;
1691 int ret = 0;
1692 spin_lock(&mdev->peer_seq_lock);
1693 for (;;) {
1694 prepare_to_wait(&mdev->seq_wait, &wait, TASK_INTERRUPTIBLE);
Andreas Gruenbacher3e394da2011-01-26 18:36:55 +01001695 if (!seq_greater(packet_seq, mdev->peer_seq + 1))
Philipp Reisnerb411b362009-09-25 16:07:19 -07001696 break;
1697 if (signal_pending(current)) {
1698 ret = -ERESTARTSYS;
1699 break;
1700 }
1701 p_seq = mdev->peer_seq;
1702 spin_unlock(&mdev->peer_seq_lock);
1703 timeout = schedule_timeout(30*HZ);
1704 spin_lock(&mdev->peer_seq_lock);
1705 if (timeout == 0 && p_seq == mdev->peer_seq) {
1706 ret = -ETIMEDOUT;
1707 dev_err(DEV, "ASSERT FAILED waited 30 seconds for sequence update, forcing reconnect\n");
1708 break;
1709 }
1710 }
1711 finish_wait(&mdev->seq_wait, &wait);
1712 if (mdev->peer_seq+1 == packet_seq)
1713 mdev->peer_seq++;
1714 spin_unlock(&mdev->peer_seq_lock);
1715 return ret;
1716}
1717
Lars Ellenberg688593c2010-11-17 22:25:03 +01001718/* see also bio_flags_to_wire()
1719 * DRBD_REQ_*, because we need to semantically map the flags to data packet
1720 * flags and back. We may replicate to other kernel versions. */
1721static unsigned long wire_flags_to_bio(struct drbd_conf *mdev, u32 dpf)
Philipp Reisner76d2e7e2010-08-25 11:58:05 +02001722{
Lars Ellenberg688593c2010-11-17 22:25:03 +01001723 return (dpf & DP_RW_SYNC ? REQ_SYNC : 0) |
1724 (dpf & DP_FUA ? REQ_FUA : 0) |
1725 (dpf & DP_FLUSH ? REQ_FLUSH : 0) |
1726 (dpf & DP_DISCARD ? REQ_DISCARD : 0);
Philipp Reisner76d2e7e2010-08-25 11:58:05 +02001727}
1728
Philipp Reisnerb411b362009-09-25 16:07:19 -07001729/* mirrored write */
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01001730static int receive_Data(struct drbd_conf *mdev, enum drbd_packet cmd,
1731 unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001732{
1733 sector_t sector;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001734 struct drbd_peer_request *peer_req;
Philipp Reisnere42325a2011-01-19 13:55:45 +01001735 struct p_data *p = &mdev->tconn->data.rbuf.data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001736 int rw = WRITE;
1737 u32 dp_flags;
1738
Philipp Reisnerb411b362009-09-25 16:07:19 -07001739 if (!get_ldev(mdev)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001740 spin_lock(&mdev->peer_seq_lock);
1741 if (mdev->peer_seq+1 == be32_to_cpu(p->seq_num))
1742 mdev->peer_seq++;
1743 spin_unlock(&mdev->peer_seq_lock);
1744
Lars Ellenberg2b2bf212010-10-06 11:46:55 +02001745 drbd_send_ack_dp(mdev, P_NEG_ACK, p, data_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001746 atomic_inc(&mdev->current_epoch->epoch_size);
1747 return drbd_drain_block(mdev, data_size);
1748 }
1749
1750 /* get_ldev(mdev) successful.
1751 * Corresponding put_ldev done either below (on various errors),
Andreas Gruenbacher9c508422011-01-14 21:19:36 +01001752 * or in drbd_endio_sec, if we successfully submit the data at
Philipp Reisnerb411b362009-09-25 16:07:19 -07001753 * the end of this function. */
1754
1755 sector = be64_to_cpu(p->sector);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001756 peer_req = read_in_block(mdev, p->block_id, sector, data_size);
1757 if (!peer_req) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001758 put_ldev(mdev);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01001759 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001760 }
1761
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001762 peer_req->w.cb = e_end_block;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001763
Lars Ellenberg688593c2010-11-17 22:25:03 +01001764 dp_flags = be32_to_cpu(p->dp_flags);
1765 rw |= wire_flags_to_bio(mdev, dp_flags);
1766
1767 if (dp_flags & DP_MAY_SET_IN_SYNC)
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001768 peer_req->flags |= EE_MAY_SET_IN_SYNC;
Lars Ellenberg688593c2010-11-17 22:25:03 +01001769
Philipp Reisnerb411b362009-09-25 16:07:19 -07001770 spin_lock(&mdev->epoch_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001771 peer_req->epoch = mdev->current_epoch;
1772 atomic_inc(&peer_req->epoch->epoch_size);
1773 atomic_inc(&peer_req->epoch->active);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001774 spin_unlock(&mdev->epoch_lock);
1775
Philipp Reisnerb411b362009-09-25 16:07:19 -07001776 /* I'm the receiver, I do hold a net_cnt reference. */
Philipp Reisner89e58e72011-01-19 13:12:45 +01001777 if (!mdev->tconn->net_conf->two_primaries) {
Philipp Reisner87eeee42011-01-19 14:16:30 +01001778 spin_lock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001779 } else {
1780 /* don't get the req_lock yet,
1781 * we may sleep in drbd_wait_peer_seq */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001782 const int size = peer_req->i.size;
Philipp Reisner25703f82011-02-07 14:35:25 +01001783 const int discard = test_bit(DISCARD_CONCURRENT, &mdev->tconn->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001784 DEFINE_WAIT(wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001785 int first;
1786
Philipp Reisner89e58e72011-01-19 13:12:45 +01001787 D_ASSERT(mdev->tconn->net_conf->wire_protocol == DRBD_PROT_C);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001788
1789 /* conflict detection and handling:
1790 * 1. wait on the sequence number,
1791 * in case this data packet overtook ACK packets.
Andreas Gruenbacher5e472262011-01-27 14:42:51 +01001792 * 2. check for conflicting write requests.
Philipp Reisnerb411b362009-09-25 16:07:19 -07001793 *
1794 * Note: for two_primaries, we are protocol C,
1795 * so there cannot be any request that is DONE
1796 * but still on the transfer log.
1797 *
Philipp Reisnerb411b362009-09-25 16:07:19 -07001798 * if no conflicting request is found:
1799 * submit.
1800 *
1801 * if any conflicting request is found
1802 * that has not yet been acked,
1803 * AND I have the "discard concurrent writes" flag:
1804 * queue (via done_ee) the P_DISCARD_ACK; OUT.
1805 *
1806 * if any conflicting request is found:
1807 * block the receiver, waiting on misc_wait
1808 * until no more conflicting requests are there,
1809 * or we get interrupted (disconnect).
1810 *
1811 * we do not just write after local io completion of those
1812 * requests, but only after req is done completely, i.e.
1813 * we wait for the P_DISCARD_ACK to arrive!
1814 *
1815 * then proceed normally, i.e. submit.
1816 */
1817 if (drbd_wait_peer_seq(mdev, be32_to_cpu(p->seq_num)))
1818 goto out_interrupted;
1819
Philipp Reisner87eeee42011-01-19 14:16:30 +01001820 spin_lock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001821
Philipp Reisnerb411b362009-09-25 16:07:19 -07001822 first = 1;
1823 for (;;) {
Andreas Gruenbacherde696712011-01-20 15:00:24 +01001824 struct drbd_interval *i;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001825 int have_unacked = 0;
1826 int have_conflict = 0;
1827 prepare_to_wait(&mdev->misc_wait, &wait,
1828 TASK_INTERRUPTIBLE);
Andreas Gruenbacherde696712011-01-20 15:00:24 +01001829
1830 i = drbd_find_overlap(&mdev->write_requests, sector, size);
1831 if (i) {
Andreas Gruenbacherde696712011-01-20 15:00:24 +01001832 /* only ALERT on first iteration,
1833 * we may be woken up early... */
1834 if (first)
Andreas Gruenbacher5e472262011-01-27 14:42:51 +01001835 dev_alert(DEV, "%s[%u] Concurrent %s write detected!"
Andreas Gruenbacherde696712011-01-20 15:00:24 +01001836 " new: %llus +%u; pending: %llus +%u\n",
1837 current->comm, current->pid,
Andreas Gruenbacher5e472262011-01-27 14:42:51 +01001838 i->local ? "local" : "remote",
Andreas Gruenbacherde696712011-01-20 15:00:24 +01001839 (unsigned long long)sector, size,
Andreas Gruenbacher5e472262011-01-27 14:42:51 +01001840 (unsigned long long)i->sector, i->size);
1841
1842 if (i->local) {
1843 struct drbd_request *req2;
1844
1845 req2 = container_of(i, struct drbd_request, i);
1846 if (req2->rq_state & RQ_NET_PENDING)
1847 ++have_unacked;
1848 }
Andreas Gruenbacherde696712011-01-20 15:00:24 +01001849 ++have_conflict;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001850 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07001851 if (!have_conflict)
1852 break;
1853
1854 /* Discard Ack only for the _first_ iteration */
1855 if (first && discard && have_unacked) {
1856 dev_alert(DEV, "Concurrent write! [DISCARD BY FLAG] sec=%llus\n",
1857 (unsigned long long)sector);
1858 inc_unacked(mdev);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001859 peer_req->w.cb = e_send_discard_ack;
1860 list_add_tail(&peer_req->w.list, &mdev->done_ee);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001861
Philipp Reisner87eeee42011-01-19 14:16:30 +01001862 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001863
1864 /* we could probably send that P_DISCARD_ACK ourselves,
1865 * but I don't like the receiver using the msock */
1866
1867 put_ldev(mdev);
Philipp Reisner0625ac12011-02-07 14:49:19 +01001868 wake_asender(mdev->tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001869 finish_wait(&mdev->misc_wait, &wait);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01001870 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001871 }
1872
1873 if (signal_pending(current)) {
Philipp Reisner87eeee42011-01-19 14:16:30 +01001874 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001875 finish_wait(&mdev->misc_wait, &wait);
1876 goto out_interrupted;
1877 }
1878
Andreas Gruenbachera500c2e2011-01-27 14:12:23 +01001879 /* Indicate to wake up mdev->misc_wait upon completion. */
Andreas Gruenbacher53840642011-01-28 10:31:04 +01001880 i->waiting = true;
Andreas Gruenbachera500c2e2011-01-27 14:12:23 +01001881
Philipp Reisner87eeee42011-01-19 14:16:30 +01001882 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001883 if (first) {
1884 first = 0;
1885 dev_alert(DEV, "Concurrent write! [W AFTERWARDS] "
1886 "sec=%llus\n", (unsigned long long)sector);
1887 } else if (discard) {
1888 /* we had none on the first iteration.
1889 * there must be none now. */
1890 D_ASSERT(have_unacked == 0);
1891 }
1892 schedule();
Philipp Reisner87eeee42011-01-19 14:16:30 +01001893 spin_lock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001894 }
1895 finish_wait(&mdev->misc_wait, &wait);
Andreas Gruenbacher5e472262011-01-27 14:42:51 +01001896
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001897 drbd_insert_interval(&mdev->write_requests, &peer_req->i);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001898 }
1899
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001900 list_add(&peer_req->w.list, &mdev->active_ee);
Philipp Reisner87eeee42011-01-19 14:16:30 +01001901 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001902
Philipp Reisner89e58e72011-01-19 13:12:45 +01001903 switch (mdev->tconn->net_conf->wire_protocol) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001904 case DRBD_PROT_C:
1905 inc_unacked(mdev);
1906 /* corresponding dec_unacked() in e_end_block()
1907 * respective _drbd_clear_done_ee */
1908 break;
1909 case DRBD_PROT_B:
1910 /* I really don't like it that the receiver thread
1911 * sends on the msock, but anyways */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001912 drbd_send_ack(mdev, P_RECV_ACK, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001913 break;
1914 case DRBD_PROT_A:
1915 /* nothing to do */
1916 break;
1917 }
1918
Lars Ellenberg6719fb02010-10-18 23:04:07 +02001919 if (mdev->state.pdsk < D_INCONSISTENT) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001920 /* In case we have the only disk of the cluster, */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001921 drbd_set_out_of_sync(mdev, peer_req->i.sector, peer_req->i.size);
1922 peer_req->flags |= EE_CALL_AL_COMPLETE_IO;
1923 peer_req->flags &= ~EE_MAY_SET_IN_SYNC;
1924 drbd_al_begin_io(mdev, peer_req->i.sector);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001925 }
1926
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001927 if (drbd_submit_ee(mdev, peer_req, rw, DRBD_FAULT_DT_WR) == 0)
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01001928 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001929
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001930 /* don't care for the reason here */
1931 dev_err(DEV, "submit failed, triggering re-connect\n");
Philipp Reisner87eeee42011-01-19 14:16:30 +01001932 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001933 list_del(&peer_req->w.list);
1934 drbd_remove_epoch_entry_interval(mdev, peer_req);
Philipp Reisner87eeee42011-01-19 14:16:30 +01001935 spin_unlock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001936 if (peer_req->flags & EE_CALL_AL_COMPLETE_IO)
1937 drbd_al_complete_io(mdev, peer_req->i.sector);
Lars Ellenberg22cc37a2010-09-14 20:40:41 +02001938
Philipp Reisnerb411b362009-09-25 16:07:19 -07001939out_interrupted:
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001940 drbd_may_finish_epoch(mdev, peer_req->epoch, EV_PUT + EV_CLEANUP);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001941 put_ldev(mdev);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001942 drbd_free_ee(mdev, peer_req);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01001943 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001944}
1945
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02001946/* We may throttle resync, if the lower device seems to be busy,
1947 * and current sync rate is above c_min_rate.
1948 *
1949 * To decide whether or not the lower device is busy, we use a scheme similar
1950 * to MD RAID is_mddev_idle(): if the partition stats reveal "significant"
1951 * (more than 64 sectors) of activity we cannot account for with our own resync
1952 * activity, it obviously is "busy".
1953 *
1954 * The current sync rate used here uses only the most recent two step marks,
1955 * to have a short time average so we can react faster.
1956 */
Philipp Reisnere3555d82010-11-07 15:56:29 +01001957int drbd_rs_should_slow_down(struct drbd_conf *mdev, sector_t sector)
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02001958{
1959 struct gendisk *disk = mdev->ldev->backing_bdev->bd_contains->bd_disk;
1960 unsigned long db, dt, dbdt;
Philipp Reisnere3555d82010-11-07 15:56:29 +01001961 struct lc_element *tmp;
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02001962 int curr_events;
1963 int throttle = 0;
1964
1965 /* feature disabled? */
1966 if (mdev->sync_conf.c_min_rate == 0)
1967 return 0;
1968
Philipp Reisnere3555d82010-11-07 15:56:29 +01001969 spin_lock_irq(&mdev->al_lock);
1970 tmp = lc_find(mdev->resync, BM_SECT_TO_EXT(sector));
1971 if (tmp) {
1972 struct bm_extent *bm_ext = lc_entry(tmp, struct bm_extent, lce);
1973 if (test_bit(BME_PRIORITY, &bm_ext->flags)) {
1974 spin_unlock_irq(&mdev->al_lock);
1975 return 0;
1976 }
1977 /* Do not slow down if app IO is already waiting for this extent */
1978 }
1979 spin_unlock_irq(&mdev->al_lock);
1980
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02001981 curr_events = (int)part_stat_read(&disk->part0, sectors[0]) +
1982 (int)part_stat_read(&disk->part0, sectors[1]) -
1983 atomic_read(&mdev->rs_sect_ev);
Philipp Reisnere3555d82010-11-07 15:56:29 +01001984
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02001985 if (!mdev->rs_last_events || curr_events - mdev->rs_last_events > 64) {
1986 unsigned long rs_left;
1987 int i;
1988
1989 mdev->rs_last_events = curr_events;
1990
1991 /* sync speed average over the last 2*DRBD_SYNC_MARK_STEP,
1992 * approx. */
Lars Ellenberg2649f082010-11-05 10:05:47 +01001993 i = (mdev->rs_last_mark + DRBD_SYNC_MARKS-1) % DRBD_SYNC_MARKS;
1994
1995 if (mdev->state.conn == C_VERIFY_S || mdev->state.conn == C_VERIFY_T)
1996 rs_left = mdev->ov_left;
1997 else
1998 rs_left = drbd_bm_total_weight(mdev) - mdev->rs_failed;
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02001999
2000 dt = ((long)jiffies - (long)mdev->rs_mark_time[i]) / HZ;
2001 if (!dt)
2002 dt++;
2003 db = mdev->rs_mark_left[i] - rs_left;
2004 dbdt = Bit2KB(db/dt);
2005
2006 if (dbdt > mdev->sync_conf.c_min_rate)
2007 throttle = 1;
2008 }
2009 return throttle;
2010}
2011
2012
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01002013static int receive_DataRequest(struct drbd_conf *mdev, enum drbd_packet cmd,
2014 unsigned int digest_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002015{
2016 sector_t sector;
2017 const sector_t capacity = drbd_get_capacity(mdev->this_bdev);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002018 struct drbd_peer_request *peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002019 struct digest_info *di = NULL;
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02002020 int size, verb;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002021 unsigned int fault_type;
Philipp Reisnere42325a2011-01-19 13:55:45 +01002022 struct p_block_req *p = &mdev->tconn->data.rbuf.block_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002023
2024 sector = be64_to_cpu(p->sector);
2025 size = be32_to_cpu(p->blksize);
2026
Lars Ellenberg1816a2b2010-11-11 15:19:07 +01002027 if (size <= 0 || (size & 0x1ff) != 0 || size > DRBD_MAX_BIO_SIZE) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002028 dev_err(DEV, "%s:%d: sector: %llus, size: %u\n", __FILE__, __LINE__,
2029 (unsigned long long)sector, size);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002030 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002031 }
2032 if (sector + (size>>9) > capacity) {
2033 dev_err(DEV, "%s:%d: sector: %llus, size: %u\n", __FILE__, __LINE__,
2034 (unsigned long long)sector, size);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002035 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002036 }
2037
2038 if (!get_ldev_if_state(mdev, D_UP_TO_DATE)) {
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02002039 verb = 1;
2040 switch (cmd) {
2041 case P_DATA_REQUEST:
2042 drbd_send_ack_rp(mdev, P_NEG_DREPLY, p);
2043 break;
2044 case P_RS_DATA_REQUEST:
2045 case P_CSUM_RS_REQUEST:
2046 case P_OV_REQUEST:
2047 drbd_send_ack_rp(mdev, P_NEG_RS_DREPLY , p);
2048 break;
2049 case P_OV_REPLY:
2050 verb = 0;
2051 dec_rs_pending(mdev);
2052 drbd_send_ack_ex(mdev, P_OV_RESULT, sector, size, ID_IN_SYNC);
2053 break;
2054 default:
2055 dev_err(DEV, "unexpected command (%s) in receive_DataRequest\n",
2056 cmdname(cmd));
2057 }
2058 if (verb && __ratelimit(&drbd_ratelimit_state))
Philipp Reisnerb411b362009-09-25 16:07:19 -07002059 dev_err(DEV, "Can not satisfy peer's read request, "
2060 "no local data.\n");
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02002061
Lars Ellenberga821cc42010-09-06 12:31:37 +02002062 /* drain possibly payload */
2063 return drbd_drain_block(mdev, digest_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002064 }
2065
2066 /* GFP_NOIO, because we must not cause arbitrary write-out: in a DRBD
2067 * "criss-cross" setup, that might cause write-out on some other DRBD,
2068 * which in turn might block on the other node at this very place. */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002069 peer_req = drbd_alloc_ee(mdev, p->block_id, sector, size, GFP_NOIO);
2070 if (!peer_req) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002071 put_ldev(mdev);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002072 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002073 }
2074
Philipp Reisner02918be2010-08-20 14:35:10 +02002075 switch (cmd) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002076 case P_DATA_REQUEST:
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002077 peer_req->w.cb = w_e_end_data_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002078 fault_type = DRBD_FAULT_DT_RD;
Lars Ellenberg80a40e42010-08-11 23:28:00 +02002079 /* application IO, don't drbd_rs_begin_io */
2080 goto submit;
2081
Philipp Reisnerb411b362009-09-25 16:07:19 -07002082 case P_RS_DATA_REQUEST:
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002083 peer_req->w.cb = w_e_end_rsdata_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002084 fault_type = DRBD_FAULT_RS_RD;
Lars Ellenberg5f9915b2010-11-09 14:15:24 +01002085 /* used in the sector offset progress display */
2086 mdev->bm_resync_fo = BM_SECT_TO_BIT(sector);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002087 break;
2088
2089 case P_OV_REPLY:
2090 case P_CSUM_RS_REQUEST:
2091 fault_type = DRBD_FAULT_RS_RD;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002092 di = kmalloc(sizeof(*di) + digest_size, GFP_NOIO);
2093 if (!di)
2094 goto out_free_e;
2095
2096 di->digest_size = digest_size;
2097 di->digest = (((char *)di)+sizeof(struct digest_info));
2098
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002099 peer_req->digest = di;
2100 peer_req->flags |= EE_HAS_DIGEST;
Lars Ellenbergc36c3ce2010-08-11 20:42:55 +02002101
Philipp Reisnerde0ff332011-02-07 16:56:20 +01002102 if (drbd_recv(mdev->tconn, di->digest, digest_size) != digest_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002103 goto out_free_e;
2104
Philipp Reisner02918be2010-08-20 14:35:10 +02002105 if (cmd == P_CSUM_RS_REQUEST) {
Philipp Reisner31890f42011-01-19 14:12:51 +01002106 D_ASSERT(mdev->tconn->agreed_pro_version >= 89);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002107 peer_req->w.cb = w_e_end_csum_rs_req;
Lars Ellenberg5f9915b2010-11-09 14:15:24 +01002108 /* used in the sector offset progress display */
2109 mdev->bm_resync_fo = BM_SECT_TO_BIT(sector);
Philipp Reisner02918be2010-08-20 14:35:10 +02002110 } else if (cmd == P_OV_REPLY) {
Lars Ellenberg2649f082010-11-05 10:05:47 +01002111 /* track progress, we may need to throttle */
2112 atomic_add(size >> 9, &mdev->rs_sect_in);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002113 peer_req->w.cb = w_e_end_ov_reply;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002114 dec_rs_pending(mdev);
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002115 /* drbd_rs_begin_io done when we sent this request,
2116 * but accounting still needs to be done. */
2117 goto submit_for_resync;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002118 }
2119 break;
2120
2121 case P_OV_REQUEST:
Philipp Reisnerb411b362009-09-25 16:07:19 -07002122 if (mdev->ov_start_sector == ~(sector_t)0 &&
Philipp Reisner31890f42011-01-19 14:12:51 +01002123 mdev->tconn->agreed_pro_version >= 90) {
Lars Ellenbergde228bb2010-11-05 09:43:15 +01002124 unsigned long now = jiffies;
2125 int i;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002126 mdev->ov_start_sector = sector;
2127 mdev->ov_position = sector;
Lars Ellenberg30b743a2010-11-05 09:39:06 +01002128 mdev->ov_left = drbd_bm_bits(mdev) - BM_SECT_TO_BIT(sector);
2129 mdev->rs_total = mdev->ov_left;
Lars Ellenbergde228bb2010-11-05 09:43:15 +01002130 for (i = 0; i < DRBD_SYNC_MARKS; i++) {
2131 mdev->rs_mark_left[i] = mdev->ov_left;
2132 mdev->rs_mark_time[i] = now;
2133 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07002134 dev_info(DEV, "Online Verify start sector: %llu\n",
2135 (unsigned long long)sector);
2136 }
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002137 peer_req->w.cb = w_e_end_ov_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002138 fault_type = DRBD_FAULT_RS_RD;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002139 break;
2140
Philipp Reisnerb411b362009-09-25 16:07:19 -07002141 default:
2142 dev_err(DEV, "unexpected command (%s) in receive_DataRequest\n",
Philipp Reisner02918be2010-08-20 14:35:10 +02002143 cmdname(cmd));
Philipp Reisnerb411b362009-09-25 16:07:19 -07002144 fault_type = DRBD_FAULT_MAX;
Lars Ellenberg80a40e42010-08-11 23:28:00 +02002145 goto out_free_e;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002146 }
2147
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002148 /* Throttle, drbd_rs_begin_io and submit should become asynchronous
2149 * wrt the receiver, but it is not as straightforward as it may seem.
2150 * Various places in the resync start and stop logic assume resync
2151 * requests are processed in order, requeuing this on the worker thread
2152 * introduces a bunch of new code for synchronization between threads.
2153 *
2154 * Unlimited throttling before drbd_rs_begin_io may stall the resync
2155 * "forever", throttling after drbd_rs_begin_io will lock that extent
2156 * for application writes for the same time. For now, just throttle
2157 * here, where the rest of the code expects the receiver to sleep for
2158 * a while, anyways.
2159 */
Philipp Reisnerb411b362009-09-25 16:07:19 -07002160
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002161 /* Throttle before drbd_rs_begin_io, as that locks out application IO;
2162 * this defers syncer requests for some time, before letting at least
2163 * on request through. The resync controller on the receiving side
2164 * will adapt to the incoming rate accordingly.
2165 *
2166 * We cannot throttle here if remote is Primary/SyncTarget:
2167 * we would also throttle its application reads.
2168 * In that case, throttling is done on the SyncTarget only.
2169 */
Philipp Reisnere3555d82010-11-07 15:56:29 +01002170 if (mdev->state.peer != R_PRIMARY && drbd_rs_should_slow_down(mdev, sector))
2171 schedule_timeout_uninterruptible(HZ/10);
2172 if (drbd_rs_begin_io(mdev, sector))
Lars Ellenberg80a40e42010-08-11 23:28:00 +02002173 goto out_free_e;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002174
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002175submit_for_resync:
2176 atomic_add(size >> 9, &mdev->rs_sect_ev);
2177
Lars Ellenberg80a40e42010-08-11 23:28:00 +02002178submit:
Philipp Reisnerb411b362009-09-25 16:07:19 -07002179 inc_unacked(mdev);
Philipp Reisner87eeee42011-01-19 14:16:30 +01002180 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002181 list_add_tail(&peer_req->w.list, &mdev->read_ee);
Philipp Reisner87eeee42011-01-19 14:16:30 +01002182 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002183
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002184 if (drbd_submit_ee(mdev, peer_req, READ, fault_type) == 0)
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002185 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002186
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01002187 /* don't care for the reason here */
2188 dev_err(DEV, "submit failed, triggering re-connect\n");
Philipp Reisner87eeee42011-01-19 14:16:30 +01002189 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002190 list_del(&peer_req->w.list);
Philipp Reisner87eeee42011-01-19 14:16:30 +01002191 spin_unlock_irq(&mdev->tconn->req_lock);
Lars Ellenberg22cc37a2010-09-14 20:40:41 +02002192 /* no drbd_rs_complete_io(), we are dropping the connection anyways */
2193
Philipp Reisnerb411b362009-09-25 16:07:19 -07002194out_free_e:
Philipp Reisnerb411b362009-09-25 16:07:19 -07002195 put_ldev(mdev);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002196 drbd_free_ee(mdev, peer_req);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002197 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002198}
2199
2200static int drbd_asb_recover_0p(struct drbd_conf *mdev) __must_hold(local)
2201{
2202 int self, peer, rv = -100;
2203 unsigned long ch_self, ch_peer;
2204
2205 self = mdev->ldev->md.uuid[UI_BITMAP] & 1;
2206 peer = mdev->p_uuid[UI_BITMAP] & 1;
2207
2208 ch_peer = mdev->p_uuid[UI_SIZE];
2209 ch_self = mdev->comm_bm_set;
2210
Philipp Reisner89e58e72011-01-19 13:12:45 +01002211 switch (mdev->tconn->net_conf->after_sb_0p) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002212 case ASB_CONSENSUS:
2213 case ASB_DISCARD_SECONDARY:
2214 case ASB_CALL_HELPER:
2215 dev_err(DEV, "Configuration error.\n");
2216 break;
2217 case ASB_DISCONNECT:
2218 break;
2219 case ASB_DISCARD_YOUNGER_PRI:
2220 if (self == 0 && peer == 1) {
2221 rv = -1;
2222 break;
2223 }
2224 if (self == 1 && peer == 0) {
2225 rv = 1;
2226 break;
2227 }
2228 /* Else fall through to one of the other strategies... */
2229 case ASB_DISCARD_OLDER_PRI:
2230 if (self == 0 && peer == 1) {
2231 rv = 1;
2232 break;
2233 }
2234 if (self == 1 && peer == 0) {
2235 rv = -1;
2236 break;
2237 }
2238 /* Else fall through to one of the other strategies... */
Lars Ellenbergad19bf62009-10-14 09:36:49 +02002239 dev_warn(DEV, "Discard younger/older primary did not find a decision\n"
Philipp Reisnerb411b362009-09-25 16:07:19 -07002240 "Using discard-least-changes instead\n");
2241 case ASB_DISCARD_ZERO_CHG:
2242 if (ch_peer == 0 && ch_self == 0) {
Philipp Reisner25703f82011-02-07 14:35:25 +01002243 rv = test_bit(DISCARD_CONCURRENT, &mdev->tconn->flags)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002244 ? -1 : 1;
2245 break;
2246 } else {
2247 if (ch_peer == 0) { rv = 1; break; }
2248 if (ch_self == 0) { rv = -1; break; }
2249 }
Philipp Reisner89e58e72011-01-19 13:12:45 +01002250 if (mdev->tconn->net_conf->after_sb_0p == ASB_DISCARD_ZERO_CHG)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002251 break;
2252 case ASB_DISCARD_LEAST_CHG:
2253 if (ch_self < ch_peer)
2254 rv = -1;
2255 else if (ch_self > ch_peer)
2256 rv = 1;
2257 else /* ( ch_self == ch_peer ) */
2258 /* Well, then use something else. */
Philipp Reisner25703f82011-02-07 14:35:25 +01002259 rv = test_bit(DISCARD_CONCURRENT, &mdev->tconn->flags)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002260 ? -1 : 1;
2261 break;
2262 case ASB_DISCARD_LOCAL:
2263 rv = -1;
2264 break;
2265 case ASB_DISCARD_REMOTE:
2266 rv = 1;
2267 }
2268
2269 return rv;
2270}
2271
2272static int drbd_asb_recover_1p(struct drbd_conf *mdev) __must_hold(local)
2273{
Andreas Gruenbacher6184ea22010-12-09 14:23:27 +01002274 int hg, rv = -100;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002275
Philipp Reisner89e58e72011-01-19 13:12:45 +01002276 switch (mdev->tconn->net_conf->after_sb_1p) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002277 case ASB_DISCARD_YOUNGER_PRI:
2278 case ASB_DISCARD_OLDER_PRI:
2279 case ASB_DISCARD_LEAST_CHG:
2280 case ASB_DISCARD_LOCAL:
2281 case ASB_DISCARD_REMOTE:
2282 dev_err(DEV, "Configuration error.\n");
2283 break;
2284 case ASB_DISCONNECT:
2285 break;
2286 case ASB_CONSENSUS:
2287 hg = drbd_asb_recover_0p(mdev);
2288 if (hg == -1 && mdev->state.role == R_SECONDARY)
2289 rv = hg;
2290 if (hg == 1 && mdev->state.role == R_PRIMARY)
2291 rv = hg;
2292 break;
2293 case ASB_VIOLENTLY:
2294 rv = drbd_asb_recover_0p(mdev);
2295 break;
2296 case ASB_DISCARD_SECONDARY:
2297 return mdev->state.role == R_PRIMARY ? 1 : -1;
2298 case ASB_CALL_HELPER:
2299 hg = drbd_asb_recover_0p(mdev);
2300 if (hg == -1 && mdev->state.role == R_PRIMARY) {
Andreas Gruenbacherbb437942010-12-09 14:02:35 +01002301 enum drbd_state_rv rv2;
2302
2303 drbd_set_role(mdev, R_SECONDARY, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002304 /* drbd_change_state() does not sleep while in SS_IN_TRANSIENT_STATE,
2305 * we might be here in C_WF_REPORT_PARAMS which is transient.
2306 * we do not need to wait for the after state change work either. */
Andreas Gruenbacherbb437942010-12-09 14:02:35 +01002307 rv2 = drbd_change_state(mdev, CS_VERBOSE, NS(role, R_SECONDARY));
2308 if (rv2 != SS_SUCCESS) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002309 drbd_khelper(mdev, "pri-lost-after-sb");
2310 } else {
2311 dev_warn(DEV, "Successfully gave up primary role.\n");
2312 rv = hg;
2313 }
2314 } else
2315 rv = hg;
2316 }
2317
2318 return rv;
2319}
2320
2321static int drbd_asb_recover_2p(struct drbd_conf *mdev) __must_hold(local)
2322{
Andreas Gruenbacher6184ea22010-12-09 14:23:27 +01002323 int hg, rv = -100;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002324
Philipp Reisner89e58e72011-01-19 13:12:45 +01002325 switch (mdev->tconn->net_conf->after_sb_2p) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002326 case ASB_DISCARD_YOUNGER_PRI:
2327 case ASB_DISCARD_OLDER_PRI:
2328 case ASB_DISCARD_LEAST_CHG:
2329 case ASB_DISCARD_LOCAL:
2330 case ASB_DISCARD_REMOTE:
2331 case ASB_CONSENSUS:
2332 case ASB_DISCARD_SECONDARY:
2333 dev_err(DEV, "Configuration error.\n");
2334 break;
2335 case ASB_VIOLENTLY:
2336 rv = drbd_asb_recover_0p(mdev);
2337 break;
2338 case ASB_DISCONNECT:
2339 break;
2340 case ASB_CALL_HELPER:
2341 hg = drbd_asb_recover_0p(mdev);
2342 if (hg == -1) {
Andreas Gruenbacherbb437942010-12-09 14:02:35 +01002343 enum drbd_state_rv rv2;
2344
Philipp Reisnerb411b362009-09-25 16:07:19 -07002345 /* drbd_change_state() does not sleep while in SS_IN_TRANSIENT_STATE,
2346 * we might be here in C_WF_REPORT_PARAMS which is transient.
2347 * we do not need to wait for the after state change work either. */
Andreas Gruenbacherbb437942010-12-09 14:02:35 +01002348 rv2 = drbd_change_state(mdev, CS_VERBOSE, NS(role, R_SECONDARY));
2349 if (rv2 != SS_SUCCESS) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002350 drbd_khelper(mdev, "pri-lost-after-sb");
2351 } else {
2352 dev_warn(DEV, "Successfully gave up primary role.\n");
2353 rv = hg;
2354 }
2355 } else
2356 rv = hg;
2357 }
2358
2359 return rv;
2360}
2361
2362static void drbd_uuid_dump(struct drbd_conf *mdev, char *text, u64 *uuid,
2363 u64 bits, u64 flags)
2364{
2365 if (!uuid) {
2366 dev_info(DEV, "%s uuid info vanished while I was looking!\n", text);
2367 return;
2368 }
2369 dev_info(DEV, "%s %016llX:%016llX:%016llX:%016llX bits:%llu flags:%llX\n",
2370 text,
2371 (unsigned long long)uuid[UI_CURRENT],
2372 (unsigned long long)uuid[UI_BITMAP],
2373 (unsigned long long)uuid[UI_HISTORY_START],
2374 (unsigned long long)uuid[UI_HISTORY_END],
2375 (unsigned long long)bits,
2376 (unsigned long long)flags);
2377}
2378
2379/*
2380 100 after split brain try auto recover
2381 2 C_SYNC_SOURCE set BitMap
2382 1 C_SYNC_SOURCE use BitMap
2383 0 no Sync
2384 -1 C_SYNC_TARGET use BitMap
2385 -2 C_SYNC_TARGET set BitMap
2386 -100 after split brain, disconnect
2387-1000 unrelated data
Philipp Reisner4a23f262011-01-11 17:42:17 +01002388-1091 requires proto 91
2389-1096 requires proto 96
Philipp Reisnerb411b362009-09-25 16:07:19 -07002390 */
2391static int drbd_uuid_compare(struct drbd_conf *mdev, int *rule_nr) __must_hold(local)
2392{
2393 u64 self, peer;
2394 int i, j;
2395
2396 self = mdev->ldev->md.uuid[UI_CURRENT] & ~((u64)1);
2397 peer = mdev->p_uuid[UI_CURRENT] & ~((u64)1);
2398
2399 *rule_nr = 10;
2400 if (self == UUID_JUST_CREATED && peer == UUID_JUST_CREATED)
2401 return 0;
2402
2403 *rule_nr = 20;
2404 if ((self == UUID_JUST_CREATED || self == (u64)0) &&
2405 peer != UUID_JUST_CREATED)
2406 return -2;
2407
2408 *rule_nr = 30;
2409 if (self != UUID_JUST_CREATED &&
2410 (peer == UUID_JUST_CREATED || peer == (u64)0))
2411 return 2;
2412
2413 if (self == peer) {
2414 int rct, dc; /* roles at crash time */
2415
2416 if (mdev->p_uuid[UI_BITMAP] == (u64)0 && mdev->ldev->md.uuid[UI_BITMAP] != (u64)0) {
2417
Philipp Reisner31890f42011-01-19 14:12:51 +01002418 if (mdev->tconn->agreed_pro_version < 91)
Philipp Reisner4a23f262011-01-11 17:42:17 +01002419 return -1091;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002420
2421 if ((mdev->ldev->md.uuid[UI_BITMAP] & ~((u64)1)) == (mdev->p_uuid[UI_HISTORY_START] & ~((u64)1)) &&
2422 (mdev->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) == (mdev->p_uuid[UI_HISTORY_START + 1] & ~((u64)1))) {
2423 dev_info(DEV, "was SyncSource, missed the resync finished event, corrected myself:\n");
2424 drbd_uuid_set_bm(mdev, 0UL);
2425
2426 drbd_uuid_dump(mdev, "self", mdev->ldev->md.uuid,
2427 mdev->state.disk >= D_NEGOTIATING ? drbd_bm_total_weight(mdev) : 0, 0);
2428 *rule_nr = 34;
2429 } else {
2430 dev_info(DEV, "was SyncSource (peer failed to write sync_uuid)\n");
2431 *rule_nr = 36;
2432 }
2433
2434 return 1;
2435 }
2436
2437 if (mdev->ldev->md.uuid[UI_BITMAP] == (u64)0 && mdev->p_uuid[UI_BITMAP] != (u64)0) {
2438
Philipp Reisner31890f42011-01-19 14:12:51 +01002439 if (mdev->tconn->agreed_pro_version < 91)
Philipp Reisner4a23f262011-01-11 17:42:17 +01002440 return -1091;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002441
2442 if ((mdev->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) == (mdev->p_uuid[UI_BITMAP] & ~((u64)1)) &&
2443 (mdev->ldev->md.uuid[UI_HISTORY_START + 1] & ~((u64)1)) == (mdev->p_uuid[UI_HISTORY_START] & ~((u64)1))) {
2444 dev_info(DEV, "was SyncTarget, peer missed the resync finished event, corrected peer:\n");
2445
2446 mdev->p_uuid[UI_HISTORY_START + 1] = mdev->p_uuid[UI_HISTORY_START];
2447 mdev->p_uuid[UI_HISTORY_START] = mdev->p_uuid[UI_BITMAP];
2448 mdev->p_uuid[UI_BITMAP] = 0UL;
2449
2450 drbd_uuid_dump(mdev, "peer", mdev->p_uuid, mdev->p_uuid[UI_SIZE], mdev->p_uuid[UI_FLAGS]);
2451 *rule_nr = 35;
2452 } else {
2453 dev_info(DEV, "was SyncTarget (failed to write sync_uuid)\n");
2454 *rule_nr = 37;
2455 }
2456
2457 return -1;
2458 }
2459
2460 /* Common power [off|failure] */
2461 rct = (test_bit(CRASHED_PRIMARY, &mdev->flags) ? 1 : 0) +
2462 (mdev->p_uuid[UI_FLAGS] & 2);
2463 /* lowest bit is set when we were primary,
2464 * next bit (weight 2) is set when peer was primary */
2465 *rule_nr = 40;
2466
2467 switch (rct) {
2468 case 0: /* !self_pri && !peer_pri */ return 0;
2469 case 1: /* self_pri && !peer_pri */ return 1;
2470 case 2: /* !self_pri && peer_pri */ return -1;
2471 case 3: /* self_pri && peer_pri */
Philipp Reisner25703f82011-02-07 14:35:25 +01002472 dc = test_bit(DISCARD_CONCURRENT, &mdev->tconn->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002473 return dc ? -1 : 1;
2474 }
2475 }
2476
2477 *rule_nr = 50;
2478 peer = mdev->p_uuid[UI_BITMAP] & ~((u64)1);
2479 if (self == peer)
2480 return -1;
2481
2482 *rule_nr = 51;
2483 peer = mdev->p_uuid[UI_HISTORY_START] & ~((u64)1);
2484 if (self == peer) {
Philipp Reisner31890f42011-01-19 14:12:51 +01002485 if (mdev->tconn->agreed_pro_version < 96 ?
Philipp Reisner4a23f262011-01-11 17:42:17 +01002486 (mdev->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) ==
2487 (mdev->p_uuid[UI_HISTORY_START + 1] & ~((u64)1)) :
2488 peer + UUID_NEW_BM_OFFSET == (mdev->p_uuid[UI_BITMAP] & ~((u64)1))) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002489 /* The last P_SYNC_UUID did not get though. Undo the last start of
2490 resync as sync source modifications of the peer's UUIDs. */
2491
Philipp Reisner31890f42011-01-19 14:12:51 +01002492 if (mdev->tconn->agreed_pro_version < 91)
Philipp Reisner4a23f262011-01-11 17:42:17 +01002493 return -1091;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002494
2495 mdev->p_uuid[UI_BITMAP] = mdev->p_uuid[UI_HISTORY_START];
2496 mdev->p_uuid[UI_HISTORY_START] = mdev->p_uuid[UI_HISTORY_START + 1];
Philipp Reisner4a23f262011-01-11 17:42:17 +01002497
2498 dev_info(DEV, "Did not got last syncUUID packet, corrected:\n");
2499 drbd_uuid_dump(mdev, "peer", mdev->p_uuid, mdev->p_uuid[UI_SIZE], mdev->p_uuid[UI_FLAGS]);
2500
Philipp Reisnerb411b362009-09-25 16:07:19 -07002501 return -1;
2502 }
2503 }
2504
2505 *rule_nr = 60;
2506 self = mdev->ldev->md.uuid[UI_CURRENT] & ~((u64)1);
2507 for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) {
2508 peer = mdev->p_uuid[i] & ~((u64)1);
2509 if (self == peer)
2510 return -2;
2511 }
2512
2513 *rule_nr = 70;
2514 self = mdev->ldev->md.uuid[UI_BITMAP] & ~((u64)1);
2515 peer = mdev->p_uuid[UI_CURRENT] & ~((u64)1);
2516 if (self == peer)
2517 return 1;
2518
2519 *rule_nr = 71;
2520 self = mdev->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1);
2521 if (self == peer) {
Philipp Reisner31890f42011-01-19 14:12:51 +01002522 if (mdev->tconn->agreed_pro_version < 96 ?
Philipp Reisner4a23f262011-01-11 17:42:17 +01002523 (mdev->ldev->md.uuid[UI_HISTORY_START + 1] & ~((u64)1)) ==
2524 (mdev->p_uuid[UI_HISTORY_START] & ~((u64)1)) :
2525 self + UUID_NEW_BM_OFFSET == (mdev->ldev->md.uuid[UI_BITMAP] & ~((u64)1))) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002526 /* The last P_SYNC_UUID did not get though. Undo the last start of
2527 resync as sync source modifications of our UUIDs. */
2528
Philipp Reisner31890f42011-01-19 14:12:51 +01002529 if (mdev->tconn->agreed_pro_version < 91)
Philipp Reisner4a23f262011-01-11 17:42:17 +01002530 return -1091;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002531
2532 _drbd_uuid_set(mdev, UI_BITMAP, mdev->ldev->md.uuid[UI_HISTORY_START]);
2533 _drbd_uuid_set(mdev, UI_HISTORY_START, mdev->ldev->md.uuid[UI_HISTORY_START + 1]);
2534
Philipp Reisner4a23f262011-01-11 17:42:17 +01002535 dev_info(DEV, "Last syncUUID did not get through, corrected:\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002536 drbd_uuid_dump(mdev, "self", mdev->ldev->md.uuid,
2537 mdev->state.disk >= D_NEGOTIATING ? drbd_bm_total_weight(mdev) : 0, 0);
2538
2539 return 1;
2540 }
2541 }
2542
2543
2544 *rule_nr = 80;
Philipp Reisnerd8c2a362009-11-18 15:52:51 +01002545 peer = mdev->p_uuid[UI_CURRENT] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002546 for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) {
2547 self = mdev->ldev->md.uuid[i] & ~((u64)1);
2548 if (self == peer)
2549 return 2;
2550 }
2551
2552 *rule_nr = 90;
2553 self = mdev->ldev->md.uuid[UI_BITMAP] & ~((u64)1);
2554 peer = mdev->p_uuid[UI_BITMAP] & ~((u64)1);
2555 if (self == peer && self != ((u64)0))
2556 return 100;
2557
2558 *rule_nr = 100;
2559 for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) {
2560 self = mdev->ldev->md.uuid[i] & ~((u64)1);
2561 for (j = UI_HISTORY_START; j <= UI_HISTORY_END; j++) {
2562 peer = mdev->p_uuid[j] & ~((u64)1);
2563 if (self == peer)
2564 return -100;
2565 }
2566 }
2567
2568 return -1000;
2569}
2570
2571/* drbd_sync_handshake() returns the new conn state on success, or
2572 CONN_MASK (-1) on failure.
2573 */
2574static enum drbd_conns drbd_sync_handshake(struct drbd_conf *mdev, enum drbd_role peer_role,
2575 enum drbd_disk_state peer_disk) __must_hold(local)
2576{
2577 int hg, rule_nr;
2578 enum drbd_conns rv = C_MASK;
2579 enum drbd_disk_state mydisk;
2580
2581 mydisk = mdev->state.disk;
2582 if (mydisk == D_NEGOTIATING)
2583 mydisk = mdev->new_state_tmp.disk;
2584
2585 dev_info(DEV, "drbd_sync_handshake:\n");
2586 drbd_uuid_dump(mdev, "self", mdev->ldev->md.uuid, mdev->comm_bm_set, 0);
2587 drbd_uuid_dump(mdev, "peer", mdev->p_uuid,
2588 mdev->p_uuid[UI_SIZE], mdev->p_uuid[UI_FLAGS]);
2589
2590 hg = drbd_uuid_compare(mdev, &rule_nr);
2591
2592 dev_info(DEV, "uuid_compare()=%d by rule %d\n", hg, rule_nr);
2593
2594 if (hg == -1000) {
2595 dev_alert(DEV, "Unrelated data, aborting!\n");
2596 return C_MASK;
2597 }
Philipp Reisner4a23f262011-01-11 17:42:17 +01002598 if (hg < -1000) {
2599 dev_alert(DEV, "To resolve this both sides have to support at least protocol %d\n", -hg - 1000);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002600 return C_MASK;
2601 }
2602
2603 if ((mydisk == D_INCONSISTENT && peer_disk > D_INCONSISTENT) ||
2604 (peer_disk == D_INCONSISTENT && mydisk > D_INCONSISTENT)) {
2605 int f = (hg == -100) || abs(hg) == 2;
2606 hg = mydisk > D_INCONSISTENT ? 1 : -1;
2607 if (f)
2608 hg = hg*2;
2609 dev_info(DEV, "Becoming sync %s due to disk states.\n",
2610 hg > 0 ? "source" : "target");
2611 }
2612
Adam Gandelman3a11a482010-04-08 16:48:23 -07002613 if (abs(hg) == 100)
2614 drbd_khelper(mdev, "initial-split-brain");
2615
Philipp Reisner89e58e72011-01-19 13:12:45 +01002616 if (hg == 100 || (hg == -100 && mdev->tconn->net_conf->always_asbp)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002617 int pcount = (mdev->state.role == R_PRIMARY)
2618 + (peer_role == R_PRIMARY);
2619 int forced = (hg == -100);
2620
2621 switch (pcount) {
2622 case 0:
2623 hg = drbd_asb_recover_0p(mdev);
2624 break;
2625 case 1:
2626 hg = drbd_asb_recover_1p(mdev);
2627 break;
2628 case 2:
2629 hg = drbd_asb_recover_2p(mdev);
2630 break;
2631 }
2632 if (abs(hg) < 100) {
2633 dev_warn(DEV, "Split-Brain detected, %d primaries, "
2634 "automatically solved. Sync from %s node\n",
2635 pcount, (hg < 0) ? "peer" : "this");
2636 if (forced) {
2637 dev_warn(DEV, "Doing a full sync, since"
2638 " UUIDs where ambiguous.\n");
2639 hg = hg*2;
2640 }
2641 }
2642 }
2643
2644 if (hg == -100) {
Philipp Reisner89e58e72011-01-19 13:12:45 +01002645 if (mdev->tconn->net_conf->want_lose && !(mdev->p_uuid[UI_FLAGS]&1))
Philipp Reisnerb411b362009-09-25 16:07:19 -07002646 hg = -1;
Philipp Reisner89e58e72011-01-19 13:12:45 +01002647 if (!mdev->tconn->net_conf->want_lose && (mdev->p_uuid[UI_FLAGS]&1))
Philipp Reisnerb411b362009-09-25 16:07:19 -07002648 hg = 1;
2649
2650 if (abs(hg) < 100)
2651 dev_warn(DEV, "Split-Brain detected, manually solved. "
2652 "Sync from %s node\n",
2653 (hg < 0) ? "peer" : "this");
2654 }
2655
2656 if (hg == -100) {
Lars Ellenberg580b9762010-02-26 23:15:23 +01002657 /* FIXME this log message is not correct if we end up here
2658 * after an attempted attach on a diskless node.
2659 * We just refuse to attach -- well, we drop the "connection"
2660 * to that disk, in a way... */
Adam Gandelman3a11a482010-04-08 16:48:23 -07002661 dev_alert(DEV, "Split-Brain detected but unresolved, dropping connection!\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002662 drbd_khelper(mdev, "split-brain");
2663 return C_MASK;
2664 }
2665
2666 if (hg > 0 && mydisk <= D_INCONSISTENT) {
2667 dev_err(DEV, "I shall become SyncSource, but I am inconsistent!\n");
2668 return C_MASK;
2669 }
2670
2671 if (hg < 0 && /* by intention we do not use mydisk here. */
2672 mdev->state.role == R_PRIMARY && mdev->state.disk >= D_CONSISTENT) {
Philipp Reisner89e58e72011-01-19 13:12:45 +01002673 switch (mdev->tconn->net_conf->rr_conflict) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002674 case ASB_CALL_HELPER:
2675 drbd_khelper(mdev, "pri-lost");
2676 /* fall through */
2677 case ASB_DISCONNECT:
2678 dev_err(DEV, "I shall become SyncTarget, but I am primary!\n");
2679 return C_MASK;
2680 case ASB_VIOLENTLY:
2681 dev_warn(DEV, "Becoming SyncTarget, violating the stable-data"
2682 "assumption\n");
2683 }
2684 }
2685
Philipp Reisner89e58e72011-01-19 13:12:45 +01002686 if (mdev->tconn->net_conf->dry_run || test_bit(CONN_DRY_RUN, &mdev->flags)) {
Philipp Reisnercf14c2e2010-02-02 21:03:50 +01002687 if (hg == 0)
2688 dev_info(DEV, "dry-run connect: No resync, would become Connected immediately.\n");
2689 else
2690 dev_info(DEV, "dry-run connect: Would become %s, doing a %s resync.",
2691 drbd_conn_str(hg > 0 ? C_SYNC_SOURCE : C_SYNC_TARGET),
2692 abs(hg) >= 2 ? "full" : "bit-map based");
2693 return C_MASK;
2694 }
2695
Philipp Reisnerb411b362009-09-25 16:07:19 -07002696 if (abs(hg) >= 2) {
2697 dev_info(DEV, "Writing the whole bitmap, full sync required after drbd_sync_handshake.\n");
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01002698 if (drbd_bitmap_io(mdev, &drbd_bmio_set_n_write, "set_n_write from sync_handshake",
2699 BM_LOCKED_SET_ALLOWED))
Philipp Reisnerb411b362009-09-25 16:07:19 -07002700 return C_MASK;
2701 }
2702
2703 if (hg > 0) { /* become sync source. */
2704 rv = C_WF_BITMAP_S;
2705 } else if (hg < 0) { /* become sync target */
2706 rv = C_WF_BITMAP_T;
2707 } else {
2708 rv = C_CONNECTED;
2709 if (drbd_bm_total_weight(mdev)) {
2710 dev_info(DEV, "No resync, but %lu bits in bitmap!\n",
2711 drbd_bm_total_weight(mdev));
2712 }
2713 }
2714
2715 return rv;
2716}
2717
2718/* returns 1 if invalid */
2719static int cmp_after_sb(enum drbd_after_sb_p peer, enum drbd_after_sb_p self)
2720{
2721 /* ASB_DISCARD_REMOTE - ASB_DISCARD_LOCAL is valid */
2722 if ((peer == ASB_DISCARD_REMOTE && self == ASB_DISCARD_LOCAL) ||
2723 (self == ASB_DISCARD_REMOTE && peer == ASB_DISCARD_LOCAL))
2724 return 0;
2725
2726 /* any other things with ASB_DISCARD_REMOTE or ASB_DISCARD_LOCAL are invalid */
2727 if (peer == ASB_DISCARD_REMOTE || peer == ASB_DISCARD_LOCAL ||
2728 self == ASB_DISCARD_REMOTE || self == ASB_DISCARD_LOCAL)
2729 return 1;
2730
2731 /* everything else is valid if they are equal on both sides. */
2732 if (peer == self)
2733 return 0;
2734
2735 /* everything es is invalid. */
2736 return 1;
2737}
2738
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01002739static int receive_protocol(struct drbd_conf *mdev, enum drbd_packet cmd,
2740 unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002741{
Philipp Reisnere42325a2011-01-19 13:55:45 +01002742 struct p_protocol *p = &mdev->tconn->data.rbuf.protocol;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002743 int p_proto, p_after_sb_0p, p_after_sb_1p, p_after_sb_2p;
Philipp Reisnercf14c2e2010-02-02 21:03:50 +01002744 int p_want_lose, p_two_primaries, cf;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002745 char p_integrity_alg[SHARED_SECRET_MAX] = "";
2746
Philipp Reisnerb411b362009-09-25 16:07:19 -07002747 p_proto = be32_to_cpu(p->protocol);
2748 p_after_sb_0p = be32_to_cpu(p->after_sb_0p);
2749 p_after_sb_1p = be32_to_cpu(p->after_sb_1p);
2750 p_after_sb_2p = be32_to_cpu(p->after_sb_2p);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002751 p_two_primaries = be32_to_cpu(p->two_primaries);
Philipp Reisnercf14c2e2010-02-02 21:03:50 +01002752 cf = be32_to_cpu(p->conn_flags);
2753 p_want_lose = cf & CF_WANT_LOSE;
2754
2755 clear_bit(CONN_DRY_RUN, &mdev->flags);
2756
2757 if (cf & CF_DRY_RUN)
2758 set_bit(CONN_DRY_RUN, &mdev->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002759
Philipp Reisner89e58e72011-01-19 13:12:45 +01002760 if (p_proto != mdev->tconn->net_conf->wire_protocol) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002761 dev_err(DEV, "incompatible communication protocols\n");
2762 goto disconnect;
2763 }
2764
Philipp Reisner89e58e72011-01-19 13:12:45 +01002765 if (cmp_after_sb(p_after_sb_0p, mdev->tconn->net_conf->after_sb_0p)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002766 dev_err(DEV, "incompatible after-sb-0pri settings\n");
2767 goto disconnect;
2768 }
2769
Philipp Reisner89e58e72011-01-19 13:12:45 +01002770 if (cmp_after_sb(p_after_sb_1p, mdev->tconn->net_conf->after_sb_1p)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002771 dev_err(DEV, "incompatible after-sb-1pri settings\n");
2772 goto disconnect;
2773 }
2774
Philipp Reisner89e58e72011-01-19 13:12:45 +01002775 if (cmp_after_sb(p_after_sb_2p, mdev->tconn->net_conf->after_sb_2p)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002776 dev_err(DEV, "incompatible after-sb-2pri settings\n");
2777 goto disconnect;
2778 }
2779
Philipp Reisner89e58e72011-01-19 13:12:45 +01002780 if (p_want_lose && mdev->tconn->net_conf->want_lose) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002781 dev_err(DEV, "both sides have the 'want_lose' flag set\n");
2782 goto disconnect;
2783 }
2784
Philipp Reisner89e58e72011-01-19 13:12:45 +01002785 if (p_two_primaries != mdev->tconn->net_conf->two_primaries) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002786 dev_err(DEV, "incompatible setting of the two-primaries options\n");
2787 goto disconnect;
2788 }
2789
Philipp Reisner31890f42011-01-19 14:12:51 +01002790 if (mdev->tconn->agreed_pro_version >= 87) {
Philipp Reisner89e58e72011-01-19 13:12:45 +01002791 unsigned char *my_alg = mdev->tconn->net_conf->integrity_alg;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002792
Philipp Reisnerde0ff332011-02-07 16:56:20 +01002793 if (drbd_recv(mdev->tconn, p_integrity_alg, data_size) != data_size)
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002794 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002795
2796 p_integrity_alg[SHARED_SECRET_MAX-1] = 0;
2797 if (strcmp(p_integrity_alg, my_alg)) {
2798 dev_err(DEV, "incompatible setting of the data-integrity-alg\n");
2799 goto disconnect;
2800 }
2801 dev_info(DEV, "data-integrity-alg: %s\n",
2802 my_alg[0] ? my_alg : (unsigned char *)"<not-used>");
2803 }
2804
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002805 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002806
2807disconnect:
2808 drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002809 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002810}
2811
2812/* helper function
2813 * input: alg name, feature name
2814 * return: NULL (alg name was "")
2815 * ERR_PTR(error) if something goes wrong
2816 * or the crypto hash ptr, if it worked out ok. */
2817struct crypto_hash *drbd_crypto_alloc_digest_safe(const struct drbd_conf *mdev,
2818 const char *alg, const char *name)
2819{
2820 struct crypto_hash *tfm;
2821
2822 if (!alg[0])
2823 return NULL;
2824
2825 tfm = crypto_alloc_hash(alg, 0, CRYPTO_ALG_ASYNC);
2826 if (IS_ERR(tfm)) {
2827 dev_err(DEV, "Can not allocate \"%s\" as %s (reason: %ld)\n",
2828 alg, name, PTR_ERR(tfm));
2829 return tfm;
2830 }
2831 if (!drbd_crypto_is_hash(crypto_hash_tfm(tfm))) {
2832 crypto_free_hash(tfm);
2833 dev_err(DEV, "\"%s\" is not a digest (%s)\n", alg, name);
2834 return ERR_PTR(-EINVAL);
2835 }
2836 return tfm;
2837}
2838
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01002839static int receive_SyncParam(struct drbd_conf *mdev, enum drbd_packet cmd,
2840 unsigned int packet_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002841{
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002842 int ok = true;
Philipp Reisnere42325a2011-01-19 13:55:45 +01002843 struct p_rs_param_95 *p = &mdev->tconn->data.rbuf.rs_param_95;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002844 unsigned int header_size, data_size, exp_max_sz;
2845 struct crypto_hash *verify_tfm = NULL;
2846 struct crypto_hash *csums_tfm = NULL;
Philipp Reisner31890f42011-01-19 14:12:51 +01002847 const int apv = mdev->tconn->agreed_pro_version;
Philipp Reisner778f2712010-07-06 11:14:00 +02002848 int *rs_plan_s = NULL;
2849 int fifo_size = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002850
2851 exp_max_sz = apv <= 87 ? sizeof(struct p_rs_param)
2852 : apv == 88 ? sizeof(struct p_rs_param)
2853 + SHARED_SECRET_MAX
Philipp Reisner8e26f9c2010-07-06 17:25:54 +02002854 : apv <= 94 ? sizeof(struct p_rs_param_89)
2855 : /* apv >= 95 */ sizeof(struct p_rs_param_95);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002856
Philipp Reisner02918be2010-08-20 14:35:10 +02002857 if (packet_size > exp_max_sz) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002858 dev_err(DEV, "SyncParam packet too long: received %u, expected <= %u bytes\n",
Philipp Reisner02918be2010-08-20 14:35:10 +02002859 packet_size, exp_max_sz);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002860 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002861 }
2862
2863 if (apv <= 88) {
Philipp Reisner257d0af2011-01-26 12:15:29 +01002864 header_size = sizeof(struct p_rs_param) - sizeof(struct p_header);
Philipp Reisner02918be2010-08-20 14:35:10 +02002865 data_size = packet_size - header_size;
Philipp Reisner8e26f9c2010-07-06 17:25:54 +02002866 } else if (apv <= 94) {
Philipp Reisner257d0af2011-01-26 12:15:29 +01002867 header_size = sizeof(struct p_rs_param_89) - sizeof(struct p_header);
Philipp Reisner02918be2010-08-20 14:35:10 +02002868 data_size = packet_size - header_size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002869 D_ASSERT(data_size == 0);
Philipp Reisner8e26f9c2010-07-06 17:25:54 +02002870 } else {
Philipp Reisner257d0af2011-01-26 12:15:29 +01002871 header_size = sizeof(struct p_rs_param_95) - sizeof(struct p_header);
Philipp Reisner02918be2010-08-20 14:35:10 +02002872 data_size = packet_size - header_size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002873 D_ASSERT(data_size == 0);
2874 }
2875
2876 /* initialize verify_alg and csums_alg */
2877 memset(p->verify_alg, 0, 2 * SHARED_SECRET_MAX);
2878
Philipp Reisnerde0ff332011-02-07 16:56:20 +01002879 if (drbd_recv(mdev->tconn, &p->head.payload, header_size) != header_size)
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002880 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002881
2882 mdev->sync_conf.rate = be32_to_cpu(p->rate);
2883
2884 if (apv >= 88) {
2885 if (apv == 88) {
2886 if (data_size > SHARED_SECRET_MAX) {
2887 dev_err(DEV, "verify-alg too long, "
2888 "peer wants %u, accepting only %u byte\n",
2889 data_size, SHARED_SECRET_MAX);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002890 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002891 }
2892
Philipp Reisnerde0ff332011-02-07 16:56:20 +01002893 if (drbd_recv(mdev->tconn, p->verify_alg, data_size) != data_size)
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002894 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002895
2896 /* we expect NUL terminated string */
2897 /* but just in case someone tries to be evil */
2898 D_ASSERT(p->verify_alg[data_size-1] == 0);
2899 p->verify_alg[data_size-1] = 0;
2900
2901 } else /* apv >= 89 */ {
2902 /* we still expect NUL terminated strings */
2903 /* but just in case someone tries to be evil */
2904 D_ASSERT(p->verify_alg[SHARED_SECRET_MAX-1] == 0);
2905 D_ASSERT(p->csums_alg[SHARED_SECRET_MAX-1] == 0);
2906 p->verify_alg[SHARED_SECRET_MAX-1] = 0;
2907 p->csums_alg[SHARED_SECRET_MAX-1] = 0;
2908 }
2909
2910 if (strcmp(mdev->sync_conf.verify_alg, p->verify_alg)) {
2911 if (mdev->state.conn == C_WF_REPORT_PARAMS) {
2912 dev_err(DEV, "Different verify-alg settings. me=\"%s\" peer=\"%s\"\n",
2913 mdev->sync_conf.verify_alg, p->verify_alg);
2914 goto disconnect;
2915 }
2916 verify_tfm = drbd_crypto_alloc_digest_safe(mdev,
2917 p->verify_alg, "verify-alg");
2918 if (IS_ERR(verify_tfm)) {
2919 verify_tfm = NULL;
2920 goto disconnect;
2921 }
2922 }
2923
2924 if (apv >= 89 && strcmp(mdev->sync_conf.csums_alg, p->csums_alg)) {
2925 if (mdev->state.conn == C_WF_REPORT_PARAMS) {
2926 dev_err(DEV, "Different csums-alg settings. me=\"%s\" peer=\"%s\"\n",
2927 mdev->sync_conf.csums_alg, p->csums_alg);
2928 goto disconnect;
2929 }
2930 csums_tfm = drbd_crypto_alloc_digest_safe(mdev,
2931 p->csums_alg, "csums-alg");
2932 if (IS_ERR(csums_tfm)) {
2933 csums_tfm = NULL;
2934 goto disconnect;
2935 }
2936 }
2937
Philipp Reisner8e26f9c2010-07-06 17:25:54 +02002938 if (apv > 94) {
2939 mdev->sync_conf.rate = be32_to_cpu(p->rate);
2940 mdev->sync_conf.c_plan_ahead = be32_to_cpu(p->c_plan_ahead);
2941 mdev->sync_conf.c_delay_target = be32_to_cpu(p->c_delay_target);
2942 mdev->sync_conf.c_fill_target = be32_to_cpu(p->c_fill_target);
2943 mdev->sync_conf.c_max_rate = be32_to_cpu(p->c_max_rate);
Philipp Reisner778f2712010-07-06 11:14:00 +02002944
2945 fifo_size = (mdev->sync_conf.c_plan_ahead * 10 * SLEEP_TIME) / HZ;
2946 if (fifo_size != mdev->rs_plan_s.size && fifo_size > 0) {
2947 rs_plan_s = kzalloc(sizeof(int) * fifo_size, GFP_KERNEL);
2948 if (!rs_plan_s) {
2949 dev_err(DEV, "kmalloc of fifo_buffer failed");
2950 goto disconnect;
2951 }
2952 }
Philipp Reisner8e26f9c2010-07-06 17:25:54 +02002953 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07002954
2955 spin_lock(&mdev->peer_seq_lock);
2956 /* lock against drbd_nl_syncer_conf() */
2957 if (verify_tfm) {
2958 strcpy(mdev->sync_conf.verify_alg, p->verify_alg);
2959 mdev->sync_conf.verify_alg_len = strlen(p->verify_alg) + 1;
2960 crypto_free_hash(mdev->verify_tfm);
2961 mdev->verify_tfm = verify_tfm;
2962 dev_info(DEV, "using verify-alg: \"%s\"\n", p->verify_alg);
2963 }
2964 if (csums_tfm) {
2965 strcpy(mdev->sync_conf.csums_alg, p->csums_alg);
2966 mdev->sync_conf.csums_alg_len = strlen(p->csums_alg) + 1;
2967 crypto_free_hash(mdev->csums_tfm);
2968 mdev->csums_tfm = csums_tfm;
2969 dev_info(DEV, "using csums-alg: \"%s\"\n", p->csums_alg);
2970 }
Philipp Reisner778f2712010-07-06 11:14:00 +02002971 if (fifo_size != mdev->rs_plan_s.size) {
2972 kfree(mdev->rs_plan_s.values);
2973 mdev->rs_plan_s.values = rs_plan_s;
2974 mdev->rs_plan_s.size = fifo_size;
2975 mdev->rs_planed = 0;
2976 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07002977 spin_unlock(&mdev->peer_seq_lock);
2978 }
2979
2980 return ok;
2981disconnect:
2982 /* just for completeness: actually not needed,
2983 * as this is not reached if csums_tfm was ok. */
2984 crypto_free_hash(csums_tfm);
2985 /* but free the verify_tfm again, if csums_tfm did not work out */
2986 crypto_free_hash(verify_tfm);
2987 drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002988 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002989}
2990
Philipp Reisnerb411b362009-09-25 16:07:19 -07002991/* warn if the arguments differ by more than 12.5% */
2992static void warn_if_differ_considerably(struct drbd_conf *mdev,
2993 const char *s, sector_t a, sector_t b)
2994{
2995 sector_t d;
2996 if (a == 0 || b == 0)
2997 return;
2998 d = (a > b) ? (a - b) : (b - a);
2999 if (d > (a>>3) || d > (b>>3))
3000 dev_warn(DEV, "Considerable difference in %s: %llus vs. %llus\n", s,
3001 (unsigned long long)a, (unsigned long long)b);
3002}
3003
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01003004static int receive_sizes(struct drbd_conf *mdev, enum drbd_packet cmd,
3005 unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003006{
Philipp Reisnere42325a2011-01-19 13:55:45 +01003007 struct p_sizes *p = &mdev->tconn->data.rbuf.sizes;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003008 enum determine_dev_size dd = unchanged;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003009 sector_t p_size, p_usize, my_usize;
3010 int ldsc = 0; /* local disk size changed */
Philipp Reisnere89b5912010-03-24 17:11:33 +01003011 enum dds_flags ddsf;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003012
Philipp Reisnerb411b362009-09-25 16:07:19 -07003013 p_size = be64_to_cpu(p->d_size);
3014 p_usize = be64_to_cpu(p->u_size);
3015
3016 if (p_size == 0 && mdev->state.disk == D_DISKLESS) {
3017 dev_err(DEV, "some backing storage is needed\n");
3018 drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003019 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003020 }
3021
3022 /* just store the peer's disk size for now.
3023 * we still need to figure out whether we accept that. */
3024 mdev->p_size = p_size;
3025
Philipp Reisnerb411b362009-09-25 16:07:19 -07003026 if (get_ldev(mdev)) {
3027 warn_if_differ_considerably(mdev, "lower level device sizes",
3028 p_size, drbd_get_max_capacity(mdev->ldev));
3029 warn_if_differ_considerably(mdev, "user requested size",
3030 p_usize, mdev->ldev->dc.disk_size);
3031
3032 /* if this is the first connect, or an otherwise expected
3033 * param exchange, choose the minimum */
3034 if (mdev->state.conn == C_WF_REPORT_PARAMS)
3035 p_usize = min_not_zero((sector_t)mdev->ldev->dc.disk_size,
3036 p_usize);
3037
3038 my_usize = mdev->ldev->dc.disk_size;
3039
3040 if (mdev->ldev->dc.disk_size != p_usize) {
3041 mdev->ldev->dc.disk_size = p_usize;
3042 dev_info(DEV, "Peer sets u_size to %lu sectors\n",
3043 (unsigned long)mdev->ldev->dc.disk_size);
3044 }
3045
3046 /* Never shrink a device with usable data during connect.
3047 But allow online shrinking if we are connected. */
Philipp Reisnera393db62009-12-22 13:35:52 +01003048 if (drbd_new_dev_size(mdev, mdev->ldev, 0) <
Philipp Reisnerb411b362009-09-25 16:07:19 -07003049 drbd_get_capacity(mdev->this_bdev) &&
3050 mdev->state.disk >= D_OUTDATED &&
3051 mdev->state.conn < C_CONNECTED) {
3052 dev_err(DEV, "The peer's disk size is too small!\n");
3053 drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
3054 mdev->ldev->dc.disk_size = my_usize;
3055 put_ldev(mdev);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003056 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003057 }
3058 put_ldev(mdev);
3059 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003060
Philipp Reisnere89b5912010-03-24 17:11:33 +01003061 ddsf = be16_to_cpu(p->dds_flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003062 if (get_ldev(mdev)) {
Bart Van Assche24c48302011-05-21 18:32:29 +02003063 dd = drbd_determine_dev_size(mdev, ddsf);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003064 put_ldev(mdev);
3065 if (dd == dev_size_error)
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003066 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003067 drbd_md_sync(mdev);
3068 } else {
3069 /* I am diskless, need to accept the peer's size. */
3070 drbd_set_my_capacity(mdev, p_size);
3071 }
3072
Philipp Reisner99432fc2011-05-20 16:39:13 +02003073 mdev->peer_max_bio_size = be32_to_cpu(p->max_bio_size);
3074 drbd_reconsider_max_bio_size(mdev);
3075
Philipp Reisnerb411b362009-09-25 16:07:19 -07003076 if (get_ldev(mdev)) {
3077 if (mdev->ldev->known_size != drbd_get_capacity(mdev->ldev->backing_bdev)) {
3078 mdev->ldev->known_size = drbd_get_capacity(mdev->ldev->backing_bdev);
3079 ldsc = 1;
3080 }
3081
Philipp Reisnerb411b362009-09-25 16:07:19 -07003082 put_ldev(mdev);
3083 }
3084
3085 if (mdev->state.conn > C_WF_REPORT_PARAMS) {
3086 if (be64_to_cpu(p->c_size) !=
3087 drbd_get_capacity(mdev->this_bdev) || ldsc) {
3088 /* we have different sizes, probably peer
3089 * needs to know my new size... */
Philipp Reisnere89b5912010-03-24 17:11:33 +01003090 drbd_send_sizes(mdev, 0, ddsf);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003091 }
3092 if (test_and_clear_bit(RESIZE_PENDING, &mdev->flags) ||
3093 (dd == grew && mdev->state.conn == C_CONNECTED)) {
3094 if (mdev->state.pdsk >= D_INCONSISTENT &&
Philipp Reisnere89b5912010-03-24 17:11:33 +01003095 mdev->state.disk >= D_INCONSISTENT) {
3096 if (ddsf & DDSF_NO_RESYNC)
3097 dev_info(DEV, "Resync of new storage suppressed with --assume-clean\n");
3098 else
3099 resync_after_online_grow(mdev);
3100 } else
Philipp Reisnerb411b362009-09-25 16:07:19 -07003101 set_bit(RESYNC_AFTER_NEG, &mdev->flags);
3102 }
3103 }
3104
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003105 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003106}
3107
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01003108static int receive_uuids(struct drbd_conf *mdev, enum drbd_packet cmd,
3109 unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003110{
Philipp Reisnere42325a2011-01-19 13:55:45 +01003111 struct p_uuids *p = &mdev->tconn->data.rbuf.uuids;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003112 u64 *p_uuid;
Lars Ellenberg62b0da32011-01-20 13:25:21 +01003113 int i, updated_uuids = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003114
Philipp Reisnerb411b362009-09-25 16:07:19 -07003115 p_uuid = kmalloc(sizeof(u64)*UI_EXTENDED_SIZE, GFP_NOIO);
3116
3117 for (i = UI_CURRENT; i < UI_EXTENDED_SIZE; i++)
3118 p_uuid[i] = be64_to_cpu(p->uuid[i]);
3119
3120 kfree(mdev->p_uuid);
3121 mdev->p_uuid = p_uuid;
3122
3123 if (mdev->state.conn < C_CONNECTED &&
3124 mdev->state.disk < D_INCONSISTENT &&
3125 mdev->state.role == R_PRIMARY &&
3126 (mdev->ed_uuid & ~((u64)1)) != (p_uuid[UI_CURRENT] & ~((u64)1))) {
3127 dev_err(DEV, "Can only connect to data with current UUID=%016llX\n",
3128 (unsigned long long)mdev->ed_uuid);
3129 drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003130 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003131 }
3132
3133 if (get_ldev(mdev)) {
3134 int skip_initial_sync =
3135 mdev->state.conn == C_CONNECTED &&
Philipp Reisner31890f42011-01-19 14:12:51 +01003136 mdev->tconn->agreed_pro_version >= 90 &&
Philipp Reisnerb411b362009-09-25 16:07:19 -07003137 mdev->ldev->md.uuid[UI_CURRENT] == UUID_JUST_CREATED &&
3138 (p_uuid[UI_FLAGS] & 8);
3139 if (skip_initial_sync) {
3140 dev_info(DEV, "Accepted new current UUID, preparing to skip initial sync\n");
3141 drbd_bitmap_io(mdev, &drbd_bmio_clear_n_write,
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01003142 "clear_n_write from receive_uuids",
3143 BM_LOCKED_TEST_ALLOWED);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003144 _drbd_uuid_set(mdev, UI_CURRENT, p_uuid[UI_CURRENT]);
3145 _drbd_uuid_set(mdev, UI_BITMAP, 0);
3146 _drbd_set_state(_NS2(mdev, disk, D_UP_TO_DATE, pdsk, D_UP_TO_DATE),
3147 CS_VERBOSE, NULL);
3148 drbd_md_sync(mdev);
Lars Ellenberg62b0da32011-01-20 13:25:21 +01003149 updated_uuids = 1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003150 }
3151 put_ldev(mdev);
Philipp Reisner18a50fa2010-06-21 14:14:15 +02003152 } else if (mdev->state.disk < D_INCONSISTENT &&
3153 mdev->state.role == R_PRIMARY) {
3154 /* I am a diskless primary, the peer just created a new current UUID
3155 for me. */
Lars Ellenberg62b0da32011-01-20 13:25:21 +01003156 updated_uuids = drbd_set_ed_uuid(mdev, p_uuid[UI_CURRENT]);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003157 }
3158
3159 /* Before we test for the disk state, we should wait until an eventually
3160 ongoing cluster wide state change is finished. That is important if
3161 we are primary and are detaching from our disk. We need to see the
3162 new disk state... */
3163 wait_event(mdev->misc_wait, !test_bit(CLUSTER_ST_CHANGE, &mdev->flags));
3164 if (mdev->state.conn >= C_CONNECTED && mdev->state.disk < D_INCONSISTENT)
Lars Ellenberg62b0da32011-01-20 13:25:21 +01003165 updated_uuids |= drbd_set_ed_uuid(mdev, p_uuid[UI_CURRENT]);
3166
3167 if (updated_uuids)
3168 drbd_print_uuids(mdev, "receiver updated UUIDs to");
Philipp Reisnerb411b362009-09-25 16:07:19 -07003169
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003170 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003171}
3172
3173/**
3174 * convert_state() - Converts the peer's view of the cluster state to our point of view
3175 * @ps: The state as seen by the peer.
3176 */
3177static union drbd_state convert_state(union drbd_state ps)
3178{
3179 union drbd_state ms;
3180
3181 static enum drbd_conns c_tab[] = {
3182 [C_CONNECTED] = C_CONNECTED,
3183
3184 [C_STARTING_SYNC_S] = C_STARTING_SYNC_T,
3185 [C_STARTING_SYNC_T] = C_STARTING_SYNC_S,
3186 [C_DISCONNECTING] = C_TEAR_DOWN, /* C_NETWORK_FAILURE, */
3187 [C_VERIFY_S] = C_VERIFY_T,
3188 [C_MASK] = C_MASK,
3189 };
3190
3191 ms.i = ps.i;
3192
3193 ms.conn = c_tab[ps.conn];
3194 ms.peer = ps.role;
3195 ms.role = ps.peer;
3196 ms.pdsk = ps.disk;
3197 ms.disk = ps.pdsk;
3198 ms.peer_isp = (ps.aftr_isp | ps.user_isp);
3199
3200 return ms;
3201}
3202
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01003203static int receive_req_state(struct drbd_conf *mdev, enum drbd_packet cmd,
3204 unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003205{
Philipp Reisnere42325a2011-01-19 13:55:45 +01003206 struct p_req_state *p = &mdev->tconn->data.rbuf.req_state;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003207 union drbd_state mask, val;
Andreas Gruenbacherbf885f82010-12-08 00:39:32 +01003208 enum drbd_state_rv rv;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003209
Philipp Reisnerb411b362009-09-25 16:07:19 -07003210 mask.i = be32_to_cpu(p->mask);
3211 val.i = be32_to_cpu(p->val);
3212
Philipp Reisner25703f82011-02-07 14:35:25 +01003213 if (test_bit(DISCARD_CONCURRENT, &mdev->tconn->flags) &&
Philipp Reisnerb411b362009-09-25 16:07:19 -07003214 test_bit(CLUSTER_ST_CHANGE, &mdev->flags)) {
3215 drbd_send_sr_reply(mdev, SS_CONCURRENT_ST_CHG);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003216 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003217 }
3218
3219 mask = convert_state(mask);
3220 val = convert_state(val);
3221
3222 rv = drbd_change_state(mdev, CS_VERBOSE, mask, val);
3223
3224 drbd_send_sr_reply(mdev, rv);
3225 drbd_md_sync(mdev);
3226
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003227 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003228}
3229
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01003230static int receive_state(struct drbd_conf *mdev, enum drbd_packet cmd,
3231 unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003232{
Philipp Reisnere42325a2011-01-19 13:55:45 +01003233 struct p_state *p = &mdev->tconn->data.rbuf.state;
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003234 union drbd_state os, ns, peer_state;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003235 enum drbd_disk_state real_peer_disk;
Philipp Reisner65d922c2010-06-16 16:18:09 +02003236 enum chg_state_flags cs_flags;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003237 int rv;
3238
Philipp Reisnerb411b362009-09-25 16:07:19 -07003239 peer_state.i = be32_to_cpu(p->state);
3240
3241 real_peer_disk = peer_state.disk;
3242 if (peer_state.disk == D_NEGOTIATING) {
3243 real_peer_disk = mdev->p_uuid[UI_FLAGS] & 4 ? D_INCONSISTENT : D_CONSISTENT;
3244 dev_info(DEV, "real peer disk state = %s\n", drbd_disk_str(real_peer_disk));
3245 }
3246
Philipp Reisner87eeee42011-01-19 14:16:30 +01003247 spin_lock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003248 retry:
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003249 os = ns = mdev->state;
Philipp Reisner87eeee42011-01-19 14:16:30 +01003250 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003251
Lars Ellenberge9ef7bb2010-10-07 15:55:39 +02003252 /* peer says his disk is uptodate, while we think it is inconsistent,
3253 * and this happens while we think we have a sync going on. */
3254 if (os.pdsk == D_INCONSISTENT && real_peer_disk == D_UP_TO_DATE &&
3255 os.conn > C_CONNECTED && os.disk == D_UP_TO_DATE) {
3256 /* If we are (becoming) SyncSource, but peer is still in sync
3257 * preparation, ignore its uptodate-ness to avoid flapping, it
3258 * will change to inconsistent once the peer reaches active
3259 * syncing states.
3260 * It may have changed syncer-paused flags, however, so we
3261 * cannot ignore this completely. */
3262 if (peer_state.conn > C_CONNECTED &&
3263 peer_state.conn < C_SYNC_SOURCE)
3264 real_peer_disk = D_INCONSISTENT;
3265
3266 /* if peer_state changes to connected at the same time,
3267 * it explicitly notifies us that it finished resync.
3268 * Maybe we should finish it up, too? */
3269 else if (os.conn >= C_SYNC_SOURCE &&
3270 peer_state.conn == C_CONNECTED) {
3271 if (drbd_bm_total_weight(mdev) <= mdev->rs_failed)
3272 drbd_resync_finished(mdev);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003273 return true;
Lars Ellenberge9ef7bb2010-10-07 15:55:39 +02003274 }
3275 }
3276
3277 /* peer says his disk is inconsistent, while we think it is uptodate,
3278 * and this happens while the peer still thinks we have a sync going on,
3279 * but we think we are already done with the sync.
3280 * We ignore this to avoid flapping pdsk.
3281 * This should not happen, if the peer is a recent version of drbd. */
3282 if (os.pdsk == D_UP_TO_DATE && real_peer_disk == D_INCONSISTENT &&
3283 os.conn == C_CONNECTED && peer_state.conn > C_SYNC_SOURCE)
3284 real_peer_disk = D_UP_TO_DATE;
3285
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003286 if (ns.conn == C_WF_REPORT_PARAMS)
3287 ns.conn = C_CONNECTED;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003288
Philipp Reisner67531712010-10-27 12:21:30 +02003289 if (peer_state.conn == C_AHEAD)
3290 ns.conn = C_BEHIND;
3291
Philipp Reisnerb411b362009-09-25 16:07:19 -07003292 if (mdev->p_uuid && peer_state.disk >= D_NEGOTIATING &&
3293 get_ldev_if_state(mdev, D_NEGOTIATING)) {
3294 int cr; /* consider resync */
3295
3296 /* if we established a new connection */
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003297 cr = (os.conn < C_CONNECTED);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003298 /* if we had an established connection
3299 * and one of the nodes newly attaches a disk */
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003300 cr |= (os.conn == C_CONNECTED &&
Philipp Reisnerb411b362009-09-25 16:07:19 -07003301 (peer_state.disk == D_NEGOTIATING ||
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003302 os.disk == D_NEGOTIATING));
Philipp Reisnerb411b362009-09-25 16:07:19 -07003303 /* if we have both been inconsistent, and the peer has been
3304 * forced to be UpToDate with --overwrite-data */
3305 cr |= test_bit(CONSIDER_RESYNC, &mdev->flags);
3306 /* if we had been plain connected, and the admin requested to
3307 * start a sync by "invalidate" or "invalidate-remote" */
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003308 cr |= (os.conn == C_CONNECTED &&
Philipp Reisnerb411b362009-09-25 16:07:19 -07003309 (peer_state.conn >= C_STARTING_SYNC_S &&
3310 peer_state.conn <= C_WF_BITMAP_T));
3311
3312 if (cr)
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003313 ns.conn = drbd_sync_handshake(mdev, peer_state.role, real_peer_disk);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003314
3315 put_ldev(mdev);
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003316 if (ns.conn == C_MASK) {
3317 ns.conn = C_CONNECTED;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003318 if (mdev->state.disk == D_NEGOTIATING) {
Lars Ellenberg82f59cc2010-10-16 12:13:47 +02003319 drbd_force_state(mdev, NS(disk, D_FAILED));
Philipp Reisnerb411b362009-09-25 16:07:19 -07003320 } else if (peer_state.disk == D_NEGOTIATING) {
3321 dev_err(DEV, "Disk attach process on the peer node was aborted.\n");
3322 peer_state.disk = D_DISKLESS;
Lars Ellenberg580b9762010-02-26 23:15:23 +01003323 real_peer_disk = D_DISKLESS;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003324 } else {
Philipp Reisnercf14c2e2010-02-02 21:03:50 +01003325 if (test_and_clear_bit(CONN_DRY_RUN, &mdev->flags))
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003326 return false;
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003327 D_ASSERT(os.conn == C_WF_REPORT_PARAMS);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003328 drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003329 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003330 }
3331 }
3332 }
3333
Philipp Reisner87eeee42011-01-19 14:16:30 +01003334 spin_lock_irq(&mdev->tconn->req_lock);
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003335 if (mdev->state.i != os.i)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003336 goto retry;
3337 clear_bit(CONSIDER_RESYNC, &mdev->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003338 ns.peer = peer_state.role;
3339 ns.pdsk = real_peer_disk;
3340 ns.peer_isp = (peer_state.aftr_isp | peer_state.user_isp);
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003341 if ((ns.conn == C_CONNECTED || ns.conn == C_WF_BITMAP_S) && ns.disk == D_NEGOTIATING)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003342 ns.disk = mdev->new_state_tmp.disk;
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003343 cs_flags = CS_VERBOSE + (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED ? 0 : CS_HARD);
3344 if (ns.pdsk == D_CONSISTENT && is_susp(ns) && ns.conn == C_CONNECTED && os.conn < C_CONNECTED &&
Philipp Reisner481c6f52010-06-22 14:03:27 +02003345 test_bit(NEW_CUR_UUID, &mdev->flags)) {
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01003346 /* Do not allow tl_restart(RESEND) for a rebooted peer. We can only allow this
Philipp Reisner481c6f52010-06-22 14:03:27 +02003347 for temporal network outages! */
Philipp Reisner87eeee42011-01-19 14:16:30 +01003348 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisner481c6f52010-06-22 14:03:27 +02003349 dev_err(DEV, "Aborting Connect, can not thaw IO with an only Consistent peer\n");
3350 tl_clear(mdev);
3351 drbd_uuid_new_current(mdev);
3352 clear_bit(NEW_CUR_UUID, &mdev->flags);
3353 drbd_force_state(mdev, NS2(conn, C_PROTOCOL_ERROR, susp, 0));
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003354 return false;
Philipp Reisner481c6f52010-06-22 14:03:27 +02003355 }
Philipp Reisner65d922c2010-06-16 16:18:09 +02003356 rv = _drbd_set_state(mdev, ns, cs_flags, NULL);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003357 ns = mdev->state;
Philipp Reisner87eeee42011-01-19 14:16:30 +01003358 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003359
3360 if (rv < SS_SUCCESS) {
3361 drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003362 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003363 }
3364
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003365 if (os.conn > C_WF_REPORT_PARAMS) {
3366 if (ns.conn > C_CONNECTED && peer_state.conn <= C_CONNECTED &&
Philipp Reisnerb411b362009-09-25 16:07:19 -07003367 peer_state.disk != D_NEGOTIATING ) {
3368 /* we want resync, peer has not yet decided to sync... */
3369 /* Nowadays only used when forcing a node into primary role and
3370 setting its disk to UpToDate with that */
3371 drbd_send_uuids(mdev);
3372 drbd_send_state(mdev);
3373 }
3374 }
3375
Philipp Reisner89e58e72011-01-19 13:12:45 +01003376 mdev->tconn->net_conf->want_lose = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003377
3378 drbd_md_sync(mdev); /* update connected indicator, la_size, ... */
3379
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003380 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003381}
3382
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01003383static int receive_sync_uuid(struct drbd_conf *mdev, enum drbd_packet cmd,
3384 unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003385{
Philipp Reisnere42325a2011-01-19 13:55:45 +01003386 struct p_rs_uuid *p = &mdev->tconn->data.rbuf.rs_uuid;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003387
3388 wait_event(mdev->misc_wait,
3389 mdev->state.conn == C_WF_SYNC_UUID ||
Philipp Reisnerc4752ef2010-10-27 17:32:36 +02003390 mdev->state.conn == C_BEHIND ||
Philipp Reisnerb411b362009-09-25 16:07:19 -07003391 mdev->state.conn < C_CONNECTED ||
3392 mdev->state.disk < D_NEGOTIATING);
3393
3394 /* D_ASSERT( mdev->state.conn == C_WF_SYNC_UUID ); */
3395
Philipp Reisnerb411b362009-09-25 16:07:19 -07003396 /* Here the _drbd_uuid_ functions are right, current should
3397 _not_ be rotated into the history */
3398 if (get_ldev_if_state(mdev, D_NEGOTIATING)) {
3399 _drbd_uuid_set(mdev, UI_CURRENT, be64_to_cpu(p->uuid));
3400 _drbd_uuid_set(mdev, UI_BITMAP, 0UL);
3401
Lars Ellenberg62b0da32011-01-20 13:25:21 +01003402 drbd_print_uuids(mdev, "updated sync uuid");
Philipp Reisnerb411b362009-09-25 16:07:19 -07003403 drbd_start_resync(mdev, C_SYNC_TARGET);
3404
3405 put_ldev(mdev);
3406 } else
3407 dev_err(DEV, "Ignoring SyncUUID packet!\n");
3408
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003409 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003410}
3411
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003412/**
3413 * receive_bitmap_plain
3414 *
3415 * Return 0 when done, 1 when another iteration is needed, and a negative error
3416 * code upon failure.
3417 */
3418static int
Philipp Reisner02918be2010-08-20 14:35:10 +02003419receive_bitmap_plain(struct drbd_conf *mdev, unsigned int data_size,
3420 unsigned long *buffer, struct bm_xfer_ctx *c)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003421{
3422 unsigned num_words = min_t(size_t, BM_PACKET_WORDS, c->bm_words - c->word_offset);
3423 unsigned want = num_words * sizeof(long);
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003424 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003425
Philipp Reisner02918be2010-08-20 14:35:10 +02003426 if (want != data_size) {
3427 dev_err(DEV, "%s:want (%u) != data_size (%u)\n", __func__, want, data_size);
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003428 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003429 }
3430 if (want == 0)
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003431 return 0;
Philipp Reisnerde0ff332011-02-07 16:56:20 +01003432 err = drbd_recv(mdev->tconn, buffer, want);
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003433 if (err != want) {
3434 if (err >= 0)
3435 err = -EIO;
3436 return err;
3437 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003438
3439 drbd_bm_merge_lel(mdev, c->word_offset, num_words, buffer);
3440
3441 c->word_offset += num_words;
3442 c->bit_offset = c->word_offset * BITS_PER_LONG;
3443 if (c->bit_offset > c->bm_bits)
3444 c->bit_offset = c->bm_bits;
3445
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003446 return 1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003447}
3448
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003449/**
3450 * recv_bm_rle_bits
3451 *
3452 * Return 0 when done, 1 when another iteration is needed, and a negative error
3453 * code upon failure.
3454 */
3455static int
Philipp Reisnerb411b362009-09-25 16:07:19 -07003456recv_bm_rle_bits(struct drbd_conf *mdev,
3457 struct p_compressed_bm *p,
Philipp Reisnerc6d25cf2011-01-19 16:13:06 +01003458 struct bm_xfer_ctx *c,
3459 unsigned int len)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003460{
3461 struct bitstream bs;
3462 u64 look_ahead;
3463 u64 rl;
3464 u64 tmp;
3465 unsigned long s = c->bit_offset;
3466 unsigned long e;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003467 int toggle = DCBP_get_start(p);
3468 int have;
3469 int bits;
3470
3471 bitstream_init(&bs, p->code, len, DCBP_get_pad_bits(p));
3472
3473 bits = bitstream_get_bits(&bs, &look_ahead, 64);
3474 if (bits < 0)
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003475 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003476
3477 for (have = bits; have > 0; s += rl, toggle = !toggle) {
3478 bits = vli_decode_bits(&rl, look_ahead);
3479 if (bits <= 0)
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003480 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003481
3482 if (toggle) {
3483 e = s + rl -1;
3484 if (e >= c->bm_bits) {
3485 dev_err(DEV, "bitmap overflow (e:%lu) while decoding bm RLE packet\n", e);
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003486 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003487 }
3488 _drbd_bm_set_bits(mdev, s, e);
3489 }
3490
3491 if (have < bits) {
3492 dev_err(DEV, "bitmap decoding error: h:%d b:%d la:0x%08llx l:%u/%u\n",
3493 have, bits, look_ahead,
3494 (unsigned int)(bs.cur.b - p->code),
3495 (unsigned int)bs.buf_len);
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003496 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003497 }
3498 look_ahead >>= bits;
3499 have -= bits;
3500
3501 bits = bitstream_get_bits(&bs, &tmp, 64 - have);
3502 if (bits < 0)
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003503 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003504 look_ahead |= tmp << have;
3505 have += bits;
3506 }
3507
3508 c->bit_offset = s;
3509 bm_xfer_ctx_bit_to_word_offset(c);
3510
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003511 return (s != c->bm_bits);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003512}
3513
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003514/**
3515 * decode_bitmap_c
3516 *
3517 * Return 0 when done, 1 when another iteration is needed, and a negative error
3518 * code upon failure.
3519 */
3520static int
Philipp Reisnerb411b362009-09-25 16:07:19 -07003521decode_bitmap_c(struct drbd_conf *mdev,
3522 struct p_compressed_bm *p,
Philipp Reisnerc6d25cf2011-01-19 16:13:06 +01003523 struct bm_xfer_ctx *c,
3524 unsigned int len)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003525{
3526 if (DCBP_get_code(p) == RLE_VLI_Bits)
Philipp Reisnerc6d25cf2011-01-19 16:13:06 +01003527 return recv_bm_rle_bits(mdev, p, c, len);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003528
3529 /* other variants had been implemented for evaluation,
3530 * but have been dropped as this one turned out to be "best"
3531 * during all our tests. */
3532
3533 dev_err(DEV, "receive_bitmap_c: unknown encoding %u\n", p->encoding);
3534 drbd_force_state(mdev, NS(conn, C_PROTOCOL_ERROR));
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003535 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003536}
3537
3538void INFO_bm_xfer_stats(struct drbd_conf *mdev,
3539 const char *direction, struct bm_xfer_ctx *c)
3540{
3541 /* what would it take to transfer it "plaintext" */
Philipp Reisnerc0129492011-01-19 16:58:16 +01003542 unsigned plain = sizeof(struct p_header) *
Philipp Reisnerb411b362009-09-25 16:07:19 -07003543 ((c->bm_words+BM_PACKET_WORDS-1)/BM_PACKET_WORDS+1)
3544 + c->bm_words * sizeof(long);
3545 unsigned total = c->bytes[0] + c->bytes[1];
3546 unsigned r;
3547
3548 /* total can not be zero. but just in case: */
3549 if (total == 0)
3550 return;
3551
3552 /* don't report if not compressed */
3553 if (total >= plain)
3554 return;
3555
3556 /* total < plain. check for overflow, still */
3557 r = (total > UINT_MAX/1000) ? (total / (plain/1000))
3558 : (1000 * total / plain);
3559
3560 if (r > 1000)
3561 r = 1000;
3562
3563 r = 1000 - r;
3564 dev_info(DEV, "%s bitmap stats [Bytes(packets)]: plain %u(%u), RLE %u(%u), "
3565 "total %u; compression: %u.%u%%\n",
3566 direction,
3567 c->bytes[1], c->packets[1],
3568 c->bytes[0], c->packets[0],
3569 total, r/10, r % 10);
3570}
3571
3572/* Since we are processing the bitfield from lower addresses to higher,
3573 it does not matter if the process it in 32 bit chunks or 64 bit
3574 chunks as long as it is little endian. (Understand it as byte stream,
3575 beginning with the lowest byte...) If we would use big endian
3576 we would need to process it from the highest address to the lowest,
3577 in order to be agnostic to the 32 vs 64 bits issue.
3578
3579 returns 0 on failure, 1 if we successfully received it. */
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01003580static int receive_bitmap(struct drbd_conf *mdev, enum drbd_packet cmd,
3581 unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003582{
3583 struct bm_xfer_ctx c;
3584 void *buffer;
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003585 int err;
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003586 int ok = false;
Philipp Reisner257d0af2011-01-26 12:15:29 +01003587 struct p_header *h = &mdev->tconn->data.rbuf.header;
Philipp Reisner77351055b2011-02-07 17:24:26 +01003588 struct packet_info pi;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003589
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01003590 drbd_bm_lock(mdev, "receive bitmap", BM_LOCKED_SET_ALLOWED);
3591 /* you are supposed to send additional out-of-sync information
3592 * if you actually set bits during this phase */
Philipp Reisnerb411b362009-09-25 16:07:19 -07003593
3594 /* maybe we should use some per thread scratch page,
3595 * and allocate that during initial device creation? */
3596 buffer = (unsigned long *) __get_free_page(GFP_NOIO);
3597 if (!buffer) {
3598 dev_err(DEV, "failed to allocate one page buffer in %s\n", __func__);
3599 goto out;
3600 }
3601
3602 c = (struct bm_xfer_ctx) {
3603 .bm_bits = drbd_bm_bits(mdev),
3604 .bm_words = drbd_bm_words(mdev),
3605 };
3606
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003607 for(;;) {
Philipp Reisner02918be2010-08-20 14:35:10 +02003608 if (cmd == P_BITMAP) {
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003609 err = receive_bitmap_plain(mdev, data_size, buffer, &c);
Philipp Reisner02918be2010-08-20 14:35:10 +02003610 } else if (cmd == P_COMPRESSED_BITMAP) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07003611 /* MAYBE: sanity check that we speak proto >= 90,
3612 * and the feature is enabled! */
3613 struct p_compressed_bm *p;
3614
Philipp Reisner02918be2010-08-20 14:35:10 +02003615 if (data_size > BM_PACKET_PAYLOAD_BYTES) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07003616 dev_err(DEV, "ReportCBitmap packet too large\n");
3617 goto out;
3618 }
3619 /* use the page buff */
3620 p = buffer;
3621 memcpy(p, h, sizeof(*h));
Philipp Reisnerde0ff332011-02-07 16:56:20 +01003622 if (drbd_recv(mdev->tconn, p->head.payload, data_size) != data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003623 goto out;
Lars Ellenberg004352f2010-10-05 20:13:58 +02003624 if (data_size <= (sizeof(*p) - sizeof(p->head))) {
3625 dev_err(DEV, "ReportCBitmap packet too small (l:%u)\n", data_size);
Andreas Gruenbacher78fcbda2010-12-10 22:18:27 +01003626 goto out;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003627 }
Philipp Reisnerc6d25cf2011-01-19 16:13:06 +01003628 err = decode_bitmap_c(mdev, p, &c, data_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003629 } else {
Philipp Reisner02918be2010-08-20 14:35:10 +02003630 dev_warn(DEV, "receive_bitmap: cmd neither ReportBitMap nor ReportCBitMap (is 0x%x)", cmd);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003631 goto out;
3632 }
3633
Philipp Reisner02918be2010-08-20 14:35:10 +02003634 c.packets[cmd == P_BITMAP]++;
Philipp Reisner257d0af2011-01-26 12:15:29 +01003635 c.bytes[cmd == P_BITMAP] += sizeof(struct p_header) + data_size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003636
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003637 if (err <= 0) {
3638 if (err < 0)
3639 goto out;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003640 break;
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003641 }
Philipp Reisner77351055b2011-02-07 17:24:26 +01003642 if (!drbd_recv_header(mdev, &pi))
Philipp Reisnerb411b362009-09-25 16:07:19 -07003643 goto out;
Philipp Reisner77351055b2011-02-07 17:24:26 +01003644 cmd = pi.cmd;
3645 data_size = pi.size;
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003646 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003647
3648 INFO_bm_xfer_stats(mdev, "receive", &c);
3649
3650 if (mdev->state.conn == C_WF_BITMAP_T) {
Andreas Gruenbacherde1f8e42010-12-10 21:04:00 +01003651 enum drbd_state_rv rv;
3652
Philipp Reisnerb411b362009-09-25 16:07:19 -07003653 ok = !drbd_send_bitmap(mdev);
3654 if (!ok)
3655 goto out;
3656 /* Omit CS_ORDERED with this state transition to avoid deadlocks. */
Andreas Gruenbacherde1f8e42010-12-10 21:04:00 +01003657 rv = _drbd_request_state(mdev, NS(conn, C_WF_SYNC_UUID), CS_VERBOSE);
3658 D_ASSERT(rv == SS_SUCCESS);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003659 } else if (mdev->state.conn != C_WF_BITMAP_S) {
3660 /* admin may have requested C_DISCONNECTING,
3661 * other threads may have noticed network errors */
3662 dev_info(DEV, "unexpected cstate (%s) in receive_bitmap\n",
3663 drbd_conn_str(mdev->state.conn));
3664 }
3665
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003666 ok = true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003667 out:
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01003668 drbd_bm_unlock(mdev);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003669 if (ok && mdev->state.conn == C_WF_BITMAP_S)
3670 drbd_start_resync(mdev, C_SYNC_SOURCE);
3671 free_page((unsigned long) buffer);
3672 return ok;
3673}
3674
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01003675static int receive_skip(struct drbd_conf *mdev, enum drbd_packet cmd,
3676 unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003677{
3678 /* TODO zero copy sink :) */
3679 static char sink[128];
3680 int size, want, r;
3681
Philipp Reisner02918be2010-08-20 14:35:10 +02003682 dev_warn(DEV, "skipping unknown optional packet type %d, l: %d!\n",
3683 cmd, data_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003684
Philipp Reisner02918be2010-08-20 14:35:10 +02003685 size = data_size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003686 while (size > 0) {
3687 want = min_t(int, size, sizeof(sink));
Philipp Reisnerde0ff332011-02-07 16:56:20 +01003688 r = drbd_recv(mdev->tconn, sink, want);
Andreas Gruenbacher841ce242010-12-15 19:31:20 +01003689 if (!expect(r > 0))
3690 break;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003691 size -= r;
3692 }
3693 return size == 0;
3694}
3695
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01003696static int receive_UnplugRemote(struct drbd_conf *mdev, enum drbd_packet cmd,
3697 unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003698{
Philipp Reisnerb411b362009-09-25 16:07:19 -07003699 /* Make sure we've acked all the TCP data associated
3700 * with the data requests being unplugged */
Philipp Reisnere42325a2011-01-19 13:55:45 +01003701 drbd_tcp_quickack(mdev->tconn->data.socket);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003702
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003703 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003704}
3705
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01003706static int receive_out_of_sync(struct drbd_conf *mdev, enum drbd_packet cmd,
3707 unsigned int data_size)
Philipp Reisner73a01a12010-10-27 14:33:00 +02003708{
Philipp Reisnere42325a2011-01-19 13:55:45 +01003709 struct p_block_desc *p = &mdev->tconn->data.rbuf.block_desc;
Philipp Reisner73a01a12010-10-27 14:33:00 +02003710
Lars Ellenbergf735e3632010-12-17 21:06:18 +01003711 switch (mdev->state.conn) {
3712 case C_WF_SYNC_UUID:
3713 case C_WF_BITMAP_T:
3714 case C_BEHIND:
3715 break;
3716 default:
3717 dev_err(DEV, "ASSERT FAILED cstate = %s, expected: WFSyncUUID|WFBitMapT|Behind\n",
3718 drbd_conn_str(mdev->state.conn));
3719 }
3720
Philipp Reisner73a01a12010-10-27 14:33:00 +02003721 drbd_set_out_of_sync(mdev, be64_to_cpu(p->sector), be32_to_cpu(p->blksize));
3722
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003723 return true;
Philipp Reisner73a01a12010-10-27 14:33:00 +02003724}
3725
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01003726typedef int (*drbd_cmd_handler_f)(struct drbd_conf *, enum drbd_packet cmd,
3727 unsigned int to_receive);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003728
Philipp Reisner02918be2010-08-20 14:35:10 +02003729struct data_cmd {
3730 int expect_payload;
3731 size_t pkt_size;
3732 drbd_cmd_handler_f function;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003733};
3734
Philipp Reisner02918be2010-08-20 14:35:10 +02003735static struct data_cmd drbd_cmd_handler[] = {
3736 [P_DATA] = { 1, sizeof(struct p_data), receive_Data },
3737 [P_DATA_REPLY] = { 1, sizeof(struct p_data), receive_DataReply },
3738 [P_RS_DATA_REPLY] = { 1, sizeof(struct p_data), receive_RSDataReply } ,
3739 [P_BARRIER] = { 0, sizeof(struct p_barrier), receive_Barrier } ,
Philipp Reisner257d0af2011-01-26 12:15:29 +01003740 [P_BITMAP] = { 1, sizeof(struct p_header), receive_bitmap } ,
3741 [P_COMPRESSED_BITMAP] = { 1, sizeof(struct p_header), receive_bitmap } ,
3742 [P_UNPLUG_REMOTE] = { 0, sizeof(struct p_header), receive_UnplugRemote },
Philipp Reisner02918be2010-08-20 14:35:10 +02003743 [P_DATA_REQUEST] = { 0, sizeof(struct p_block_req), receive_DataRequest },
3744 [P_RS_DATA_REQUEST] = { 0, sizeof(struct p_block_req), receive_DataRequest },
Philipp Reisner257d0af2011-01-26 12:15:29 +01003745 [P_SYNC_PARAM] = { 1, sizeof(struct p_header), receive_SyncParam },
3746 [P_SYNC_PARAM89] = { 1, sizeof(struct p_header), receive_SyncParam },
Philipp Reisner02918be2010-08-20 14:35:10 +02003747 [P_PROTOCOL] = { 1, sizeof(struct p_protocol), receive_protocol },
3748 [P_UUIDS] = { 0, sizeof(struct p_uuids), receive_uuids },
3749 [P_SIZES] = { 0, sizeof(struct p_sizes), receive_sizes },
3750 [P_STATE] = { 0, sizeof(struct p_state), receive_state },
3751 [P_STATE_CHG_REQ] = { 0, sizeof(struct p_req_state), receive_req_state },
3752 [P_SYNC_UUID] = { 0, sizeof(struct p_rs_uuid), receive_sync_uuid },
3753 [P_OV_REQUEST] = { 0, sizeof(struct p_block_req), receive_DataRequest },
3754 [P_OV_REPLY] = { 1, sizeof(struct p_block_req), receive_DataRequest },
3755 [P_CSUM_RS_REQUEST] = { 1, sizeof(struct p_block_req), receive_DataRequest },
3756 [P_DELAY_PROBE] = { 0, sizeof(struct p_delay_probe93), receive_skip },
Philipp Reisner73a01a12010-10-27 14:33:00 +02003757 [P_OUT_OF_SYNC] = { 0, sizeof(struct p_block_desc), receive_out_of_sync },
Philipp Reisner02918be2010-08-20 14:35:10 +02003758 /* anything missing from this table is in
3759 * the asender_tbl, see get_asender_cmd */
3760 [P_MAX_CMD] = { 0, 0, NULL },
3761};
3762
3763/* All handler functions that expect a sub-header get that sub-heder in
Philipp Reisnere42325a2011-01-19 13:55:45 +01003764 mdev->tconn->data.rbuf.header.head.payload.
Philipp Reisner02918be2010-08-20 14:35:10 +02003765
Philipp Reisnere42325a2011-01-19 13:55:45 +01003766 Usually in mdev->tconn->data.rbuf.header.head the callback can find the usual
Philipp Reisner02918be2010-08-20 14:35:10 +02003767 p_header, but they may not rely on that. Since there is also p_header95 !
3768 */
Philipp Reisnerb411b362009-09-25 16:07:19 -07003769
3770static void drbdd(struct drbd_conf *mdev)
3771{
Philipp Reisnerc0129492011-01-19 16:58:16 +01003772 struct p_header *header = &mdev->tconn->data.rbuf.header;
Philipp Reisner77351055b2011-02-07 17:24:26 +01003773 struct packet_info pi;
Philipp Reisner02918be2010-08-20 14:35:10 +02003774 size_t shs; /* sub header size */
3775 int rv;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003776
Philipp Reisnere6b3ea82011-01-19 14:02:01 +01003777 while (get_t_state(&mdev->tconn->receiver) == RUNNING) {
Philipp Reisnerbc31fe32011-02-07 11:14:38 +01003778 drbd_thread_current_set_cpu(mdev, &mdev->tconn->receiver);
Philipp Reisner77351055b2011-02-07 17:24:26 +01003779 if (!drbd_recv_header(mdev, &pi))
Philipp Reisner02918be2010-08-20 14:35:10 +02003780 goto err_out;
3781
Philipp Reisner77351055b2011-02-07 17:24:26 +01003782 if (unlikely(pi.cmd >= P_MAX_CMD || !drbd_cmd_handler[pi.cmd].function)) {
3783 dev_err(DEV, "unknown packet type %d, l: %d!\n", pi.cmd, pi.size);
Philipp Reisner02918be2010-08-20 14:35:10 +02003784 goto err_out;
Lars Ellenberg0b33a912009-11-16 15:58:04 +01003785 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003786
Philipp Reisner77351055b2011-02-07 17:24:26 +01003787 shs = drbd_cmd_handler[pi.cmd].pkt_size - sizeof(struct p_header);
3788 if (pi.size - shs > 0 && !drbd_cmd_handler[pi.cmd].expect_payload) {
3789 dev_err(DEV, "No payload expected %s l:%d\n", cmdname(pi.cmd), pi.size);
Philipp Reisner02918be2010-08-20 14:35:10 +02003790 goto err_out;
3791 }
3792
Lars Ellenbergc13f7e12010-10-29 23:32:01 +02003793 if (shs) {
Philipp Reisnerde0ff332011-02-07 16:56:20 +01003794 rv = drbd_recv(mdev->tconn, &header->payload, shs);
Lars Ellenbergc13f7e12010-10-29 23:32:01 +02003795 if (unlikely(rv != shs)) {
Lars Ellenberg0ddc5542011-01-21 12:35:15 +01003796 if (!signal_pending(current))
3797 dev_warn(DEV, "short read while reading sub header: rv=%d\n", rv);
Lars Ellenbergc13f7e12010-10-29 23:32:01 +02003798 goto err_out;
3799 }
3800 }
3801
Philipp Reisner77351055b2011-02-07 17:24:26 +01003802 rv = drbd_cmd_handler[pi.cmd].function(mdev, pi.cmd, pi.size - shs);
Philipp Reisner02918be2010-08-20 14:35:10 +02003803
3804 if (unlikely(!rv)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07003805 dev_err(DEV, "error receiving %s, l: %d!\n",
Philipp Reisner77351055b2011-02-07 17:24:26 +01003806 cmdname(pi.cmd), pi.size);
Philipp Reisner02918be2010-08-20 14:35:10 +02003807 goto err_out;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003808 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003809 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003810
Philipp Reisner02918be2010-08-20 14:35:10 +02003811 if (0) {
3812 err_out:
3813 drbd_force_state(mdev, NS(conn, C_PROTOCOL_ERROR));
Philipp Reisnerb411b362009-09-25 16:07:19 -07003814 }
Lars Ellenberg856c50c2010-10-14 13:37:40 +02003815 /* If we leave here, we probably want to update at least the
3816 * "Connected" indicator on stable storage. Do so explicitly here. */
3817 drbd_md_sync(mdev);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003818}
3819
Philipp Reisner191d3cc2011-01-19 14:53:22 +01003820void drbd_flush_workqueue(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003821{
3822 struct drbd_wq_barrier barr;
3823
3824 barr.w.cb = w_prev_work_done;
3825 init_completion(&barr.done);
Philipp Reisner191d3cc2011-01-19 14:53:22 +01003826 drbd_queue_work(&tconn->data.work, &barr.w);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003827 wait_for_completion(&barr.done);
3828}
3829
3830static void drbd_disconnect(struct drbd_conf *mdev)
3831{
3832 enum drbd_fencing_p fp;
3833 union drbd_state os, ns;
3834 int rv = SS_UNKNOWN_ERROR;
3835 unsigned int i;
3836
3837 if (mdev->state.conn == C_STANDALONE)
3838 return;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003839
3840 /* asender does not clean up anything. it must not interfere, either */
Philipp Reisnere6b3ea82011-01-19 14:02:01 +01003841 drbd_thread_stop(&mdev->tconn->asender);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003842 drbd_free_sock(mdev);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003843
Philipp Reisner85719572010-07-21 10:20:17 +02003844 /* wait for current activity to cease. */
Philipp Reisner87eeee42011-01-19 14:16:30 +01003845 spin_lock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003846 _drbd_wait_ee_list_empty(mdev, &mdev->active_ee);
3847 _drbd_wait_ee_list_empty(mdev, &mdev->sync_ee);
3848 _drbd_wait_ee_list_empty(mdev, &mdev->read_ee);
Philipp Reisner87eeee42011-01-19 14:16:30 +01003849 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003850
3851 /* We do not have data structures that would allow us to
3852 * get the rs_pending_cnt down to 0 again.
3853 * * On C_SYNC_TARGET we do not have any data structures describing
3854 * the pending RSDataRequest's we have sent.
3855 * * On C_SYNC_SOURCE there is no data structure that tracks
3856 * the P_RS_DATA_REPLY blocks that we sent to the SyncTarget.
3857 * And no, it is not the sum of the reference counts in the
3858 * resync_LRU. The resync_LRU tracks the whole operation including
3859 * the disk-IO, while the rs_pending_cnt only tracks the blocks
3860 * on the fly. */
3861 drbd_rs_cancel_all(mdev);
3862 mdev->rs_total = 0;
3863 mdev->rs_failed = 0;
3864 atomic_set(&mdev->rs_pending_cnt, 0);
3865 wake_up(&mdev->misc_wait);
3866
Philipp Reisner7fde2be2011-03-01 11:08:28 +01003867 del_timer(&mdev->request_timer);
3868
Philipp Reisnerb411b362009-09-25 16:07:19 -07003869 /* make sure syncer is stopped and w_resume_next_sg queued */
3870 del_timer_sync(&mdev->resync_timer);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003871 resync_timer_fn((unsigned long)mdev);
3872
Philipp Reisnerb411b362009-09-25 16:07:19 -07003873 /* wait for all w_e_end_data_req, w_e_end_rsdata_req, w_send_barrier,
3874 * w_make_resync_request etc. which may still be on the worker queue
3875 * to be "canceled" */
Philipp Reisner191d3cc2011-01-19 14:53:22 +01003876 drbd_flush_workqueue(mdev->tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003877
3878 /* This also does reclaim_net_ee(). If we do this too early, we might
3879 * miss some resync ee and pages.*/
3880 drbd_process_done_ee(mdev);
3881
3882 kfree(mdev->p_uuid);
3883 mdev->p_uuid = NULL;
3884
Philipp Reisnerfb22c402010-09-08 23:20:21 +02003885 if (!is_susp(mdev->state))
Philipp Reisnerb411b362009-09-25 16:07:19 -07003886 tl_clear(mdev);
3887
Philipp Reisnerb411b362009-09-25 16:07:19 -07003888 dev_info(DEV, "Connection closed\n");
3889
3890 drbd_md_sync(mdev);
3891
3892 fp = FP_DONT_CARE;
3893 if (get_ldev(mdev)) {
3894 fp = mdev->ldev->dc.fencing;
3895 put_ldev(mdev);
3896 }
3897
Philipp Reisner87f7be42010-06-11 13:56:33 +02003898 if (mdev->state.role == R_PRIMARY && fp >= FP_RESOURCE && mdev->state.pdsk >= D_UNKNOWN)
3899 drbd_try_outdate_peer_async(mdev);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003900
Philipp Reisner87eeee42011-01-19 14:16:30 +01003901 spin_lock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003902 os = mdev->state;
3903 if (os.conn >= C_UNCONNECTED) {
3904 /* Do not restart in case we are C_DISCONNECTING */
3905 ns = os;
3906 ns.conn = C_UNCONNECTED;
3907 rv = _drbd_set_state(mdev, ns, CS_VERBOSE, NULL);
3908 }
Philipp Reisner87eeee42011-01-19 14:16:30 +01003909 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003910
3911 if (os.conn == C_DISCONNECTING) {
Philipp Reisnerb2fb6dbe2011-01-19 13:48:44 +01003912 wait_event(mdev->tconn->net_cnt_wait, atomic_read(&mdev->tconn->net_cnt) == 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003913
Philipp Reisnera0638452011-01-19 14:31:32 +01003914 crypto_free_hash(mdev->tconn->cram_hmac_tfm);
3915 mdev->tconn->cram_hmac_tfm = NULL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003916
Philipp Reisner89e58e72011-01-19 13:12:45 +01003917 kfree(mdev->tconn->net_conf);
3918 mdev->tconn->net_conf = NULL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003919 drbd_request_state(mdev, NS(conn, C_STANDALONE));
3920 }
3921
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01003922 /* serialize with bitmap writeout triggered by the state change,
3923 * if any. */
3924 wait_event(mdev->misc_wait, !test_bit(BITMAP_IO, &mdev->flags));
3925
Philipp Reisnerb411b362009-09-25 16:07:19 -07003926 /* tcp_close and release of sendpage pages can be deferred. I don't
3927 * want to use SO_LINGER, because apparently it can be deferred for
3928 * more than 20 seconds (longest time I checked).
3929 *
3930 * Actually we don't care for exactly when the network stack does its
3931 * put_page(), but release our reference on these pages right here.
3932 */
3933 i = drbd_release_ee(mdev, &mdev->net_ee);
3934 if (i)
3935 dev_info(DEV, "net_ee not empty, killed %u entries\n", i);
Lars Ellenberg435f0742010-09-06 12:30:25 +02003936 i = atomic_read(&mdev->pp_in_use_by_net);
3937 if (i)
3938 dev_info(DEV, "pp_in_use_by_net = %d, expected 0\n", i);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003939 i = atomic_read(&mdev->pp_in_use);
3940 if (i)
Lars Ellenberg45bb9122010-05-14 17:10:48 +02003941 dev_info(DEV, "pp_in_use = %d, expected 0\n", i);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003942
3943 D_ASSERT(list_empty(&mdev->read_ee));
3944 D_ASSERT(list_empty(&mdev->active_ee));
3945 D_ASSERT(list_empty(&mdev->sync_ee));
3946 D_ASSERT(list_empty(&mdev->done_ee));
3947
3948 /* ok, no more ee's on the fly, it is safe to reset the epoch_size */
3949 atomic_set(&mdev->current_epoch->epoch_size, 0);
3950 D_ASSERT(list_empty(&mdev->current_epoch->list));
3951}
3952
3953/*
3954 * We support PRO_VERSION_MIN to PRO_VERSION_MAX. The protocol version
3955 * we can agree on is stored in agreed_pro_version.
3956 *
3957 * feature flags and the reserved array should be enough room for future
3958 * enhancements of the handshake protocol, and possible plugins...
3959 *
3960 * for now, they are expected to be zero, but ignored.
3961 */
Philipp Reisner8a22ccc2011-02-07 16:47:12 +01003962static int drbd_send_handshake(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003963{
Philipp Reisnere6b3ea82011-01-19 14:02:01 +01003964 /* ASSERT current == mdev->tconn->receiver ... */
Philipp Reisner8a22ccc2011-02-07 16:47:12 +01003965 struct p_handshake *p = &tconn->data.sbuf.handshake;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003966 int ok;
3967
Philipp Reisner8a22ccc2011-02-07 16:47:12 +01003968 if (mutex_lock_interruptible(&tconn->data.mutex)) {
3969 conn_err(tconn, "interrupted during initial handshake\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07003970 return 0; /* interrupted. not ok. */
3971 }
3972
Philipp Reisner8a22ccc2011-02-07 16:47:12 +01003973 if (tconn->data.socket == NULL) {
3974 mutex_unlock(&tconn->data.mutex);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003975 return 0;
3976 }
3977
3978 memset(p, 0, sizeof(*p));
3979 p->protocol_min = cpu_to_be32(PRO_VERSION_MIN);
3980 p->protocol_max = cpu_to_be32(PRO_VERSION_MAX);
Philipp Reisner8a22ccc2011-02-07 16:47:12 +01003981 ok = _conn_send_cmd(tconn, 0, tconn->data.socket, P_HAND_SHAKE,
3982 &p->head, sizeof(*p), 0);
3983 mutex_unlock(&tconn->data.mutex);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003984 return ok;
3985}
3986
3987/*
3988 * return values:
3989 * 1 yes, we have a valid connection
3990 * 0 oops, did not work out, please try again
3991 * -1 peer talks different language,
3992 * no point in trying again, please go standalone.
3993 */
3994static int drbd_do_handshake(struct drbd_conf *mdev)
3995{
Philipp Reisnere6b3ea82011-01-19 14:02:01 +01003996 /* ASSERT current == mdev->tconn->receiver ... */
Philipp Reisnere42325a2011-01-19 13:55:45 +01003997 struct p_handshake *p = &mdev->tconn->data.rbuf.handshake;
Philipp Reisner02918be2010-08-20 14:35:10 +02003998 const int expect = sizeof(struct p_handshake) - sizeof(struct p_header80);
Philipp Reisner77351055b2011-02-07 17:24:26 +01003999 struct packet_info pi;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004000 int rv;
4001
Philipp Reisner8a22ccc2011-02-07 16:47:12 +01004002 rv = drbd_send_handshake(mdev->tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004003 if (!rv)
4004 return 0;
4005
Philipp Reisner77351055b2011-02-07 17:24:26 +01004006 rv = drbd_recv_header(mdev, &pi);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004007 if (!rv)
4008 return 0;
4009
Philipp Reisner77351055b2011-02-07 17:24:26 +01004010 if (pi.cmd != P_HAND_SHAKE) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004011 dev_err(DEV, "expected HandShake packet, received: %s (0x%04x)\n",
Philipp Reisner77351055b2011-02-07 17:24:26 +01004012 cmdname(pi.cmd), pi.cmd);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004013 return -1;
4014 }
4015
Philipp Reisner77351055b2011-02-07 17:24:26 +01004016 if (pi.size != expect) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004017 dev_err(DEV, "expected HandShake length: %u, received: %u\n",
Philipp Reisner77351055b2011-02-07 17:24:26 +01004018 expect, pi.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004019 return -1;
4020 }
4021
Philipp Reisnerde0ff332011-02-07 16:56:20 +01004022 rv = drbd_recv(mdev->tconn, &p->head.payload, expect);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004023
4024 if (rv != expect) {
Lars Ellenberg0ddc5542011-01-21 12:35:15 +01004025 if (!signal_pending(current))
4026 dev_warn(DEV, "short read receiving handshake packet: l=%u\n", rv);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004027 return 0;
4028 }
4029
Philipp Reisnerb411b362009-09-25 16:07:19 -07004030 p->protocol_min = be32_to_cpu(p->protocol_min);
4031 p->protocol_max = be32_to_cpu(p->protocol_max);
4032 if (p->protocol_max == 0)
4033 p->protocol_max = p->protocol_min;
4034
4035 if (PRO_VERSION_MAX < p->protocol_min ||
4036 PRO_VERSION_MIN > p->protocol_max)
4037 goto incompat;
4038
Philipp Reisner31890f42011-01-19 14:12:51 +01004039 mdev->tconn->agreed_pro_version = min_t(int, PRO_VERSION_MAX, p->protocol_max);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004040
4041 dev_info(DEV, "Handshake successful: "
Philipp Reisner31890f42011-01-19 14:12:51 +01004042 "Agreed network protocol version %d\n", mdev->tconn->agreed_pro_version);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004043
4044 return 1;
4045
4046 incompat:
4047 dev_err(DEV, "incompatible DRBD dialects: "
4048 "I support %d-%d, peer supports %d-%d\n",
4049 PRO_VERSION_MIN, PRO_VERSION_MAX,
4050 p->protocol_min, p->protocol_max);
4051 return -1;
4052}
4053
4054#if !defined(CONFIG_CRYPTO_HMAC) && !defined(CONFIG_CRYPTO_HMAC_MODULE)
4055static int drbd_do_auth(struct drbd_conf *mdev)
4056{
4057 dev_err(DEV, "This kernel was build without CONFIG_CRYPTO_HMAC.\n");
4058 dev_err(DEV, "You need to disable 'cram-hmac-alg' in drbd.conf.\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01004059 return -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004060}
4061#else
4062#define CHALLENGE_LEN 64
Johannes Thomab10d96c2010-01-07 16:02:50 +01004063
4064/* Return value:
4065 1 - auth succeeded,
4066 0 - failed, try again (network error),
4067 -1 - auth failed, don't try again.
4068*/
4069
Philipp Reisnerb411b362009-09-25 16:07:19 -07004070static int drbd_do_auth(struct drbd_conf *mdev)
4071{
4072 char my_challenge[CHALLENGE_LEN]; /* 64 Bytes... */
4073 struct scatterlist sg;
4074 char *response = NULL;
4075 char *right_response = NULL;
4076 char *peers_ch = NULL;
Philipp Reisner89e58e72011-01-19 13:12:45 +01004077 unsigned int key_len = strlen(mdev->tconn->net_conf->shared_secret);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004078 unsigned int resp_size;
4079 struct hash_desc desc;
Philipp Reisner77351055b2011-02-07 17:24:26 +01004080 struct packet_info pi;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004081 int rv;
4082
Philipp Reisnera0638452011-01-19 14:31:32 +01004083 desc.tfm = mdev->tconn->cram_hmac_tfm;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004084 desc.flags = 0;
4085
Philipp Reisnera0638452011-01-19 14:31:32 +01004086 rv = crypto_hash_setkey(mdev->tconn->cram_hmac_tfm,
Philipp Reisner89e58e72011-01-19 13:12:45 +01004087 (u8 *)mdev->tconn->net_conf->shared_secret, key_len);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004088 if (rv) {
4089 dev_err(DEV, "crypto_hash_setkey() failed with %d\n", rv);
Johannes Thomab10d96c2010-01-07 16:02:50 +01004090 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004091 goto fail;
4092 }
4093
4094 get_random_bytes(my_challenge, CHALLENGE_LEN);
4095
4096 rv = drbd_send_cmd2(mdev, P_AUTH_CHALLENGE, my_challenge, CHALLENGE_LEN);
4097 if (!rv)
4098 goto fail;
4099
Philipp Reisner77351055b2011-02-07 17:24:26 +01004100 rv = drbd_recv_header(mdev, &pi);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004101 if (!rv)
4102 goto fail;
4103
Philipp Reisner77351055b2011-02-07 17:24:26 +01004104 if (pi.cmd != P_AUTH_CHALLENGE) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004105 dev_err(DEV, "expected AuthChallenge packet, received: %s (0x%04x)\n",
Philipp Reisner77351055b2011-02-07 17:24:26 +01004106 cmdname(pi.cmd), pi.cmd);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004107 rv = 0;
4108 goto fail;
4109 }
4110
Philipp Reisner77351055b2011-02-07 17:24:26 +01004111 if (pi.size > CHALLENGE_LEN * 2) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004112 dev_err(DEV, "expected AuthChallenge payload too big.\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01004113 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004114 goto fail;
4115 }
4116
Philipp Reisner77351055b2011-02-07 17:24:26 +01004117 peers_ch = kmalloc(pi.size, GFP_NOIO);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004118 if (peers_ch == NULL) {
4119 dev_err(DEV, "kmalloc of peers_ch failed\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01004120 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004121 goto fail;
4122 }
4123
Philipp Reisner77351055b2011-02-07 17:24:26 +01004124 rv = drbd_recv(mdev->tconn, peers_ch, pi.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004125
Philipp Reisner77351055b2011-02-07 17:24:26 +01004126 if (rv != pi.size) {
Lars Ellenberg0ddc5542011-01-21 12:35:15 +01004127 if (!signal_pending(current))
4128 dev_warn(DEV, "short read AuthChallenge: l=%u\n", rv);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004129 rv = 0;
4130 goto fail;
4131 }
4132
Philipp Reisnera0638452011-01-19 14:31:32 +01004133 resp_size = crypto_hash_digestsize(mdev->tconn->cram_hmac_tfm);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004134 response = kmalloc(resp_size, GFP_NOIO);
4135 if (response == NULL) {
4136 dev_err(DEV, "kmalloc of response failed\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01004137 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004138 goto fail;
4139 }
4140
4141 sg_init_table(&sg, 1);
Philipp Reisner77351055b2011-02-07 17:24:26 +01004142 sg_set_buf(&sg, peers_ch, pi.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004143
4144 rv = crypto_hash_digest(&desc, &sg, sg.length, response);
4145 if (rv) {
4146 dev_err(DEV, "crypto_hash_digest() failed with %d\n", rv);
Johannes Thomab10d96c2010-01-07 16:02:50 +01004147 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004148 goto fail;
4149 }
4150
4151 rv = drbd_send_cmd2(mdev, P_AUTH_RESPONSE, response, resp_size);
4152 if (!rv)
4153 goto fail;
4154
Philipp Reisner77351055b2011-02-07 17:24:26 +01004155 rv = drbd_recv_header(mdev, &pi);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004156 if (!rv)
4157 goto fail;
4158
Philipp Reisner77351055b2011-02-07 17:24:26 +01004159 if (pi.cmd != P_AUTH_RESPONSE) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004160 dev_err(DEV, "expected AuthResponse packet, received: %s (0x%04x)\n",
Philipp Reisner77351055b2011-02-07 17:24:26 +01004161 cmdname(pi.cmd), pi.cmd);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004162 rv = 0;
4163 goto fail;
4164 }
4165
Philipp Reisner77351055b2011-02-07 17:24:26 +01004166 if (pi.size != resp_size) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004167 dev_err(DEV, "expected AuthResponse payload of wrong size\n");
4168 rv = 0;
4169 goto fail;
4170 }
4171
Philipp Reisnerde0ff332011-02-07 16:56:20 +01004172 rv = drbd_recv(mdev->tconn, response , resp_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004173
4174 if (rv != resp_size) {
Lars Ellenberg0ddc5542011-01-21 12:35:15 +01004175 if (!signal_pending(current))
4176 dev_warn(DEV, "short read receiving AuthResponse: l=%u\n", rv);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004177 rv = 0;
4178 goto fail;
4179 }
4180
4181 right_response = kmalloc(resp_size, GFP_NOIO);
Julia Lawall2d1ee872009-12-27 22:27:11 +01004182 if (right_response == NULL) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004183 dev_err(DEV, "kmalloc of right_response failed\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01004184 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004185 goto fail;
4186 }
4187
4188 sg_set_buf(&sg, my_challenge, CHALLENGE_LEN);
4189
4190 rv = crypto_hash_digest(&desc, &sg, sg.length, right_response);
4191 if (rv) {
4192 dev_err(DEV, "crypto_hash_digest() failed with %d\n", rv);
Johannes Thomab10d96c2010-01-07 16:02:50 +01004193 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004194 goto fail;
4195 }
4196
4197 rv = !memcmp(response, right_response, resp_size);
4198
4199 if (rv)
4200 dev_info(DEV, "Peer authenticated using %d bytes of '%s' HMAC\n",
Philipp Reisner89e58e72011-01-19 13:12:45 +01004201 resp_size, mdev->tconn->net_conf->cram_hmac_alg);
Johannes Thomab10d96c2010-01-07 16:02:50 +01004202 else
4203 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004204
4205 fail:
4206 kfree(peers_ch);
4207 kfree(response);
4208 kfree(right_response);
4209
4210 return rv;
4211}
4212#endif
4213
4214int drbdd_init(struct drbd_thread *thi)
4215{
4216 struct drbd_conf *mdev = thi->mdev;
4217 unsigned int minor = mdev_to_minor(mdev);
4218 int h;
4219
4220 sprintf(current->comm, "drbd%d_receiver", minor);
4221
4222 dev_info(DEV, "receiver (re)started\n");
4223
4224 do {
4225 h = drbd_connect(mdev);
4226 if (h == 0) {
4227 drbd_disconnect(mdev);
Philipp Reisner20ee6392011-01-18 15:28:59 +01004228 schedule_timeout_interruptible(HZ);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004229 }
4230 if (h == -1) {
4231 dev_warn(DEV, "Discarding network configuration.\n");
4232 drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
4233 }
4234 } while (h == 0);
4235
4236 if (h > 0) {
Philipp Reisnerb2fb6dbe2011-01-19 13:48:44 +01004237 if (get_net_conf(mdev->tconn)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004238 drbdd(mdev);
Philipp Reisnerb2fb6dbe2011-01-19 13:48:44 +01004239 put_net_conf(mdev->tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004240 }
4241 }
4242
4243 drbd_disconnect(mdev);
4244
4245 dev_info(DEV, "receiver terminated\n");
4246 return 0;
4247}
4248
4249/* ********* acknowledge sender ******** */
4250
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01004251static int got_RqSReply(struct drbd_conf *mdev, enum drbd_packet cmd)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004252{
Philipp Reisner257d0af2011-01-26 12:15:29 +01004253 struct p_req_state_reply *p = &mdev->tconn->meta.rbuf.req_state_reply;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004254
4255 int retcode = be32_to_cpu(p->retcode);
4256
4257 if (retcode >= SS_SUCCESS) {
4258 set_bit(CL_ST_CHG_SUCCESS, &mdev->flags);
4259 } else {
4260 set_bit(CL_ST_CHG_FAIL, &mdev->flags);
4261 dev_err(DEV, "Requested state change failed by peer: %s (%d)\n",
4262 drbd_set_st_err_str(retcode), retcode);
4263 }
4264 wake_up(&mdev->state_wait);
4265
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004266 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004267}
4268
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01004269static int got_Ping(struct drbd_conf *mdev, enum drbd_packet cmd)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004270{
4271 return drbd_send_ping_ack(mdev);
4272
4273}
4274
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01004275static int got_PingAck(struct drbd_conf *mdev, enum drbd_packet cmd)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004276{
4277 /* restore idle timeout */
Philipp Reisnere42325a2011-01-19 13:55:45 +01004278 mdev->tconn->meta.socket->sk->sk_rcvtimeo = mdev->tconn->net_conf->ping_int*HZ;
Philipp Reisner309d1602010-03-02 15:03:44 +01004279 if (!test_and_set_bit(GOT_PING_ACK, &mdev->flags))
4280 wake_up(&mdev->misc_wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004281
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004282 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004283}
4284
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01004285static int got_IsInSync(struct drbd_conf *mdev, enum drbd_packet cmd)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004286{
Philipp Reisner257d0af2011-01-26 12:15:29 +01004287 struct p_block_ack *p = &mdev->tconn->meta.rbuf.block_ack;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004288 sector_t sector = be64_to_cpu(p->sector);
4289 int blksize = be32_to_cpu(p->blksize);
4290
Philipp Reisner31890f42011-01-19 14:12:51 +01004291 D_ASSERT(mdev->tconn->agreed_pro_version >= 89);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004292
4293 update_peer_seq(mdev, be32_to_cpu(p->seq_num));
4294
Lars Ellenberg1d53f092010-09-05 01:13:24 +02004295 if (get_ldev(mdev)) {
4296 drbd_rs_complete_io(mdev, sector);
4297 drbd_set_in_sync(mdev, sector, blksize);
4298 /* rs_same_csums is supposed to count in units of BM_BLOCK_SIZE */
4299 mdev->rs_same_csum += (blksize >> BM_BLOCK_SHIFT);
4300 put_ldev(mdev);
4301 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07004302 dec_rs_pending(mdev);
Philipp Reisner778f2712010-07-06 11:14:00 +02004303 atomic_add(blksize >> 9, &mdev->rs_sect_in);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004304
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004305 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004306}
4307
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01004308static int
4309validate_req_change_req_state(struct drbd_conf *mdev, u64 id, sector_t sector,
4310 struct rb_root *root, const char *func,
4311 enum drbd_req_event what, bool missing_ok)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004312{
4313 struct drbd_request *req;
4314 struct bio_and_error m;
4315
Philipp Reisner87eeee42011-01-19 14:16:30 +01004316 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01004317 req = find_request(mdev, root, id, sector, missing_ok, func);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004318 if (unlikely(!req)) {
Philipp Reisner87eeee42011-01-19 14:16:30 +01004319 spin_unlock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004320 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004321 }
4322 __req_mod(req, what, &m);
Philipp Reisner87eeee42011-01-19 14:16:30 +01004323 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004324
4325 if (m.bio)
4326 complete_master_bio(mdev, &m);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004327 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004328}
4329
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01004330static int got_BlockAck(struct drbd_conf *mdev, enum drbd_packet cmd)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004331{
Philipp Reisner257d0af2011-01-26 12:15:29 +01004332 struct p_block_ack *p = &mdev->tconn->meta.rbuf.block_ack;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004333 sector_t sector = be64_to_cpu(p->sector);
4334 int blksize = be32_to_cpu(p->blksize);
4335 enum drbd_req_event what;
4336
4337 update_peer_seq(mdev, be32_to_cpu(p->seq_num));
4338
Andreas Gruenbacher579b57e2011-01-13 18:40:57 +01004339 if (p->block_id == ID_SYNCER) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004340 drbd_set_in_sync(mdev, sector, blksize);
4341 dec_rs_pending(mdev);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004342 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004343 }
Philipp Reisner257d0af2011-01-26 12:15:29 +01004344 switch (cmd) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004345 case P_RS_WRITE_ACK:
Philipp Reisner89e58e72011-01-19 13:12:45 +01004346 D_ASSERT(mdev->tconn->net_conf->wire_protocol == DRBD_PROT_C);
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01004347 what = WRITE_ACKED_BY_PEER_AND_SIS;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004348 break;
4349 case P_WRITE_ACK:
Philipp Reisner89e58e72011-01-19 13:12:45 +01004350 D_ASSERT(mdev->tconn->net_conf->wire_protocol == DRBD_PROT_C);
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01004351 what = WRITE_ACKED_BY_PEER;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004352 break;
4353 case P_RECV_ACK:
Philipp Reisner89e58e72011-01-19 13:12:45 +01004354 D_ASSERT(mdev->tconn->net_conf->wire_protocol == DRBD_PROT_B);
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01004355 what = RECV_ACKED_BY_PEER;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004356 break;
4357 case P_DISCARD_ACK:
Philipp Reisner89e58e72011-01-19 13:12:45 +01004358 D_ASSERT(mdev->tconn->net_conf->wire_protocol == DRBD_PROT_C);
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01004359 what = CONFLICT_DISCARDED_BY_PEER;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004360 break;
4361 default:
4362 D_ASSERT(0);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004363 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004364 }
4365
4366 return validate_req_change_req_state(mdev, p->block_id, sector,
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01004367 &mdev->write_requests, __func__,
4368 what, false);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004369}
4370
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01004371static int got_NegAck(struct drbd_conf *mdev, enum drbd_packet cmd)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004372{
Philipp Reisner257d0af2011-01-26 12:15:29 +01004373 struct p_block_ack *p = &mdev->tconn->meta.rbuf.block_ack;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004374 sector_t sector = be64_to_cpu(p->sector);
Philipp Reisner2deb8332011-01-17 18:39:18 +01004375 int size = be32_to_cpu(p->blksize);
Philipp Reisner89e58e72011-01-19 13:12:45 +01004376 bool missing_ok = mdev->tconn->net_conf->wire_protocol == DRBD_PROT_A ||
4377 mdev->tconn->net_conf->wire_protocol == DRBD_PROT_B;
Andreas Gruenbacherc3afd8f2011-01-20 22:25:40 +01004378 bool found;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004379
4380 update_peer_seq(mdev, be32_to_cpu(p->seq_num));
4381
Andreas Gruenbacher579b57e2011-01-13 18:40:57 +01004382 if (p->block_id == ID_SYNCER) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004383 dec_rs_pending(mdev);
4384 drbd_rs_failed_io(mdev, sector, size);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004385 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004386 }
Philipp Reisner2deb8332011-01-17 18:39:18 +01004387
Andreas Gruenbacherc3afd8f2011-01-20 22:25:40 +01004388 found = validate_req_change_req_state(mdev, p->block_id, sector,
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01004389 &mdev->write_requests, __func__,
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01004390 NEG_ACKED, missing_ok);
Andreas Gruenbacherc3afd8f2011-01-20 22:25:40 +01004391 if (!found) {
4392 /* Protocol A has no P_WRITE_ACKs, but has P_NEG_ACKs.
4393 The master bio might already be completed, therefore the
4394 request is no longer in the collision hash. */
4395 /* In Protocol B we might already have got a P_RECV_ACK
4396 but then get a P_NEG_ACK afterwards. */
4397 if (!missing_ok)
Philipp Reisner2deb8332011-01-17 18:39:18 +01004398 return false;
Andreas Gruenbacherc3afd8f2011-01-20 22:25:40 +01004399 drbd_set_out_of_sync(mdev, sector, size);
Philipp Reisner2deb8332011-01-17 18:39:18 +01004400 }
Philipp Reisner2deb8332011-01-17 18:39:18 +01004401 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004402}
4403
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01004404static int got_NegDReply(struct drbd_conf *mdev, enum drbd_packet cmd)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004405{
Philipp Reisner257d0af2011-01-26 12:15:29 +01004406 struct p_block_ack *p = &mdev->tconn->meta.rbuf.block_ack;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004407 sector_t sector = be64_to_cpu(p->sector);
4408
4409 update_peer_seq(mdev, be32_to_cpu(p->seq_num));
4410 dev_err(DEV, "Got NegDReply; Sector %llus, len %u; Fail original request.\n",
4411 (unsigned long long)sector, be32_to_cpu(p->blksize));
4412
4413 return validate_req_change_req_state(mdev, p->block_id, sector,
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01004414 &mdev->read_requests, __func__,
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01004415 NEG_ACKED, false);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004416}
4417
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01004418static int got_NegRSDReply(struct drbd_conf *mdev, enum drbd_packet cmd)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004419{
4420 sector_t sector;
4421 int size;
Philipp Reisner257d0af2011-01-26 12:15:29 +01004422 struct p_block_ack *p = &mdev->tconn->meta.rbuf.block_ack;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004423
4424 sector = be64_to_cpu(p->sector);
4425 size = be32_to_cpu(p->blksize);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004426
4427 update_peer_seq(mdev, be32_to_cpu(p->seq_num));
4428
4429 dec_rs_pending(mdev);
4430
4431 if (get_ldev_if_state(mdev, D_FAILED)) {
4432 drbd_rs_complete_io(mdev, sector);
Philipp Reisner257d0af2011-01-26 12:15:29 +01004433 switch (cmd) {
Philipp Reisnerd612d302010-12-27 10:53:28 +01004434 case P_NEG_RS_DREPLY:
4435 drbd_rs_failed_io(mdev, sector, size);
4436 case P_RS_CANCEL:
4437 break;
4438 default:
4439 D_ASSERT(0);
4440 put_ldev(mdev);
4441 return false;
4442 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07004443 put_ldev(mdev);
4444 }
4445
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004446 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004447}
4448
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01004449static int got_BarrierAck(struct drbd_conf *mdev, enum drbd_packet cmd)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004450{
Philipp Reisner257d0af2011-01-26 12:15:29 +01004451 struct p_barrier_ack *p = &mdev->tconn->meta.rbuf.barrier_ack;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004452
4453 tl_release(mdev, p->barrier, be32_to_cpu(p->set_size));
4454
Philipp Reisnerc4752ef2010-10-27 17:32:36 +02004455 if (mdev->state.conn == C_AHEAD &&
4456 atomic_read(&mdev->ap_in_flight) == 0 &&
Philipp Reisner370a43e2011-01-14 16:03:11 +01004457 !test_and_set_bit(AHEAD_TO_SYNC_SOURCE, &mdev->current_epoch->flags)) {
4458 mdev->start_resync_timer.expires = jiffies + HZ;
4459 add_timer(&mdev->start_resync_timer);
Philipp Reisnerc4752ef2010-10-27 17:32:36 +02004460 }
4461
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004462 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004463}
4464
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01004465static int got_OVResult(struct drbd_conf *mdev, enum drbd_packet cmd)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004466{
Philipp Reisner257d0af2011-01-26 12:15:29 +01004467 struct p_block_ack *p = &mdev->tconn->meta.rbuf.block_ack;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004468 struct drbd_work *w;
4469 sector_t sector;
4470 int size;
4471
4472 sector = be64_to_cpu(p->sector);
4473 size = be32_to_cpu(p->blksize);
4474
4475 update_peer_seq(mdev, be32_to_cpu(p->seq_num));
4476
4477 if (be64_to_cpu(p->block_id) == ID_OUT_OF_SYNC)
4478 drbd_ov_oos_found(mdev, sector, size);
4479 else
4480 ov_oos_print(mdev);
4481
Lars Ellenberg1d53f092010-09-05 01:13:24 +02004482 if (!get_ldev(mdev))
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004483 return true;
Lars Ellenberg1d53f092010-09-05 01:13:24 +02004484
Philipp Reisnerb411b362009-09-25 16:07:19 -07004485 drbd_rs_complete_io(mdev, sector);
4486 dec_rs_pending(mdev);
4487
Lars Ellenbergea5442a2010-11-05 09:48:01 +01004488 --mdev->ov_left;
4489
4490 /* let's advance progress step marks only for every other megabyte */
4491 if ((mdev->ov_left & 0x200) == 0x200)
4492 drbd_advance_rs_marks(mdev, mdev->ov_left);
4493
4494 if (mdev->ov_left == 0) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004495 w = kmalloc(sizeof(*w), GFP_NOIO);
4496 if (w) {
4497 w->cb = w_ov_finished;
Philipp Reisnere42325a2011-01-19 13:55:45 +01004498 drbd_queue_work_front(&mdev->tconn->data.work, w);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004499 } else {
4500 dev_err(DEV, "kmalloc(w) failed.");
4501 ov_oos_print(mdev);
4502 drbd_resync_finished(mdev);
4503 }
4504 }
Lars Ellenberg1d53f092010-09-05 01:13:24 +02004505 put_ldev(mdev);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004506 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004507}
4508
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01004509static int got_skip(struct drbd_conf *mdev, enum drbd_packet cmd)
Philipp Reisner0ced55a2010-04-30 15:26:20 +02004510{
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004511 return true;
Philipp Reisner0ced55a2010-04-30 15:26:20 +02004512}
4513
Philipp Reisnerb411b362009-09-25 16:07:19 -07004514struct asender_cmd {
4515 size_t pkt_size;
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01004516 int (*process)(struct drbd_conf *mdev, enum drbd_packet cmd);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004517};
4518
4519static struct asender_cmd *get_asender_cmd(int cmd)
4520{
4521 static struct asender_cmd asender_tbl[] = {
4522 /* anything missing from this table is in
4523 * the drbd_cmd_handler (drbd_default_handler) table,
4524 * see the beginning of drbdd() */
Philipp Reisner257d0af2011-01-26 12:15:29 +01004525 [P_PING] = { sizeof(struct p_header), got_Ping },
4526 [P_PING_ACK] = { sizeof(struct p_header), got_PingAck },
Philipp Reisnerb411b362009-09-25 16:07:19 -07004527 [P_RECV_ACK] = { sizeof(struct p_block_ack), got_BlockAck },
4528 [P_WRITE_ACK] = { sizeof(struct p_block_ack), got_BlockAck },
4529 [P_RS_WRITE_ACK] = { sizeof(struct p_block_ack), got_BlockAck },
4530 [P_DISCARD_ACK] = { sizeof(struct p_block_ack), got_BlockAck },
4531 [P_NEG_ACK] = { sizeof(struct p_block_ack), got_NegAck },
4532 [P_NEG_DREPLY] = { sizeof(struct p_block_ack), got_NegDReply },
4533 [P_NEG_RS_DREPLY] = { sizeof(struct p_block_ack), got_NegRSDReply},
4534 [P_OV_RESULT] = { sizeof(struct p_block_ack), got_OVResult },
4535 [P_BARRIER_ACK] = { sizeof(struct p_barrier_ack), got_BarrierAck },
4536 [P_STATE_CHG_REPLY] = { sizeof(struct p_req_state_reply), got_RqSReply },
4537 [P_RS_IS_IN_SYNC] = { sizeof(struct p_block_ack), got_IsInSync },
Philipp Reisner02918be2010-08-20 14:35:10 +02004538 [P_DELAY_PROBE] = { sizeof(struct p_delay_probe93), got_skip },
Philipp Reisnerd612d302010-12-27 10:53:28 +01004539 [P_RS_CANCEL] = { sizeof(struct p_block_ack), got_NegRSDReply},
Philipp Reisnerb411b362009-09-25 16:07:19 -07004540 [P_MAX_CMD] = { 0, NULL },
4541 };
4542 if (cmd > P_MAX_CMD || asender_tbl[cmd].process == NULL)
4543 return NULL;
4544 return &asender_tbl[cmd];
4545}
4546
4547int drbd_asender(struct drbd_thread *thi)
4548{
4549 struct drbd_conf *mdev = thi->mdev;
Philipp Reisner257d0af2011-01-26 12:15:29 +01004550 struct p_header *h = &mdev->tconn->meta.rbuf.header;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004551 struct asender_cmd *cmd = NULL;
Philipp Reisner77351055b2011-02-07 17:24:26 +01004552 struct packet_info pi;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004553
Philipp Reisner257d0af2011-01-26 12:15:29 +01004554 int rv;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004555 void *buf = h;
4556 int received = 0;
Philipp Reisner257d0af2011-01-26 12:15:29 +01004557 int expect = sizeof(struct p_header);
Lars Ellenbergf36af182011-03-09 22:44:55 +01004558 int ping_timeout_active = 0;
Philipp Reisner77351055b2011-02-07 17:24:26 +01004559 int empty;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004560
4561 sprintf(current->comm, "drbd%d_asender", mdev_to_minor(mdev));
4562
4563 current->policy = SCHED_RR; /* Make this a realtime task! */
4564 current->rt_priority = 2; /* more important than all other tasks */
4565
Andreas Gruenbachere77a0a52011-01-25 15:43:39 +01004566 while (get_t_state(thi) == RUNNING) {
Philipp Reisnerbc31fe32011-02-07 11:14:38 +01004567 drbd_thread_current_set_cpu(mdev, thi);
Philipp Reisnere43ef192011-02-07 14:40:40 +01004568 if (test_and_clear_bit(SEND_PING, &mdev->tconn->flags)) {
Andreas Gruenbacher841ce242010-12-15 19:31:20 +01004569 if (!drbd_send_ping(mdev)) {
4570 dev_err(DEV, "drbd_send_ping has failed\n");
4571 goto reconnect;
4572 }
Philipp Reisnere42325a2011-01-19 13:55:45 +01004573 mdev->tconn->meta.socket->sk->sk_rcvtimeo =
Philipp Reisner89e58e72011-01-19 13:12:45 +01004574 mdev->tconn->net_conf->ping_timeo*HZ/10;
Lars Ellenbergf36af182011-03-09 22:44:55 +01004575 ping_timeout_active = 1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004576 }
4577
4578 /* conditionally cork;
4579 * it may hurt latency if we cork without much to send */
Philipp Reisner89e58e72011-01-19 13:12:45 +01004580 if (!mdev->tconn->net_conf->no_cork &&
Philipp Reisnerb411b362009-09-25 16:07:19 -07004581 3 < atomic_read(&mdev->unacked_cnt))
Philipp Reisnere42325a2011-01-19 13:55:45 +01004582 drbd_tcp_cork(mdev->tconn->meta.socket);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004583 while (1) {
Philipp Reisner808e37b2011-02-07 14:44:14 +01004584 clear_bit(SIGNAL_ASENDER, &mdev->tconn->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004585 flush_signals(current);
Lars Ellenberg0f8488e2010-10-13 18:19:23 +02004586 if (!drbd_process_done_ee(mdev))
Philipp Reisnerb411b362009-09-25 16:07:19 -07004587 goto reconnect;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004588 /* to avoid race with newly queued ACKs */
Philipp Reisner808e37b2011-02-07 14:44:14 +01004589 set_bit(SIGNAL_ASENDER, &mdev->tconn->flags);
Philipp Reisner87eeee42011-01-19 14:16:30 +01004590 spin_lock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004591 empty = list_empty(&mdev->done_ee);
Philipp Reisner87eeee42011-01-19 14:16:30 +01004592 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004593 /* new ack may have been queued right here,
4594 * but then there is also a signal pending,
4595 * and we start over... */
4596 if (empty)
4597 break;
4598 }
4599 /* but unconditionally uncork unless disabled */
Philipp Reisner89e58e72011-01-19 13:12:45 +01004600 if (!mdev->tconn->net_conf->no_cork)
Philipp Reisnere42325a2011-01-19 13:55:45 +01004601 drbd_tcp_uncork(mdev->tconn->meta.socket);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004602
4603 /* short circuit, recv_msg would return EINTR anyways. */
4604 if (signal_pending(current))
4605 continue;
4606
Philipp Reisnerdbd9eea2011-02-07 15:34:16 +01004607 rv = drbd_recv_short(mdev->tconn->meta.socket, buf, expect-received, 0);
Philipp Reisner808e37b2011-02-07 14:44:14 +01004608 clear_bit(SIGNAL_ASENDER, &mdev->tconn->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004609
4610 flush_signals(current);
4611
4612 /* Note:
4613 * -EINTR (on meta) we got a signal
4614 * -EAGAIN (on meta) rcvtimeo expired
4615 * -ECONNRESET other side closed the connection
4616 * -ERESTARTSYS (on data) we got a signal
4617 * rv < 0 other than above: unexpected error!
4618 * rv == expected: full header or command
4619 * rv < expected: "woken" by signal during receive
4620 * rv == 0 : "connection shut down by peer"
4621 */
4622 if (likely(rv > 0)) {
4623 received += rv;
4624 buf += rv;
4625 } else if (rv == 0) {
4626 dev_err(DEV, "meta connection shut down by peer.\n");
4627 goto reconnect;
4628 } else if (rv == -EAGAIN) {
Lars Ellenbergcb6518c2011-06-20 14:44:45 +02004629 /* If the data socket received something meanwhile,
4630 * that is good enough: peer is still alive. */
Philipp Reisner31890f42011-01-19 14:12:51 +01004631 if (time_after(mdev->tconn->last_received,
Philipp Reisnere42325a2011-01-19 13:55:45 +01004632 jiffies - mdev->tconn->meta.socket->sk->sk_rcvtimeo))
Lars Ellenbergcb6518c2011-06-20 14:44:45 +02004633 continue;
Lars Ellenbergf36af182011-03-09 22:44:55 +01004634 if (ping_timeout_active) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004635 dev_err(DEV, "PingAck did not arrive in time.\n");
4636 goto reconnect;
4637 }
Philipp Reisnere43ef192011-02-07 14:40:40 +01004638 set_bit(SEND_PING, &mdev->tconn->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004639 continue;
4640 } else if (rv == -EINTR) {
4641 continue;
4642 } else {
4643 dev_err(DEV, "sock_recvmsg returned %d\n", rv);
4644 goto reconnect;
4645 }
4646
4647 if (received == expect && cmd == NULL) {
Philipp Reisnerce243852011-02-07 17:27:47 +01004648 if (!decode_header(mdev->tconn, h, &pi))
Philipp Reisnerb411b362009-09-25 16:07:19 -07004649 goto reconnect;
Philipp Reisner77351055b2011-02-07 17:24:26 +01004650 cmd = get_asender_cmd(pi.cmd);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004651 if (unlikely(cmd == NULL)) {
Philipp Reisner257d0af2011-01-26 12:15:29 +01004652 dev_err(DEV, "unknown command %d on meta (l: %d)\n",
Philipp Reisner77351055b2011-02-07 17:24:26 +01004653 pi.cmd, pi.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004654 goto disconnect;
4655 }
4656 expect = cmd->pkt_size;
Philipp Reisner77351055b2011-02-07 17:24:26 +01004657 if (pi.size != expect - sizeof(struct p_header)) {
Philipp Reisner257d0af2011-01-26 12:15:29 +01004658 dev_err(DEV, "Wrong packet size on meta (c: %d, l: %d)\n",
Philipp Reisner77351055b2011-02-07 17:24:26 +01004659 pi.cmd, pi.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004660 goto reconnect;
Philipp Reisner257d0af2011-01-26 12:15:29 +01004661 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07004662 }
4663 if (received == expect) {
Philipp Reisner31890f42011-01-19 14:12:51 +01004664 mdev->tconn->last_received = jiffies;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004665 D_ASSERT(cmd != NULL);
Philipp Reisner77351055b2011-02-07 17:24:26 +01004666 if (!cmd->process(mdev, pi.cmd))
Philipp Reisnerb411b362009-09-25 16:07:19 -07004667 goto reconnect;
4668
Lars Ellenbergf36af182011-03-09 22:44:55 +01004669 /* the idle_timeout (ping-int)
4670 * has been restored in got_PingAck() */
4671 if (cmd == get_asender_cmd(P_PING_ACK))
4672 ping_timeout_active = 0;
4673
Philipp Reisnerb411b362009-09-25 16:07:19 -07004674 buf = h;
4675 received = 0;
Philipp Reisner257d0af2011-01-26 12:15:29 +01004676 expect = sizeof(struct p_header);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004677 cmd = NULL;
4678 }
4679 }
4680
4681 if (0) {
4682reconnect:
4683 drbd_force_state(mdev, NS(conn, C_NETWORK_FAILURE));
Lars Ellenberg856c50c2010-10-14 13:37:40 +02004684 drbd_md_sync(mdev);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004685 }
4686 if (0) {
4687disconnect:
4688 drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
Lars Ellenberg856c50c2010-10-14 13:37:40 +02004689 drbd_md_sync(mdev);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004690 }
Philipp Reisner808e37b2011-02-07 14:44:14 +01004691 clear_bit(SIGNAL_ASENDER, &mdev->tconn->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004692
4693 D_ASSERT(mdev->state.conn < C_CONNECTED);
4694 dev_info(DEV, "asender terminated\n");
4695
4696 return 0;
4697}