blob: 4c61802c3421aa32166224df2331b0d78733aa57 [file] [log] [blame]
Philipp Reisnerb411b362009-09-25 16:07:19 -07001/*
2 drbd_receiver.c
3
4 This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
5
6 Copyright (C) 2001-2008, LINBIT Information Technologies GmbH.
7 Copyright (C) 1999-2008, Philipp Reisner <philipp.reisner@linbit.com>.
8 Copyright (C) 2002-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
9
10 drbd is free software; you can redistribute it and/or modify
11 it under the terms of the GNU General Public License as published by
12 the Free Software Foundation; either version 2, or (at your option)
13 any later version.
14
15 drbd is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License for more details.
19
20 You should have received a copy of the GNU General Public License
21 along with drbd; see the file COPYING. If not, write to
22 the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
23 */
24
25
Philipp Reisnerb411b362009-09-25 16:07:19 -070026#include <linux/module.h>
27
28#include <asm/uaccess.h>
29#include <net/sock.h>
30
Philipp Reisnerb411b362009-09-25 16:07:19 -070031#include <linux/drbd.h>
32#include <linux/fs.h>
33#include <linux/file.h>
34#include <linux/in.h>
35#include <linux/mm.h>
36#include <linux/memcontrol.h>
37#include <linux/mm_inline.h>
38#include <linux/slab.h>
Philipp Reisnerb411b362009-09-25 16:07:19 -070039#include <linux/pkt_sched.h>
40#define __KERNEL_SYSCALLS__
41#include <linux/unistd.h>
42#include <linux/vmalloc.h>
43#include <linux/random.h>
Philipp Reisnerb411b362009-09-25 16:07:19 -070044#include <linux/string.h>
45#include <linux/scatterlist.h>
46#include "drbd_int.h"
Philipp Reisnerb411b362009-09-25 16:07:19 -070047#include "drbd_req.h"
48
49#include "drbd_vli.h"
50
Philipp Reisner77351055b2011-02-07 17:24:26 +010051struct packet_info {
52 enum drbd_packet cmd;
53 int size;
54 int vnr;
55};
56
Philipp Reisnerb411b362009-09-25 16:07:19 -070057enum finish_epoch {
58 FE_STILL_LIVE,
59 FE_DESTROYED,
60 FE_RECYCLED,
61};
62
Philipp Reisner65d11ed2011-02-07 17:35:59 +010063static int drbd_do_handshake(struct drbd_tconn *tconn);
Philipp Reisner13e60372011-02-08 09:54:40 +010064static int drbd_do_auth(struct drbd_tconn *tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -070065
66static enum finish_epoch drbd_may_finish_epoch(struct drbd_conf *, struct drbd_epoch *, enum epoch_event);
67static int e_end_block(struct drbd_conf *, struct drbd_work *, int);
68
Philipp Reisnerb411b362009-09-25 16:07:19 -070069
70#define GFP_TRY (__GFP_HIGHMEM | __GFP_NOWARN)
71
Lars Ellenberg45bb9122010-05-14 17:10:48 +020072/*
73 * some helper functions to deal with single linked page lists,
74 * page->private being our "next" pointer.
75 */
76
77/* If at least n pages are linked at head, get n pages off.
78 * Otherwise, don't modify head, and return NULL.
79 * Locking is the responsibility of the caller.
80 */
81static struct page *page_chain_del(struct page **head, int n)
82{
83 struct page *page;
84 struct page *tmp;
85
86 BUG_ON(!n);
87 BUG_ON(!head);
88
89 page = *head;
Philipp Reisner23ce4222010-05-20 13:35:31 +020090
91 if (!page)
92 return NULL;
93
Lars Ellenberg45bb9122010-05-14 17:10:48 +020094 while (page) {
95 tmp = page_chain_next(page);
96 if (--n == 0)
97 break; /* found sufficient pages */
98 if (tmp == NULL)
99 /* insufficient pages, don't use any of them. */
100 return NULL;
101 page = tmp;
102 }
103
104 /* add end of list marker for the returned list */
105 set_page_private(page, 0);
106 /* actual return value, and adjustment of head */
107 page = *head;
108 *head = tmp;
109 return page;
110}
111
112/* may be used outside of locks to find the tail of a (usually short)
113 * "private" page chain, before adding it back to a global chain head
114 * with page_chain_add() under a spinlock. */
115static struct page *page_chain_tail(struct page *page, int *len)
116{
117 struct page *tmp;
118 int i = 1;
119 while ((tmp = page_chain_next(page)))
120 ++i, page = tmp;
121 if (len)
122 *len = i;
123 return page;
124}
125
126static int page_chain_free(struct page *page)
127{
128 struct page *tmp;
129 int i = 0;
130 page_chain_for_each_safe(page, tmp) {
131 put_page(page);
132 ++i;
133 }
134 return i;
135}
136
137static void page_chain_add(struct page **head,
138 struct page *chain_first, struct page *chain_last)
139{
140#if 1
141 struct page *tmp;
142 tmp = page_chain_tail(chain_first, NULL);
143 BUG_ON(tmp != chain_last);
144#endif
145
146 /* add chain to head */
147 set_page_private(chain_last, (unsigned long)*head);
148 *head = chain_first;
149}
150
151static struct page *drbd_pp_first_pages_or_try_alloc(struct drbd_conf *mdev, int number)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700152{
153 struct page *page = NULL;
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200154 struct page *tmp = NULL;
155 int i = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700156
157 /* Yes, testing drbd_pp_vacant outside the lock is racy.
158 * So what. It saves a spin_lock. */
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200159 if (drbd_pp_vacant >= number) {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700160 spin_lock(&drbd_pp_lock);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200161 page = page_chain_del(&drbd_pp_pool, number);
162 if (page)
163 drbd_pp_vacant -= number;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700164 spin_unlock(&drbd_pp_lock);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200165 if (page)
166 return page;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700167 }
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200168
Philipp Reisnerb411b362009-09-25 16:07:19 -0700169 /* GFP_TRY, because we must not cause arbitrary write-out: in a DRBD
170 * "criss-cross" setup, that might cause write-out on some other DRBD,
171 * which in turn might block on the other node at this very place. */
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200172 for (i = 0; i < number; i++) {
173 tmp = alloc_page(GFP_TRY);
174 if (!tmp)
175 break;
176 set_page_private(tmp, (unsigned long)page);
177 page = tmp;
178 }
179
180 if (i == number)
181 return page;
182
183 /* Not enough pages immediately available this time.
184 * No need to jump around here, drbd_pp_alloc will retry this
185 * function "soon". */
186 if (page) {
187 tmp = page_chain_tail(page, NULL);
188 spin_lock(&drbd_pp_lock);
189 page_chain_add(&drbd_pp_pool, page, tmp);
190 drbd_pp_vacant += i;
191 spin_unlock(&drbd_pp_lock);
192 }
193 return NULL;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700194}
195
Philipp Reisnerb411b362009-09-25 16:07:19 -0700196static void reclaim_net_ee(struct drbd_conf *mdev, struct list_head *to_be_freed)
197{
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100198 struct drbd_peer_request *peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700199 struct list_head *le, *tle;
200
201 /* The EEs are always appended to the end of the list. Since
202 they are sent in order over the wire, they have to finish
203 in order. As soon as we see the first not finished we can
204 stop to examine the list... */
205
206 list_for_each_safe(le, tle, &mdev->net_ee) {
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100207 peer_req = list_entry(le, struct drbd_peer_request, w.list);
208 if (drbd_ee_has_active_page(peer_req))
Philipp Reisnerb411b362009-09-25 16:07:19 -0700209 break;
210 list_move(le, to_be_freed);
211 }
212}
213
214static void drbd_kick_lo_and_reclaim_net(struct drbd_conf *mdev)
215{
216 LIST_HEAD(reclaimed);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100217 struct drbd_peer_request *peer_req, *t;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700218
Philipp Reisner87eeee42011-01-19 14:16:30 +0100219 spin_lock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700220 reclaim_net_ee(mdev, &reclaimed);
Philipp Reisner87eeee42011-01-19 14:16:30 +0100221 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700222
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100223 list_for_each_entry_safe(peer_req, t, &reclaimed, w.list)
224 drbd_free_net_ee(mdev, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700225}
226
227/**
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200228 * drbd_pp_alloc() - Returns @number pages, retries forever (or until signalled)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700229 * @mdev: DRBD device.
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200230 * @number: number of pages requested
231 * @retry: whether to retry, if not enough pages are available right now
Philipp Reisnerb411b362009-09-25 16:07:19 -0700232 *
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200233 * Tries to allocate number pages, first from our own page pool, then from
234 * the kernel, unless this allocation would exceed the max_buffers setting.
235 * Possibly retry until DRBD frees sufficient pages somewhere else.
236 *
237 * Returns a page chain linked via page->private.
Philipp Reisnerb411b362009-09-25 16:07:19 -0700238 */
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200239static struct page *drbd_pp_alloc(struct drbd_conf *mdev, unsigned number, bool retry)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700240{
241 struct page *page = NULL;
242 DEFINE_WAIT(wait);
243
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200244 /* Yes, we may run up to @number over max_buffers. If we
245 * follow it strictly, the admin will get it wrong anyways. */
Philipp Reisner89e58e72011-01-19 13:12:45 +0100246 if (atomic_read(&mdev->pp_in_use) < mdev->tconn->net_conf->max_buffers)
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200247 page = drbd_pp_first_pages_or_try_alloc(mdev, number);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700248
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200249 while (page == NULL) {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700250 prepare_to_wait(&drbd_pp_wait, &wait, TASK_INTERRUPTIBLE);
251
252 drbd_kick_lo_and_reclaim_net(mdev);
253
Philipp Reisner89e58e72011-01-19 13:12:45 +0100254 if (atomic_read(&mdev->pp_in_use) < mdev->tconn->net_conf->max_buffers) {
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200255 page = drbd_pp_first_pages_or_try_alloc(mdev, number);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700256 if (page)
257 break;
258 }
259
260 if (!retry)
261 break;
262
263 if (signal_pending(current)) {
264 dev_warn(DEV, "drbd_pp_alloc interrupted!\n");
265 break;
266 }
267
268 schedule();
269 }
270 finish_wait(&drbd_pp_wait, &wait);
271
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200272 if (page)
273 atomic_add(number, &mdev->pp_in_use);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700274 return page;
275}
276
277/* Must not be used from irq, as that may deadlock: see drbd_pp_alloc.
Philipp Reisner87eeee42011-01-19 14:16:30 +0100278 * Is also used from inside an other spin_lock_irq(&mdev->tconn->req_lock);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200279 * Either links the page chain back to the global pool,
280 * or returns all pages to the system. */
Lars Ellenberg435f0742010-09-06 12:30:25 +0200281static void drbd_pp_free(struct drbd_conf *mdev, struct page *page, int is_net)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700282{
Lars Ellenberg435f0742010-09-06 12:30:25 +0200283 atomic_t *a = is_net ? &mdev->pp_in_use_by_net : &mdev->pp_in_use;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700284 int i;
Lars Ellenberg435f0742010-09-06 12:30:25 +0200285
Lars Ellenberg1816a2b2010-11-11 15:19:07 +0100286 if (drbd_pp_vacant > (DRBD_MAX_BIO_SIZE/PAGE_SIZE)*minor_count)
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200287 i = page_chain_free(page);
288 else {
289 struct page *tmp;
290 tmp = page_chain_tail(page, &i);
291 spin_lock(&drbd_pp_lock);
292 page_chain_add(&drbd_pp_pool, page, tmp);
293 drbd_pp_vacant += i;
294 spin_unlock(&drbd_pp_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700295 }
Lars Ellenberg435f0742010-09-06 12:30:25 +0200296 i = atomic_sub_return(i, a);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200297 if (i < 0)
Lars Ellenberg435f0742010-09-06 12:30:25 +0200298 dev_warn(DEV, "ASSERTION FAILED: %s: %d < 0\n",
299 is_net ? "pp_in_use_by_net" : "pp_in_use", i);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700300 wake_up(&drbd_pp_wait);
301}
302
303/*
304You need to hold the req_lock:
305 _drbd_wait_ee_list_empty()
306
307You must not have the req_lock:
308 drbd_free_ee()
309 drbd_alloc_ee()
310 drbd_init_ee()
311 drbd_release_ee()
312 drbd_ee_fix_bhs()
313 drbd_process_done_ee()
314 drbd_clear_done_ee()
315 drbd_wait_ee_list_empty()
316*/
317
Andreas Gruenbacherf6ffca92011-02-04 15:30:34 +0100318struct drbd_peer_request *
319drbd_alloc_ee(struct drbd_conf *mdev, u64 id, sector_t sector,
320 unsigned int data_size, gfp_t gfp_mask) __must_hold(local)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700321{
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100322 struct drbd_peer_request *peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700323 struct page *page;
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200324 unsigned nr_pages = (data_size + PAGE_SIZE -1) >> PAGE_SHIFT;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700325
Andreas Gruenbacher0cf9d272010-12-07 10:43:29 +0100326 if (drbd_insert_fault(mdev, DRBD_FAULT_AL_EE))
Philipp Reisnerb411b362009-09-25 16:07:19 -0700327 return NULL;
328
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100329 peer_req = mempool_alloc(drbd_ee_mempool, gfp_mask & ~__GFP_HIGHMEM);
330 if (!peer_req) {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700331 if (!(gfp_mask & __GFP_NOWARN))
332 dev_err(DEV, "alloc_ee: Allocation of an EE failed\n");
333 return NULL;
334 }
335
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200336 page = drbd_pp_alloc(mdev, nr_pages, (gfp_mask & __GFP_WAIT));
337 if (!page)
338 goto fail;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700339
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100340 drbd_clear_interval(&peer_req->i);
341 peer_req->i.size = data_size;
342 peer_req->i.sector = sector;
343 peer_req->i.local = false;
344 peer_req->i.waiting = false;
Andreas Gruenbacher53840642011-01-28 10:31:04 +0100345
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100346 peer_req->epoch = NULL;
347 peer_req->mdev = mdev;
348 peer_req->pages = page;
349 atomic_set(&peer_req->pending_bios, 0);
350 peer_req->flags = 0;
Andreas Gruenbacher9a8e7752011-01-11 14:04:09 +0100351 /*
352 * The block_id is opaque to the receiver. It is not endianness
353 * converted, and sent back to the sender unchanged.
354 */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100355 peer_req->block_id = id;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700356
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100357 return peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700358
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200359 fail:
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100360 mempool_free(peer_req, drbd_ee_mempool);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700361 return NULL;
362}
363
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100364void drbd_free_some_ee(struct drbd_conf *mdev, struct drbd_peer_request *peer_req,
Andreas Gruenbacherf6ffca92011-02-04 15:30:34 +0100365 int is_net)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700366{
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100367 if (peer_req->flags & EE_HAS_DIGEST)
368 kfree(peer_req->digest);
369 drbd_pp_free(mdev, peer_req->pages, is_net);
370 D_ASSERT(atomic_read(&peer_req->pending_bios) == 0);
371 D_ASSERT(drbd_interval_empty(&peer_req->i));
372 mempool_free(peer_req, drbd_ee_mempool);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700373}
374
375int drbd_release_ee(struct drbd_conf *mdev, struct list_head *list)
376{
377 LIST_HEAD(work_list);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100378 struct drbd_peer_request *peer_req, *t;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700379 int count = 0;
Lars Ellenberg435f0742010-09-06 12:30:25 +0200380 int is_net = list == &mdev->net_ee;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700381
Philipp Reisner87eeee42011-01-19 14:16:30 +0100382 spin_lock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700383 list_splice_init(list, &work_list);
Philipp Reisner87eeee42011-01-19 14:16:30 +0100384 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700385
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100386 list_for_each_entry_safe(peer_req, t, &work_list, w.list) {
387 drbd_free_some_ee(mdev, peer_req, is_net);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700388 count++;
389 }
390 return count;
391}
392
393
394/*
395 * This function is called from _asender only_
Andreas Gruenbacher8554df12011-01-25 15:37:43 +0100396 * but see also comments in _req_mod(,BARRIER_ACKED)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700397 * and receive_Barrier.
398 *
399 * Move entries from net_ee to done_ee, if ready.
400 * Grab done_ee, call all callbacks, free the entries.
401 * The callbacks typically send out ACKs.
402 */
403static int drbd_process_done_ee(struct drbd_conf *mdev)
404{
405 LIST_HEAD(work_list);
406 LIST_HEAD(reclaimed);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100407 struct drbd_peer_request *peer_req, *t;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700408 int ok = (mdev->state.conn >= C_WF_REPORT_PARAMS);
409
Philipp Reisner87eeee42011-01-19 14:16:30 +0100410 spin_lock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700411 reclaim_net_ee(mdev, &reclaimed);
412 list_splice_init(&mdev->done_ee, &work_list);
Philipp Reisner87eeee42011-01-19 14:16:30 +0100413 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700414
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100415 list_for_each_entry_safe(peer_req, t, &reclaimed, w.list)
416 drbd_free_net_ee(mdev, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700417
418 /* possible callbacks here:
419 * e_end_block, and e_end_resync_block, e_send_discard_ack.
420 * all ignore the last argument.
421 */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100422 list_for_each_entry_safe(peer_req, t, &work_list, w.list) {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700423 /* list_del not necessary, next/prev members not touched */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100424 ok = peer_req->w.cb(mdev, &peer_req->w, !ok) && ok;
425 drbd_free_ee(mdev, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700426 }
427 wake_up(&mdev->ee_wait);
428
429 return ok;
430}
431
432void _drbd_wait_ee_list_empty(struct drbd_conf *mdev, struct list_head *head)
433{
434 DEFINE_WAIT(wait);
435
436 /* avoids spin_lock/unlock
437 * and calling prepare_to_wait in the fast path */
438 while (!list_empty(head)) {
439 prepare_to_wait(&mdev->ee_wait, &wait, TASK_UNINTERRUPTIBLE);
Philipp Reisner87eeee42011-01-19 14:16:30 +0100440 spin_unlock_irq(&mdev->tconn->req_lock);
Jens Axboe7eaceac2011-03-10 08:52:07 +0100441 io_schedule();
Philipp Reisnerb411b362009-09-25 16:07:19 -0700442 finish_wait(&mdev->ee_wait, &wait);
Philipp Reisner87eeee42011-01-19 14:16:30 +0100443 spin_lock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700444 }
445}
446
447void drbd_wait_ee_list_empty(struct drbd_conf *mdev, struct list_head *head)
448{
Philipp Reisner87eeee42011-01-19 14:16:30 +0100449 spin_lock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700450 _drbd_wait_ee_list_empty(mdev, head);
Philipp Reisner87eeee42011-01-19 14:16:30 +0100451 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700452}
453
454/* see also kernel_accept; which is only present since 2.6.18.
455 * also we want to log which part of it failed, exactly */
Philipp Reisner76536202011-02-07 14:09:54 +0100456static int drbd_accept(const char **what, struct socket *sock, struct socket **newsock)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700457{
458 struct sock *sk = sock->sk;
459 int err = 0;
460
461 *what = "listen";
462 err = sock->ops->listen(sock, 5);
463 if (err < 0)
464 goto out;
465
466 *what = "sock_create_lite";
467 err = sock_create_lite(sk->sk_family, sk->sk_type, sk->sk_protocol,
468 newsock);
469 if (err < 0)
470 goto out;
471
472 *what = "accept";
473 err = sock->ops->accept(sock, *newsock, 0);
474 if (err < 0) {
475 sock_release(*newsock);
476 *newsock = NULL;
477 goto out;
478 }
479 (*newsock)->ops = sock->ops;
480
481out:
482 return err;
483}
484
Philipp Reisnerdbd9eea2011-02-07 15:34:16 +0100485static int drbd_recv_short(struct socket *sock, void *buf, size_t size, int flags)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700486{
487 mm_segment_t oldfs;
488 struct kvec iov = {
489 .iov_base = buf,
490 .iov_len = size,
491 };
492 struct msghdr msg = {
493 .msg_iovlen = 1,
494 .msg_iov = (struct iovec *)&iov,
495 .msg_flags = (flags ? flags : MSG_WAITALL | MSG_NOSIGNAL)
496 };
497 int rv;
498
499 oldfs = get_fs();
500 set_fs(KERNEL_DS);
501 rv = sock_recvmsg(sock, &msg, size, msg.msg_flags);
502 set_fs(oldfs);
503
504 return rv;
505}
506
Philipp Reisnerde0ff332011-02-07 16:56:20 +0100507static int drbd_recv(struct drbd_tconn *tconn, void *buf, size_t size)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700508{
509 mm_segment_t oldfs;
510 struct kvec iov = {
511 .iov_base = buf,
512 .iov_len = size,
513 };
514 struct msghdr msg = {
515 .msg_iovlen = 1,
516 .msg_iov = (struct iovec *)&iov,
517 .msg_flags = MSG_WAITALL | MSG_NOSIGNAL
518 };
519 int rv;
520
521 oldfs = get_fs();
522 set_fs(KERNEL_DS);
523
524 for (;;) {
Philipp Reisnerde0ff332011-02-07 16:56:20 +0100525 rv = sock_recvmsg(tconn->data.socket, &msg, size, msg.msg_flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700526 if (rv == size)
527 break;
528
529 /* Note:
530 * ECONNRESET other side closed the connection
531 * ERESTARTSYS (on sock) we got a signal
532 */
533
534 if (rv < 0) {
535 if (rv == -ECONNRESET)
Philipp Reisnerde0ff332011-02-07 16:56:20 +0100536 conn_info(tconn, "sock was reset by peer\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -0700537 else if (rv != -ERESTARTSYS)
Philipp Reisnerde0ff332011-02-07 16:56:20 +0100538 conn_err(tconn, "sock_recvmsg returned %d\n", rv);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700539 break;
540 } else if (rv == 0) {
Philipp Reisnerde0ff332011-02-07 16:56:20 +0100541 conn_info(tconn, "sock was shut down by peer\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -0700542 break;
543 } else {
544 /* signal came in, or peer/link went down,
545 * after we read a partial message
546 */
547 /* D_ASSERT(signal_pending(current)); */
548 break;
549 }
550 };
551
552 set_fs(oldfs);
553
554 if (rv != size)
Philipp Reisnerde0ff332011-02-07 16:56:20 +0100555 drbd_force_state(tconn->volume0, NS(conn, C_BROKEN_PIPE));
Philipp Reisnerb411b362009-09-25 16:07:19 -0700556
557 return rv;
558}
559
Lars Ellenberg5dbf1672010-05-25 16:18:01 +0200560/* quoting tcp(7):
561 * On individual connections, the socket buffer size must be set prior to the
562 * listen(2) or connect(2) calls in order to have it take effect.
563 * This is our wrapper to do so.
564 */
565static void drbd_setbufsize(struct socket *sock, unsigned int snd,
566 unsigned int rcv)
567{
568 /* open coded SO_SNDBUF, SO_RCVBUF */
569 if (snd) {
570 sock->sk->sk_sndbuf = snd;
571 sock->sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
572 }
573 if (rcv) {
574 sock->sk->sk_rcvbuf = rcv;
575 sock->sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
576 }
577}
578
Philipp Reisnereac3e992011-02-07 14:05:07 +0100579static struct socket *drbd_try_connect(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700580{
581 const char *what;
582 struct socket *sock;
583 struct sockaddr_in6 src_in6;
584 int err;
585 int disconnect_on_error = 1;
586
Philipp Reisnereac3e992011-02-07 14:05:07 +0100587 if (!get_net_conf(tconn))
Philipp Reisnerb411b362009-09-25 16:07:19 -0700588 return NULL;
589
590 what = "sock_create_kern";
Philipp Reisnereac3e992011-02-07 14:05:07 +0100591 err = sock_create_kern(((struct sockaddr *)tconn->net_conf->my_addr)->sa_family,
Philipp Reisnerb411b362009-09-25 16:07:19 -0700592 SOCK_STREAM, IPPROTO_TCP, &sock);
593 if (err < 0) {
594 sock = NULL;
595 goto out;
596 }
597
598 sock->sk->sk_rcvtimeo =
Philipp Reisnereac3e992011-02-07 14:05:07 +0100599 sock->sk->sk_sndtimeo = tconn->net_conf->try_connect_int*HZ;
600 drbd_setbufsize(sock, tconn->net_conf->sndbuf_size,
601 tconn->net_conf->rcvbuf_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700602
603 /* explicitly bind to the configured IP as source IP
604 * for the outgoing connections.
605 * This is needed for multihomed hosts and to be
606 * able to use lo: interfaces for drbd.
607 * Make sure to use 0 as port number, so linux selects
608 * a free one dynamically.
609 */
Philipp Reisnereac3e992011-02-07 14:05:07 +0100610 memcpy(&src_in6, tconn->net_conf->my_addr,
611 min_t(int, tconn->net_conf->my_addr_len, sizeof(src_in6)));
612 if (((struct sockaddr *)tconn->net_conf->my_addr)->sa_family == AF_INET6)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700613 src_in6.sin6_port = 0;
614 else
615 ((struct sockaddr_in *)&src_in6)->sin_port = 0; /* AF_INET & AF_SCI */
616
617 what = "bind before connect";
618 err = sock->ops->bind(sock,
619 (struct sockaddr *) &src_in6,
Philipp Reisnereac3e992011-02-07 14:05:07 +0100620 tconn->net_conf->my_addr_len);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700621 if (err < 0)
622 goto out;
623
624 /* connect may fail, peer not yet available.
625 * stay C_WF_CONNECTION, don't go Disconnecting! */
626 disconnect_on_error = 0;
627 what = "connect";
628 err = sock->ops->connect(sock,
Philipp Reisnereac3e992011-02-07 14:05:07 +0100629 (struct sockaddr *)tconn->net_conf->peer_addr,
630 tconn->net_conf->peer_addr_len, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700631
632out:
633 if (err < 0) {
634 if (sock) {
635 sock_release(sock);
636 sock = NULL;
637 }
638 switch (-err) {
639 /* timeout, busy, signal pending */
640 case ETIMEDOUT: case EAGAIN: case EINPROGRESS:
641 case EINTR: case ERESTARTSYS:
642 /* peer not (yet) available, network problem */
643 case ECONNREFUSED: case ENETUNREACH:
644 case EHOSTDOWN: case EHOSTUNREACH:
645 disconnect_on_error = 0;
646 break;
647 default:
Philipp Reisnereac3e992011-02-07 14:05:07 +0100648 conn_err(tconn, "%s failed, err = %d\n", what, err);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700649 }
650 if (disconnect_on_error)
Philipp Reisnereac3e992011-02-07 14:05:07 +0100651 drbd_force_state(tconn->volume0, NS(conn, C_DISCONNECTING));
Philipp Reisnerb411b362009-09-25 16:07:19 -0700652 }
Philipp Reisnereac3e992011-02-07 14:05:07 +0100653 put_net_conf(tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700654 return sock;
655}
656
Philipp Reisner76536202011-02-07 14:09:54 +0100657static struct socket *drbd_wait_for_connect(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700658{
659 int timeo, err;
660 struct socket *s_estab = NULL, *s_listen;
661 const char *what;
662
Philipp Reisner76536202011-02-07 14:09:54 +0100663 if (!get_net_conf(tconn))
Philipp Reisnerb411b362009-09-25 16:07:19 -0700664 return NULL;
665
666 what = "sock_create_kern";
Philipp Reisner76536202011-02-07 14:09:54 +0100667 err = sock_create_kern(((struct sockaddr *)tconn->net_conf->my_addr)->sa_family,
Philipp Reisnerb411b362009-09-25 16:07:19 -0700668 SOCK_STREAM, IPPROTO_TCP, &s_listen);
669 if (err) {
670 s_listen = NULL;
671 goto out;
672 }
673
Philipp Reisner76536202011-02-07 14:09:54 +0100674 timeo = tconn->net_conf->try_connect_int * HZ;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700675 timeo += (random32() & 1) ? timeo / 7 : -timeo / 7; /* 28.5% random jitter */
676
677 s_listen->sk->sk_reuse = 1; /* SO_REUSEADDR */
678 s_listen->sk->sk_rcvtimeo = timeo;
679 s_listen->sk->sk_sndtimeo = timeo;
Philipp Reisner76536202011-02-07 14:09:54 +0100680 drbd_setbufsize(s_listen, tconn->net_conf->sndbuf_size,
681 tconn->net_conf->rcvbuf_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700682
683 what = "bind before listen";
684 err = s_listen->ops->bind(s_listen,
Philipp Reisner76536202011-02-07 14:09:54 +0100685 (struct sockaddr *) tconn->net_conf->my_addr,
686 tconn->net_conf->my_addr_len);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700687 if (err < 0)
688 goto out;
689
Philipp Reisner76536202011-02-07 14:09:54 +0100690 err = drbd_accept(&what, s_listen, &s_estab);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700691
692out:
693 if (s_listen)
694 sock_release(s_listen);
695 if (err < 0) {
696 if (err != -EAGAIN && err != -EINTR && err != -ERESTARTSYS) {
Philipp Reisner76536202011-02-07 14:09:54 +0100697 conn_err(tconn, "%s failed, err = %d\n", what, err);
698 drbd_force_state(tconn->volume0, NS(conn, C_DISCONNECTING));
Philipp Reisnerb411b362009-09-25 16:07:19 -0700699 }
700 }
Philipp Reisner76536202011-02-07 14:09:54 +0100701 put_net_conf(tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700702
703 return s_estab;
704}
705
Philipp Reisnerd38e7872011-02-07 15:32:04 +0100706static int drbd_send_fp(struct drbd_tconn *tconn, struct socket *sock, enum drbd_packet cmd)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700707{
Philipp Reisnerd38e7872011-02-07 15:32:04 +0100708 struct p_header *h = &tconn->data.sbuf.header;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700709
Philipp Reisnerd38e7872011-02-07 15:32:04 +0100710 return _conn_send_cmd(tconn, 0, sock, cmd, h, sizeof(*h), 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700711}
712
Philipp Reisnera25b63f2011-02-07 15:43:45 +0100713static enum drbd_packet drbd_recv_fp(struct drbd_tconn *tconn, struct socket *sock)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700714{
Philipp Reisnera25b63f2011-02-07 15:43:45 +0100715 struct p_header80 *h = &tconn->data.rbuf.header.h80;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700716 int rr;
717
Philipp Reisnerdbd9eea2011-02-07 15:34:16 +0100718 rr = drbd_recv_short(sock, h, sizeof(*h), 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700719
Andreas Gruenbacherca9bc122011-01-11 13:47:24 +0100720 if (rr == sizeof(*h) && h->magic == cpu_to_be32(DRBD_MAGIC))
Philipp Reisnerb411b362009-09-25 16:07:19 -0700721 return be16_to_cpu(h->command);
722
723 return 0xffff;
724}
725
726/**
727 * drbd_socket_okay() - Free the socket if its connection is not okay
Philipp Reisnerb411b362009-09-25 16:07:19 -0700728 * @sock: pointer to the pointer to the socket.
729 */
Philipp Reisnerdbd9eea2011-02-07 15:34:16 +0100730static int drbd_socket_okay(struct socket **sock)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700731{
732 int rr;
733 char tb[4];
734
735 if (!*sock)
Andreas Gruenbacher81e84652010-12-09 15:03:57 +0100736 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700737
Philipp Reisnerdbd9eea2011-02-07 15:34:16 +0100738 rr = drbd_recv_short(*sock, tb, 4, MSG_DONTWAIT | MSG_PEEK);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700739
740 if (rr > 0 || rr == -EAGAIN) {
Andreas Gruenbacher81e84652010-12-09 15:03:57 +0100741 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700742 } else {
743 sock_release(*sock);
744 *sock = NULL;
Andreas Gruenbacher81e84652010-12-09 15:03:57 +0100745 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700746 }
747}
748
Philipp Reisner907599e2011-02-08 11:25:37 +0100749static int drbd_connected(int vnr, void *p, void *data)
750{
751 struct drbd_conf *mdev = (struct drbd_conf *)p;
752 int ok = 1;
753
754 atomic_set(&mdev->packet_seq, 0);
755 mdev->peer_seq = 0;
756
757 ok &= drbd_send_sync_param(mdev, &mdev->sync_conf);
758 ok &= drbd_send_sizes(mdev, 0, 0);
759 ok &= drbd_send_uuids(mdev);
760 ok &= drbd_send_state(mdev);
761 clear_bit(USE_DEGR_WFC_T, &mdev->flags);
762 clear_bit(RESIZE_PENDING, &mdev->flags);
763
764 return !ok;
765}
766
Philipp Reisnerb411b362009-09-25 16:07:19 -0700767/*
768 * return values:
769 * 1 yes, we have a valid connection
770 * 0 oops, did not work out, please try again
771 * -1 peer talks different language,
772 * no point in trying again, please go standalone.
773 * -2 We do not have a network config...
774 */
Philipp Reisner907599e2011-02-08 11:25:37 +0100775static int drbd_connect(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700776{
777 struct socket *s, *sock, *msock;
778 int try, h, ok;
779
Philipp Reisner907599e2011-02-08 11:25:37 +0100780 if (drbd_request_state(tconn->volume0, NS(conn, C_WF_CONNECTION)) < SS_SUCCESS)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700781 return -2;
782
Philipp Reisner907599e2011-02-08 11:25:37 +0100783 clear_bit(DISCARD_CONCURRENT, &tconn->flags);
784 tconn->agreed_pro_version = 99;
Philipp Reisnerfd340c12011-01-19 16:57:39 +0100785 /* agreed_pro_version must be smaller than 100 so we send the old
786 header (h80) in the first packet and in the handshake packet. */
Philipp Reisnerb411b362009-09-25 16:07:19 -0700787
788 sock = NULL;
789 msock = NULL;
790
791 do {
792 for (try = 0;;) {
793 /* 3 tries, this should take less than a second! */
Philipp Reisner907599e2011-02-08 11:25:37 +0100794 s = drbd_try_connect(tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700795 if (s || ++try >= 3)
796 break;
797 /* give the other side time to call bind() & listen() */
Philipp Reisner20ee6392011-01-18 15:28:59 +0100798 schedule_timeout_interruptible(HZ / 10);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700799 }
800
801 if (s) {
802 if (!sock) {
Philipp Reisner907599e2011-02-08 11:25:37 +0100803 drbd_send_fp(tconn, s, P_HAND_SHAKE_S);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700804 sock = s;
805 s = NULL;
806 } else if (!msock) {
Philipp Reisner907599e2011-02-08 11:25:37 +0100807 drbd_send_fp(tconn, s, P_HAND_SHAKE_M);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700808 msock = s;
809 s = NULL;
810 } else {
Philipp Reisner907599e2011-02-08 11:25:37 +0100811 conn_err(tconn, "Logic error in drbd_connect()\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -0700812 goto out_release_sockets;
813 }
814 }
815
816 if (sock && msock) {
Philipp Reisner907599e2011-02-08 11:25:37 +0100817 schedule_timeout_interruptible(tconn->net_conf->ping_timeo*HZ/10);
Philipp Reisnerdbd9eea2011-02-07 15:34:16 +0100818 ok = drbd_socket_okay(&sock);
819 ok = drbd_socket_okay(&msock) && ok;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700820 if (ok)
821 break;
822 }
823
824retry:
Philipp Reisner907599e2011-02-08 11:25:37 +0100825 s = drbd_wait_for_connect(tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700826 if (s) {
Philipp Reisner907599e2011-02-08 11:25:37 +0100827 try = drbd_recv_fp(tconn, s);
Philipp Reisnerdbd9eea2011-02-07 15:34:16 +0100828 drbd_socket_okay(&sock);
829 drbd_socket_okay(&msock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700830 switch (try) {
831 case P_HAND_SHAKE_S:
832 if (sock) {
Philipp Reisner907599e2011-02-08 11:25:37 +0100833 conn_warn(tconn, "initial packet S crossed\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -0700834 sock_release(sock);
835 }
836 sock = s;
837 break;
838 case P_HAND_SHAKE_M:
839 if (msock) {
Philipp Reisner907599e2011-02-08 11:25:37 +0100840 conn_warn(tconn, "initial packet M crossed\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -0700841 sock_release(msock);
842 }
843 msock = s;
Philipp Reisner907599e2011-02-08 11:25:37 +0100844 set_bit(DISCARD_CONCURRENT, &tconn->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700845 break;
846 default:
Philipp Reisner907599e2011-02-08 11:25:37 +0100847 conn_warn(tconn, "Error receiving initial packet\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -0700848 sock_release(s);
849 if (random32() & 1)
850 goto retry;
851 }
852 }
853
Philipp Reisner907599e2011-02-08 11:25:37 +0100854 if (tconn->volume0->state.conn <= C_DISCONNECTING)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700855 goto out_release_sockets;
856 if (signal_pending(current)) {
857 flush_signals(current);
858 smp_rmb();
Philipp Reisner907599e2011-02-08 11:25:37 +0100859 if (get_t_state(&tconn->receiver) == EXITING)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700860 goto out_release_sockets;
861 }
862
863 if (sock && msock) {
Philipp Reisnerdbd9eea2011-02-07 15:34:16 +0100864 ok = drbd_socket_okay(&sock);
865 ok = drbd_socket_okay(&msock) && ok;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700866 if (ok)
867 break;
868 }
869 } while (1);
870
871 msock->sk->sk_reuse = 1; /* SO_REUSEADDR */
872 sock->sk->sk_reuse = 1; /* SO_REUSEADDR */
873
874 sock->sk->sk_allocation = GFP_NOIO;
875 msock->sk->sk_allocation = GFP_NOIO;
876
877 sock->sk->sk_priority = TC_PRIO_INTERACTIVE_BULK;
878 msock->sk->sk_priority = TC_PRIO_INTERACTIVE;
879
Philipp Reisnerb411b362009-09-25 16:07:19 -0700880 /* NOT YET ...
Philipp Reisner907599e2011-02-08 11:25:37 +0100881 * sock->sk->sk_sndtimeo = tconn->net_conf->timeout*HZ/10;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700882 * sock->sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
883 * first set it to the P_HAND_SHAKE timeout,
884 * which we set to 4x the configured ping_timeout. */
885 sock->sk->sk_sndtimeo =
Philipp Reisner907599e2011-02-08 11:25:37 +0100886 sock->sk->sk_rcvtimeo = tconn->net_conf->ping_timeo*4*HZ/10;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700887
Philipp Reisner907599e2011-02-08 11:25:37 +0100888 msock->sk->sk_sndtimeo = tconn->net_conf->timeout*HZ/10;
889 msock->sk->sk_rcvtimeo = tconn->net_conf->ping_int*HZ;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700890
891 /* we don't want delays.
Lucas De Marchi25985ed2011-03-30 22:57:33 -0300892 * we use TCP_CORK where appropriate, though */
Philipp Reisnerb411b362009-09-25 16:07:19 -0700893 drbd_tcp_nodelay(sock);
894 drbd_tcp_nodelay(msock);
895
Philipp Reisner907599e2011-02-08 11:25:37 +0100896 tconn->data.socket = sock;
897 tconn->meta.socket = msock;
898 tconn->last_received = jiffies;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700899
Philipp Reisner907599e2011-02-08 11:25:37 +0100900 h = drbd_do_handshake(tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700901 if (h <= 0)
902 return h;
903
Philipp Reisner907599e2011-02-08 11:25:37 +0100904 if (tconn->cram_hmac_tfm) {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700905 /* drbd_request_state(mdev, NS(conn, WFAuth)); */
Philipp Reisner907599e2011-02-08 11:25:37 +0100906 switch (drbd_do_auth(tconn)) {
Johannes Thomab10d96c2010-01-07 16:02:50 +0100907 case -1:
Philipp Reisner907599e2011-02-08 11:25:37 +0100908 conn_err(tconn, "Authentication of peer failed\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -0700909 return -1;
Johannes Thomab10d96c2010-01-07 16:02:50 +0100910 case 0:
Philipp Reisner907599e2011-02-08 11:25:37 +0100911 conn_err(tconn, "Authentication of peer failed, trying again.\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +0100912 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700913 }
914 }
915
Philipp Reisner907599e2011-02-08 11:25:37 +0100916 if (drbd_request_state(tconn->volume0, NS(conn, C_WF_REPORT_PARAMS)) < SS_SUCCESS)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700917 return 0;
918
Philipp Reisner907599e2011-02-08 11:25:37 +0100919 sock->sk->sk_sndtimeo = tconn->net_conf->timeout*HZ/10;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700920 sock->sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
921
Philipp Reisner907599e2011-02-08 11:25:37 +0100922 drbd_thread_start(&tconn->asender);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700923
Philipp Reisner907599e2011-02-08 11:25:37 +0100924 if (drbd_send_protocol(tconn) == -1)
Philipp Reisner7e2455c2010-04-22 14:50:23 +0200925 return -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700926
Philipp Reisner907599e2011-02-08 11:25:37 +0100927 return !idr_for_each(&tconn->volumes, drbd_connected, tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700928
929out_release_sockets:
930 if (sock)
931 sock_release(sock);
932 if (msock)
933 sock_release(msock);
934 return -1;
935}
936
Philipp Reisnerce243852011-02-07 17:27:47 +0100937static bool decode_header(struct drbd_tconn *tconn, struct p_header *h, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700938{
Philipp Reisnerfd340c12011-01-19 16:57:39 +0100939 if (h->h80.magic == cpu_to_be32(DRBD_MAGIC)) {
Philipp Reisner77351055b2011-02-07 17:24:26 +0100940 pi->cmd = be16_to_cpu(h->h80.command);
941 pi->size = be16_to_cpu(h->h80.length);
Philipp Reisnereefc2f72011-02-08 12:55:24 +0100942 pi->vnr = 0;
Andreas Gruenbacherca9bc122011-01-11 13:47:24 +0100943 } else if (h->h95.magic == cpu_to_be16(DRBD_MAGIC_BIG)) {
Philipp Reisner77351055b2011-02-07 17:24:26 +0100944 pi->cmd = be16_to_cpu(h->h95.command);
945 pi->size = be32_to_cpu(h->h95.length) & 0x00ffffff;
946 pi->vnr = 0;
Philipp Reisner02918be2010-08-20 14:35:10 +0200947 } else {
Philipp Reisnerce243852011-02-07 17:27:47 +0100948 conn_err(tconn, "magic?? on data m: 0x%08x c: %d l: %d\n",
Lars Ellenberg004352f2010-10-05 20:13:58 +0200949 be32_to_cpu(h->h80.magic),
950 be16_to_cpu(h->h80.command),
951 be16_to_cpu(h->h80.length));
Andreas Gruenbacher81e84652010-12-09 15:03:57 +0100952 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700953 }
Philipp Reisner257d0af2011-01-26 12:15:29 +0100954 return true;
955}
956
Philipp Reisner9ba7aa02011-02-07 17:32:41 +0100957static int drbd_recv_header(struct drbd_tconn *tconn, struct packet_info *pi)
Philipp Reisner257d0af2011-01-26 12:15:29 +0100958{
Philipp Reisner9ba7aa02011-02-07 17:32:41 +0100959 struct p_header *h = &tconn->data.rbuf.header;
Philipp Reisner257d0af2011-01-26 12:15:29 +0100960 int r;
961
Philipp Reisner9ba7aa02011-02-07 17:32:41 +0100962 r = drbd_recv(tconn, h, sizeof(*h));
Philipp Reisner257d0af2011-01-26 12:15:29 +0100963 if (unlikely(r != sizeof(*h))) {
964 if (!signal_pending(current))
Philipp Reisner9ba7aa02011-02-07 17:32:41 +0100965 conn_warn(tconn, "short read expecting header on sock: r=%d\n", r);
Philipp Reisner257d0af2011-01-26 12:15:29 +0100966 return false;
967 }
968
Philipp Reisner9ba7aa02011-02-07 17:32:41 +0100969 r = decode_header(tconn, h, pi);
970 tconn->last_received = jiffies;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700971
Philipp Reisner257d0af2011-01-26 12:15:29 +0100972 return r;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700973}
974
Philipp Reisner2451fc32010-08-24 13:43:11 +0200975static void drbd_flush(struct drbd_conf *mdev)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700976{
977 int rv;
978
979 if (mdev->write_ordering >= WO_bdev_flush && get_ldev(mdev)) {
Dmitry Monakhovfbd9b092010-04-28 17:55:06 +0400980 rv = blkdev_issue_flush(mdev->ldev->backing_bdev, GFP_KERNEL,
Christoph Hellwigdd3932e2010-09-16 20:51:46 +0200981 NULL);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700982 if (rv) {
983 dev_err(DEV, "local disk flush failed with status %d\n", rv);
984 /* would rather check on EOPNOTSUPP, but that is not reliable.
985 * don't try again for ANY return value != 0
986 * if (rv == -EOPNOTSUPP) */
987 drbd_bump_write_ordering(mdev, WO_drain_io);
988 }
989 put_ldev(mdev);
990 }
Philipp Reisnerb411b362009-09-25 16:07:19 -0700991}
992
993/**
994 * drbd_may_finish_epoch() - Applies an epoch_event to the epoch's state, eventually finishes it.
995 * @mdev: DRBD device.
996 * @epoch: Epoch object.
997 * @ev: Epoch event.
998 */
999static enum finish_epoch drbd_may_finish_epoch(struct drbd_conf *mdev,
1000 struct drbd_epoch *epoch,
1001 enum epoch_event ev)
1002{
Philipp Reisner2451fc32010-08-24 13:43:11 +02001003 int epoch_size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001004 struct drbd_epoch *next_epoch;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001005 enum finish_epoch rv = FE_STILL_LIVE;
1006
1007 spin_lock(&mdev->epoch_lock);
1008 do {
1009 next_epoch = NULL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001010
1011 epoch_size = atomic_read(&epoch->epoch_size);
1012
1013 switch (ev & ~EV_CLEANUP) {
1014 case EV_PUT:
1015 atomic_dec(&epoch->active);
1016 break;
1017 case EV_GOT_BARRIER_NR:
1018 set_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001019 break;
1020 case EV_BECAME_LAST:
1021 /* nothing to do*/
1022 break;
1023 }
1024
Philipp Reisnerb411b362009-09-25 16:07:19 -07001025 if (epoch_size != 0 &&
1026 atomic_read(&epoch->active) == 0 &&
Philipp Reisner2451fc32010-08-24 13:43:11 +02001027 test_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001028 if (!(ev & EV_CLEANUP)) {
1029 spin_unlock(&mdev->epoch_lock);
1030 drbd_send_b_ack(mdev, epoch->barrier_nr, epoch_size);
1031 spin_lock(&mdev->epoch_lock);
1032 }
1033 dec_unacked(mdev);
1034
1035 if (mdev->current_epoch != epoch) {
1036 next_epoch = list_entry(epoch->list.next, struct drbd_epoch, list);
1037 list_del(&epoch->list);
1038 ev = EV_BECAME_LAST | (ev & EV_CLEANUP);
1039 mdev->epochs--;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001040 kfree(epoch);
1041
1042 if (rv == FE_STILL_LIVE)
1043 rv = FE_DESTROYED;
1044 } else {
1045 epoch->flags = 0;
1046 atomic_set(&epoch->epoch_size, 0);
Uwe Kleine-König698f9312010-07-02 20:41:51 +02001047 /* atomic_set(&epoch->active, 0); is already zero */
Philipp Reisnerb411b362009-09-25 16:07:19 -07001048 if (rv == FE_STILL_LIVE)
1049 rv = FE_RECYCLED;
Philipp Reisner2451fc32010-08-24 13:43:11 +02001050 wake_up(&mdev->ee_wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001051 }
1052 }
1053
1054 if (!next_epoch)
1055 break;
1056
1057 epoch = next_epoch;
1058 } while (1);
1059
1060 spin_unlock(&mdev->epoch_lock);
1061
Philipp Reisnerb411b362009-09-25 16:07:19 -07001062 return rv;
1063}
1064
1065/**
1066 * drbd_bump_write_ordering() - Fall back to an other write ordering method
1067 * @mdev: DRBD device.
1068 * @wo: Write ordering method to try.
1069 */
1070void drbd_bump_write_ordering(struct drbd_conf *mdev, enum write_ordering_e wo) __must_hold(local)
1071{
1072 enum write_ordering_e pwo;
1073 static char *write_ordering_str[] = {
1074 [WO_none] = "none",
1075 [WO_drain_io] = "drain",
1076 [WO_bdev_flush] = "flush",
Philipp Reisnerb411b362009-09-25 16:07:19 -07001077 };
1078
1079 pwo = mdev->write_ordering;
1080 wo = min(pwo, wo);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001081 if (wo == WO_bdev_flush && mdev->ldev->dc.no_disk_flush)
1082 wo = WO_drain_io;
1083 if (wo == WO_drain_io && mdev->ldev->dc.no_disk_drain)
1084 wo = WO_none;
1085 mdev->write_ordering = wo;
Philipp Reisner2451fc32010-08-24 13:43:11 +02001086 if (pwo != mdev->write_ordering || wo == WO_bdev_flush)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001087 dev_info(DEV, "Method to ensure write ordering: %s\n", write_ordering_str[mdev->write_ordering]);
1088}
1089
1090/**
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001091 * drbd_submit_ee()
1092 * @mdev: DRBD device.
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001093 * @peer_req: peer request
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001094 * @rw: flag field, see bio->bi_rw
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001095 *
1096 * May spread the pages to multiple bios,
1097 * depending on bio_add_page restrictions.
1098 *
1099 * Returns 0 if all bios have been submitted,
1100 * -ENOMEM if we could not allocate enough bios,
1101 * -ENOSPC (any better suggestion?) if we have not been able to bio_add_page a
1102 * single page to an empty bio (which should never happen and likely indicates
1103 * that the lower level IO stack is in some way broken). This has been observed
1104 * on certain Xen deployments.
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001105 */
1106/* TODO allocate from our own bio_set. */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001107int drbd_submit_ee(struct drbd_conf *mdev, struct drbd_peer_request *peer_req,
Andreas Gruenbacherf6ffca92011-02-04 15:30:34 +01001108 const unsigned rw, const int fault_type)
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001109{
1110 struct bio *bios = NULL;
1111 struct bio *bio;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001112 struct page *page = peer_req->pages;
1113 sector_t sector = peer_req->i.sector;
1114 unsigned ds = peer_req->i.size;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001115 unsigned n_bios = 0;
1116 unsigned nr_pages = (ds + PAGE_SIZE -1) >> PAGE_SHIFT;
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001117 int err = -ENOMEM;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001118
1119 /* In most cases, we will only need one bio. But in case the lower
1120 * level restrictions happen to be different at this offset on this
1121 * side than those of the sending peer, we may need to submit the
1122 * request in more than one bio. */
1123next_bio:
1124 bio = bio_alloc(GFP_NOIO, nr_pages);
1125 if (!bio) {
1126 dev_err(DEV, "submit_ee: Allocation of a bio failed\n");
1127 goto fail;
1128 }
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001129 /* > peer_req->i.sector, unless this is the first bio */
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001130 bio->bi_sector = sector;
1131 bio->bi_bdev = mdev->ldev->backing_bdev;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001132 bio->bi_rw = rw;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001133 bio->bi_private = peer_req;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001134 bio->bi_end_io = drbd_endio_sec;
1135
1136 bio->bi_next = bios;
1137 bios = bio;
1138 ++n_bios;
1139
1140 page_chain_for_each(page) {
1141 unsigned len = min_t(unsigned, ds, PAGE_SIZE);
1142 if (!bio_add_page(bio, page, len, 0)) {
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001143 /* A single page must always be possible!
1144 * But in case it fails anyways,
1145 * we deal with it, and complain (below). */
1146 if (bio->bi_vcnt == 0) {
1147 dev_err(DEV,
1148 "bio_add_page failed for len=%u, "
1149 "bi_vcnt=0 (bi_sector=%llu)\n",
1150 len, (unsigned long long)bio->bi_sector);
1151 err = -ENOSPC;
1152 goto fail;
1153 }
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001154 goto next_bio;
1155 }
1156 ds -= len;
1157 sector += len >> 9;
1158 --nr_pages;
1159 }
1160 D_ASSERT(page == NULL);
1161 D_ASSERT(ds == 0);
1162
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001163 atomic_set(&peer_req->pending_bios, n_bios);
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001164 do {
1165 bio = bios;
1166 bios = bios->bi_next;
1167 bio->bi_next = NULL;
1168
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001169 drbd_generic_make_request(mdev, fault_type, bio);
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001170 } while (bios);
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001171 return 0;
1172
1173fail:
1174 while (bios) {
1175 bio = bios;
1176 bios = bios->bi_next;
1177 bio_put(bio);
1178 }
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001179 return err;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001180}
1181
Andreas Gruenbacher53840642011-01-28 10:31:04 +01001182static void drbd_remove_epoch_entry_interval(struct drbd_conf *mdev,
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001183 struct drbd_peer_request *peer_req)
Andreas Gruenbacher53840642011-01-28 10:31:04 +01001184{
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001185 struct drbd_interval *i = &peer_req->i;
Andreas Gruenbacher53840642011-01-28 10:31:04 +01001186
1187 drbd_remove_interval(&mdev->write_requests, i);
1188 drbd_clear_interval(i);
1189
Andreas Gruenbacher6c852be2011-02-04 15:38:52 +01001190 /* Wake up any processes waiting for this peer request to complete. */
Andreas Gruenbacher53840642011-01-28 10:31:04 +01001191 if (i->waiting)
1192 wake_up(&mdev->misc_wait);
1193}
1194
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01001195static int receive_Barrier(struct drbd_conf *mdev, enum drbd_packet cmd,
1196 unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001197{
Philipp Reisner2451fc32010-08-24 13:43:11 +02001198 int rv;
Philipp Reisnere42325a2011-01-19 13:55:45 +01001199 struct p_barrier *p = &mdev->tconn->data.rbuf.barrier;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001200 struct drbd_epoch *epoch;
1201
Philipp Reisnerb411b362009-09-25 16:07:19 -07001202 inc_unacked(mdev);
1203
Philipp Reisnerb411b362009-09-25 16:07:19 -07001204 mdev->current_epoch->barrier_nr = p->barrier;
1205 rv = drbd_may_finish_epoch(mdev, mdev->current_epoch, EV_GOT_BARRIER_NR);
1206
1207 /* P_BARRIER_ACK may imply that the corresponding extent is dropped from
1208 * the activity log, which means it would not be resynced in case the
1209 * R_PRIMARY crashes now.
1210 * Therefore we must send the barrier_ack after the barrier request was
1211 * completed. */
1212 switch (mdev->write_ordering) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001213 case WO_none:
1214 if (rv == FE_RECYCLED)
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01001215 return true;
Philipp Reisner2451fc32010-08-24 13:43:11 +02001216
1217 /* receiver context, in the writeout path of the other node.
1218 * avoid potential distributed deadlock */
1219 epoch = kmalloc(sizeof(struct drbd_epoch), GFP_NOIO);
1220 if (epoch)
1221 break;
1222 else
1223 dev_warn(DEV, "Allocation of an epoch failed, slowing down\n");
1224 /* Fall through */
Philipp Reisnerb411b362009-09-25 16:07:19 -07001225
1226 case WO_bdev_flush:
1227 case WO_drain_io:
Philipp Reisnerb411b362009-09-25 16:07:19 -07001228 drbd_wait_ee_list_empty(mdev, &mdev->active_ee);
Philipp Reisner2451fc32010-08-24 13:43:11 +02001229 drbd_flush(mdev);
1230
1231 if (atomic_read(&mdev->current_epoch->epoch_size)) {
1232 epoch = kmalloc(sizeof(struct drbd_epoch), GFP_NOIO);
1233 if (epoch)
1234 break;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001235 }
1236
Philipp Reisner2451fc32010-08-24 13:43:11 +02001237 epoch = mdev->current_epoch;
1238 wait_event(mdev->ee_wait, atomic_read(&epoch->epoch_size) == 0);
1239
1240 D_ASSERT(atomic_read(&epoch->active) == 0);
1241 D_ASSERT(epoch->flags == 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001242
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01001243 return true;
Philipp Reisner2451fc32010-08-24 13:43:11 +02001244 default:
1245 dev_err(DEV, "Strangeness in mdev->write_ordering %d\n", mdev->write_ordering);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01001246 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001247 }
1248
1249 epoch->flags = 0;
1250 atomic_set(&epoch->epoch_size, 0);
1251 atomic_set(&epoch->active, 0);
1252
1253 spin_lock(&mdev->epoch_lock);
1254 if (atomic_read(&mdev->current_epoch->epoch_size)) {
1255 list_add(&epoch->list, &mdev->current_epoch->list);
1256 mdev->current_epoch = epoch;
1257 mdev->epochs++;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001258 } else {
1259 /* The current_epoch got recycled while we allocated this one... */
1260 kfree(epoch);
1261 }
1262 spin_unlock(&mdev->epoch_lock);
1263
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01001264 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001265}
1266
1267/* used from receive_RSDataReply (recv_resync_read)
1268 * and from receive_Data */
Andreas Gruenbacherf6ffca92011-02-04 15:30:34 +01001269static struct drbd_peer_request *
1270read_in_block(struct drbd_conf *mdev, u64 id, sector_t sector,
1271 int data_size) __must_hold(local)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001272{
Lars Ellenberg66660322010-04-06 12:15:04 +02001273 const sector_t capacity = drbd_get_capacity(mdev->this_bdev);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001274 struct drbd_peer_request *peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001275 struct page *page;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001276 int dgs, ds, rr;
Philipp Reisnera0638452011-01-19 14:31:32 +01001277 void *dig_in = mdev->tconn->int_dig_in;
1278 void *dig_vv = mdev->tconn->int_dig_vv;
Philipp Reisner6b4388a2010-04-26 14:11:45 +02001279 unsigned long *data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001280
Philipp Reisnera0638452011-01-19 14:31:32 +01001281 dgs = (mdev->tconn->agreed_pro_version >= 87 && mdev->tconn->integrity_r_tfm) ?
1282 crypto_hash_digestsize(mdev->tconn->integrity_r_tfm) : 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001283
1284 if (dgs) {
Philipp Reisnerde0ff332011-02-07 16:56:20 +01001285 rr = drbd_recv(mdev->tconn, dig_in, dgs);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001286 if (rr != dgs) {
Lars Ellenberg0ddc5542011-01-21 12:35:15 +01001287 if (!signal_pending(current))
1288 dev_warn(DEV,
1289 "short read receiving data digest: read %d expected %d\n",
1290 rr, dgs);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001291 return NULL;
1292 }
1293 }
1294
1295 data_size -= dgs;
1296
Andreas Gruenbacher841ce242010-12-15 19:31:20 +01001297 if (!expect(data_size != 0))
1298 return NULL;
1299 if (!expect(IS_ALIGNED(data_size, 512)))
1300 return NULL;
1301 if (!expect(data_size <= DRBD_MAX_BIO_SIZE))
1302 return NULL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001303
Lars Ellenberg66660322010-04-06 12:15:04 +02001304 /* even though we trust out peer,
1305 * we sometimes have to double check. */
1306 if (sector + (data_size>>9) > capacity) {
Lars Ellenbergfdda6542011-01-24 15:11:01 +01001307 dev_err(DEV, "request from peer beyond end of local disk: "
1308 "capacity: %llus < sector: %llus + size: %u\n",
Lars Ellenberg66660322010-04-06 12:15:04 +02001309 (unsigned long long)capacity,
1310 (unsigned long long)sector, data_size);
1311 return NULL;
1312 }
1313
Philipp Reisnerb411b362009-09-25 16:07:19 -07001314 /* GFP_NOIO, because we must not cause arbitrary write-out: in a DRBD
1315 * "criss-cross" setup, that might cause write-out on some other DRBD,
1316 * which in turn might block on the other node at this very place. */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001317 peer_req = drbd_alloc_ee(mdev, id, sector, data_size, GFP_NOIO);
1318 if (!peer_req)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001319 return NULL;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001320
Philipp Reisnerb411b362009-09-25 16:07:19 -07001321 ds = data_size;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001322 page = peer_req->pages;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001323 page_chain_for_each(page) {
1324 unsigned len = min_t(int, ds, PAGE_SIZE);
Philipp Reisner6b4388a2010-04-26 14:11:45 +02001325 data = kmap(page);
Philipp Reisnerde0ff332011-02-07 16:56:20 +01001326 rr = drbd_recv(mdev->tconn, data, len);
Andreas Gruenbacher0cf9d272010-12-07 10:43:29 +01001327 if (drbd_insert_fault(mdev, DRBD_FAULT_RECEIVE)) {
Philipp Reisner6b4388a2010-04-26 14:11:45 +02001328 dev_err(DEV, "Fault injection: Corrupting data on receive\n");
1329 data[0] = data[0] ^ (unsigned long)-1;
1330 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07001331 kunmap(page);
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001332 if (rr != len) {
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001333 drbd_free_ee(mdev, peer_req);
Lars Ellenberg0ddc5542011-01-21 12:35:15 +01001334 if (!signal_pending(current))
1335 dev_warn(DEV, "short read receiving data: read %d expected %d\n",
1336 rr, len);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001337 return NULL;
1338 }
1339 ds -= rr;
1340 }
1341
1342 if (dgs) {
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001343 drbd_csum_ee(mdev, mdev->tconn->integrity_r_tfm, peer_req, dig_vv);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001344 if (memcmp(dig_in, dig_vv, dgs)) {
Lars Ellenberg470be442010-11-10 10:36:52 +01001345 dev_err(DEV, "Digest integrity check FAILED: %llus +%u\n",
1346 (unsigned long long)sector, data_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001347 drbd_bcast_ee(mdev, "digest failed",
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001348 dgs, dig_in, dig_vv, peer_req);
1349 drbd_free_ee(mdev, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001350 return NULL;
1351 }
1352 }
1353 mdev->recv_cnt += data_size>>9;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001354 return peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001355}
1356
1357/* drbd_drain_block() just takes a data block
1358 * out of the socket input buffer, and discards it.
1359 */
1360static int drbd_drain_block(struct drbd_conf *mdev, int data_size)
1361{
1362 struct page *page;
1363 int rr, rv = 1;
1364 void *data;
1365
Lars Ellenbergc3470cd2010-04-01 16:57:19 +02001366 if (!data_size)
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01001367 return true;
Lars Ellenbergc3470cd2010-04-01 16:57:19 +02001368
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001369 page = drbd_pp_alloc(mdev, 1, 1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001370
1371 data = kmap(page);
1372 while (data_size) {
Philipp Reisnerde0ff332011-02-07 16:56:20 +01001373 rr = drbd_recv(mdev->tconn, data, min_t(int, data_size, PAGE_SIZE));
Philipp Reisnerb411b362009-09-25 16:07:19 -07001374 if (rr != min_t(int, data_size, PAGE_SIZE)) {
1375 rv = 0;
Lars Ellenberg0ddc5542011-01-21 12:35:15 +01001376 if (!signal_pending(current))
1377 dev_warn(DEV,
1378 "short read receiving data: read %d expected %d\n",
1379 rr, min_t(int, data_size, PAGE_SIZE));
Philipp Reisnerb411b362009-09-25 16:07:19 -07001380 break;
1381 }
1382 data_size -= rr;
1383 }
1384 kunmap(page);
Lars Ellenberg435f0742010-09-06 12:30:25 +02001385 drbd_pp_free(mdev, page, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001386 return rv;
1387}
1388
1389static int recv_dless_read(struct drbd_conf *mdev, struct drbd_request *req,
1390 sector_t sector, int data_size)
1391{
1392 struct bio_vec *bvec;
1393 struct bio *bio;
1394 int dgs, rr, i, expect;
Philipp Reisnera0638452011-01-19 14:31:32 +01001395 void *dig_in = mdev->tconn->int_dig_in;
1396 void *dig_vv = mdev->tconn->int_dig_vv;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001397
Philipp Reisnera0638452011-01-19 14:31:32 +01001398 dgs = (mdev->tconn->agreed_pro_version >= 87 && mdev->tconn->integrity_r_tfm) ?
1399 crypto_hash_digestsize(mdev->tconn->integrity_r_tfm) : 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001400
1401 if (dgs) {
Philipp Reisnerde0ff332011-02-07 16:56:20 +01001402 rr = drbd_recv(mdev->tconn, dig_in, dgs);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001403 if (rr != dgs) {
Lars Ellenberg0ddc5542011-01-21 12:35:15 +01001404 if (!signal_pending(current))
1405 dev_warn(DEV,
1406 "short read receiving data reply digest: read %d expected %d\n",
1407 rr, dgs);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001408 return 0;
1409 }
1410 }
1411
1412 data_size -= dgs;
1413
1414 /* optimistically update recv_cnt. if receiving fails below,
1415 * we disconnect anyways, and counters will be reset. */
1416 mdev->recv_cnt += data_size>>9;
1417
1418 bio = req->master_bio;
1419 D_ASSERT(sector == bio->bi_sector);
1420
1421 bio_for_each_segment(bvec, bio, i) {
1422 expect = min_t(int, data_size, bvec->bv_len);
Philipp Reisnerde0ff332011-02-07 16:56:20 +01001423 rr = drbd_recv(mdev->tconn,
Philipp Reisnerb411b362009-09-25 16:07:19 -07001424 kmap(bvec->bv_page)+bvec->bv_offset,
1425 expect);
1426 kunmap(bvec->bv_page);
1427 if (rr != expect) {
Lars Ellenberg0ddc5542011-01-21 12:35:15 +01001428 if (!signal_pending(current))
1429 dev_warn(DEV, "short read receiving data reply: "
1430 "read %d expected %d\n",
1431 rr, expect);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001432 return 0;
1433 }
1434 data_size -= rr;
1435 }
1436
1437 if (dgs) {
Philipp Reisnera0638452011-01-19 14:31:32 +01001438 drbd_csum_bio(mdev, mdev->tconn->integrity_r_tfm, bio, dig_vv);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001439 if (memcmp(dig_in, dig_vv, dgs)) {
1440 dev_err(DEV, "Digest integrity check FAILED. Broken NICs?\n");
1441 return 0;
1442 }
1443 }
1444
1445 D_ASSERT(data_size == 0);
1446 return 1;
1447}
1448
1449/* e_end_resync_block() is called via
1450 * drbd_process_done_ee() by asender only */
1451static int e_end_resync_block(struct drbd_conf *mdev, struct drbd_work *w, int unused)
1452{
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001453 struct drbd_peer_request *peer_req = (struct drbd_peer_request *)w;
1454 sector_t sector = peer_req->i.sector;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001455 int ok;
1456
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001457 D_ASSERT(drbd_interval_empty(&peer_req->i));
Philipp Reisnerb411b362009-09-25 16:07:19 -07001458
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001459 if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
1460 drbd_set_in_sync(mdev, sector, peer_req->i.size);
1461 ok = drbd_send_ack(mdev, P_RS_WRITE_ACK, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001462 } else {
1463 /* Record failure to sync */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001464 drbd_rs_failed_io(mdev, sector, peer_req->i.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001465
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001466 ok = drbd_send_ack(mdev, P_NEG_ACK, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001467 }
1468 dec_unacked(mdev);
1469
1470 return ok;
1471}
1472
1473static int recv_resync_read(struct drbd_conf *mdev, sector_t sector, int data_size) __releases(local)
1474{
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001475 struct drbd_peer_request *peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001476
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001477 peer_req = read_in_block(mdev, ID_SYNCER, sector, data_size);
1478 if (!peer_req)
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001479 goto fail;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001480
1481 dec_rs_pending(mdev);
1482
Philipp Reisnerb411b362009-09-25 16:07:19 -07001483 inc_unacked(mdev);
1484 /* corresponding dec_unacked() in e_end_resync_block()
1485 * respective _drbd_clear_done_ee */
1486
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001487 peer_req->w.cb = e_end_resync_block;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001488
Philipp Reisner87eeee42011-01-19 14:16:30 +01001489 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001490 list_add(&peer_req->w.list, &mdev->sync_ee);
Philipp Reisner87eeee42011-01-19 14:16:30 +01001491 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001492
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02001493 atomic_add(data_size >> 9, &mdev->rs_sect_ev);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001494 if (drbd_submit_ee(mdev, peer_req, WRITE, DRBD_FAULT_RS_WR) == 0)
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01001495 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001496
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001497 /* don't care for the reason here */
1498 dev_err(DEV, "submit failed, triggering re-connect\n");
Philipp Reisner87eeee42011-01-19 14:16:30 +01001499 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001500 list_del(&peer_req->w.list);
Philipp Reisner87eeee42011-01-19 14:16:30 +01001501 spin_unlock_irq(&mdev->tconn->req_lock);
Lars Ellenberg22cc37a2010-09-14 20:40:41 +02001502
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001503 drbd_free_ee(mdev, peer_req);
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001504fail:
1505 put_ldev(mdev);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01001506 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001507}
1508
Andreas Gruenbacher668eebc2011-01-20 17:14:26 +01001509static struct drbd_request *
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01001510find_request(struct drbd_conf *mdev, struct rb_root *root, u64 id,
1511 sector_t sector, bool missing_ok, const char *func)
Andreas Gruenbacher668eebc2011-01-20 17:14:26 +01001512{
Andreas Gruenbacher668eebc2011-01-20 17:14:26 +01001513 struct drbd_request *req;
1514
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01001515 /* Request object according to our peer */
1516 req = (struct drbd_request *)(unsigned long)id;
Andreas Gruenbacher5e472262011-01-27 14:42:51 +01001517 if (drbd_contains_interval(root, sector, &req->i) && req->i.local)
Andreas Gruenbacher668eebc2011-01-20 17:14:26 +01001518 return req;
Andreas Gruenbacherc3afd8f2011-01-20 22:25:40 +01001519 if (!missing_ok) {
1520 dev_err(DEV, "%s: failed to find request %lu, sector %llus\n", func,
1521 (unsigned long)id, (unsigned long long)sector);
1522 }
Andreas Gruenbacher668eebc2011-01-20 17:14:26 +01001523 return NULL;
1524}
1525
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01001526static int receive_DataReply(struct drbd_conf *mdev, enum drbd_packet cmd,
1527 unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001528{
1529 struct drbd_request *req;
1530 sector_t sector;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001531 int ok;
Philipp Reisnere42325a2011-01-19 13:55:45 +01001532 struct p_data *p = &mdev->tconn->data.rbuf.data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001533
1534 sector = be64_to_cpu(p->sector);
1535
Philipp Reisner87eeee42011-01-19 14:16:30 +01001536 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01001537 req = find_request(mdev, &mdev->read_requests, p->block_id, sector, false, __func__);
Philipp Reisner87eeee42011-01-19 14:16:30 +01001538 spin_unlock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherc3afd8f2011-01-20 22:25:40 +01001539 if (unlikely(!req))
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01001540 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001541
Bart Van Assche24c48302011-05-21 18:32:29 +02001542 /* hlist_del(&req->collision) is done in _req_may_be_done, to avoid
Philipp Reisnerb411b362009-09-25 16:07:19 -07001543 * special casing it there for the various failure cases.
1544 * still no race with drbd_fail_pending_reads */
1545 ok = recv_dless_read(mdev, req, sector, data_size);
1546
1547 if (ok)
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01001548 req_mod(req, DATA_RECEIVED);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001549 /* else: nothing. handled from drbd_disconnect...
1550 * I don't think we may complete this just yet
1551 * in case we are "on-disconnect: freeze" */
1552
1553 return ok;
1554}
1555
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01001556static int receive_RSDataReply(struct drbd_conf *mdev, enum drbd_packet cmd,
1557 unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001558{
1559 sector_t sector;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001560 int ok;
Philipp Reisnere42325a2011-01-19 13:55:45 +01001561 struct p_data *p = &mdev->tconn->data.rbuf.data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001562
1563 sector = be64_to_cpu(p->sector);
1564 D_ASSERT(p->block_id == ID_SYNCER);
1565
1566 if (get_ldev(mdev)) {
1567 /* data is submitted to disk within recv_resync_read.
1568 * corresponding put_ldev done below on error,
Andreas Gruenbacher9c508422011-01-14 21:19:36 +01001569 * or in drbd_endio_sec. */
Philipp Reisnerb411b362009-09-25 16:07:19 -07001570 ok = recv_resync_read(mdev, sector, data_size);
1571 } else {
1572 if (__ratelimit(&drbd_ratelimit_state))
1573 dev_err(DEV, "Can not write resync data to local disk.\n");
1574
1575 ok = drbd_drain_block(mdev, data_size);
1576
Lars Ellenberg2b2bf212010-10-06 11:46:55 +02001577 drbd_send_ack_dp(mdev, P_NEG_ACK, p, data_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001578 }
1579
Philipp Reisner778f2712010-07-06 11:14:00 +02001580 atomic_add(data_size >> 9, &mdev->rs_sect_in);
1581
Philipp Reisnerb411b362009-09-25 16:07:19 -07001582 return ok;
1583}
1584
1585/* e_end_block() is called via drbd_process_done_ee().
1586 * this means this function only runs in the asender thread
1587 */
1588static int e_end_block(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
1589{
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001590 struct drbd_peer_request *peer_req = (struct drbd_peer_request *)w;
1591 sector_t sector = peer_req->i.sector;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001592 int ok = 1, pcmd;
1593
Philipp Reisner89e58e72011-01-19 13:12:45 +01001594 if (mdev->tconn->net_conf->wire_protocol == DRBD_PROT_C) {
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001595 if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001596 pcmd = (mdev->state.conn >= C_SYNC_SOURCE &&
1597 mdev->state.conn <= C_PAUSED_SYNC_T &&
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001598 peer_req->flags & EE_MAY_SET_IN_SYNC) ?
Philipp Reisnerb411b362009-09-25 16:07:19 -07001599 P_RS_WRITE_ACK : P_WRITE_ACK;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001600 ok &= drbd_send_ack(mdev, pcmd, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001601 if (pcmd == P_RS_WRITE_ACK)
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001602 drbd_set_in_sync(mdev, sector, peer_req->i.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001603 } else {
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001604 ok = drbd_send_ack(mdev, P_NEG_ACK, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001605 /* we expect it to be marked out of sync anyways...
1606 * maybe assert this? */
1607 }
1608 dec_unacked(mdev);
1609 }
1610 /* we delete from the conflict detection hash _after_ we sent out the
1611 * P_WRITE_ACK / P_NEG_ACK, to get the sequence number right. */
Philipp Reisner89e58e72011-01-19 13:12:45 +01001612 if (mdev->tconn->net_conf->two_primaries) {
Philipp Reisner87eeee42011-01-19 14:16:30 +01001613 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001614 D_ASSERT(!drbd_interval_empty(&peer_req->i));
1615 drbd_remove_epoch_entry_interval(mdev, peer_req);
Philipp Reisner87eeee42011-01-19 14:16:30 +01001616 spin_unlock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherbb3bfe92011-01-21 15:59:23 +01001617 } else
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001618 D_ASSERT(drbd_interval_empty(&peer_req->i));
Philipp Reisnerb411b362009-09-25 16:07:19 -07001619
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001620 drbd_may_finish_epoch(mdev, peer_req->epoch, EV_PUT + (cancel ? EV_CLEANUP : 0));
Philipp Reisnerb411b362009-09-25 16:07:19 -07001621
1622 return ok;
1623}
1624
1625static int e_send_discard_ack(struct drbd_conf *mdev, struct drbd_work *w, int unused)
1626{
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001627 struct drbd_peer_request *peer_req = (struct drbd_peer_request *)w;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001628 int ok = 1;
1629
Philipp Reisner89e58e72011-01-19 13:12:45 +01001630 D_ASSERT(mdev->tconn->net_conf->wire_protocol == DRBD_PROT_C);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001631 ok = drbd_send_ack(mdev, P_DISCARD_ACK, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001632
Philipp Reisner87eeee42011-01-19 14:16:30 +01001633 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001634 D_ASSERT(!drbd_interval_empty(&peer_req->i));
1635 drbd_remove_epoch_entry_interval(mdev, peer_req);
Philipp Reisner87eeee42011-01-19 14:16:30 +01001636 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001637
1638 dec_unacked(mdev);
1639
1640 return ok;
1641}
1642
Andreas Gruenbacher3e394da2011-01-26 18:36:55 +01001643static bool seq_greater(u32 a, u32 b)
1644{
1645 /*
1646 * We assume 32-bit wrap-around here.
1647 * For 24-bit wrap-around, we would have to shift:
1648 * a <<= 8; b <<= 8;
1649 */
1650 return (s32)a - (s32)b > 0;
1651}
1652
1653static u32 seq_max(u32 a, u32 b)
1654{
1655 return seq_greater(a, b) ? a : b;
1656}
1657
Andreas Gruenbacher43ae0772011-02-03 18:42:08 +01001658static void update_peer_seq(struct drbd_conf *mdev, unsigned int peer_seq)
Andreas Gruenbacher3e394da2011-01-26 18:36:55 +01001659{
Andreas Gruenbacher43ae0772011-02-03 18:42:08 +01001660 unsigned int old_peer_seq;
Andreas Gruenbacher3e394da2011-01-26 18:36:55 +01001661
1662 spin_lock(&mdev->peer_seq_lock);
Andreas Gruenbacher43ae0772011-02-03 18:42:08 +01001663 old_peer_seq = mdev->peer_seq;
1664 mdev->peer_seq = seq_max(mdev->peer_seq, peer_seq);
Andreas Gruenbacher3e394da2011-01-26 18:36:55 +01001665 spin_unlock(&mdev->peer_seq_lock);
Andreas Gruenbacher43ae0772011-02-03 18:42:08 +01001666 if (old_peer_seq != peer_seq)
Andreas Gruenbacher3e394da2011-01-26 18:36:55 +01001667 wake_up(&mdev->seq_wait);
1668}
1669
Philipp Reisnerb411b362009-09-25 16:07:19 -07001670/* Called from receive_Data.
1671 * Synchronize packets on sock with packets on msock.
1672 *
1673 * This is here so even when a P_DATA packet traveling via sock overtook an Ack
1674 * packet traveling on msock, they are still processed in the order they have
1675 * been sent.
1676 *
1677 * Note: we don't care for Ack packets overtaking P_DATA packets.
1678 *
1679 * In case packet_seq is larger than mdev->peer_seq number, there are
1680 * outstanding packets on the msock. We wait for them to arrive.
1681 * In case we are the logically next packet, we update mdev->peer_seq
1682 * ourselves. Correctly handles 32bit wrap around.
1683 *
1684 * Assume we have a 10 GBit connection, that is about 1<<30 byte per second,
1685 * about 1<<21 sectors per second. So "worst" case, we have 1<<3 == 8 seconds
1686 * for the 24bit wrap (historical atomic_t guarantee on some archs), and we have
1687 * 1<<9 == 512 seconds aka ages for the 32bit wrap around...
1688 *
1689 * returns 0 if we may process the packet,
1690 * -ERESTARTSYS if we were interrupted (by disconnect signal). */
1691static int drbd_wait_peer_seq(struct drbd_conf *mdev, const u32 packet_seq)
1692{
1693 DEFINE_WAIT(wait);
1694 unsigned int p_seq;
1695 long timeout;
1696 int ret = 0;
1697 spin_lock(&mdev->peer_seq_lock);
1698 for (;;) {
1699 prepare_to_wait(&mdev->seq_wait, &wait, TASK_INTERRUPTIBLE);
Andreas Gruenbacher3e394da2011-01-26 18:36:55 +01001700 if (!seq_greater(packet_seq, mdev->peer_seq + 1))
Philipp Reisnerb411b362009-09-25 16:07:19 -07001701 break;
1702 if (signal_pending(current)) {
1703 ret = -ERESTARTSYS;
1704 break;
1705 }
1706 p_seq = mdev->peer_seq;
1707 spin_unlock(&mdev->peer_seq_lock);
1708 timeout = schedule_timeout(30*HZ);
1709 spin_lock(&mdev->peer_seq_lock);
1710 if (timeout == 0 && p_seq == mdev->peer_seq) {
1711 ret = -ETIMEDOUT;
1712 dev_err(DEV, "ASSERT FAILED waited 30 seconds for sequence update, forcing reconnect\n");
1713 break;
1714 }
1715 }
1716 finish_wait(&mdev->seq_wait, &wait);
1717 if (mdev->peer_seq+1 == packet_seq)
1718 mdev->peer_seq++;
1719 spin_unlock(&mdev->peer_seq_lock);
1720 return ret;
1721}
1722
Lars Ellenberg688593c2010-11-17 22:25:03 +01001723/* see also bio_flags_to_wire()
1724 * DRBD_REQ_*, because we need to semantically map the flags to data packet
1725 * flags and back. We may replicate to other kernel versions. */
1726static unsigned long wire_flags_to_bio(struct drbd_conf *mdev, u32 dpf)
Philipp Reisner76d2e7e2010-08-25 11:58:05 +02001727{
Lars Ellenberg688593c2010-11-17 22:25:03 +01001728 return (dpf & DP_RW_SYNC ? REQ_SYNC : 0) |
1729 (dpf & DP_FUA ? REQ_FUA : 0) |
1730 (dpf & DP_FLUSH ? REQ_FLUSH : 0) |
1731 (dpf & DP_DISCARD ? REQ_DISCARD : 0);
Philipp Reisner76d2e7e2010-08-25 11:58:05 +02001732}
1733
Philipp Reisnerb411b362009-09-25 16:07:19 -07001734/* mirrored write */
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01001735static int receive_Data(struct drbd_conf *mdev, enum drbd_packet cmd,
1736 unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001737{
1738 sector_t sector;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001739 struct drbd_peer_request *peer_req;
Philipp Reisnere42325a2011-01-19 13:55:45 +01001740 struct p_data *p = &mdev->tconn->data.rbuf.data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001741 int rw = WRITE;
1742 u32 dp_flags;
1743
Philipp Reisnerb411b362009-09-25 16:07:19 -07001744 if (!get_ldev(mdev)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001745 spin_lock(&mdev->peer_seq_lock);
1746 if (mdev->peer_seq+1 == be32_to_cpu(p->seq_num))
1747 mdev->peer_seq++;
1748 spin_unlock(&mdev->peer_seq_lock);
1749
Lars Ellenberg2b2bf212010-10-06 11:46:55 +02001750 drbd_send_ack_dp(mdev, P_NEG_ACK, p, data_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001751 atomic_inc(&mdev->current_epoch->epoch_size);
1752 return drbd_drain_block(mdev, data_size);
1753 }
1754
1755 /* get_ldev(mdev) successful.
1756 * Corresponding put_ldev done either below (on various errors),
Andreas Gruenbacher9c508422011-01-14 21:19:36 +01001757 * or in drbd_endio_sec, if we successfully submit the data at
Philipp Reisnerb411b362009-09-25 16:07:19 -07001758 * the end of this function. */
1759
1760 sector = be64_to_cpu(p->sector);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001761 peer_req = read_in_block(mdev, p->block_id, sector, data_size);
1762 if (!peer_req) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001763 put_ldev(mdev);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01001764 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001765 }
1766
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001767 peer_req->w.cb = e_end_block;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001768
Lars Ellenberg688593c2010-11-17 22:25:03 +01001769 dp_flags = be32_to_cpu(p->dp_flags);
1770 rw |= wire_flags_to_bio(mdev, dp_flags);
1771
1772 if (dp_flags & DP_MAY_SET_IN_SYNC)
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001773 peer_req->flags |= EE_MAY_SET_IN_SYNC;
Lars Ellenberg688593c2010-11-17 22:25:03 +01001774
Philipp Reisnerb411b362009-09-25 16:07:19 -07001775 spin_lock(&mdev->epoch_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001776 peer_req->epoch = mdev->current_epoch;
1777 atomic_inc(&peer_req->epoch->epoch_size);
1778 atomic_inc(&peer_req->epoch->active);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001779 spin_unlock(&mdev->epoch_lock);
1780
Philipp Reisnerb411b362009-09-25 16:07:19 -07001781 /* I'm the receiver, I do hold a net_cnt reference. */
Philipp Reisner89e58e72011-01-19 13:12:45 +01001782 if (!mdev->tconn->net_conf->two_primaries) {
Philipp Reisner87eeee42011-01-19 14:16:30 +01001783 spin_lock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001784 } else {
1785 /* don't get the req_lock yet,
1786 * we may sleep in drbd_wait_peer_seq */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001787 const int size = peer_req->i.size;
Philipp Reisner25703f82011-02-07 14:35:25 +01001788 const int discard = test_bit(DISCARD_CONCURRENT, &mdev->tconn->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001789 DEFINE_WAIT(wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001790 int first;
1791
Philipp Reisner89e58e72011-01-19 13:12:45 +01001792 D_ASSERT(mdev->tconn->net_conf->wire_protocol == DRBD_PROT_C);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001793
1794 /* conflict detection and handling:
1795 * 1. wait on the sequence number,
1796 * in case this data packet overtook ACK packets.
Andreas Gruenbacher5e472262011-01-27 14:42:51 +01001797 * 2. check for conflicting write requests.
Philipp Reisnerb411b362009-09-25 16:07:19 -07001798 *
1799 * Note: for two_primaries, we are protocol C,
1800 * so there cannot be any request that is DONE
1801 * but still on the transfer log.
1802 *
Philipp Reisnerb411b362009-09-25 16:07:19 -07001803 * if no conflicting request is found:
1804 * submit.
1805 *
1806 * if any conflicting request is found
1807 * that has not yet been acked,
1808 * AND I have the "discard concurrent writes" flag:
1809 * queue (via done_ee) the P_DISCARD_ACK; OUT.
1810 *
1811 * if any conflicting request is found:
1812 * block the receiver, waiting on misc_wait
1813 * until no more conflicting requests are there,
1814 * or we get interrupted (disconnect).
1815 *
1816 * we do not just write after local io completion of those
1817 * requests, but only after req is done completely, i.e.
1818 * we wait for the P_DISCARD_ACK to arrive!
1819 *
1820 * then proceed normally, i.e. submit.
1821 */
1822 if (drbd_wait_peer_seq(mdev, be32_to_cpu(p->seq_num)))
1823 goto out_interrupted;
1824
Philipp Reisner87eeee42011-01-19 14:16:30 +01001825 spin_lock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001826
Philipp Reisnerb411b362009-09-25 16:07:19 -07001827 first = 1;
1828 for (;;) {
Andreas Gruenbacherde696712011-01-20 15:00:24 +01001829 struct drbd_interval *i;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001830 int have_unacked = 0;
1831 int have_conflict = 0;
1832 prepare_to_wait(&mdev->misc_wait, &wait,
1833 TASK_INTERRUPTIBLE);
Andreas Gruenbacherde696712011-01-20 15:00:24 +01001834
1835 i = drbd_find_overlap(&mdev->write_requests, sector, size);
1836 if (i) {
Andreas Gruenbacherde696712011-01-20 15:00:24 +01001837 /* only ALERT on first iteration,
1838 * we may be woken up early... */
1839 if (first)
Andreas Gruenbacher5e472262011-01-27 14:42:51 +01001840 dev_alert(DEV, "%s[%u] Concurrent %s write detected!"
Andreas Gruenbacherde696712011-01-20 15:00:24 +01001841 " new: %llus +%u; pending: %llus +%u\n",
1842 current->comm, current->pid,
Andreas Gruenbacher5e472262011-01-27 14:42:51 +01001843 i->local ? "local" : "remote",
Andreas Gruenbacherde696712011-01-20 15:00:24 +01001844 (unsigned long long)sector, size,
Andreas Gruenbacher5e472262011-01-27 14:42:51 +01001845 (unsigned long long)i->sector, i->size);
1846
1847 if (i->local) {
1848 struct drbd_request *req2;
1849
1850 req2 = container_of(i, struct drbd_request, i);
1851 if (req2->rq_state & RQ_NET_PENDING)
1852 ++have_unacked;
1853 }
Andreas Gruenbacherde696712011-01-20 15:00:24 +01001854 ++have_conflict;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001855 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07001856 if (!have_conflict)
1857 break;
1858
1859 /* Discard Ack only for the _first_ iteration */
1860 if (first && discard && have_unacked) {
1861 dev_alert(DEV, "Concurrent write! [DISCARD BY FLAG] sec=%llus\n",
1862 (unsigned long long)sector);
1863 inc_unacked(mdev);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001864 peer_req->w.cb = e_send_discard_ack;
1865 list_add_tail(&peer_req->w.list, &mdev->done_ee);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001866
Philipp Reisner87eeee42011-01-19 14:16:30 +01001867 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001868
1869 /* we could probably send that P_DISCARD_ACK ourselves,
1870 * but I don't like the receiver using the msock */
1871
1872 put_ldev(mdev);
Philipp Reisner0625ac12011-02-07 14:49:19 +01001873 wake_asender(mdev->tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001874 finish_wait(&mdev->misc_wait, &wait);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01001875 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001876 }
1877
1878 if (signal_pending(current)) {
Philipp Reisner87eeee42011-01-19 14:16:30 +01001879 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001880 finish_wait(&mdev->misc_wait, &wait);
1881 goto out_interrupted;
1882 }
1883
Andreas Gruenbachera500c2e2011-01-27 14:12:23 +01001884 /* Indicate to wake up mdev->misc_wait upon completion. */
Andreas Gruenbacher53840642011-01-28 10:31:04 +01001885 i->waiting = true;
Andreas Gruenbachera500c2e2011-01-27 14:12:23 +01001886
Philipp Reisner87eeee42011-01-19 14:16:30 +01001887 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001888 if (first) {
1889 first = 0;
1890 dev_alert(DEV, "Concurrent write! [W AFTERWARDS] "
1891 "sec=%llus\n", (unsigned long long)sector);
1892 } else if (discard) {
1893 /* we had none on the first iteration.
1894 * there must be none now. */
1895 D_ASSERT(have_unacked == 0);
1896 }
1897 schedule();
Philipp Reisner87eeee42011-01-19 14:16:30 +01001898 spin_lock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001899 }
1900 finish_wait(&mdev->misc_wait, &wait);
Andreas Gruenbacher5e472262011-01-27 14:42:51 +01001901
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001902 drbd_insert_interval(&mdev->write_requests, &peer_req->i);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001903 }
1904
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001905 list_add(&peer_req->w.list, &mdev->active_ee);
Philipp Reisner87eeee42011-01-19 14:16:30 +01001906 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001907
Philipp Reisner89e58e72011-01-19 13:12:45 +01001908 switch (mdev->tconn->net_conf->wire_protocol) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001909 case DRBD_PROT_C:
1910 inc_unacked(mdev);
1911 /* corresponding dec_unacked() in e_end_block()
1912 * respective _drbd_clear_done_ee */
1913 break;
1914 case DRBD_PROT_B:
1915 /* I really don't like it that the receiver thread
1916 * sends on the msock, but anyways */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001917 drbd_send_ack(mdev, P_RECV_ACK, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001918 break;
1919 case DRBD_PROT_A:
1920 /* nothing to do */
1921 break;
1922 }
1923
Lars Ellenberg6719fb02010-10-18 23:04:07 +02001924 if (mdev->state.pdsk < D_INCONSISTENT) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001925 /* In case we have the only disk of the cluster, */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001926 drbd_set_out_of_sync(mdev, peer_req->i.sector, peer_req->i.size);
1927 peer_req->flags |= EE_CALL_AL_COMPLETE_IO;
1928 peer_req->flags &= ~EE_MAY_SET_IN_SYNC;
1929 drbd_al_begin_io(mdev, peer_req->i.sector);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001930 }
1931
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001932 if (drbd_submit_ee(mdev, peer_req, rw, DRBD_FAULT_DT_WR) == 0)
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01001933 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001934
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001935 /* don't care for the reason here */
1936 dev_err(DEV, "submit failed, triggering re-connect\n");
Philipp Reisner87eeee42011-01-19 14:16:30 +01001937 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001938 list_del(&peer_req->w.list);
1939 drbd_remove_epoch_entry_interval(mdev, peer_req);
Philipp Reisner87eeee42011-01-19 14:16:30 +01001940 spin_unlock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001941 if (peer_req->flags & EE_CALL_AL_COMPLETE_IO)
1942 drbd_al_complete_io(mdev, peer_req->i.sector);
Lars Ellenberg22cc37a2010-09-14 20:40:41 +02001943
Philipp Reisnerb411b362009-09-25 16:07:19 -07001944out_interrupted:
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001945 drbd_may_finish_epoch(mdev, peer_req->epoch, EV_PUT + EV_CLEANUP);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001946 put_ldev(mdev);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001947 drbd_free_ee(mdev, peer_req);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01001948 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001949}
1950
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02001951/* We may throttle resync, if the lower device seems to be busy,
1952 * and current sync rate is above c_min_rate.
1953 *
1954 * To decide whether or not the lower device is busy, we use a scheme similar
1955 * to MD RAID is_mddev_idle(): if the partition stats reveal "significant"
1956 * (more than 64 sectors) of activity we cannot account for with our own resync
1957 * activity, it obviously is "busy".
1958 *
1959 * The current sync rate used here uses only the most recent two step marks,
1960 * to have a short time average so we can react faster.
1961 */
Philipp Reisnere3555d82010-11-07 15:56:29 +01001962int drbd_rs_should_slow_down(struct drbd_conf *mdev, sector_t sector)
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02001963{
1964 struct gendisk *disk = mdev->ldev->backing_bdev->bd_contains->bd_disk;
1965 unsigned long db, dt, dbdt;
Philipp Reisnere3555d82010-11-07 15:56:29 +01001966 struct lc_element *tmp;
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02001967 int curr_events;
1968 int throttle = 0;
1969
1970 /* feature disabled? */
1971 if (mdev->sync_conf.c_min_rate == 0)
1972 return 0;
1973
Philipp Reisnere3555d82010-11-07 15:56:29 +01001974 spin_lock_irq(&mdev->al_lock);
1975 tmp = lc_find(mdev->resync, BM_SECT_TO_EXT(sector));
1976 if (tmp) {
1977 struct bm_extent *bm_ext = lc_entry(tmp, struct bm_extent, lce);
1978 if (test_bit(BME_PRIORITY, &bm_ext->flags)) {
1979 spin_unlock_irq(&mdev->al_lock);
1980 return 0;
1981 }
1982 /* Do not slow down if app IO is already waiting for this extent */
1983 }
1984 spin_unlock_irq(&mdev->al_lock);
1985
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02001986 curr_events = (int)part_stat_read(&disk->part0, sectors[0]) +
1987 (int)part_stat_read(&disk->part0, sectors[1]) -
1988 atomic_read(&mdev->rs_sect_ev);
Philipp Reisnere3555d82010-11-07 15:56:29 +01001989
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02001990 if (!mdev->rs_last_events || curr_events - mdev->rs_last_events > 64) {
1991 unsigned long rs_left;
1992 int i;
1993
1994 mdev->rs_last_events = curr_events;
1995
1996 /* sync speed average over the last 2*DRBD_SYNC_MARK_STEP,
1997 * approx. */
Lars Ellenberg2649f082010-11-05 10:05:47 +01001998 i = (mdev->rs_last_mark + DRBD_SYNC_MARKS-1) % DRBD_SYNC_MARKS;
1999
2000 if (mdev->state.conn == C_VERIFY_S || mdev->state.conn == C_VERIFY_T)
2001 rs_left = mdev->ov_left;
2002 else
2003 rs_left = drbd_bm_total_weight(mdev) - mdev->rs_failed;
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002004
2005 dt = ((long)jiffies - (long)mdev->rs_mark_time[i]) / HZ;
2006 if (!dt)
2007 dt++;
2008 db = mdev->rs_mark_left[i] - rs_left;
2009 dbdt = Bit2KB(db/dt);
2010
2011 if (dbdt > mdev->sync_conf.c_min_rate)
2012 throttle = 1;
2013 }
2014 return throttle;
2015}
2016
2017
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01002018static int receive_DataRequest(struct drbd_conf *mdev, enum drbd_packet cmd,
2019 unsigned int digest_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002020{
2021 sector_t sector;
2022 const sector_t capacity = drbd_get_capacity(mdev->this_bdev);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002023 struct drbd_peer_request *peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002024 struct digest_info *di = NULL;
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02002025 int size, verb;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002026 unsigned int fault_type;
Philipp Reisnere42325a2011-01-19 13:55:45 +01002027 struct p_block_req *p = &mdev->tconn->data.rbuf.block_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002028
2029 sector = be64_to_cpu(p->sector);
2030 size = be32_to_cpu(p->blksize);
2031
Lars Ellenberg1816a2b2010-11-11 15:19:07 +01002032 if (size <= 0 || (size & 0x1ff) != 0 || size > DRBD_MAX_BIO_SIZE) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002033 dev_err(DEV, "%s:%d: sector: %llus, size: %u\n", __FILE__, __LINE__,
2034 (unsigned long long)sector, size);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002035 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002036 }
2037 if (sector + (size>>9) > capacity) {
2038 dev_err(DEV, "%s:%d: sector: %llus, size: %u\n", __FILE__, __LINE__,
2039 (unsigned long long)sector, size);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002040 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002041 }
2042
2043 if (!get_ldev_if_state(mdev, D_UP_TO_DATE)) {
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02002044 verb = 1;
2045 switch (cmd) {
2046 case P_DATA_REQUEST:
2047 drbd_send_ack_rp(mdev, P_NEG_DREPLY, p);
2048 break;
2049 case P_RS_DATA_REQUEST:
2050 case P_CSUM_RS_REQUEST:
2051 case P_OV_REQUEST:
2052 drbd_send_ack_rp(mdev, P_NEG_RS_DREPLY , p);
2053 break;
2054 case P_OV_REPLY:
2055 verb = 0;
2056 dec_rs_pending(mdev);
2057 drbd_send_ack_ex(mdev, P_OV_RESULT, sector, size, ID_IN_SYNC);
2058 break;
2059 default:
2060 dev_err(DEV, "unexpected command (%s) in receive_DataRequest\n",
2061 cmdname(cmd));
2062 }
2063 if (verb && __ratelimit(&drbd_ratelimit_state))
Philipp Reisnerb411b362009-09-25 16:07:19 -07002064 dev_err(DEV, "Can not satisfy peer's read request, "
2065 "no local data.\n");
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02002066
Lars Ellenberga821cc42010-09-06 12:31:37 +02002067 /* drain possibly payload */
2068 return drbd_drain_block(mdev, digest_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002069 }
2070
2071 /* GFP_NOIO, because we must not cause arbitrary write-out: in a DRBD
2072 * "criss-cross" setup, that might cause write-out on some other DRBD,
2073 * which in turn might block on the other node at this very place. */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002074 peer_req = drbd_alloc_ee(mdev, p->block_id, sector, size, GFP_NOIO);
2075 if (!peer_req) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002076 put_ldev(mdev);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002077 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002078 }
2079
Philipp Reisner02918be2010-08-20 14:35:10 +02002080 switch (cmd) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002081 case P_DATA_REQUEST:
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002082 peer_req->w.cb = w_e_end_data_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002083 fault_type = DRBD_FAULT_DT_RD;
Lars Ellenberg80a40e42010-08-11 23:28:00 +02002084 /* application IO, don't drbd_rs_begin_io */
2085 goto submit;
2086
Philipp Reisnerb411b362009-09-25 16:07:19 -07002087 case P_RS_DATA_REQUEST:
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002088 peer_req->w.cb = w_e_end_rsdata_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002089 fault_type = DRBD_FAULT_RS_RD;
Lars Ellenberg5f9915b2010-11-09 14:15:24 +01002090 /* used in the sector offset progress display */
2091 mdev->bm_resync_fo = BM_SECT_TO_BIT(sector);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002092 break;
2093
2094 case P_OV_REPLY:
2095 case P_CSUM_RS_REQUEST:
2096 fault_type = DRBD_FAULT_RS_RD;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002097 di = kmalloc(sizeof(*di) + digest_size, GFP_NOIO);
2098 if (!di)
2099 goto out_free_e;
2100
2101 di->digest_size = digest_size;
2102 di->digest = (((char *)di)+sizeof(struct digest_info));
2103
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002104 peer_req->digest = di;
2105 peer_req->flags |= EE_HAS_DIGEST;
Lars Ellenbergc36c3ce2010-08-11 20:42:55 +02002106
Philipp Reisnerde0ff332011-02-07 16:56:20 +01002107 if (drbd_recv(mdev->tconn, di->digest, digest_size) != digest_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002108 goto out_free_e;
2109
Philipp Reisner02918be2010-08-20 14:35:10 +02002110 if (cmd == P_CSUM_RS_REQUEST) {
Philipp Reisner31890f42011-01-19 14:12:51 +01002111 D_ASSERT(mdev->tconn->agreed_pro_version >= 89);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002112 peer_req->w.cb = w_e_end_csum_rs_req;
Lars Ellenberg5f9915b2010-11-09 14:15:24 +01002113 /* used in the sector offset progress display */
2114 mdev->bm_resync_fo = BM_SECT_TO_BIT(sector);
Philipp Reisner02918be2010-08-20 14:35:10 +02002115 } else if (cmd == P_OV_REPLY) {
Lars Ellenberg2649f082010-11-05 10:05:47 +01002116 /* track progress, we may need to throttle */
2117 atomic_add(size >> 9, &mdev->rs_sect_in);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002118 peer_req->w.cb = w_e_end_ov_reply;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002119 dec_rs_pending(mdev);
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002120 /* drbd_rs_begin_io done when we sent this request,
2121 * but accounting still needs to be done. */
2122 goto submit_for_resync;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002123 }
2124 break;
2125
2126 case P_OV_REQUEST:
Philipp Reisnerb411b362009-09-25 16:07:19 -07002127 if (mdev->ov_start_sector == ~(sector_t)0 &&
Philipp Reisner31890f42011-01-19 14:12:51 +01002128 mdev->tconn->agreed_pro_version >= 90) {
Lars Ellenbergde228bb2010-11-05 09:43:15 +01002129 unsigned long now = jiffies;
2130 int i;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002131 mdev->ov_start_sector = sector;
2132 mdev->ov_position = sector;
Lars Ellenberg30b743a2010-11-05 09:39:06 +01002133 mdev->ov_left = drbd_bm_bits(mdev) - BM_SECT_TO_BIT(sector);
2134 mdev->rs_total = mdev->ov_left;
Lars Ellenbergde228bb2010-11-05 09:43:15 +01002135 for (i = 0; i < DRBD_SYNC_MARKS; i++) {
2136 mdev->rs_mark_left[i] = mdev->ov_left;
2137 mdev->rs_mark_time[i] = now;
2138 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07002139 dev_info(DEV, "Online Verify start sector: %llu\n",
2140 (unsigned long long)sector);
2141 }
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002142 peer_req->w.cb = w_e_end_ov_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002143 fault_type = DRBD_FAULT_RS_RD;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002144 break;
2145
Philipp Reisnerb411b362009-09-25 16:07:19 -07002146 default:
2147 dev_err(DEV, "unexpected command (%s) in receive_DataRequest\n",
Philipp Reisner02918be2010-08-20 14:35:10 +02002148 cmdname(cmd));
Philipp Reisnerb411b362009-09-25 16:07:19 -07002149 fault_type = DRBD_FAULT_MAX;
Lars Ellenberg80a40e42010-08-11 23:28:00 +02002150 goto out_free_e;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002151 }
2152
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002153 /* Throttle, drbd_rs_begin_io and submit should become asynchronous
2154 * wrt the receiver, but it is not as straightforward as it may seem.
2155 * Various places in the resync start and stop logic assume resync
2156 * requests are processed in order, requeuing this on the worker thread
2157 * introduces a bunch of new code for synchronization between threads.
2158 *
2159 * Unlimited throttling before drbd_rs_begin_io may stall the resync
2160 * "forever", throttling after drbd_rs_begin_io will lock that extent
2161 * for application writes for the same time. For now, just throttle
2162 * here, where the rest of the code expects the receiver to sleep for
2163 * a while, anyways.
2164 */
Philipp Reisnerb411b362009-09-25 16:07:19 -07002165
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002166 /* Throttle before drbd_rs_begin_io, as that locks out application IO;
2167 * this defers syncer requests for some time, before letting at least
2168 * on request through. The resync controller on the receiving side
2169 * will adapt to the incoming rate accordingly.
2170 *
2171 * We cannot throttle here if remote is Primary/SyncTarget:
2172 * we would also throttle its application reads.
2173 * In that case, throttling is done on the SyncTarget only.
2174 */
Philipp Reisnere3555d82010-11-07 15:56:29 +01002175 if (mdev->state.peer != R_PRIMARY && drbd_rs_should_slow_down(mdev, sector))
2176 schedule_timeout_uninterruptible(HZ/10);
2177 if (drbd_rs_begin_io(mdev, sector))
Lars Ellenberg80a40e42010-08-11 23:28:00 +02002178 goto out_free_e;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002179
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002180submit_for_resync:
2181 atomic_add(size >> 9, &mdev->rs_sect_ev);
2182
Lars Ellenberg80a40e42010-08-11 23:28:00 +02002183submit:
Philipp Reisnerb411b362009-09-25 16:07:19 -07002184 inc_unacked(mdev);
Philipp Reisner87eeee42011-01-19 14:16:30 +01002185 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002186 list_add_tail(&peer_req->w.list, &mdev->read_ee);
Philipp Reisner87eeee42011-01-19 14:16:30 +01002187 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002188
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002189 if (drbd_submit_ee(mdev, peer_req, READ, fault_type) == 0)
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002190 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002191
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01002192 /* don't care for the reason here */
2193 dev_err(DEV, "submit failed, triggering re-connect\n");
Philipp Reisner87eeee42011-01-19 14:16:30 +01002194 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002195 list_del(&peer_req->w.list);
Philipp Reisner87eeee42011-01-19 14:16:30 +01002196 spin_unlock_irq(&mdev->tconn->req_lock);
Lars Ellenberg22cc37a2010-09-14 20:40:41 +02002197 /* no drbd_rs_complete_io(), we are dropping the connection anyways */
2198
Philipp Reisnerb411b362009-09-25 16:07:19 -07002199out_free_e:
Philipp Reisnerb411b362009-09-25 16:07:19 -07002200 put_ldev(mdev);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002201 drbd_free_ee(mdev, peer_req);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002202 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002203}
2204
2205static int drbd_asb_recover_0p(struct drbd_conf *mdev) __must_hold(local)
2206{
2207 int self, peer, rv = -100;
2208 unsigned long ch_self, ch_peer;
2209
2210 self = mdev->ldev->md.uuid[UI_BITMAP] & 1;
2211 peer = mdev->p_uuid[UI_BITMAP] & 1;
2212
2213 ch_peer = mdev->p_uuid[UI_SIZE];
2214 ch_self = mdev->comm_bm_set;
2215
Philipp Reisner89e58e72011-01-19 13:12:45 +01002216 switch (mdev->tconn->net_conf->after_sb_0p) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002217 case ASB_CONSENSUS:
2218 case ASB_DISCARD_SECONDARY:
2219 case ASB_CALL_HELPER:
2220 dev_err(DEV, "Configuration error.\n");
2221 break;
2222 case ASB_DISCONNECT:
2223 break;
2224 case ASB_DISCARD_YOUNGER_PRI:
2225 if (self == 0 && peer == 1) {
2226 rv = -1;
2227 break;
2228 }
2229 if (self == 1 && peer == 0) {
2230 rv = 1;
2231 break;
2232 }
2233 /* Else fall through to one of the other strategies... */
2234 case ASB_DISCARD_OLDER_PRI:
2235 if (self == 0 && peer == 1) {
2236 rv = 1;
2237 break;
2238 }
2239 if (self == 1 && peer == 0) {
2240 rv = -1;
2241 break;
2242 }
2243 /* Else fall through to one of the other strategies... */
Lars Ellenbergad19bf62009-10-14 09:36:49 +02002244 dev_warn(DEV, "Discard younger/older primary did not find a decision\n"
Philipp Reisnerb411b362009-09-25 16:07:19 -07002245 "Using discard-least-changes instead\n");
2246 case ASB_DISCARD_ZERO_CHG:
2247 if (ch_peer == 0 && ch_self == 0) {
Philipp Reisner25703f82011-02-07 14:35:25 +01002248 rv = test_bit(DISCARD_CONCURRENT, &mdev->tconn->flags)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002249 ? -1 : 1;
2250 break;
2251 } else {
2252 if (ch_peer == 0) { rv = 1; break; }
2253 if (ch_self == 0) { rv = -1; break; }
2254 }
Philipp Reisner89e58e72011-01-19 13:12:45 +01002255 if (mdev->tconn->net_conf->after_sb_0p == ASB_DISCARD_ZERO_CHG)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002256 break;
2257 case ASB_DISCARD_LEAST_CHG:
2258 if (ch_self < ch_peer)
2259 rv = -1;
2260 else if (ch_self > ch_peer)
2261 rv = 1;
2262 else /* ( ch_self == ch_peer ) */
2263 /* Well, then use something else. */
Philipp Reisner25703f82011-02-07 14:35:25 +01002264 rv = test_bit(DISCARD_CONCURRENT, &mdev->tconn->flags)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002265 ? -1 : 1;
2266 break;
2267 case ASB_DISCARD_LOCAL:
2268 rv = -1;
2269 break;
2270 case ASB_DISCARD_REMOTE:
2271 rv = 1;
2272 }
2273
2274 return rv;
2275}
2276
2277static int drbd_asb_recover_1p(struct drbd_conf *mdev) __must_hold(local)
2278{
Andreas Gruenbacher6184ea22010-12-09 14:23:27 +01002279 int hg, rv = -100;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002280
Philipp Reisner89e58e72011-01-19 13:12:45 +01002281 switch (mdev->tconn->net_conf->after_sb_1p) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002282 case ASB_DISCARD_YOUNGER_PRI:
2283 case ASB_DISCARD_OLDER_PRI:
2284 case ASB_DISCARD_LEAST_CHG:
2285 case ASB_DISCARD_LOCAL:
2286 case ASB_DISCARD_REMOTE:
2287 dev_err(DEV, "Configuration error.\n");
2288 break;
2289 case ASB_DISCONNECT:
2290 break;
2291 case ASB_CONSENSUS:
2292 hg = drbd_asb_recover_0p(mdev);
2293 if (hg == -1 && mdev->state.role == R_SECONDARY)
2294 rv = hg;
2295 if (hg == 1 && mdev->state.role == R_PRIMARY)
2296 rv = hg;
2297 break;
2298 case ASB_VIOLENTLY:
2299 rv = drbd_asb_recover_0p(mdev);
2300 break;
2301 case ASB_DISCARD_SECONDARY:
2302 return mdev->state.role == R_PRIMARY ? 1 : -1;
2303 case ASB_CALL_HELPER:
2304 hg = drbd_asb_recover_0p(mdev);
2305 if (hg == -1 && mdev->state.role == R_PRIMARY) {
Andreas Gruenbacherbb437942010-12-09 14:02:35 +01002306 enum drbd_state_rv rv2;
2307
2308 drbd_set_role(mdev, R_SECONDARY, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002309 /* drbd_change_state() does not sleep while in SS_IN_TRANSIENT_STATE,
2310 * we might be here in C_WF_REPORT_PARAMS which is transient.
2311 * we do not need to wait for the after state change work either. */
Andreas Gruenbacherbb437942010-12-09 14:02:35 +01002312 rv2 = drbd_change_state(mdev, CS_VERBOSE, NS(role, R_SECONDARY));
2313 if (rv2 != SS_SUCCESS) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002314 drbd_khelper(mdev, "pri-lost-after-sb");
2315 } else {
2316 dev_warn(DEV, "Successfully gave up primary role.\n");
2317 rv = hg;
2318 }
2319 } else
2320 rv = hg;
2321 }
2322
2323 return rv;
2324}
2325
2326static int drbd_asb_recover_2p(struct drbd_conf *mdev) __must_hold(local)
2327{
Andreas Gruenbacher6184ea22010-12-09 14:23:27 +01002328 int hg, rv = -100;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002329
Philipp Reisner89e58e72011-01-19 13:12:45 +01002330 switch (mdev->tconn->net_conf->after_sb_2p) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002331 case ASB_DISCARD_YOUNGER_PRI:
2332 case ASB_DISCARD_OLDER_PRI:
2333 case ASB_DISCARD_LEAST_CHG:
2334 case ASB_DISCARD_LOCAL:
2335 case ASB_DISCARD_REMOTE:
2336 case ASB_CONSENSUS:
2337 case ASB_DISCARD_SECONDARY:
2338 dev_err(DEV, "Configuration error.\n");
2339 break;
2340 case ASB_VIOLENTLY:
2341 rv = drbd_asb_recover_0p(mdev);
2342 break;
2343 case ASB_DISCONNECT:
2344 break;
2345 case ASB_CALL_HELPER:
2346 hg = drbd_asb_recover_0p(mdev);
2347 if (hg == -1) {
Andreas Gruenbacherbb437942010-12-09 14:02:35 +01002348 enum drbd_state_rv rv2;
2349
Philipp Reisnerb411b362009-09-25 16:07:19 -07002350 /* drbd_change_state() does not sleep while in SS_IN_TRANSIENT_STATE,
2351 * we might be here in C_WF_REPORT_PARAMS which is transient.
2352 * we do not need to wait for the after state change work either. */
Andreas Gruenbacherbb437942010-12-09 14:02:35 +01002353 rv2 = drbd_change_state(mdev, CS_VERBOSE, NS(role, R_SECONDARY));
2354 if (rv2 != SS_SUCCESS) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002355 drbd_khelper(mdev, "pri-lost-after-sb");
2356 } else {
2357 dev_warn(DEV, "Successfully gave up primary role.\n");
2358 rv = hg;
2359 }
2360 } else
2361 rv = hg;
2362 }
2363
2364 return rv;
2365}
2366
2367static void drbd_uuid_dump(struct drbd_conf *mdev, char *text, u64 *uuid,
2368 u64 bits, u64 flags)
2369{
2370 if (!uuid) {
2371 dev_info(DEV, "%s uuid info vanished while I was looking!\n", text);
2372 return;
2373 }
2374 dev_info(DEV, "%s %016llX:%016llX:%016llX:%016llX bits:%llu flags:%llX\n",
2375 text,
2376 (unsigned long long)uuid[UI_CURRENT],
2377 (unsigned long long)uuid[UI_BITMAP],
2378 (unsigned long long)uuid[UI_HISTORY_START],
2379 (unsigned long long)uuid[UI_HISTORY_END],
2380 (unsigned long long)bits,
2381 (unsigned long long)flags);
2382}
2383
2384/*
2385 100 after split brain try auto recover
2386 2 C_SYNC_SOURCE set BitMap
2387 1 C_SYNC_SOURCE use BitMap
2388 0 no Sync
2389 -1 C_SYNC_TARGET use BitMap
2390 -2 C_SYNC_TARGET set BitMap
2391 -100 after split brain, disconnect
2392-1000 unrelated data
Philipp Reisner4a23f262011-01-11 17:42:17 +01002393-1091 requires proto 91
2394-1096 requires proto 96
Philipp Reisnerb411b362009-09-25 16:07:19 -07002395 */
2396static int drbd_uuid_compare(struct drbd_conf *mdev, int *rule_nr) __must_hold(local)
2397{
2398 u64 self, peer;
2399 int i, j;
2400
2401 self = mdev->ldev->md.uuid[UI_CURRENT] & ~((u64)1);
2402 peer = mdev->p_uuid[UI_CURRENT] & ~((u64)1);
2403
2404 *rule_nr = 10;
2405 if (self == UUID_JUST_CREATED && peer == UUID_JUST_CREATED)
2406 return 0;
2407
2408 *rule_nr = 20;
2409 if ((self == UUID_JUST_CREATED || self == (u64)0) &&
2410 peer != UUID_JUST_CREATED)
2411 return -2;
2412
2413 *rule_nr = 30;
2414 if (self != UUID_JUST_CREATED &&
2415 (peer == UUID_JUST_CREATED || peer == (u64)0))
2416 return 2;
2417
2418 if (self == peer) {
2419 int rct, dc; /* roles at crash time */
2420
2421 if (mdev->p_uuid[UI_BITMAP] == (u64)0 && mdev->ldev->md.uuid[UI_BITMAP] != (u64)0) {
2422
Philipp Reisner31890f42011-01-19 14:12:51 +01002423 if (mdev->tconn->agreed_pro_version < 91)
Philipp Reisner4a23f262011-01-11 17:42:17 +01002424 return -1091;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002425
2426 if ((mdev->ldev->md.uuid[UI_BITMAP] & ~((u64)1)) == (mdev->p_uuid[UI_HISTORY_START] & ~((u64)1)) &&
2427 (mdev->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) == (mdev->p_uuid[UI_HISTORY_START + 1] & ~((u64)1))) {
2428 dev_info(DEV, "was SyncSource, missed the resync finished event, corrected myself:\n");
2429 drbd_uuid_set_bm(mdev, 0UL);
2430
2431 drbd_uuid_dump(mdev, "self", mdev->ldev->md.uuid,
2432 mdev->state.disk >= D_NEGOTIATING ? drbd_bm_total_weight(mdev) : 0, 0);
2433 *rule_nr = 34;
2434 } else {
2435 dev_info(DEV, "was SyncSource (peer failed to write sync_uuid)\n");
2436 *rule_nr = 36;
2437 }
2438
2439 return 1;
2440 }
2441
2442 if (mdev->ldev->md.uuid[UI_BITMAP] == (u64)0 && mdev->p_uuid[UI_BITMAP] != (u64)0) {
2443
Philipp Reisner31890f42011-01-19 14:12:51 +01002444 if (mdev->tconn->agreed_pro_version < 91)
Philipp Reisner4a23f262011-01-11 17:42:17 +01002445 return -1091;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002446
2447 if ((mdev->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) == (mdev->p_uuid[UI_BITMAP] & ~((u64)1)) &&
2448 (mdev->ldev->md.uuid[UI_HISTORY_START + 1] & ~((u64)1)) == (mdev->p_uuid[UI_HISTORY_START] & ~((u64)1))) {
2449 dev_info(DEV, "was SyncTarget, peer missed the resync finished event, corrected peer:\n");
2450
2451 mdev->p_uuid[UI_HISTORY_START + 1] = mdev->p_uuid[UI_HISTORY_START];
2452 mdev->p_uuid[UI_HISTORY_START] = mdev->p_uuid[UI_BITMAP];
2453 mdev->p_uuid[UI_BITMAP] = 0UL;
2454
2455 drbd_uuid_dump(mdev, "peer", mdev->p_uuid, mdev->p_uuid[UI_SIZE], mdev->p_uuid[UI_FLAGS]);
2456 *rule_nr = 35;
2457 } else {
2458 dev_info(DEV, "was SyncTarget (failed to write sync_uuid)\n");
2459 *rule_nr = 37;
2460 }
2461
2462 return -1;
2463 }
2464
2465 /* Common power [off|failure] */
2466 rct = (test_bit(CRASHED_PRIMARY, &mdev->flags) ? 1 : 0) +
2467 (mdev->p_uuid[UI_FLAGS] & 2);
2468 /* lowest bit is set when we were primary,
2469 * next bit (weight 2) is set when peer was primary */
2470 *rule_nr = 40;
2471
2472 switch (rct) {
2473 case 0: /* !self_pri && !peer_pri */ return 0;
2474 case 1: /* self_pri && !peer_pri */ return 1;
2475 case 2: /* !self_pri && peer_pri */ return -1;
2476 case 3: /* self_pri && peer_pri */
Philipp Reisner25703f82011-02-07 14:35:25 +01002477 dc = test_bit(DISCARD_CONCURRENT, &mdev->tconn->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002478 return dc ? -1 : 1;
2479 }
2480 }
2481
2482 *rule_nr = 50;
2483 peer = mdev->p_uuid[UI_BITMAP] & ~((u64)1);
2484 if (self == peer)
2485 return -1;
2486
2487 *rule_nr = 51;
2488 peer = mdev->p_uuid[UI_HISTORY_START] & ~((u64)1);
2489 if (self == peer) {
Philipp Reisner31890f42011-01-19 14:12:51 +01002490 if (mdev->tconn->agreed_pro_version < 96 ?
Philipp Reisner4a23f262011-01-11 17:42:17 +01002491 (mdev->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) ==
2492 (mdev->p_uuid[UI_HISTORY_START + 1] & ~((u64)1)) :
2493 peer + UUID_NEW_BM_OFFSET == (mdev->p_uuid[UI_BITMAP] & ~((u64)1))) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002494 /* The last P_SYNC_UUID did not get though. Undo the last start of
2495 resync as sync source modifications of the peer's UUIDs. */
2496
Philipp Reisner31890f42011-01-19 14:12:51 +01002497 if (mdev->tconn->agreed_pro_version < 91)
Philipp Reisner4a23f262011-01-11 17:42:17 +01002498 return -1091;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002499
2500 mdev->p_uuid[UI_BITMAP] = mdev->p_uuid[UI_HISTORY_START];
2501 mdev->p_uuid[UI_HISTORY_START] = mdev->p_uuid[UI_HISTORY_START + 1];
Philipp Reisner4a23f262011-01-11 17:42:17 +01002502
2503 dev_info(DEV, "Did not got last syncUUID packet, corrected:\n");
2504 drbd_uuid_dump(mdev, "peer", mdev->p_uuid, mdev->p_uuid[UI_SIZE], mdev->p_uuid[UI_FLAGS]);
2505
Philipp Reisnerb411b362009-09-25 16:07:19 -07002506 return -1;
2507 }
2508 }
2509
2510 *rule_nr = 60;
2511 self = mdev->ldev->md.uuid[UI_CURRENT] & ~((u64)1);
2512 for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) {
2513 peer = mdev->p_uuid[i] & ~((u64)1);
2514 if (self == peer)
2515 return -2;
2516 }
2517
2518 *rule_nr = 70;
2519 self = mdev->ldev->md.uuid[UI_BITMAP] & ~((u64)1);
2520 peer = mdev->p_uuid[UI_CURRENT] & ~((u64)1);
2521 if (self == peer)
2522 return 1;
2523
2524 *rule_nr = 71;
2525 self = mdev->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1);
2526 if (self == peer) {
Philipp Reisner31890f42011-01-19 14:12:51 +01002527 if (mdev->tconn->agreed_pro_version < 96 ?
Philipp Reisner4a23f262011-01-11 17:42:17 +01002528 (mdev->ldev->md.uuid[UI_HISTORY_START + 1] & ~((u64)1)) ==
2529 (mdev->p_uuid[UI_HISTORY_START] & ~((u64)1)) :
2530 self + UUID_NEW_BM_OFFSET == (mdev->ldev->md.uuid[UI_BITMAP] & ~((u64)1))) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002531 /* The last P_SYNC_UUID did not get though. Undo the last start of
2532 resync as sync source modifications of our UUIDs. */
2533
Philipp Reisner31890f42011-01-19 14:12:51 +01002534 if (mdev->tconn->agreed_pro_version < 91)
Philipp Reisner4a23f262011-01-11 17:42:17 +01002535 return -1091;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002536
2537 _drbd_uuid_set(mdev, UI_BITMAP, mdev->ldev->md.uuid[UI_HISTORY_START]);
2538 _drbd_uuid_set(mdev, UI_HISTORY_START, mdev->ldev->md.uuid[UI_HISTORY_START + 1]);
2539
Philipp Reisner4a23f262011-01-11 17:42:17 +01002540 dev_info(DEV, "Last syncUUID did not get through, corrected:\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002541 drbd_uuid_dump(mdev, "self", mdev->ldev->md.uuid,
2542 mdev->state.disk >= D_NEGOTIATING ? drbd_bm_total_weight(mdev) : 0, 0);
2543
2544 return 1;
2545 }
2546 }
2547
2548
2549 *rule_nr = 80;
Philipp Reisnerd8c2a362009-11-18 15:52:51 +01002550 peer = mdev->p_uuid[UI_CURRENT] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002551 for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) {
2552 self = mdev->ldev->md.uuid[i] & ~((u64)1);
2553 if (self == peer)
2554 return 2;
2555 }
2556
2557 *rule_nr = 90;
2558 self = mdev->ldev->md.uuid[UI_BITMAP] & ~((u64)1);
2559 peer = mdev->p_uuid[UI_BITMAP] & ~((u64)1);
2560 if (self == peer && self != ((u64)0))
2561 return 100;
2562
2563 *rule_nr = 100;
2564 for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) {
2565 self = mdev->ldev->md.uuid[i] & ~((u64)1);
2566 for (j = UI_HISTORY_START; j <= UI_HISTORY_END; j++) {
2567 peer = mdev->p_uuid[j] & ~((u64)1);
2568 if (self == peer)
2569 return -100;
2570 }
2571 }
2572
2573 return -1000;
2574}
2575
2576/* drbd_sync_handshake() returns the new conn state on success, or
2577 CONN_MASK (-1) on failure.
2578 */
2579static enum drbd_conns drbd_sync_handshake(struct drbd_conf *mdev, enum drbd_role peer_role,
2580 enum drbd_disk_state peer_disk) __must_hold(local)
2581{
2582 int hg, rule_nr;
2583 enum drbd_conns rv = C_MASK;
2584 enum drbd_disk_state mydisk;
2585
2586 mydisk = mdev->state.disk;
2587 if (mydisk == D_NEGOTIATING)
2588 mydisk = mdev->new_state_tmp.disk;
2589
2590 dev_info(DEV, "drbd_sync_handshake:\n");
2591 drbd_uuid_dump(mdev, "self", mdev->ldev->md.uuid, mdev->comm_bm_set, 0);
2592 drbd_uuid_dump(mdev, "peer", mdev->p_uuid,
2593 mdev->p_uuid[UI_SIZE], mdev->p_uuid[UI_FLAGS]);
2594
2595 hg = drbd_uuid_compare(mdev, &rule_nr);
2596
2597 dev_info(DEV, "uuid_compare()=%d by rule %d\n", hg, rule_nr);
2598
2599 if (hg == -1000) {
2600 dev_alert(DEV, "Unrelated data, aborting!\n");
2601 return C_MASK;
2602 }
Philipp Reisner4a23f262011-01-11 17:42:17 +01002603 if (hg < -1000) {
2604 dev_alert(DEV, "To resolve this both sides have to support at least protocol %d\n", -hg - 1000);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002605 return C_MASK;
2606 }
2607
2608 if ((mydisk == D_INCONSISTENT && peer_disk > D_INCONSISTENT) ||
2609 (peer_disk == D_INCONSISTENT && mydisk > D_INCONSISTENT)) {
2610 int f = (hg == -100) || abs(hg) == 2;
2611 hg = mydisk > D_INCONSISTENT ? 1 : -1;
2612 if (f)
2613 hg = hg*2;
2614 dev_info(DEV, "Becoming sync %s due to disk states.\n",
2615 hg > 0 ? "source" : "target");
2616 }
2617
Adam Gandelman3a11a482010-04-08 16:48:23 -07002618 if (abs(hg) == 100)
2619 drbd_khelper(mdev, "initial-split-brain");
2620
Philipp Reisner89e58e72011-01-19 13:12:45 +01002621 if (hg == 100 || (hg == -100 && mdev->tconn->net_conf->always_asbp)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002622 int pcount = (mdev->state.role == R_PRIMARY)
2623 + (peer_role == R_PRIMARY);
2624 int forced = (hg == -100);
2625
2626 switch (pcount) {
2627 case 0:
2628 hg = drbd_asb_recover_0p(mdev);
2629 break;
2630 case 1:
2631 hg = drbd_asb_recover_1p(mdev);
2632 break;
2633 case 2:
2634 hg = drbd_asb_recover_2p(mdev);
2635 break;
2636 }
2637 if (abs(hg) < 100) {
2638 dev_warn(DEV, "Split-Brain detected, %d primaries, "
2639 "automatically solved. Sync from %s node\n",
2640 pcount, (hg < 0) ? "peer" : "this");
2641 if (forced) {
2642 dev_warn(DEV, "Doing a full sync, since"
2643 " UUIDs where ambiguous.\n");
2644 hg = hg*2;
2645 }
2646 }
2647 }
2648
2649 if (hg == -100) {
Philipp Reisner89e58e72011-01-19 13:12:45 +01002650 if (mdev->tconn->net_conf->want_lose && !(mdev->p_uuid[UI_FLAGS]&1))
Philipp Reisnerb411b362009-09-25 16:07:19 -07002651 hg = -1;
Philipp Reisner89e58e72011-01-19 13:12:45 +01002652 if (!mdev->tconn->net_conf->want_lose && (mdev->p_uuid[UI_FLAGS]&1))
Philipp Reisnerb411b362009-09-25 16:07:19 -07002653 hg = 1;
2654
2655 if (abs(hg) < 100)
2656 dev_warn(DEV, "Split-Brain detected, manually solved. "
2657 "Sync from %s node\n",
2658 (hg < 0) ? "peer" : "this");
2659 }
2660
2661 if (hg == -100) {
Lars Ellenberg580b9762010-02-26 23:15:23 +01002662 /* FIXME this log message is not correct if we end up here
2663 * after an attempted attach on a diskless node.
2664 * We just refuse to attach -- well, we drop the "connection"
2665 * to that disk, in a way... */
Adam Gandelman3a11a482010-04-08 16:48:23 -07002666 dev_alert(DEV, "Split-Brain detected but unresolved, dropping connection!\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002667 drbd_khelper(mdev, "split-brain");
2668 return C_MASK;
2669 }
2670
2671 if (hg > 0 && mydisk <= D_INCONSISTENT) {
2672 dev_err(DEV, "I shall become SyncSource, but I am inconsistent!\n");
2673 return C_MASK;
2674 }
2675
2676 if (hg < 0 && /* by intention we do not use mydisk here. */
2677 mdev->state.role == R_PRIMARY && mdev->state.disk >= D_CONSISTENT) {
Philipp Reisner89e58e72011-01-19 13:12:45 +01002678 switch (mdev->tconn->net_conf->rr_conflict) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002679 case ASB_CALL_HELPER:
2680 drbd_khelper(mdev, "pri-lost");
2681 /* fall through */
2682 case ASB_DISCONNECT:
2683 dev_err(DEV, "I shall become SyncTarget, but I am primary!\n");
2684 return C_MASK;
2685 case ASB_VIOLENTLY:
2686 dev_warn(DEV, "Becoming SyncTarget, violating the stable-data"
2687 "assumption\n");
2688 }
2689 }
2690
Philipp Reisner89e58e72011-01-19 13:12:45 +01002691 if (mdev->tconn->net_conf->dry_run || test_bit(CONN_DRY_RUN, &mdev->flags)) {
Philipp Reisnercf14c2e2010-02-02 21:03:50 +01002692 if (hg == 0)
2693 dev_info(DEV, "dry-run connect: No resync, would become Connected immediately.\n");
2694 else
2695 dev_info(DEV, "dry-run connect: Would become %s, doing a %s resync.",
2696 drbd_conn_str(hg > 0 ? C_SYNC_SOURCE : C_SYNC_TARGET),
2697 abs(hg) >= 2 ? "full" : "bit-map based");
2698 return C_MASK;
2699 }
2700
Philipp Reisnerb411b362009-09-25 16:07:19 -07002701 if (abs(hg) >= 2) {
2702 dev_info(DEV, "Writing the whole bitmap, full sync required after drbd_sync_handshake.\n");
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01002703 if (drbd_bitmap_io(mdev, &drbd_bmio_set_n_write, "set_n_write from sync_handshake",
2704 BM_LOCKED_SET_ALLOWED))
Philipp Reisnerb411b362009-09-25 16:07:19 -07002705 return C_MASK;
2706 }
2707
2708 if (hg > 0) { /* become sync source. */
2709 rv = C_WF_BITMAP_S;
2710 } else if (hg < 0) { /* become sync target */
2711 rv = C_WF_BITMAP_T;
2712 } else {
2713 rv = C_CONNECTED;
2714 if (drbd_bm_total_weight(mdev)) {
2715 dev_info(DEV, "No resync, but %lu bits in bitmap!\n",
2716 drbd_bm_total_weight(mdev));
2717 }
2718 }
2719
2720 return rv;
2721}
2722
2723/* returns 1 if invalid */
2724static int cmp_after_sb(enum drbd_after_sb_p peer, enum drbd_after_sb_p self)
2725{
2726 /* ASB_DISCARD_REMOTE - ASB_DISCARD_LOCAL is valid */
2727 if ((peer == ASB_DISCARD_REMOTE && self == ASB_DISCARD_LOCAL) ||
2728 (self == ASB_DISCARD_REMOTE && peer == ASB_DISCARD_LOCAL))
2729 return 0;
2730
2731 /* any other things with ASB_DISCARD_REMOTE or ASB_DISCARD_LOCAL are invalid */
2732 if (peer == ASB_DISCARD_REMOTE || peer == ASB_DISCARD_LOCAL ||
2733 self == ASB_DISCARD_REMOTE || self == ASB_DISCARD_LOCAL)
2734 return 1;
2735
2736 /* everything else is valid if they are equal on both sides. */
2737 if (peer == self)
2738 return 0;
2739
2740 /* everything es is invalid. */
2741 return 1;
2742}
2743
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01002744static int receive_protocol(struct drbd_conf *mdev, enum drbd_packet cmd,
2745 unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002746{
Philipp Reisnere42325a2011-01-19 13:55:45 +01002747 struct p_protocol *p = &mdev->tconn->data.rbuf.protocol;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002748 int p_proto, p_after_sb_0p, p_after_sb_1p, p_after_sb_2p;
Philipp Reisnercf14c2e2010-02-02 21:03:50 +01002749 int p_want_lose, p_two_primaries, cf;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002750 char p_integrity_alg[SHARED_SECRET_MAX] = "";
2751
Philipp Reisnerb411b362009-09-25 16:07:19 -07002752 p_proto = be32_to_cpu(p->protocol);
2753 p_after_sb_0p = be32_to_cpu(p->after_sb_0p);
2754 p_after_sb_1p = be32_to_cpu(p->after_sb_1p);
2755 p_after_sb_2p = be32_to_cpu(p->after_sb_2p);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002756 p_two_primaries = be32_to_cpu(p->two_primaries);
Philipp Reisnercf14c2e2010-02-02 21:03:50 +01002757 cf = be32_to_cpu(p->conn_flags);
2758 p_want_lose = cf & CF_WANT_LOSE;
2759
2760 clear_bit(CONN_DRY_RUN, &mdev->flags);
2761
2762 if (cf & CF_DRY_RUN)
2763 set_bit(CONN_DRY_RUN, &mdev->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002764
Philipp Reisner89e58e72011-01-19 13:12:45 +01002765 if (p_proto != mdev->tconn->net_conf->wire_protocol) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002766 dev_err(DEV, "incompatible communication protocols\n");
2767 goto disconnect;
2768 }
2769
Philipp Reisner89e58e72011-01-19 13:12:45 +01002770 if (cmp_after_sb(p_after_sb_0p, mdev->tconn->net_conf->after_sb_0p)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002771 dev_err(DEV, "incompatible after-sb-0pri settings\n");
2772 goto disconnect;
2773 }
2774
Philipp Reisner89e58e72011-01-19 13:12:45 +01002775 if (cmp_after_sb(p_after_sb_1p, mdev->tconn->net_conf->after_sb_1p)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002776 dev_err(DEV, "incompatible after-sb-1pri settings\n");
2777 goto disconnect;
2778 }
2779
Philipp Reisner89e58e72011-01-19 13:12:45 +01002780 if (cmp_after_sb(p_after_sb_2p, mdev->tconn->net_conf->after_sb_2p)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002781 dev_err(DEV, "incompatible after-sb-2pri settings\n");
2782 goto disconnect;
2783 }
2784
Philipp Reisner89e58e72011-01-19 13:12:45 +01002785 if (p_want_lose && mdev->tconn->net_conf->want_lose) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002786 dev_err(DEV, "both sides have the 'want_lose' flag set\n");
2787 goto disconnect;
2788 }
2789
Philipp Reisner89e58e72011-01-19 13:12:45 +01002790 if (p_two_primaries != mdev->tconn->net_conf->two_primaries) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002791 dev_err(DEV, "incompatible setting of the two-primaries options\n");
2792 goto disconnect;
2793 }
2794
Philipp Reisner31890f42011-01-19 14:12:51 +01002795 if (mdev->tconn->agreed_pro_version >= 87) {
Philipp Reisner89e58e72011-01-19 13:12:45 +01002796 unsigned char *my_alg = mdev->tconn->net_conf->integrity_alg;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002797
Philipp Reisnerde0ff332011-02-07 16:56:20 +01002798 if (drbd_recv(mdev->tconn, p_integrity_alg, data_size) != data_size)
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002799 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002800
2801 p_integrity_alg[SHARED_SECRET_MAX-1] = 0;
2802 if (strcmp(p_integrity_alg, my_alg)) {
2803 dev_err(DEV, "incompatible setting of the data-integrity-alg\n");
2804 goto disconnect;
2805 }
2806 dev_info(DEV, "data-integrity-alg: %s\n",
2807 my_alg[0] ? my_alg : (unsigned char *)"<not-used>");
2808 }
2809
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002810 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002811
2812disconnect:
2813 drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002814 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002815}
2816
2817/* helper function
2818 * input: alg name, feature name
2819 * return: NULL (alg name was "")
2820 * ERR_PTR(error) if something goes wrong
2821 * or the crypto hash ptr, if it worked out ok. */
2822struct crypto_hash *drbd_crypto_alloc_digest_safe(const struct drbd_conf *mdev,
2823 const char *alg, const char *name)
2824{
2825 struct crypto_hash *tfm;
2826
2827 if (!alg[0])
2828 return NULL;
2829
2830 tfm = crypto_alloc_hash(alg, 0, CRYPTO_ALG_ASYNC);
2831 if (IS_ERR(tfm)) {
2832 dev_err(DEV, "Can not allocate \"%s\" as %s (reason: %ld)\n",
2833 alg, name, PTR_ERR(tfm));
2834 return tfm;
2835 }
2836 if (!drbd_crypto_is_hash(crypto_hash_tfm(tfm))) {
2837 crypto_free_hash(tfm);
2838 dev_err(DEV, "\"%s\" is not a digest (%s)\n", alg, name);
2839 return ERR_PTR(-EINVAL);
2840 }
2841 return tfm;
2842}
2843
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01002844static int receive_SyncParam(struct drbd_conf *mdev, enum drbd_packet cmd,
2845 unsigned int packet_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002846{
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002847 int ok = true;
Philipp Reisnere42325a2011-01-19 13:55:45 +01002848 struct p_rs_param_95 *p = &mdev->tconn->data.rbuf.rs_param_95;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002849 unsigned int header_size, data_size, exp_max_sz;
2850 struct crypto_hash *verify_tfm = NULL;
2851 struct crypto_hash *csums_tfm = NULL;
Philipp Reisner31890f42011-01-19 14:12:51 +01002852 const int apv = mdev->tconn->agreed_pro_version;
Philipp Reisner778f2712010-07-06 11:14:00 +02002853 int *rs_plan_s = NULL;
2854 int fifo_size = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002855
2856 exp_max_sz = apv <= 87 ? sizeof(struct p_rs_param)
2857 : apv == 88 ? sizeof(struct p_rs_param)
2858 + SHARED_SECRET_MAX
Philipp Reisner8e26f9c2010-07-06 17:25:54 +02002859 : apv <= 94 ? sizeof(struct p_rs_param_89)
2860 : /* apv >= 95 */ sizeof(struct p_rs_param_95);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002861
Philipp Reisner02918be2010-08-20 14:35:10 +02002862 if (packet_size > exp_max_sz) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002863 dev_err(DEV, "SyncParam packet too long: received %u, expected <= %u bytes\n",
Philipp Reisner02918be2010-08-20 14:35:10 +02002864 packet_size, exp_max_sz);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002865 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002866 }
2867
2868 if (apv <= 88) {
Philipp Reisner257d0af2011-01-26 12:15:29 +01002869 header_size = sizeof(struct p_rs_param) - sizeof(struct p_header);
Philipp Reisner02918be2010-08-20 14:35:10 +02002870 data_size = packet_size - header_size;
Philipp Reisner8e26f9c2010-07-06 17:25:54 +02002871 } else if (apv <= 94) {
Philipp Reisner257d0af2011-01-26 12:15:29 +01002872 header_size = sizeof(struct p_rs_param_89) - sizeof(struct p_header);
Philipp Reisner02918be2010-08-20 14:35:10 +02002873 data_size = packet_size - header_size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002874 D_ASSERT(data_size == 0);
Philipp Reisner8e26f9c2010-07-06 17:25:54 +02002875 } else {
Philipp Reisner257d0af2011-01-26 12:15:29 +01002876 header_size = sizeof(struct p_rs_param_95) - sizeof(struct p_header);
Philipp Reisner02918be2010-08-20 14:35:10 +02002877 data_size = packet_size - header_size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002878 D_ASSERT(data_size == 0);
2879 }
2880
2881 /* initialize verify_alg and csums_alg */
2882 memset(p->verify_alg, 0, 2 * SHARED_SECRET_MAX);
2883
Philipp Reisnerde0ff332011-02-07 16:56:20 +01002884 if (drbd_recv(mdev->tconn, &p->head.payload, header_size) != header_size)
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002885 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002886
2887 mdev->sync_conf.rate = be32_to_cpu(p->rate);
2888
2889 if (apv >= 88) {
2890 if (apv == 88) {
2891 if (data_size > SHARED_SECRET_MAX) {
2892 dev_err(DEV, "verify-alg too long, "
2893 "peer wants %u, accepting only %u byte\n",
2894 data_size, SHARED_SECRET_MAX);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002895 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002896 }
2897
Philipp Reisnerde0ff332011-02-07 16:56:20 +01002898 if (drbd_recv(mdev->tconn, p->verify_alg, data_size) != data_size)
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002899 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002900
2901 /* we expect NUL terminated string */
2902 /* but just in case someone tries to be evil */
2903 D_ASSERT(p->verify_alg[data_size-1] == 0);
2904 p->verify_alg[data_size-1] = 0;
2905
2906 } else /* apv >= 89 */ {
2907 /* we still expect NUL terminated strings */
2908 /* but just in case someone tries to be evil */
2909 D_ASSERT(p->verify_alg[SHARED_SECRET_MAX-1] == 0);
2910 D_ASSERT(p->csums_alg[SHARED_SECRET_MAX-1] == 0);
2911 p->verify_alg[SHARED_SECRET_MAX-1] = 0;
2912 p->csums_alg[SHARED_SECRET_MAX-1] = 0;
2913 }
2914
2915 if (strcmp(mdev->sync_conf.verify_alg, p->verify_alg)) {
2916 if (mdev->state.conn == C_WF_REPORT_PARAMS) {
2917 dev_err(DEV, "Different verify-alg settings. me=\"%s\" peer=\"%s\"\n",
2918 mdev->sync_conf.verify_alg, p->verify_alg);
2919 goto disconnect;
2920 }
2921 verify_tfm = drbd_crypto_alloc_digest_safe(mdev,
2922 p->verify_alg, "verify-alg");
2923 if (IS_ERR(verify_tfm)) {
2924 verify_tfm = NULL;
2925 goto disconnect;
2926 }
2927 }
2928
2929 if (apv >= 89 && strcmp(mdev->sync_conf.csums_alg, p->csums_alg)) {
2930 if (mdev->state.conn == C_WF_REPORT_PARAMS) {
2931 dev_err(DEV, "Different csums-alg settings. me=\"%s\" peer=\"%s\"\n",
2932 mdev->sync_conf.csums_alg, p->csums_alg);
2933 goto disconnect;
2934 }
2935 csums_tfm = drbd_crypto_alloc_digest_safe(mdev,
2936 p->csums_alg, "csums-alg");
2937 if (IS_ERR(csums_tfm)) {
2938 csums_tfm = NULL;
2939 goto disconnect;
2940 }
2941 }
2942
Philipp Reisner8e26f9c2010-07-06 17:25:54 +02002943 if (apv > 94) {
2944 mdev->sync_conf.rate = be32_to_cpu(p->rate);
2945 mdev->sync_conf.c_plan_ahead = be32_to_cpu(p->c_plan_ahead);
2946 mdev->sync_conf.c_delay_target = be32_to_cpu(p->c_delay_target);
2947 mdev->sync_conf.c_fill_target = be32_to_cpu(p->c_fill_target);
2948 mdev->sync_conf.c_max_rate = be32_to_cpu(p->c_max_rate);
Philipp Reisner778f2712010-07-06 11:14:00 +02002949
2950 fifo_size = (mdev->sync_conf.c_plan_ahead * 10 * SLEEP_TIME) / HZ;
2951 if (fifo_size != mdev->rs_plan_s.size && fifo_size > 0) {
2952 rs_plan_s = kzalloc(sizeof(int) * fifo_size, GFP_KERNEL);
2953 if (!rs_plan_s) {
2954 dev_err(DEV, "kmalloc of fifo_buffer failed");
2955 goto disconnect;
2956 }
2957 }
Philipp Reisner8e26f9c2010-07-06 17:25:54 +02002958 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07002959
2960 spin_lock(&mdev->peer_seq_lock);
2961 /* lock against drbd_nl_syncer_conf() */
2962 if (verify_tfm) {
2963 strcpy(mdev->sync_conf.verify_alg, p->verify_alg);
2964 mdev->sync_conf.verify_alg_len = strlen(p->verify_alg) + 1;
2965 crypto_free_hash(mdev->verify_tfm);
2966 mdev->verify_tfm = verify_tfm;
2967 dev_info(DEV, "using verify-alg: \"%s\"\n", p->verify_alg);
2968 }
2969 if (csums_tfm) {
2970 strcpy(mdev->sync_conf.csums_alg, p->csums_alg);
2971 mdev->sync_conf.csums_alg_len = strlen(p->csums_alg) + 1;
2972 crypto_free_hash(mdev->csums_tfm);
2973 mdev->csums_tfm = csums_tfm;
2974 dev_info(DEV, "using csums-alg: \"%s\"\n", p->csums_alg);
2975 }
Philipp Reisner778f2712010-07-06 11:14:00 +02002976 if (fifo_size != mdev->rs_plan_s.size) {
2977 kfree(mdev->rs_plan_s.values);
2978 mdev->rs_plan_s.values = rs_plan_s;
2979 mdev->rs_plan_s.size = fifo_size;
2980 mdev->rs_planed = 0;
2981 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07002982 spin_unlock(&mdev->peer_seq_lock);
2983 }
2984
2985 return ok;
2986disconnect:
2987 /* just for completeness: actually not needed,
2988 * as this is not reached if csums_tfm was ok. */
2989 crypto_free_hash(csums_tfm);
2990 /* but free the verify_tfm again, if csums_tfm did not work out */
2991 crypto_free_hash(verify_tfm);
2992 drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002993 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002994}
2995
Philipp Reisnerb411b362009-09-25 16:07:19 -07002996/* warn if the arguments differ by more than 12.5% */
2997static void warn_if_differ_considerably(struct drbd_conf *mdev,
2998 const char *s, sector_t a, sector_t b)
2999{
3000 sector_t d;
3001 if (a == 0 || b == 0)
3002 return;
3003 d = (a > b) ? (a - b) : (b - a);
3004 if (d > (a>>3) || d > (b>>3))
3005 dev_warn(DEV, "Considerable difference in %s: %llus vs. %llus\n", s,
3006 (unsigned long long)a, (unsigned long long)b);
3007}
3008
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01003009static int receive_sizes(struct drbd_conf *mdev, enum drbd_packet cmd,
3010 unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003011{
Philipp Reisnere42325a2011-01-19 13:55:45 +01003012 struct p_sizes *p = &mdev->tconn->data.rbuf.sizes;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003013 enum determine_dev_size dd = unchanged;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003014 sector_t p_size, p_usize, my_usize;
3015 int ldsc = 0; /* local disk size changed */
Philipp Reisnere89b5912010-03-24 17:11:33 +01003016 enum dds_flags ddsf;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003017
Philipp Reisnerb411b362009-09-25 16:07:19 -07003018 p_size = be64_to_cpu(p->d_size);
3019 p_usize = be64_to_cpu(p->u_size);
3020
3021 if (p_size == 0 && mdev->state.disk == D_DISKLESS) {
3022 dev_err(DEV, "some backing storage is needed\n");
3023 drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003024 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003025 }
3026
3027 /* just store the peer's disk size for now.
3028 * we still need to figure out whether we accept that. */
3029 mdev->p_size = p_size;
3030
Philipp Reisnerb411b362009-09-25 16:07:19 -07003031 if (get_ldev(mdev)) {
3032 warn_if_differ_considerably(mdev, "lower level device sizes",
3033 p_size, drbd_get_max_capacity(mdev->ldev));
3034 warn_if_differ_considerably(mdev, "user requested size",
3035 p_usize, mdev->ldev->dc.disk_size);
3036
3037 /* if this is the first connect, or an otherwise expected
3038 * param exchange, choose the minimum */
3039 if (mdev->state.conn == C_WF_REPORT_PARAMS)
3040 p_usize = min_not_zero((sector_t)mdev->ldev->dc.disk_size,
3041 p_usize);
3042
3043 my_usize = mdev->ldev->dc.disk_size;
3044
3045 if (mdev->ldev->dc.disk_size != p_usize) {
3046 mdev->ldev->dc.disk_size = p_usize;
3047 dev_info(DEV, "Peer sets u_size to %lu sectors\n",
3048 (unsigned long)mdev->ldev->dc.disk_size);
3049 }
3050
3051 /* Never shrink a device with usable data during connect.
3052 But allow online shrinking if we are connected. */
Philipp Reisnera393db62009-12-22 13:35:52 +01003053 if (drbd_new_dev_size(mdev, mdev->ldev, 0) <
Philipp Reisnerb411b362009-09-25 16:07:19 -07003054 drbd_get_capacity(mdev->this_bdev) &&
3055 mdev->state.disk >= D_OUTDATED &&
3056 mdev->state.conn < C_CONNECTED) {
3057 dev_err(DEV, "The peer's disk size is too small!\n");
3058 drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
3059 mdev->ldev->dc.disk_size = my_usize;
3060 put_ldev(mdev);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003061 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003062 }
3063 put_ldev(mdev);
3064 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003065
Philipp Reisnere89b5912010-03-24 17:11:33 +01003066 ddsf = be16_to_cpu(p->dds_flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003067 if (get_ldev(mdev)) {
Bart Van Assche24c48302011-05-21 18:32:29 +02003068 dd = drbd_determine_dev_size(mdev, ddsf);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003069 put_ldev(mdev);
3070 if (dd == dev_size_error)
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003071 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003072 drbd_md_sync(mdev);
3073 } else {
3074 /* I am diskless, need to accept the peer's size. */
3075 drbd_set_my_capacity(mdev, p_size);
3076 }
3077
Philipp Reisner99432fc2011-05-20 16:39:13 +02003078 mdev->peer_max_bio_size = be32_to_cpu(p->max_bio_size);
3079 drbd_reconsider_max_bio_size(mdev);
3080
Philipp Reisnerb411b362009-09-25 16:07:19 -07003081 if (get_ldev(mdev)) {
3082 if (mdev->ldev->known_size != drbd_get_capacity(mdev->ldev->backing_bdev)) {
3083 mdev->ldev->known_size = drbd_get_capacity(mdev->ldev->backing_bdev);
3084 ldsc = 1;
3085 }
3086
Philipp Reisnerb411b362009-09-25 16:07:19 -07003087 put_ldev(mdev);
3088 }
3089
3090 if (mdev->state.conn > C_WF_REPORT_PARAMS) {
3091 if (be64_to_cpu(p->c_size) !=
3092 drbd_get_capacity(mdev->this_bdev) || ldsc) {
3093 /* we have different sizes, probably peer
3094 * needs to know my new size... */
Philipp Reisnere89b5912010-03-24 17:11:33 +01003095 drbd_send_sizes(mdev, 0, ddsf);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003096 }
3097 if (test_and_clear_bit(RESIZE_PENDING, &mdev->flags) ||
3098 (dd == grew && mdev->state.conn == C_CONNECTED)) {
3099 if (mdev->state.pdsk >= D_INCONSISTENT &&
Philipp Reisnere89b5912010-03-24 17:11:33 +01003100 mdev->state.disk >= D_INCONSISTENT) {
3101 if (ddsf & DDSF_NO_RESYNC)
3102 dev_info(DEV, "Resync of new storage suppressed with --assume-clean\n");
3103 else
3104 resync_after_online_grow(mdev);
3105 } else
Philipp Reisnerb411b362009-09-25 16:07:19 -07003106 set_bit(RESYNC_AFTER_NEG, &mdev->flags);
3107 }
3108 }
3109
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003110 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003111}
3112
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01003113static int receive_uuids(struct drbd_conf *mdev, enum drbd_packet cmd,
3114 unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003115{
Philipp Reisnere42325a2011-01-19 13:55:45 +01003116 struct p_uuids *p = &mdev->tconn->data.rbuf.uuids;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003117 u64 *p_uuid;
Lars Ellenberg62b0da32011-01-20 13:25:21 +01003118 int i, updated_uuids = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003119
Philipp Reisnerb411b362009-09-25 16:07:19 -07003120 p_uuid = kmalloc(sizeof(u64)*UI_EXTENDED_SIZE, GFP_NOIO);
3121
3122 for (i = UI_CURRENT; i < UI_EXTENDED_SIZE; i++)
3123 p_uuid[i] = be64_to_cpu(p->uuid[i]);
3124
3125 kfree(mdev->p_uuid);
3126 mdev->p_uuid = p_uuid;
3127
3128 if (mdev->state.conn < C_CONNECTED &&
3129 mdev->state.disk < D_INCONSISTENT &&
3130 mdev->state.role == R_PRIMARY &&
3131 (mdev->ed_uuid & ~((u64)1)) != (p_uuid[UI_CURRENT] & ~((u64)1))) {
3132 dev_err(DEV, "Can only connect to data with current UUID=%016llX\n",
3133 (unsigned long long)mdev->ed_uuid);
3134 drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003135 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003136 }
3137
3138 if (get_ldev(mdev)) {
3139 int skip_initial_sync =
3140 mdev->state.conn == C_CONNECTED &&
Philipp Reisner31890f42011-01-19 14:12:51 +01003141 mdev->tconn->agreed_pro_version >= 90 &&
Philipp Reisnerb411b362009-09-25 16:07:19 -07003142 mdev->ldev->md.uuid[UI_CURRENT] == UUID_JUST_CREATED &&
3143 (p_uuid[UI_FLAGS] & 8);
3144 if (skip_initial_sync) {
3145 dev_info(DEV, "Accepted new current UUID, preparing to skip initial sync\n");
3146 drbd_bitmap_io(mdev, &drbd_bmio_clear_n_write,
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01003147 "clear_n_write from receive_uuids",
3148 BM_LOCKED_TEST_ALLOWED);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003149 _drbd_uuid_set(mdev, UI_CURRENT, p_uuid[UI_CURRENT]);
3150 _drbd_uuid_set(mdev, UI_BITMAP, 0);
3151 _drbd_set_state(_NS2(mdev, disk, D_UP_TO_DATE, pdsk, D_UP_TO_DATE),
3152 CS_VERBOSE, NULL);
3153 drbd_md_sync(mdev);
Lars Ellenberg62b0da32011-01-20 13:25:21 +01003154 updated_uuids = 1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003155 }
3156 put_ldev(mdev);
Philipp Reisner18a50fa2010-06-21 14:14:15 +02003157 } else if (mdev->state.disk < D_INCONSISTENT &&
3158 mdev->state.role == R_PRIMARY) {
3159 /* I am a diskless primary, the peer just created a new current UUID
3160 for me. */
Lars Ellenberg62b0da32011-01-20 13:25:21 +01003161 updated_uuids = drbd_set_ed_uuid(mdev, p_uuid[UI_CURRENT]);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003162 }
3163
3164 /* Before we test for the disk state, we should wait until an eventually
3165 ongoing cluster wide state change is finished. That is important if
3166 we are primary and are detaching from our disk. We need to see the
3167 new disk state... */
3168 wait_event(mdev->misc_wait, !test_bit(CLUSTER_ST_CHANGE, &mdev->flags));
3169 if (mdev->state.conn >= C_CONNECTED && mdev->state.disk < D_INCONSISTENT)
Lars Ellenberg62b0da32011-01-20 13:25:21 +01003170 updated_uuids |= drbd_set_ed_uuid(mdev, p_uuid[UI_CURRENT]);
3171
3172 if (updated_uuids)
3173 drbd_print_uuids(mdev, "receiver updated UUIDs to");
Philipp Reisnerb411b362009-09-25 16:07:19 -07003174
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003175 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003176}
3177
3178/**
3179 * convert_state() - Converts the peer's view of the cluster state to our point of view
3180 * @ps: The state as seen by the peer.
3181 */
3182static union drbd_state convert_state(union drbd_state ps)
3183{
3184 union drbd_state ms;
3185
3186 static enum drbd_conns c_tab[] = {
3187 [C_CONNECTED] = C_CONNECTED,
3188
3189 [C_STARTING_SYNC_S] = C_STARTING_SYNC_T,
3190 [C_STARTING_SYNC_T] = C_STARTING_SYNC_S,
3191 [C_DISCONNECTING] = C_TEAR_DOWN, /* C_NETWORK_FAILURE, */
3192 [C_VERIFY_S] = C_VERIFY_T,
3193 [C_MASK] = C_MASK,
3194 };
3195
3196 ms.i = ps.i;
3197
3198 ms.conn = c_tab[ps.conn];
3199 ms.peer = ps.role;
3200 ms.role = ps.peer;
3201 ms.pdsk = ps.disk;
3202 ms.disk = ps.pdsk;
3203 ms.peer_isp = (ps.aftr_isp | ps.user_isp);
3204
3205 return ms;
3206}
3207
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01003208static int receive_req_state(struct drbd_conf *mdev, enum drbd_packet cmd,
3209 unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003210{
Philipp Reisnere42325a2011-01-19 13:55:45 +01003211 struct p_req_state *p = &mdev->tconn->data.rbuf.req_state;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003212 union drbd_state mask, val;
Andreas Gruenbacherbf885f82010-12-08 00:39:32 +01003213 enum drbd_state_rv rv;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003214
Philipp Reisnerb411b362009-09-25 16:07:19 -07003215 mask.i = be32_to_cpu(p->mask);
3216 val.i = be32_to_cpu(p->val);
3217
Philipp Reisner25703f82011-02-07 14:35:25 +01003218 if (test_bit(DISCARD_CONCURRENT, &mdev->tconn->flags) &&
Philipp Reisnerb411b362009-09-25 16:07:19 -07003219 test_bit(CLUSTER_ST_CHANGE, &mdev->flags)) {
3220 drbd_send_sr_reply(mdev, SS_CONCURRENT_ST_CHG);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003221 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003222 }
3223
3224 mask = convert_state(mask);
3225 val = convert_state(val);
3226
3227 rv = drbd_change_state(mdev, CS_VERBOSE, mask, val);
3228
3229 drbd_send_sr_reply(mdev, rv);
3230 drbd_md_sync(mdev);
3231
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003232 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003233}
3234
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01003235static int receive_state(struct drbd_conf *mdev, enum drbd_packet cmd,
3236 unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003237{
Philipp Reisnere42325a2011-01-19 13:55:45 +01003238 struct p_state *p = &mdev->tconn->data.rbuf.state;
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003239 union drbd_state os, ns, peer_state;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003240 enum drbd_disk_state real_peer_disk;
Philipp Reisner65d922c2010-06-16 16:18:09 +02003241 enum chg_state_flags cs_flags;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003242 int rv;
3243
Philipp Reisnerb411b362009-09-25 16:07:19 -07003244 peer_state.i = be32_to_cpu(p->state);
3245
3246 real_peer_disk = peer_state.disk;
3247 if (peer_state.disk == D_NEGOTIATING) {
3248 real_peer_disk = mdev->p_uuid[UI_FLAGS] & 4 ? D_INCONSISTENT : D_CONSISTENT;
3249 dev_info(DEV, "real peer disk state = %s\n", drbd_disk_str(real_peer_disk));
3250 }
3251
Philipp Reisner87eeee42011-01-19 14:16:30 +01003252 spin_lock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003253 retry:
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003254 os = ns = mdev->state;
Philipp Reisner87eeee42011-01-19 14:16:30 +01003255 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003256
Lars Ellenberge9ef7bb2010-10-07 15:55:39 +02003257 /* peer says his disk is uptodate, while we think it is inconsistent,
3258 * and this happens while we think we have a sync going on. */
3259 if (os.pdsk == D_INCONSISTENT && real_peer_disk == D_UP_TO_DATE &&
3260 os.conn > C_CONNECTED && os.disk == D_UP_TO_DATE) {
3261 /* If we are (becoming) SyncSource, but peer is still in sync
3262 * preparation, ignore its uptodate-ness to avoid flapping, it
3263 * will change to inconsistent once the peer reaches active
3264 * syncing states.
3265 * It may have changed syncer-paused flags, however, so we
3266 * cannot ignore this completely. */
3267 if (peer_state.conn > C_CONNECTED &&
3268 peer_state.conn < C_SYNC_SOURCE)
3269 real_peer_disk = D_INCONSISTENT;
3270
3271 /* if peer_state changes to connected at the same time,
3272 * it explicitly notifies us that it finished resync.
3273 * Maybe we should finish it up, too? */
3274 else if (os.conn >= C_SYNC_SOURCE &&
3275 peer_state.conn == C_CONNECTED) {
3276 if (drbd_bm_total_weight(mdev) <= mdev->rs_failed)
3277 drbd_resync_finished(mdev);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003278 return true;
Lars Ellenberge9ef7bb2010-10-07 15:55:39 +02003279 }
3280 }
3281
3282 /* peer says his disk is inconsistent, while we think it is uptodate,
3283 * and this happens while the peer still thinks we have a sync going on,
3284 * but we think we are already done with the sync.
3285 * We ignore this to avoid flapping pdsk.
3286 * This should not happen, if the peer is a recent version of drbd. */
3287 if (os.pdsk == D_UP_TO_DATE && real_peer_disk == D_INCONSISTENT &&
3288 os.conn == C_CONNECTED && peer_state.conn > C_SYNC_SOURCE)
3289 real_peer_disk = D_UP_TO_DATE;
3290
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003291 if (ns.conn == C_WF_REPORT_PARAMS)
3292 ns.conn = C_CONNECTED;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003293
Philipp Reisner67531712010-10-27 12:21:30 +02003294 if (peer_state.conn == C_AHEAD)
3295 ns.conn = C_BEHIND;
3296
Philipp Reisnerb411b362009-09-25 16:07:19 -07003297 if (mdev->p_uuid && peer_state.disk >= D_NEGOTIATING &&
3298 get_ldev_if_state(mdev, D_NEGOTIATING)) {
3299 int cr; /* consider resync */
3300
3301 /* if we established a new connection */
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003302 cr = (os.conn < C_CONNECTED);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003303 /* if we had an established connection
3304 * and one of the nodes newly attaches a disk */
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003305 cr |= (os.conn == C_CONNECTED &&
Philipp Reisnerb411b362009-09-25 16:07:19 -07003306 (peer_state.disk == D_NEGOTIATING ||
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003307 os.disk == D_NEGOTIATING));
Philipp Reisnerb411b362009-09-25 16:07:19 -07003308 /* if we have both been inconsistent, and the peer has been
3309 * forced to be UpToDate with --overwrite-data */
3310 cr |= test_bit(CONSIDER_RESYNC, &mdev->flags);
3311 /* if we had been plain connected, and the admin requested to
3312 * start a sync by "invalidate" or "invalidate-remote" */
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003313 cr |= (os.conn == C_CONNECTED &&
Philipp Reisnerb411b362009-09-25 16:07:19 -07003314 (peer_state.conn >= C_STARTING_SYNC_S &&
3315 peer_state.conn <= C_WF_BITMAP_T));
3316
3317 if (cr)
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003318 ns.conn = drbd_sync_handshake(mdev, peer_state.role, real_peer_disk);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003319
3320 put_ldev(mdev);
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003321 if (ns.conn == C_MASK) {
3322 ns.conn = C_CONNECTED;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003323 if (mdev->state.disk == D_NEGOTIATING) {
Lars Ellenberg82f59cc2010-10-16 12:13:47 +02003324 drbd_force_state(mdev, NS(disk, D_FAILED));
Philipp Reisnerb411b362009-09-25 16:07:19 -07003325 } else if (peer_state.disk == D_NEGOTIATING) {
3326 dev_err(DEV, "Disk attach process on the peer node was aborted.\n");
3327 peer_state.disk = D_DISKLESS;
Lars Ellenberg580b9762010-02-26 23:15:23 +01003328 real_peer_disk = D_DISKLESS;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003329 } else {
Philipp Reisnercf14c2e2010-02-02 21:03:50 +01003330 if (test_and_clear_bit(CONN_DRY_RUN, &mdev->flags))
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003331 return false;
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003332 D_ASSERT(os.conn == C_WF_REPORT_PARAMS);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003333 drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003334 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003335 }
3336 }
3337 }
3338
Philipp Reisner87eeee42011-01-19 14:16:30 +01003339 spin_lock_irq(&mdev->tconn->req_lock);
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003340 if (mdev->state.i != os.i)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003341 goto retry;
3342 clear_bit(CONSIDER_RESYNC, &mdev->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003343 ns.peer = peer_state.role;
3344 ns.pdsk = real_peer_disk;
3345 ns.peer_isp = (peer_state.aftr_isp | peer_state.user_isp);
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003346 if ((ns.conn == C_CONNECTED || ns.conn == C_WF_BITMAP_S) && ns.disk == D_NEGOTIATING)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003347 ns.disk = mdev->new_state_tmp.disk;
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003348 cs_flags = CS_VERBOSE + (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED ? 0 : CS_HARD);
3349 if (ns.pdsk == D_CONSISTENT && is_susp(ns) && ns.conn == C_CONNECTED && os.conn < C_CONNECTED &&
Philipp Reisner481c6f52010-06-22 14:03:27 +02003350 test_bit(NEW_CUR_UUID, &mdev->flags)) {
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01003351 /* Do not allow tl_restart(RESEND) for a rebooted peer. We can only allow this
Philipp Reisner481c6f52010-06-22 14:03:27 +02003352 for temporal network outages! */
Philipp Reisner87eeee42011-01-19 14:16:30 +01003353 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisner481c6f52010-06-22 14:03:27 +02003354 dev_err(DEV, "Aborting Connect, can not thaw IO with an only Consistent peer\n");
3355 tl_clear(mdev);
3356 drbd_uuid_new_current(mdev);
3357 clear_bit(NEW_CUR_UUID, &mdev->flags);
3358 drbd_force_state(mdev, NS2(conn, C_PROTOCOL_ERROR, susp, 0));
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003359 return false;
Philipp Reisner481c6f52010-06-22 14:03:27 +02003360 }
Philipp Reisner65d922c2010-06-16 16:18:09 +02003361 rv = _drbd_set_state(mdev, ns, cs_flags, NULL);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003362 ns = mdev->state;
Philipp Reisner87eeee42011-01-19 14:16:30 +01003363 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003364
3365 if (rv < SS_SUCCESS) {
3366 drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003367 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003368 }
3369
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003370 if (os.conn > C_WF_REPORT_PARAMS) {
3371 if (ns.conn > C_CONNECTED && peer_state.conn <= C_CONNECTED &&
Philipp Reisnerb411b362009-09-25 16:07:19 -07003372 peer_state.disk != D_NEGOTIATING ) {
3373 /* we want resync, peer has not yet decided to sync... */
3374 /* Nowadays only used when forcing a node into primary role and
3375 setting its disk to UpToDate with that */
3376 drbd_send_uuids(mdev);
3377 drbd_send_state(mdev);
3378 }
3379 }
3380
Philipp Reisner89e58e72011-01-19 13:12:45 +01003381 mdev->tconn->net_conf->want_lose = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003382
3383 drbd_md_sync(mdev); /* update connected indicator, la_size, ... */
3384
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003385 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003386}
3387
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01003388static int receive_sync_uuid(struct drbd_conf *mdev, enum drbd_packet cmd,
3389 unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003390{
Philipp Reisnere42325a2011-01-19 13:55:45 +01003391 struct p_rs_uuid *p = &mdev->tconn->data.rbuf.rs_uuid;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003392
3393 wait_event(mdev->misc_wait,
3394 mdev->state.conn == C_WF_SYNC_UUID ||
Philipp Reisnerc4752ef2010-10-27 17:32:36 +02003395 mdev->state.conn == C_BEHIND ||
Philipp Reisnerb411b362009-09-25 16:07:19 -07003396 mdev->state.conn < C_CONNECTED ||
3397 mdev->state.disk < D_NEGOTIATING);
3398
3399 /* D_ASSERT( mdev->state.conn == C_WF_SYNC_UUID ); */
3400
Philipp Reisnerb411b362009-09-25 16:07:19 -07003401 /* Here the _drbd_uuid_ functions are right, current should
3402 _not_ be rotated into the history */
3403 if (get_ldev_if_state(mdev, D_NEGOTIATING)) {
3404 _drbd_uuid_set(mdev, UI_CURRENT, be64_to_cpu(p->uuid));
3405 _drbd_uuid_set(mdev, UI_BITMAP, 0UL);
3406
Lars Ellenberg62b0da32011-01-20 13:25:21 +01003407 drbd_print_uuids(mdev, "updated sync uuid");
Philipp Reisnerb411b362009-09-25 16:07:19 -07003408 drbd_start_resync(mdev, C_SYNC_TARGET);
3409
3410 put_ldev(mdev);
3411 } else
3412 dev_err(DEV, "Ignoring SyncUUID packet!\n");
3413
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003414 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003415}
3416
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003417/**
3418 * receive_bitmap_plain
3419 *
3420 * Return 0 when done, 1 when another iteration is needed, and a negative error
3421 * code upon failure.
3422 */
3423static int
Philipp Reisner02918be2010-08-20 14:35:10 +02003424receive_bitmap_plain(struct drbd_conf *mdev, unsigned int data_size,
3425 unsigned long *buffer, struct bm_xfer_ctx *c)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003426{
3427 unsigned num_words = min_t(size_t, BM_PACKET_WORDS, c->bm_words - c->word_offset);
3428 unsigned want = num_words * sizeof(long);
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003429 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003430
Philipp Reisner02918be2010-08-20 14:35:10 +02003431 if (want != data_size) {
3432 dev_err(DEV, "%s:want (%u) != data_size (%u)\n", __func__, want, data_size);
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003433 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003434 }
3435 if (want == 0)
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003436 return 0;
Philipp Reisnerde0ff332011-02-07 16:56:20 +01003437 err = drbd_recv(mdev->tconn, buffer, want);
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003438 if (err != want) {
3439 if (err >= 0)
3440 err = -EIO;
3441 return err;
3442 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003443
3444 drbd_bm_merge_lel(mdev, c->word_offset, num_words, buffer);
3445
3446 c->word_offset += num_words;
3447 c->bit_offset = c->word_offset * BITS_PER_LONG;
3448 if (c->bit_offset > c->bm_bits)
3449 c->bit_offset = c->bm_bits;
3450
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003451 return 1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003452}
3453
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003454/**
3455 * recv_bm_rle_bits
3456 *
3457 * Return 0 when done, 1 when another iteration is needed, and a negative error
3458 * code upon failure.
3459 */
3460static int
Philipp Reisnerb411b362009-09-25 16:07:19 -07003461recv_bm_rle_bits(struct drbd_conf *mdev,
3462 struct p_compressed_bm *p,
Philipp Reisnerc6d25cf2011-01-19 16:13:06 +01003463 struct bm_xfer_ctx *c,
3464 unsigned int len)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003465{
3466 struct bitstream bs;
3467 u64 look_ahead;
3468 u64 rl;
3469 u64 tmp;
3470 unsigned long s = c->bit_offset;
3471 unsigned long e;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003472 int toggle = DCBP_get_start(p);
3473 int have;
3474 int bits;
3475
3476 bitstream_init(&bs, p->code, len, DCBP_get_pad_bits(p));
3477
3478 bits = bitstream_get_bits(&bs, &look_ahead, 64);
3479 if (bits < 0)
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003480 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003481
3482 for (have = bits; have > 0; s += rl, toggle = !toggle) {
3483 bits = vli_decode_bits(&rl, look_ahead);
3484 if (bits <= 0)
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003485 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003486
3487 if (toggle) {
3488 e = s + rl -1;
3489 if (e >= c->bm_bits) {
3490 dev_err(DEV, "bitmap overflow (e:%lu) while decoding bm RLE packet\n", e);
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003491 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003492 }
3493 _drbd_bm_set_bits(mdev, s, e);
3494 }
3495
3496 if (have < bits) {
3497 dev_err(DEV, "bitmap decoding error: h:%d b:%d la:0x%08llx l:%u/%u\n",
3498 have, bits, look_ahead,
3499 (unsigned int)(bs.cur.b - p->code),
3500 (unsigned int)bs.buf_len);
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003501 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003502 }
3503 look_ahead >>= bits;
3504 have -= bits;
3505
3506 bits = bitstream_get_bits(&bs, &tmp, 64 - have);
3507 if (bits < 0)
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003508 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003509 look_ahead |= tmp << have;
3510 have += bits;
3511 }
3512
3513 c->bit_offset = s;
3514 bm_xfer_ctx_bit_to_word_offset(c);
3515
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003516 return (s != c->bm_bits);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003517}
3518
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003519/**
3520 * decode_bitmap_c
3521 *
3522 * Return 0 when done, 1 when another iteration is needed, and a negative error
3523 * code upon failure.
3524 */
3525static int
Philipp Reisnerb411b362009-09-25 16:07:19 -07003526decode_bitmap_c(struct drbd_conf *mdev,
3527 struct p_compressed_bm *p,
Philipp Reisnerc6d25cf2011-01-19 16:13:06 +01003528 struct bm_xfer_ctx *c,
3529 unsigned int len)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003530{
3531 if (DCBP_get_code(p) == RLE_VLI_Bits)
Philipp Reisnerc6d25cf2011-01-19 16:13:06 +01003532 return recv_bm_rle_bits(mdev, p, c, len);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003533
3534 /* other variants had been implemented for evaluation,
3535 * but have been dropped as this one turned out to be "best"
3536 * during all our tests. */
3537
3538 dev_err(DEV, "receive_bitmap_c: unknown encoding %u\n", p->encoding);
3539 drbd_force_state(mdev, NS(conn, C_PROTOCOL_ERROR));
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003540 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003541}
3542
3543void INFO_bm_xfer_stats(struct drbd_conf *mdev,
3544 const char *direction, struct bm_xfer_ctx *c)
3545{
3546 /* what would it take to transfer it "plaintext" */
Philipp Reisnerc0129492011-01-19 16:58:16 +01003547 unsigned plain = sizeof(struct p_header) *
Philipp Reisnerb411b362009-09-25 16:07:19 -07003548 ((c->bm_words+BM_PACKET_WORDS-1)/BM_PACKET_WORDS+1)
3549 + c->bm_words * sizeof(long);
3550 unsigned total = c->bytes[0] + c->bytes[1];
3551 unsigned r;
3552
3553 /* total can not be zero. but just in case: */
3554 if (total == 0)
3555 return;
3556
3557 /* don't report if not compressed */
3558 if (total >= plain)
3559 return;
3560
3561 /* total < plain. check for overflow, still */
3562 r = (total > UINT_MAX/1000) ? (total / (plain/1000))
3563 : (1000 * total / plain);
3564
3565 if (r > 1000)
3566 r = 1000;
3567
3568 r = 1000 - r;
3569 dev_info(DEV, "%s bitmap stats [Bytes(packets)]: plain %u(%u), RLE %u(%u), "
3570 "total %u; compression: %u.%u%%\n",
3571 direction,
3572 c->bytes[1], c->packets[1],
3573 c->bytes[0], c->packets[0],
3574 total, r/10, r % 10);
3575}
3576
3577/* Since we are processing the bitfield from lower addresses to higher,
3578 it does not matter if the process it in 32 bit chunks or 64 bit
3579 chunks as long as it is little endian. (Understand it as byte stream,
3580 beginning with the lowest byte...) If we would use big endian
3581 we would need to process it from the highest address to the lowest,
3582 in order to be agnostic to the 32 vs 64 bits issue.
3583
3584 returns 0 on failure, 1 if we successfully received it. */
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01003585static int receive_bitmap(struct drbd_conf *mdev, enum drbd_packet cmd,
3586 unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003587{
3588 struct bm_xfer_ctx c;
3589 void *buffer;
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003590 int err;
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003591 int ok = false;
Philipp Reisner257d0af2011-01-26 12:15:29 +01003592 struct p_header *h = &mdev->tconn->data.rbuf.header;
Philipp Reisner77351055b2011-02-07 17:24:26 +01003593 struct packet_info pi;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003594
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01003595 drbd_bm_lock(mdev, "receive bitmap", BM_LOCKED_SET_ALLOWED);
3596 /* you are supposed to send additional out-of-sync information
3597 * if you actually set bits during this phase */
Philipp Reisnerb411b362009-09-25 16:07:19 -07003598
3599 /* maybe we should use some per thread scratch page,
3600 * and allocate that during initial device creation? */
3601 buffer = (unsigned long *) __get_free_page(GFP_NOIO);
3602 if (!buffer) {
3603 dev_err(DEV, "failed to allocate one page buffer in %s\n", __func__);
3604 goto out;
3605 }
3606
3607 c = (struct bm_xfer_ctx) {
3608 .bm_bits = drbd_bm_bits(mdev),
3609 .bm_words = drbd_bm_words(mdev),
3610 };
3611
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003612 for(;;) {
Philipp Reisner02918be2010-08-20 14:35:10 +02003613 if (cmd == P_BITMAP) {
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003614 err = receive_bitmap_plain(mdev, data_size, buffer, &c);
Philipp Reisner02918be2010-08-20 14:35:10 +02003615 } else if (cmd == P_COMPRESSED_BITMAP) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07003616 /* MAYBE: sanity check that we speak proto >= 90,
3617 * and the feature is enabled! */
3618 struct p_compressed_bm *p;
3619
Philipp Reisner02918be2010-08-20 14:35:10 +02003620 if (data_size > BM_PACKET_PAYLOAD_BYTES) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07003621 dev_err(DEV, "ReportCBitmap packet too large\n");
3622 goto out;
3623 }
3624 /* use the page buff */
3625 p = buffer;
3626 memcpy(p, h, sizeof(*h));
Philipp Reisnerde0ff332011-02-07 16:56:20 +01003627 if (drbd_recv(mdev->tconn, p->head.payload, data_size) != data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003628 goto out;
Lars Ellenberg004352f2010-10-05 20:13:58 +02003629 if (data_size <= (sizeof(*p) - sizeof(p->head))) {
3630 dev_err(DEV, "ReportCBitmap packet too small (l:%u)\n", data_size);
Andreas Gruenbacher78fcbda2010-12-10 22:18:27 +01003631 goto out;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003632 }
Philipp Reisnerc6d25cf2011-01-19 16:13:06 +01003633 err = decode_bitmap_c(mdev, p, &c, data_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003634 } else {
Philipp Reisner02918be2010-08-20 14:35:10 +02003635 dev_warn(DEV, "receive_bitmap: cmd neither ReportBitMap nor ReportCBitMap (is 0x%x)", cmd);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003636 goto out;
3637 }
3638
Philipp Reisner02918be2010-08-20 14:35:10 +02003639 c.packets[cmd == P_BITMAP]++;
Philipp Reisner257d0af2011-01-26 12:15:29 +01003640 c.bytes[cmd == P_BITMAP] += sizeof(struct p_header) + data_size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003641
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003642 if (err <= 0) {
3643 if (err < 0)
3644 goto out;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003645 break;
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003646 }
Philipp Reisner9ba7aa02011-02-07 17:32:41 +01003647 if (!drbd_recv_header(mdev->tconn, &pi))
Philipp Reisnerb411b362009-09-25 16:07:19 -07003648 goto out;
Philipp Reisner77351055b2011-02-07 17:24:26 +01003649 cmd = pi.cmd;
3650 data_size = pi.size;
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003651 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003652
3653 INFO_bm_xfer_stats(mdev, "receive", &c);
3654
3655 if (mdev->state.conn == C_WF_BITMAP_T) {
Andreas Gruenbacherde1f8e42010-12-10 21:04:00 +01003656 enum drbd_state_rv rv;
3657
Philipp Reisnerb411b362009-09-25 16:07:19 -07003658 ok = !drbd_send_bitmap(mdev);
3659 if (!ok)
3660 goto out;
3661 /* Omit CS_ORDERED with this state transition to avoid deadlocks. */
Andreas Gruenbacherde1f8e42010-12-10 21:04:00 +01003662 rv = _drbd_request_state(mdev, NS(conn, C_WF_SYNC_UUID), CS_VERBOSE);
3663 D_ASSERT(rv == SS_SUCCESS);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003664 } else if (mdev->state.conn != C_WF_BITMAP_S) {
3665 /* admin may have requested C_DISCONNECTING,
3666 * other threads may have noticed network errors */
3667 dev_info(DEV, "unexpected cstate (%s) in receive_bitmap\n",
3668 drbd_conn_str(mdev->state.conn));
3669 }
3670
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003671 ok = true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003672 out:
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01003673 drbd_bm_unlock(mdev);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003674 if (ok && mdev->state.conn == C_WF_BITMAP_S)
3675 drbd_start_resync(mdev, C_SYNC_SOURCE);
3676 free_page((unsigned long) buffer);
3677 return ok;
3678}
3679
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01003680static int receive_skip(struct drbd_conf *mdev, enum drbd_packet cmd,
3681 unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003682{
3683 /* TODO zero copy sink :) */
3684 static char sink[128];
3685 int size, want, r;
3686
Philipp Reisner02918be2010-08-20 14:35:10 +02003687 dev_warn(DEV, "skipping unknown optional packet type %d, l: %d!\n",
3688 cmd, data_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003689
Philipp Reisner02918be2010-08-20 14:35:10 +02003690 size = data_size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003691 while (size > 0) {
3692 want = min_t(int, size, sizeof(sink));
Philipp Reisnerde0ff332011-02-07 16:56:20 +01003693 r = drbd_recv(mdev->tconn, sink, want);
Andreas Gruenbacher841ce242010-12-15 19:31:20 +01003694 if (!expect(r > 0))
3695 break;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003696 size -= r;
3697 }
3698 return size == 0;
3699}
3700
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01003701static int receive_UnplugRemote(struct drbd_conf *mdev, enum drbd_packet cmd,
3702 unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003703{
Philipp Reisnerb411b362009-09-25 16:07:19 -07003704 /* Make sure we've acked all the TCP data associated
3705 * with the data requests being unplugged */
Philipp Reisnere42325a2011-01-19 13:55:45 +01003706 drbd_tcp_quickack(mdev->tconn->data.socket);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003707
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003708 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003709}
3710
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01003711static int receive_out_of_sync(struct drbd_conf *mdev, enum drbd_packet cmd,
3712 unsigned int data_size)
Philipp Reisner73a01a12010-10-27 14:33:00 +02003713{
Philipp Reisnere42325a2011-01-19 13:55:45 +01003714 struct p_block_desc *p = &mdev->tconn->data.rbuf.block_desc;
Philipp Reisner73a01a12010-10-27 14:33:00 +02003715
Lars Ellenbergf735e3632010-12-17 21:06:18 +01003716 switch (mdev->state.conn) {
3717 case C_WF_SYNC_UUID:
3718 case C_WF_BITMAP_T:
3719 case C_BEHIND:
3720 break;
3721 default:
3722 dev_err(DEV, "ASSERT FAILED cstate = %s, expected: WFSyncUUID|WFBitMapT|Behind\n",
3723 drbd_conn_str(mdev->state.conn));
3724 }
3725
Philipp Reisner73a01a12010-10-27 14:33:00 +02003726 drbd_set_out_of_sync(mdev, be64_to_cpu(p->sector), be32_to_cpu(p->blksize));
3727
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003728 return true;
Philipp Reisner73a01a12010-10-27 14:33:00 +02003729}
3730
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01003731typedef int (*drbd_cmd_handler_f)(struct drbd_conf *, enum drbd_packet cmd,
3732 unsigned int to_receive);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003733
Philipp Reisner02918be2010-08-20 14:35:10 +02003734struct data_cmd {
3735 int expect_payload;
3736 size_t pkt_size;
3737 drbd_cmd_handler_f function;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003738};
3739
Philipp Reisner02918be2010-08-20 14:35:10 +02003740static struct data_cmd drbd_cmd_handler[] = {
3741 [P_DATA] = { 1, sizeof(struct p_data), receive_Data },
3742 [P_DATA_REPLY] = { 1, sizeof(struct p_data), receive_DataReply },
3743 [P_RS_DATA_REPLY] = { 1, sizeof(struct p_data), receive_RSDataReply } ,
3744 [P_BARRIER] = { 0, sizeof(struct p_barrier), receive_Barrier } ,
Philipp Reisner257d0af2011-01-26 12:15:29 +01003745 [P_BITMAP] = { 1, sizeof(struct p_header), receive_bitmap } ,
3746 [P_COMPRESSED_BITMAP] = { 1, sizeof(struct p_header), receive_bitmap } ,
3747 [P_UNPLUG_REMOTE] = { 0, sizeof(struct p_header), receive_UnplugRemote },
Philipp Reisner02918be2010-08-20 14:35:10 +02003748 [P_DATA_REQUEST] = { 0, sizeof(struct p_block_req), receive_DataRequest },
3749 [P_RS_DATA_REQUEST] = { 0, sizeof(struct p_block_req), receive_DataRequest },
Philipp Reisner257d0af2011-01-26 12:15:29 +01003750 [P_SYNC_PARAM] = { 1, sizeof(struct p_header), receive_SyncParam },
3751 [P_SYNC_PARAM89] = { 1, sizeof(struct p_header), receive_SyncParam },
Philipp Reisner02918be2010-08-20 14:35:10 +02003752 [P_PROTOCOL] = { 1, sizeof(struct p_protocol), receive_protocol },
3753 [P_UUIDS] = { 0, sizeof(struct p_uuids), receive_uuids },
3754 [P_SIZES] = { 0, sizeof(struct p_sizes), receive_sizes },
3755 [P_STATE] = { 0, sizeof(struct p_state), receive_state },
3756 [P_STATE_CHG_REQ] = { 0, sizeof(struct p_req_state), receive_req_state },
3757 [P_SYNC_UUID] = { 0, sizeof(struct p_rs_uuid), receive_sync_uuid },
3758 [P_OV_REQUEST] = { 0, sizeof(struct p_block_req), receive_DataRequest },
3759 [P_OV_REPLY] = { 1, sizeof(struct p_block_req), receive_DataRequest },
3760 [P_CSUM_RS_REQUEST] = { 1, sizeof(struct p_block_req), receive_DataRequest },
3761 [P_DELAY_PROBE] = { 0, sizeof(struct p_delay_probe93), receive_skip },
Philipp Reisner73a01a12010-10-27 14:33:00 +02003762 [P_OUT_OF_SYNC] = { 0, sizeof(struct p_block_desc), receive_out_of_sync },
Philipp Reisner02918be2010-08-20 14:35:10 +02003763 /* anything missing from this table is in
3764 * the asender_tbl, see get_asender_cmd */
3765 [P_MAX_CMD] = { 0, 0, NULL },
3766};
3767
3768/* All handler functions that expect a sub-header get that sub-heder in
Philipp Reisnere42325a2011-01-19 13:55:45 +01003769 mdev->tconn->data.rbuf.header.head.payload.
Philipp Reisner02918be2010-08-20 14:35:10 +02003770
Philipp Reisnere42325a2011-01-19 13:55:45 +01003771 Usually in mdev->tconn->data.rbuf.header.head the callback can find the usual
Philipp Reisner02918be2010-08-20 14:35:10 +02003772 p_header, but they may not rely on that. Since there is also p_header95 !
3773 */
Philipp Reisnerb411b362009-09-25 16:07:19 -07003774
Philipp Reisnereefc2f72011-02-08 12:55:24 +01003775static void drbdd(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003776{
Philipp Reisnereefc2f72011-02-08 12:55:24 +01003777 struct p_header *header = &tconn->data.rbuf.header;
Philipp Reisner77351055b2011-02-07 17:24:26 +01003778 struct packet_info pi;
Philipp Reisner02918be2010-08-20 14:35:10 +02003779 size_t shs; /* sub header size */
3780 int rv;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003781
Philipp Reisnereefc2f72011-02-08 12:55:24 +01003782 while (get_t_state(&tconn->receiver) == RUNNING) {
3783 drbd_thread_current_set_cpu(&tconn->receiver);
3784 if (!drbd_recv_header(tconn, &pi))
Philipp Reisner02918be2010-08-20 14:35:10 +02003785 goto err_out;
3786
Philipp Reisner77351055b2011-02-07 17:24:26 +01003787 if (unlikely(pi.cmd >= P_MAX_CMD || !drbd_cmd_handler[pi.cmd].function)) {
Philipp Reisnereefc2f72011-02-08 12:55:24 +01003788 conn_err(tconn, "unknown packet type %d, l: %d!\n", pi.cmd, pi.size);
Philipp Reisner02918be2010-08-20 14:35:10 +02003789 goto err_out;
Lars Ellenberg0b33a912009-11-16 15:58:04 +01003790 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003791
Philipp Reisner77351055b2011-02-07 17:24:26 +01003792 shs = drbd_cmd_handler[pi.cmd].pkt_size - sizeof(struct p_header);
3793 if (pi.size - shs > 0 && !drbd_cmd_handler[pi.cmd].expect_payload) {
Philipp Reisnereefc2f72011-02-08 12:55:24 +01003794 conn_err(tconn, "No payload expected %s l:%d\n", cmdname(pi.cmd), pi.size);
Philipp Reisner02918be2010-08-20 14:35:10 +02003795 goto err_out;
3796 }
3797
Lars Ellenbergc13f7e12010-10-29 23:32:01 +02003798 if (shs) {
Philipp Reisnereefc2f72011-02-08 12:55:24 +01003799 rv = drbd_recv(tconn, &header->payload, shs);
Lars Ellenbergc13f7e12010-10-29 23:32:01 +02003800 if (unlikely(rv != shs)) {
Lars Ellenberg0ddc5542011-01-21 12:35:15 +01003801 if (!signal_pending(current))
Philipp Reisnereefc2f72011-02-08 12:55:24 +01003802 conn_warn(tconn, "short read while reading sub header: rv=%d\n", rv);
Lars Ellenbergc13f7e12010-10-29 23:32:01 +02003803 goto err_out;
3804 }
3805 }
3806
Philipp Reisnereefc2f72011-02-08 12:55:24 +01003807 rv = drbd_cmd_handler[pi.cmd].function(vnr_to_mdev(tconn, pi.vnr), pi.cmd, pi.size - shs);
Philipp Reisner02918be2010-08-20 14:35:10 +02003808
3809 if (unlikely(!rv)) {
Philipp Reisnereefc2f72011-02-08 12:55:24 +01003810 conn_err(tconn, "error receiving %s, l: %d!\n",
Philipp Reisner77351055b2011-02-07 17:24:26 +01003811 cmdname(pi.cmd), pi.size);
Philipp Reisner02918be2010-08-20 14:35:10 +02003812 goto err_out;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003813 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003814 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003815
Philipp Reisner02918be2010-08-20 14:35:10 +02003816 if (0) {
3817 err_out:
Philipp Reisnereefc2f72011-02-08 12:55:24 +01003818 drbd_force_state(tconn->volume0, NS(conn, C_PROTOCOL_ERROR));
Philipp Reisnerb411b362009-09-25 16:07:19 -07003819 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003820}
3821
Philipp Reisner191d3cc2011-01-19 14:53:22 +01003822void drbd_flush_workqueue(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003823{
3824 struct drbd_wq_barrier barr;
3825
3826 barr.w.cb = w_prev_work_done;
3827 init_completion(&barr.done);
Philipp Reisner191d3cc2011-01-19 14:53:22 +01003828 drbd_queue_work(&tconn->data.work, &barr.w);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003829 wait_for_completion(&barr.done);
3830}
3831
3832static void drbd_disconnect(struct drbd_conf *mdev)
3833{
3834 enum drbd_fencing_p fp;
3835 union drbd_state os, ns;
3836 int rv = SS_UNKNOWN_ERROR;
3837 unsigned int i;
3838
3839 if (mdev->state.conn == C_STANDALONE)
3840 return;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003841
3842 /* asender does not clean up anything. it must not interfere, either */
Philipp Reisnere6b3ea82011-01-19 14:02:01 +01003843 drbd_thread_stop(&mdev->tconn->asender);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003844 drbd_free_sock(mdev);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003845
Philipp Reisner85719572010-07-21 10:20:17 +02003846 /* wait for current activity to cease. */
Philipp Reisner87eeee42011-01-19 14:16:30 +01003847 spin_lock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003848 _drbd_wait_ee_list_empty(mdev, &mdev->active_ee);
3849 _drbd_wait_ee_list_empty(mdev, &mdev->sync_ee);
3850 _drbd_wait_ee_list_empty(mdev, &mdev->read_ee);
Philipp Reisner87eeee42011-01-19 14:16:30 +01003851 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003852
3853 /* We do not have data structures that would allow us to
3854 * get the rs_pending_cnt down to 0 again.
3855 * * On C_SYNC_TARGET we do not have any data structures describing
3856 * the pending RSDataRequest's we have sent.
3857 * * On C_SYNC_SOURCE there is no data structure that tracks
3858 * the P_RS_DATA_REPLY blocks that we sent to the SyncTarget.
3859 * And no, it is not the sum of the reference counts in the
3860 * resync_LRU. The resync_LRU tracks the whole operation including
3861 * the disk-IO, while the rs_pending_cnt only tracks the blocks
3862 * on the fly. */
3863 drbd_rs_cancel_all(mdev);
3864 mdev->rs_total = 0;
3865 mdev->rs_failed = 0;
3866 atomic_set(&mdev->rs_pending_cnt, 0);
3867 wake_up(&mdev->misc_wait);
3868
Philipp Reisner7fde2be2011-03-01 11:08:28 +01003869 del_timer(&mdev->request_timer);
3870
Philipp Reisnerb411b362009-09-25 16:07:19 -07003871 /* make sure syncer is stopped and w_resume_next_sg queued */
3872 del_timer_sync(&mdev->resync_timer);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003873 resync_timer_fn((unsigned long)mdev);
3874
Philipp Reisnerb411b362009-09-25 16:07:19 -07003875 /* wait for all w_e_end_data_req, w_e_end_rsdata_req, w_send_barrier,
3876 * w_make_resync_request etc. which may still be on the worker queue
3877 * to be "canceled" */
Philipp Reisner191d3cc2011-01-19 14:53:22 +01003878 drbd_flush_workqueue(mdev->tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003879
3880 /* This also does reclaim_net_ee(). If we do this too early, we might
3881 * miss some resync ee and pages.*/
3882 drbd_process_done_ee(mdev);
3883
3884 kfree(mdev->p_uuid);
3885 mdev->p_uuid = NULL;
3886
Philipp Reisnerfb22c402010-09-08 23:20:21 +02003887 if (!is_susp(mdev->state))
Philipp Reisnerb411b362009-09-25 16:07:19 -07003888 tl_clear(mdev);
3889
Philipp Reisnerb411b362009-09-25 16:07:19 -07003890 dev_info(DEV, "Connection closed\n");
3891
3892 drbd_md_sync(mdev);
3893
3894 fp = FP_DONT_CARE;
3895 if (get_ldev(mdev)) {
3896 fp = mdev->ldev->dc.fencing;
3897 put_ldev(mdev);
3898 }
3899
Philipp Reisner87f7be42010-06-11 13:56:33 +02003900 if (mdev->state.role == R_PRIMARY && fp >= FP_RESOURCE && mdev->state.pdsk >= D_UNKNOWN)
3901 drbd_try_outdate_peer_async(mdev);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003902
Philipp Reisner87eeee42011-01-19 14:16:30 +01003903 spin_lock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003904 os = mdev->state;
3905 if (os.conn >= C_UNCONNECTED) {
3906 /* Do not restart in case we are C_DISCONNECTING */
3907 ns = os;
3908 ns.conn = C_UNCONNECTED;
3909 rv = _drbd_set_state(mdev, ns, CS_VERBOSE, NULL);
3910 }
Philipp Reisner87eeee42011-01-19 14:16:30 +01003911 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003912
3913 if (os.conn == C_DISCONNECTING) {
Philipp Reisnerb2fb6dbe2011-01-19 13:48:44 +01003914 wait_event(mdev->tconn->net_cnt_wait, atomic_read(&mdev->tconn->net_cnt) == 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003915
Philipp Reisnera0638452011-01-19 14:31:32 +01003916 crypto_free_hash(mdev->tconn->cram_hmac_tfm);
3917 mdev->tconn->cram_hmac_tfm = NULL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003918
Philipp Reisner89e58e72011-01-19 13:12:45 +01003919 kfree(mdev->tconn->net_conf);
3920 mdev->tconn->net_conf = NULL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003921 drbd_request_state(mdev, NS(conn, C_STANDALONE));
3922 }
3923
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01003924 /* serialize with bitmap writeout triggered by the state change,
3925 * if any. */
3926 wait_event(mdev->misc_wait, !test_bit(BITMAP_IO, &mdev->flags));
3927
Philipp Reisnerb411b362009-09-25 16:07:19 -07003928 /* tcp_close and release of sendpage pages can be deferred. I don't
3929 * want to use SO_LINGER, because apparently it can be deferred for
3930 * more than 20 seconds (longest time I checked).
3931 *
3932 * Actually we don't care for exactly when the network stack does its
3933 * put_page(), but release our reference on these pages right here.
3934 */
3935 i = drbd_release_ee(mdev, &mdev->net_ee);
3936 if (i)
3937 dev_info(DEV, "net_ee not empty, killed %u entries\n", i);
Lars Ellenberg435f0742010-09-06 12:30:25 +02003938 i = atomic_read(&mdev->pp_in_use_by_net);
3939 if (i)
3940 dev_info(DEV, "pp_in_use_by_net = %d, expected 0\n", i);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003941 i = atomic_read(&mdev->pp_in_use);
3942 if (i)
Lars Ellenberg45bb9122010-05-14 17:10:48 +02003943 dev_info(DEV, "pp_in_use = %d, expected 0\n", i);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003944
3945 D_ASSERT(list_empty(&mdev->read_ee));
3946 D_ASSERT(list_empty(&mdev->active_ee));
3947 D_ASSERT(list_empty(&mdev->sync_ee));
3948 D_ASSERT(list_empty(&mdev->done_ee));
3949
3950 /* ok, no more ee's on the fly, it is safe to reset the epoch_size */
3951 atomic_set(&mdev->current_epoch->epoch_size, 0);
3952 D_ASSERT(list_empty(&mdev->current_epoch->list));
3953}
3954
3955/*
3956 * We support PRO_VERSION_MIN to PRO_VERSION_MAX. The protocol version
3957 * we can agree on is stored in agreed_pro_version.
3958 *
3959 * feature flags and the reserved array should be enough room for future
3960 * enhancements of the handshake protocol, and possible plugins...
3961 *
3962 * for now, they are expected to be zero, but ignored.
3963 */
Philipp Reisner8a22ccc2011-02-07 16:47:12 +01003964static int drbd_send_handshake(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003965{
Philipp Reisnere6b3ea82011-01-19 14:02:01 +01003966 /* ASSERT current == mdev->tconn->receiver ... */
Philipp Reisner8a22ccc2011-02-07 16:47:12 +01003967 struct p_handshake *p = &tconn->data.sbuf.handshake;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003968 int ok;
3969
Philipp Reisner8a22ccc2011-02-07 16:47:12 +01003970 if (mutex_lock_interruptible(&tconn->data.mutex)) {
3971 conn_err(tconn, "interrupted during initial handshake\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07003972 return 0; /* interrupted. not ok. */
3973 }
3974
Philipp Reisner8a22ccc2011-02-07 16:47:12 +01003975 if (tconn->data.socket == NULL) {
3976 mutex_unlock(&tconn->data.mutex);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003977 return 0;
3978 }
3979
3980 memset(p, 0, sizeof(*p));
3981 p->protocol_min = cpu_to_be32(PRO_VERSION_MIN);
3982 p->protocol_max = cpu_to_be32(PRO_VERSION_MAX);
Philipp Reisner8a22ccc2011-02-07 16:47:12 +01003983 ok = _conn_send_cmd(tconn, 0, tconn->data.socket, P_HAND_SHAKE,
3984 &p->head, sizeof(*p), 0);
3985 mutex_unlock(&tconn->data.mutex);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003986 return ok;
3987}
3988
3989/*
3990 * return values:
3991 * 1 yes, we have a valid connection
3992 * 0 oops, did not work out, please try again
3993 * -1 peer talks different language,
3994 * no point in trying again, please go standalone.
3995 */
Philipp Reisner65d11ed2011-02-07 17:35:59 +01003996static int drbd_do_handshake(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003997{
Philipp Reisner65d11ed2011-02-07 17:35:59 +01003998 /* ASSERT current == tconn->receiver ... */
3999 struct p_handshake *p = &tconn->data.rbuf.handshake;
Philipp Reisner02918be2010-08-20 14:35:10 +02004000 const int expect = sizeof(struct p_handshake) - sizeof(struct p_header80);
Philipp Reisner77351055b2011-02-07 17:24:26 +01004001 struct packet_info pi;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004002 int rv;
4003
Philipp Reisner65d11ed2011-02-07 17:35:59 +01004004 rv = drbd_send_handshake(tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004005 if (!rv)
4006 return 0;
4007
Philipp Reisner65d11ed2011-02-07 17:35:59 +01004008 rv = drbd_recv_header(tconn, &pi);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004009 if (!rv)
4010 return 0;
4011
Philipp Reisner77351055b2011-02-07 17:24:26 +01004012 if (pi.cmd != P_HAND_SHAKE) {
Philipp Reisner65d11ed2011-02-07 17:35:59 +01004013 conn_err(tconn, "expected HandShake packet, received: %s (0x%04x)\n",
Philipp Reisner77351055b2011-02-07 17:24:26 +01004014 cmdname(pi.cmd), pi.cmd);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004015 return -1;
4016 }
4017
Philipp Reisner77351055b2011-02-07 17:24:26 +01004018 if (pi.size != expect) {
Philipp Reisner65d11ed2011-02-07 17:35:59 +01004019 conn_err(tconn, "expected HandShake length: %u, received: %u\n",
Philipp Reisner77351055b2011-02-07 17:24:26 +01004020 expect, pi.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004021 return -1;
4022 }
4023
Philipp Reisner65d11ed2011-02-07 17:35:59 +01004024 rv = drbd_recv(tconn, &p->head.payload, expect);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004025
4026 if (rv != expect) {
Lars Ellenberg0ddc5542011-01-21 12:35:15 +01004027 if (!signal_pending(current))
Philipp Reisner65d11ed2011-02-07 17:35:59 +01004028 conn_warn(tconn, "short read receiving handshake packet: l=%u\n", rv);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004029 return 0;
4030 }
4031
Philipp Reisnerb411b362009-09-25 16:07:19 -07004032 p->protocol_min = be32_to_cpu(p->protocol_min);
4033 p->protocol_max = be32_to_cpu(p->protocol_max);
4034 if (p->protocol_max == 0)
4035 p->protocol_max = p->protocol_min;
4036
4037 if (PRO_VERSION_MAX < p->protocol_min ||
4038 PRO_VERSION_MIN > p->protocol_max)
4039 goto incompat;
4040
Philipp Reisner65d11ed2011-02-07 17:35:59 +01004041 tconn->agreed_pro_version = min_t(int, PRO_VERSION_MAX, p->protocol_max);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004042
Philipp Reisner65d11ed2011-02-07 17:35:59 +01004043 conn_info(tconn, "Handshake successful: "
4044 "Agreed network protocol version %d\n", tconn->agreed_pro_version);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004045
4046 return 1;
4047
4048 incompat:
Philipp Reisner65d11ed2011-02-07 17:35:59 +01004049 conn_err(tconn, "incompatible DRBD dialects: "
Philipp Reisnerb411b362009-09-25 16:07:19 -07004050 "I support %d-%d, peer supports %d-%d\n",
4051 PRO_VERSION_MIN, PRO_VERSION_MAX,
4052 p->protocol_min, p->protocol_max);
4053 return -1;
4054}
4055
4056#if !defined(CONFIG_CRYPTO_HMAC) && !defined(CONFIG_CRYPTO_HMAC_MODULE)
Philipp Reisner13e60372011-02-08 09:54:40 +01004057static int drbd_do_auth(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004058{
4059 dev_err(DEV, "This kernel was build without CONFIG_CRYPTO_HMAC.\n");
4060 dev_err(DEV, "You need to disable 'cram-hmac-alg' in drbd.conf.\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01004061 return -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004062}
4063#else
4064#define CHALLENGE_LEN 64
Johannes Thomab10d96c2010-01-07 16:02:50 +01004065
4066/* Return value:
4067 1 - auth succeeded,
4068 0 - failed, try again (network error),
4069 -1 - auth failed, don't try again.
4070*/
4071
Philipp Reisner13e60372011-02-08 09:54:40 +01004072static int drbd_do_auth(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004073{
4074 char my_challenge[CHALLENGE_LEN]; /* 64 Bytes... */
4075 struct scatterlist sg;
4076 char *response = NULL;
4077 char *right_response = NULL;
4078 char *peers_ch = NULL;
Philipp Reisner13e60372011-02-08 09:54:40 +01004079 unsigned int key_len = strlen(tconn->net_conf->shared_secret);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004080 unsigned int resp_size;
4081 struct hash_desc desc;
Philipp Reisner77351055b2011-02-07 17:24:26 +01004082 struct packet_info pi;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004083 int rv;
4084
Philipp Reisner13e60372011-02-08 09:54:40 +01004085 desc.tfm = tconn->cram_hmac_tfm;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004086 desc.flags = 0;
4087
Philipp Reisner13e60372011-02-08 09:54:40 +01004088 rv = crypto_hash_setkey(tconn->cram_hmac_tfm,
4089 (u8 *)tconn->net_conf->shared_secret, key_len);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004090 if (rv) {
Philipp Reisner13e60372011-02-08 09:54:40 +01004091 conn_err(tconn, "crypto_hash_setkey() failed with %d\n", rv);
Johannes Thomab10d96c2010-01-07 16:02:50 +01004092 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004093 goto fail;
4094 }
4095
4096 get_random_bytes(my_challenge, CHALLENGE_LEN);
4097
Philipp Reisner13e60372011-02-08 09:54:40 +01004098 rv = conn_send_cmd2(tconn, P_AUTH_CHALLENGE, my_challenge, CHALLENGE_LEN);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004099 if (!rv)
4100 goto fail;
4101
Philipp Reisner13e60372011-02-08 09:54:40 +01004102 rv = drbd_recv_header(tconn, &pi);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004103 if (!rv)
4104 goto fail;
4105
Philipp Reisner77351055b2011-02-07 17:24:26 +01004106 if (pi.cmd != P_AUTH_CHALLENGE) {
Philipp Reisner13e60372011-02-08 09:54:40 +01004107 conn_err(tconn, "expected AuthChallenge packet, received: %s (0x%04x)\n",
Philipp Reisner77351055b2011-02-07 17:24:26 +01004108 cmdname(pi.cmd), pi.cmd);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004109 rv = 0;
4110 goto fail;
4111 }
4112
Philipp Reisner77351055b2011-02-07 17:24:26 +01004113 if (pi.size > CHALLENGE_LEN * 2) {
Philipp Reisner13e60372011-02-08 09:54:40 +01004114 conn_err(tconn, "expected AuthChallenge payload too big.\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01004115 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004116 goto fail;
4117 }
4118
Philipp Reisner77351055b2011-02-07 17:24:26 +01004119 peers_ch = kmalloc(pi.size, GFP_NOIO);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004120 if (peers_ch == NULL) {
Philipp Reisner13e60372011-02-08 09:54:40 +01004121 conn_err(tconn, "kmalloc of peers_ch failed\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01004122 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004123 goto fail;
4124 }
4125
Philipp Reisner13e60372011-02-08 09:54:40 +01004126 rv = drbd_recv(tconn, peers_ch, pi.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004127
Philipp Reisner77351055b2011-02-07 17:24:26 +01004128 if (rv != pi.size) {
Lars Ellenberg0ddc5542011-01-21 12:35:15 +01004129 if (!signal_pending(current))
Philipp Reisner13e60372011-02-08 09:54:40 +01004130 conn_warn(tconn, "short read AuthChallenge: l=%u\n", rv);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004131 rv = 0;
4132 goto fail;
4133 }
4134
Philipp Reisner13e60372011-02-08 09:54:40 +01004135 resp_size = crypto_hash_digestsize(tconn->cram_hmac_tfm);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004136 response = kmalloc(resp_size, GFP_NOIO);
4137 if (response == NULL) {
Philipp Reisner13e60372011-02-08 09:54:40 +01004138 conn_err(tconn, "kmalloc of response failed\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01004139 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004140 goto fail;
4141 }
4142
4143 sg_init_table(&sg, 1);
Philipp Reisner77351055b2011-02-07 17:24:26 +01004144 sg_set_buf(&sg, peers_ch, pi.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004145
4146 rv = crypto_hash_digest(&desc, &sg, sg.length, response);
4147 if (rv) {
Philipp Reisner13e60372011-02-08 09:54:40 +01004148 conn_err(tconn, "crypto_hash_digest() failed with %d\n", rv);
Johannes Thomab10d96c2010-01-07 16:02:50 +01004149 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004150 goto fail;
4151 }
4152
Philipp Reisner13e60372011-02-08 09:54:40 +01004153 rv = conn_send_cmd2(tconn, P_AUTH_RESPONSE, response, resp_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004154 if (!rv)
4155 goto fail;
4156
Philipp Reisner13e60372011-02-08 09:54:40 +01004157 rv = drbd_recv_header(tconn, &pi);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004158 if (!rv)
4159 goto fail;
4160
Philipp Reisner77351055b2011-02-07 17:24:26 +01004161 if (pi.cmd != P_AUTH_RESPONSE) {
Philipp Reisner13e60372011-02-08 09:54:40 +01004162 conn_err(tconn, "expected AuthResponse packet, received: %s (0x%04x)\n",
Philipp Reisner77351055b2011-02-07 17:24:26 +01004163 cmdname(pi.cmd), pi.cmd);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004164 rv = 0;
4165 goto fail;
4166 }
4167
Philipp Reisner77351055b2011-02-07 17:24:26 +01004168 if (pi.size != resp_size) {
Philipp Reisner13e60372011-02-08 09:54:40 +01004169 conn_err(tconn, "expected AuthResponse payload of wrong size\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07004170 rv = 0;
4171 goto fail;
4172 }
4173
Philipp Reisner13e60372011-02-08 09:54:40 +01004174 rv = drbd_recv(tconn, response , resp_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004175
4176 if (rv != resp_size) {
Lars Ellenberg0ddc5542011-01-21 12:35:15 +01004177 if (!signal_pending(current))
Philipp Reisner13e60372011-02-08 09:54:40 +01004178 conn_warn(tconn, "short read receiving AuthResponse: l=%u\n", rv);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004179 rv = 0;
4180 goto fail;
4181 }
4182
4183 right_response = kmalloc(resp_size, GFP_NOIO);
Julia Lawall2d1ee872009-12-27 22:27:11 +01004184 if (right_response == NULL) {
Philipp Reisner13e60372011-02-08 09:54:40 +01004185 conn_err(tconn, "kmalloc of right_response failed\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01004186 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004187 goto fail;
4188 }
4189
4190 sg_set_buf(&sg, my_challenge, CHALLENGE_LEN);
4191
4192 rv = crypto_hash_digest(&desc, &sg, sg.length, right_response);
4193 if (rv) {
Philipp Reisner13e60372011-02-08 09:54:40 +01004194 conn_err(tconn, "crypto_hash_digest() failed with %d\n", rv);
Johannes Thomab10d96c2010-01-07 16:02:50 +01004195 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004196 goto fail;
4197 }
4198
4199 rv = !memcmp(response, right_response, resp_size);
4200
4201 if (rv)
Philipp Reisner13e60372011-02-08 09:54:40 +01004202 conn_info(tconn, "Peer authenticated using %d bytes of '%s' HMAC\n",
4203 resp_size, tconn->net_conf->cram_hmac_alg);
Johannes Thomab10d96c2010-01-07 16:02:50 +01004204 else
4205 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004206
4207 fail:
4208 kfree(peers_ch);
4209 kfree(response);
4210 kfree(right_response);
4211
4212 return rv;
4213}
4214#endif
4215
4216int drbdd_init(struct drbd_thread *thi)
4217{
4218 struct drbd_conf *mdev = thi->mdev;
4219 unsigned int minor = mdev_to_minor(mdev);
4220 int h;
4221
4222 sprintf(current->comm, "drbd%d_receiver", minor);
4223
4224 dev_info(DEV, "receiver (re)started\n");
4225
4226 do {
Philipp Reisner907599e2011-02-08 11:25:37 +01004227 h = drbd_connect(mdev->tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004228 if (h == 0) {
4229 drbd_disconnect(mdev);
Philipp Reisner20ee6392011-01-18 15:28:59 +01004230 schedule_timeout_interruptible(HZ);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004231 }
4232 if (h == -1) {
4233 dev_warn(DEV, "Discarding network configuration.\n");
4234 drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
4235 }
4236 } while (h == 0);
4237
4238 if (h > 0) {
Philipp Reisnerb2fb6dbe2011-01-19 13:48:44 +01004239 if (get_net_conf(mdev->tconn)) {
Philipp Reisnereefc2f72011-02-08 12:55:24 +01004240 drbdd(mdev->tconn);
Philipp Reisnerb2fb6dbe2011-01-19 13:48:44 +01004241 put_net_conf(mdev->tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004242 }
4243 }
4244
4245 drbd_disconnect(mdev);
4246
4247 dev_info(DEV, "receiver terminated\n");
4248 return 0;
4249}
4250
4251/* ********* acknowledge sender ******** */
4252
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01004253static int got_RqSReply(struct drbd_conf *mdev, enum drbd_packet cmd)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004254{
Philipp Reisner257d0af2011-01-26 12:15:29 +01004255 struct p_req_state_reply *p = &mdev->tconn->meta.rbuf.req_state_reply;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004256
4257 int retcode = be32_to_cpu(p->retcode);
4258
4259 if (retcode >= SS_SUCCESS) {
4260 set_bit(CL_ST_CHG_SUCCESS, &mdev->flags);
4261 } else {
4262 set_bit(CL_ST_CHG_FAIL, &mdev->flags);
4263 dev_err(DEV, "Requested state change failed by peer: %s (%d)\n",
4264 drbd_set_st_err_str(retcode), retcode);
4265 }
4266 wake_up(&mdev->state_wait);
4267
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004268 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004269}
4270
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01004271static int got_Ping(struct drbd_conf *mdev, enum drbd_packet cmd)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004272{
4273 return drbd_send_ping_ack(mdev);
4274
4275}
4276
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01004277static int got_PingAck(struct drbd_conf *mdev, enum drbd_packet cmd)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004278{
4279 /* restore idle timeout */
Philipp Reisnere42325a2011-01-19 13:55:45 +01004280 mdev->tconn->meta.socket->sk->sk_rcvtimeo = mdev->tconn->net_conf->ping_int*HZ;
Philipp Reisner309d1602010-03-02 15:03:44 +01004281 if (!test_and_set_bit(GOT_PING_ACK, &mdev->flags))
4282 wake_up(&mdev->misc_wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004283
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004284 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004285}
4286
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01004287static int got_IsInSync(struct drbd_conf *mdev, enum drbd_packet cmd)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004288{
Philipp Reisner257d0af2011-01-26 12:15:29 +01004289 struct p_block_ack *p = &mdev->tconn->meta.rbuf.block_ack;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004290 sector_t sector = be64_to_cpu(p->sector);
4291 int blksize = be32_to_cpu(p->blksize);
4292
Philipp Reisner31890f42011-01-19 14:12:51 +01004293 D_ASSERT(mdev->tconn->agreed_pro_version >= 89);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004294
4295 update_peer_seq(mdev, be32_to_cpu(p->seq_num));
4296
Lars Ellenberg1d53f092010-09-05 01:13:24 +02004297 if (get_ldev(mdev)) {
4298 drbd_rs_complete_io(mdev, sector);
4299 drbd_set_in_sync(mdev, sector, blksize);
4300 /* rs_same_csums is supposed to count in units of BM_BLOCK_SIZE */
4301 mdev->rs_same_csum += (blksize >> BM_BLOCK_SHIFT);
4302 put_ldev(mdev);
4303 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07004304 dec_rs_pending(mdev);
Philipp Reisner778f2712010-07-06 11:14:00 +02004305 atomic_add(blksize >> 9, &mdev->rs_sect_in);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004306
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004307 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004308}
4309
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01004310static int
4311validate_req_change_req_state(struct drbd_conf *mdev, u64 id, sector_t sector,
4312 struct rb_root *root, const char *func,
4313 enum drbd_req_event what, bool missing_ok)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004314{
4315 struct drbd_request *req;
4316 struct bio_and_error m;
4317
Philipp Reisner87eeee42011-01-19 14:16:30 +01004318 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01004319 req = find_request(mdev, root, id, sector, missing_ok, func);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004320 if (unlikely(!req)) {
Philipp Reisner87eeee42011-01-19 14:16:30 +01004321 spin_unlock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004322 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004323 }
4324 __req_mod(req, what, &m);
Philipp Reisner87eeee42011-01-19 14:16:30 +01004325 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004326
4327 if (m.bio)
4328 complete_master_bio(mdev, &m);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004329 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004330}
4331
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01004332static int got_BlockAck(struct drbd_conf *mdev, enum drbd_packet cmd)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004333{
Philipp Reisner257d0af2011-01-26 12:15:29 +01004334 struct p_block_ack *p = &mdev->tconn->meta.rbuf.block_ack;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004335 sector_t sector = be64_to_cpu(p->sector);
4336 int blksize = be32_to_cpu(p->blksize);
4337 enum drbd_req_event what;
4338
4339 update_peer_seq(mdev, be32_to_cpu(p->seq_num));
4340
Andreas Gruenbacher579b57e2011-01-13 18:40:57 +01004341 if (p->block_id == ID_SYNCER) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004342 drbd_set_in_sync(mdev, sector, blksize);
4343 dec_rs_pending(mdev);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004344 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004345 }
Philipp Reisner257d0af2011-01-26 12:15:29 +01004346 switch (cmd) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004347 case P_RS_WRITE_ACK:
Philipp Reisner89e58e72011-01-19 13:12:45 +01004348 D_ASSERT(mdev->tconn->net_conf->wire_protocol == DRBD_PROT_C);
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01004349 what = WRITE_ACKED_BY_PEER_AND_SIS;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004350 break;
4351 case P_WRITE_ACK:
Philipp Reisner89e58e72011-01-19 13:12:45 +01004352 D_ASSERT(mdev->tconn->net_conf->wire_protocol == DRBD_PROT_C);
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01004353 what = WRITE_ACKED_BY_PEER;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004354 break;
4355 case P_RECV_ACK:
Philipp Reisner89e58e72011-01-19 13:12:45 +01004356 D_ASSERT(mdev->tconn->net_conf->wire_protocol == DRBD_PROT_B);
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01004357 what = RECV_ACKED_BY_PEER;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004358 break;
4359 case P_DISCARD_ACK:
Philipp Reisner89e58e72011-01-19 13:12:45 +01004360 D_ASSERT(mdev->tconn->net_conf->wire_protocol == DRBD_PROT_C);
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01004361 what = CONFLICT_DISCARDED_BY_PEER;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004362 break;
4363 default:
4364 D_ASSERT(0);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004365 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004366 }
4367
4368 return validate_req_change_req_state(mdev, p->block_id, sector,
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01004369 &mdev->write_requests, __func__,
4370 what, false);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004371}
4372
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01004373static int got_NegAck(struct drbd_conf *mdev, enum drbd_packet cmd)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004374{
Philipp Reisner257d0af2011-01-26 12:15:29 +01004375 struct p_block_ack *p = &mdev->tconn->meta.rbuf.block_ack;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004376 sector_t sector = be64_to_cpu(p->sector);
Philipp Reisner2deb8332011-01-17 18:39:18 +01004377 int size = be32_to_cpu(p->blksize);
Philipp Reisner89e58e72011-01-19 13:12:45 +01004378 bool missing_ok = mdev->tconn->net_conf->wire_protocol == DRBD_PROT_A ||
4379 mdev->tconn->net_conf->wire_protocol == DRBD_PROT_B;
Andreas Gruenbacherc3afd8f2011-01-20 22:25:40 +01004380 bool found;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004381
4382 update_peer_seq(mdev, be32_to_cpu(p->seq_num));
4383
Andreas Gruenbacher579b57e2011-01-13 18:40:57 +01004384 if (p->block_id == ID_SYNCER) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004385 dec_rs_pending(mdev);
4386 drbd_rs_failed_io(mdev, sector, size);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004387 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004388 }
Philipp Reisner2deb8332011-01-17 18:39:18 +01004389
Andreas Gruenbacherc3afd8f2011-01-20 22:25:40 +01004390 found = validate_req_change_req_state(mdev, p->block_id, sector,
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01004391 &mdev->write_requests, __func__,
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01004392 NEG_ACKED, missing_ok);
Andreas Gruenbacherc3afd8f2011-01-20 22:25:40 +01004393 if (!found) {
4394 /* Protocol A has no P_WRITE_ACKs, but has P_NEG_ACKs.
4395 The master bio might already be completed, therefore the
4396 request is no longer in the collision hash. */
4397 /* In Protocol B we might already have got a P_RECV_ACK
4398 but then get a P_NEG_ACK afterwards. */
4399 if (!missing_ok)
Philipp Reisner2deb8332011-01-17 18:39:18 +01004400 return false;
Andreas Gruenbacherc3afd8f2011-01-20 22:25:40 +01004401 drbd_set_out_of_sync(mdev, sector, size);
Philipp Reisner2deb8332011-01-17 18:39:18 +01004402 }
Philipp Reisner2deb8332011-01-17 18:39:18 +01004403 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004404}
4405
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01004406static int got_NegDReply(struct drbd_conf *mdev, enum drbd_packet cmd)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004407{
Philipp Reisner257d0af2011-01-26 12:15:29 +01004408 struct p_block_ack *p = &mdev->tconn->meta.rbuf.block_ack;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004409 sector_t sector = be64_to_cpu(p->sector);
4410
4411 update_peer_seq(mdev, be32_to_cpu(p->seq_num));
4412 dev_err(DEV, "Got NegDReply; Sector %llus, len %u; Fail original request.\n",
4413 (unsigned long long)sector, be32_to_cpu(p->blksize));
4414
4415 return validate_req_change_req_state(mdev, p->block_id, sector,
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01004416 &mdev->read_requests, __func__,
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01004417 NEG_ACKED, false);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004418}
4419
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01004420static int got_NegRSDReply(struct drbd_conf *mdev, enum drbd_packet cmd)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004421{
4422 sector_t sector;
4423 int size;
Philipp Reisner257d0af2011-01-26 12:15:29 +01004424 struct p_block_ack *p = &mdev->tconn->meta.rbuf.block_ack;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004425
4426 sector = be64_to_cpu(p->sector);
4427 size = be32_to_cpu(p->blksize);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004428
4429 update_peer_seq(mdev, be32_to_cpu(p->seq_num));
4430
4431 dec_rs_pending(mdev);
4432
4433 if (get_ldev_if_state(mdev, D_FAILED)) {
4434 drbd_rs_complete_io(mdev, sector);
Philipp Reisner257d0af2011-01-26 12:15:29 +01004435 switch (cmd) {
Philipp Reisnerd612d302010-12-27 10:53:28 +01004436 case P_NEG_RS_DREPLY:
4437 drbd_rs_failed_io(mdev, sector, size);
4438 case P_RS_CANCEL:
4439 break;
4440 default:
4441 D_ASSERT(0);
4442 put_ldev(mdev);
4443 return false;
4444 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07004445 put_ldev(mdev);
4446 }
4447
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004448 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004449}
4450
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01004451static int got_BarrierAck(struct drbd_conf *mdev, enum drbd_packet cmd)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004452{
Philipp Reisner257d0af2011-01-26 12:15:29 +01004453 struct p_barrier_ack *p = &mdev->tconn->meta.rbuf.barrier_ack;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004454
4455 tl_release(mdev, p->barrier, be32_to_cpu(p->set_size));
4456
Philipp Reisnerc4752ef2010-10-27 17:32:36 +02004457 if (mdev->state.conn == C_AHEAD &&
4458 atomic_read(&mdev->ap_in_flight) == 0 &&
Philipp Reisner370a43e2011-01-14 16:03:11 +01004459 !test_and_set_bit(AHEAD_TO_SYNC_SOURCE, &mdev->current_epoch->flags)) {
4460 mdev->start_resync_timer.expires = jiffies + HZ;
4461 add_timer(&mdev->start_resync_timer);
Philipp Reisnerc4752ef2010-10-27 17:32:36 +02004462 }
4463
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004464 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004465}
4466
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01004467static int got_OVResult(struct drbd_conf *mdev, enum drbd_packet cmd)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004468{
Philipp Reisner257d0af2011-01-26 12:15:29 +01004469 struct p_block_ack *p = &mdev->tconn->meta.rbuf.block_ack;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004470 struct drbd_work *w;
4471 sector_t sector;
4472 int size;
4473
4474 sector = be64_to_cpu(p->sector);
4475 size = be32_to_cpu(p->blksize);
4476
4477 update_peer_seq(mdev, be32_to_cpu(p->seq_num));
4478
4479 if (be64_to_cpu(p->block_id) == ID_OUT_OF_SYNC)
4480 drbd_ov_oos_found(mdev, sector, size);
4481 else
4482 ov_oos_print(mdev);
4483
Lars Ellenberg1d53f092010-09-05 01:13:24 +02004484 if (!get_ldev(mdev))
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004485 return true;
Lars Ellenberg1d53f092010-09-05 01:13:24 +02004486
Philipp Reisnerb411b362009-09-25 16:07:19 -07004487 drbd_rs_complete_io(mdev, sector);
4488 dec_rs_pending(mdev);
4489
Lars Ellenbergea5442a2010-11-05 09:48:01 +01004490 --mdev->ov_left;
4491
4492 /* let's advance progress step marks only for every other megabyte */
4493 if ((mdev->ov_left & 0x200) == 0x200)
4494 drbd_advance_rs_marks(mdev, mdev->ov_left);
4495
4496 if (mdev->ov_left == 0) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004497 w = kmalloc(sizeof(*w), GFP_NOIO);
4498 if (w) {
4499 w->cb = w_ov_finished;
Philipp Reisnere42325a2011-01-19 13:55:45 +01004500 drbd_queue_work_front(&mdev->tconn->data.work, w);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004501 } else {
4502 dev_err(DEV, "kmalloc(w) failed.");
4503 ov_oos_print(mdev);
4504 drbd_resync_finished(mdev);
4505 }
4506 }
Lars Ellenberg1d53f092010-09-05 01:13:24 +02004507 put_ldev(mdev);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004508 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004509}
4510
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01004511static int got_skip(struct drbd_conf *mdev, enum drbd_packet cmd)
Philipp Reisner0ced55a2010-04-30 15:26:20 +02004512{
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004513 return true;
Philipp Reisner0ced55a2010-04-30 15:26:20 +02004514}
4515
Philipp Reisnerb411b362009-09-25 16:07:19 -07004516struct asender_cmd {
4517 size_t pkt_size;
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01004518 int (*process)(struct drbd_conf *mdev, enum drbd_packet cmd);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004519};
4520
4521static struct asender_cmd *get_asender_cmd(int cmd)
4522{
4523 static struct asender_cmd asender_tbl[] = {
4524 /* anything missing from this table is in
4525 * the drbd_cmd_handler (drbd_default_handler) table,
4526 * see the beginning of drbdd() */
Philipp Reisner257d0af2011-01-26 12:15:29 +01004527 [P_PING] = { sizeof(struct p_header), got_Ping },
4528 [P_PING_ACK] = { sizeof(struct p_header), got_PingAck },
Philipp Reisnerb411b362009-09-25 16:07:19 -07004529 [P_RECV_ACK] = { sizeof(struct p_block_ack), got_BlockAck },
4530 [P_WRITE_ACK] = { sizeof(struct p_block_ack), got_BlockAck },
4531 [P_RS_WRITE_ACK] = { sizeof(struct p_block_ack), got_BlockAck },
4532 [P_DISCARD_ACK] = { sizeof(struct p_block_ack), got_BlockAck },
4533 [P_NEG_ACK] = { sizeof(struct p_block_ack), got_NegAck },
4534 [P_NEG_DREPLY] = { sizeof(struct p_block_ack), got_NegDReply },
4535 [P_NEG_RS_DREPLY] = { sizeof(struct p_block_ack), got_NegRSDReply},
4536 [P_OV_RESULT] = { sizeof(struct p_block_ack), got_OVResult },
4537 [P_BARRIER_ACK] = { sizeof(struct p_barrier_ack), got_BarrierAck },
4538 [P_STATE_CHG_REPLY] = { sizeof(struct p_req_state_reply), got_RqSReply },
4539 [P_RS_IS_IN_SYNC] = { sizeof(struct p_block_ack), got_IsInSync },
Philipp Reisner02918be2010-08-20 14:35:10 +02004540 [P_DELAY_PROBE] = { sizeof(struct p_delay_probe93), got_skip },
Philipp Reisnerd612d302010-12-27 10:53:28 +01004541 [P_RS_CANCEL] = { sizeof(struct p_block_ack), got_NegRSDReply},
Philipp Reisnerb411b362009-09-25 16:07:19 -07004542 [P_MAX_CMD] = { 0, NULL },
4543 };
4544 if (cmd > P_MAX_CMD || asender_tbl[cmd].process == NULL)
4545 return NULL;
4546 return &asender_tbl[cmd];
4547}
4548
4549int drbd_asender(struct drbd_thread *thi)
4550{
4551 struct drbd_conf *mdev = thi->mdev;
Philipp Reisner257d0af2011-01-26 12:15:29 +01004552 struct p_header *h = &mdev->tconn->meta.rbuf.header;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004553 struct asender_cmd *cmd = NULL;
Philipp Reisner77351055b2011-02-07 17:24:26 +01004554 struct packet_info pi;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004555
Philipp Reisner257d0af2011-01-26 12:15:29 +01004556 int rv;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004557 void *buf = h;
4558 int received = 0;
Philipp Reisner257d0af2011-01-26 12:15:29 +01004559 int expect = sizeof(struct p_header);
Lars Ellenbergf36af182011-03-09 22:44:55 +01004560 int ping_timeout_active = 0;
Philipp Reisner77351055b2011-02-07 17:24:26 +01004561 int empty;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004562
4563 sprintf(current->comm, "drbd%d_asender", mdev_to_minor(mdev));
4564
4565 current->policy = SCHED_RR; /* Make this a realtime task! */
4566 current->rt_priority = 2; /* more important than all other tasks */
4567
Andreas Gruenbachere77a0a52011-01-25 15:43:39 +01004568 while (get_t_state(thi) == RUNNING) {
Philipp Reisner80822282011-02-08 12:46:30 +01004569 drbd_thread_current_set_cpu(thi);
Philipp Reisnere43ef192011-02-07 14:40:40 +01004570 if (test_and_clear_bit(SEND_PING, &mdev->tconn->flags)) {
Andreas Gruenbacher841ce242010-12-15 19:31:20 +01004571 if (!drbd_send_ping(mdev)) {
4572 dev_err(DEV, "drbd_send_ping has failed\n");
4573 goto reconnect;
4574 }
Philipp Reisnere42325a2011-01-19 13:55:45 +01004575 mdev->tconn->meta.socket->sk->sk_rcvtimeo =
Philipp Reisner89e58e72011-01-19 13:12:45 +01004576 mdev->tconn->net_conf->ping_timeo*HZ/10;
Lars Ellenbergf36af182011-03-09 22:44:55 +01004577 ping_timeout_active = 1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004578 }
4579
4580 /* conditionally cork;
4581 * it may hurt latency if we cork without much to send */
Philipp Reisner89e58e72011-01-19 13:12:45 +01004582 if (!mdev->tconn->net_conf->no_cork &&
Philipp Reisnerb411b362009-09-25 16:07:19 -07004583 3 < atomic_read(&mdev->unacked_cnt))
Philipp Reisnere42325a2011-01-19 13:55:45 +01004584 drbd_tcp_cork(mdev->tconn->meta.socket);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004585 while (1) {
Philipp Reisner808e37b2011-02-07 14:44:14 +01004586 clear_bit(SIGNAL_ASENDER, &mdev->tconn->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004587 flush_signals(current);
Lars Ellenberg0f8488e2010-10-13 18:19:23 +02004588 if (!drbd_process_done_ee(mdev))
Philipp Reisnerb411b362009-09-25 16:07:19 -07004589 goto reconnect;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004590 /* to avoid race with newly queued ACKs */
Philipp Reisner808e37b2011-02-07 14:44:14 +01004591 set_bit(SIGNAL_ASENDER, &mdev->tconn->flags);
Philipp Reisner87eeee42011-01-19 14:16:30 +01004592 spin_lock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004593 empty = list_empty(&mdev->done_ee);
Philipp Reisner87eeee42011-01-19 14:16:30 +01004594 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004595 /* new ack may have been queued right here,
4596 * but then there is also a signal pending,
4597 * and we start over... */
4598 if (empty)
4599 break;
4600 }
4601 /* but unconditionally uncork unless disabled */
Philipp Reisner89e58e72011-01-19 13:12:45 +01004602 if (!mdev->tconn->net_conf->no_cork)
Philipp Reisnere42325a2011-01-19 13:55:45 +01004603 drbd_tcp_uncork(mdev->tconn->meta.socket);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004604
4605 /* short circuit, recv_msg would return EINTR anyways. */
4606 if (signal_pending(current))
4607 continue;
4608
Philipp Reisnerdbd9eea2011-02-07 15:34:16 +01004609 rv = drbd_recv_short(mdev->tconn->meta.socket, buf, expect-received, 0);
Philipp Reisner808e37b2011-02-07 14:44:14 +01004610 clear_bit(SIGNAL_ASENDER, &mdev->tconn->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004611
4612 flush_signals(current);
4613
4614 /* Note:
4615 * -EINTR (on meta) we got a signal
4616 * -EAGAIN (on meta) rcvtimeo expired
4617 * -ECONNRESET other side closed the connection
4618 * -ERESTARTSYS (on data) we got a signal
4619 * rv < 0 other than above: unexpected error!
4620 * rv == expected: full header or command
4621 * rv < expected: "woken" by signal during receive
4622 * rv == 0 : "connection shut down by peer"
4623 */
4624 if (likely(rv > 0)) {
4625 received += rv;
4626 buf += rv;
4627 } else if (rv == 0) {
4628 dev_err(DEV, "meta connection shut down by peer.\n");
4629 goto reconnect;
4630 } else if (rv == -EAGAIN) {
Lars Ellenbergcb6518c2011-06-20 14:44:45 +02004631 /* If the data socket received something meanwhile,
4632 * that is good enough: peer is still alive. */
Philipp Reisner31890f42011-01-19 14:12:51 +01004633 if (time_after(mdev->tconn->last_received,
Philipp Reisnere42325a2011-01-19 13:55:45 +01004634 jiffies - mdev->tconn->meta.socket->sk->sk_rcvtimeo))
Lars Ellenbergcb6518c2011-06-20 14:44:45 +02004635 continue;
Lars Ellenbergf36af182011-03-09 22:44:55 +01004636 if (ping_timeout_active) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004637 dev_err(DEV, "PingAck did not arrive in time.\n");
4638 goto reconnect;
4639 }
Philipp Reisnere43ef192011-02-07 14:40:40 +01004640 set_bit(SEND_PING, &mdev->tconn->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004641 continue;
4642 } else if (rv == -EINTR) {
4643 continue;
4644 } else {
4645 dev_err(DEV, "sock_recvmsg returned %d\n", rv);
4646 goto reconnect;
4647 }
4648
4649 if (received == expect && cmd == NULL) {
Philipp Reisnerce243852011-02-07 17:27:47 +01004650 if (!decode_header(mdev->tconn, h, &pi))
Philipp Reisnerb411b362009-09-25 16:07:19 -07004651 goto reconnect;
Philipp Reisner77351055b2011-02-07 17:24:26 +01004652 cmd = get_asender_cmd(pi.cmd);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004653 if (unlikely(cmd == NULL)) {
Philipp Reisner257d0af2011-01-26 12:15:29 +01004654 dev_err(DEV, "unknown command %d on meta (l: %d)\n",
Philipp Reisner77351055b2011-02-07 17:24:26 +01004655 pi.cmd, pi.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004656 goto disconnect;
4657 }
4658 expect = cmd->pkt_size;
Philipp Reisner77351055b2011-02-07 17:24:26 +01004659 if (pi.size != expect - sizeof(struct p_header)) {
Philipp Reisner257d0af2011-01-26 12:15:29 +01004660 dev_err(DEV, "Wrong packet size on meta (c: %d, l: %d)\n",
Philipp Reisner77351055b2011-02-07 17:24:26 +01004661 pi.cmd, pi.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004662 goto reconnect;
Philipp Reisner257d0af2011-01-26 12:15:29 +01004663 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07004664 }
4665 if (received == expect) {
Philipp Reisner31890f42011-01-19 14:12:51 +01004666 mdev->tconn->last_received = jiffies;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004667 D_ASSERT(cmd != NULL);
Philipp Reisner77351055b2011-02-07 17:24:26 +01004668 if (!cmd->process(mdev, pi.cmd))
Philipp Reisnerb411b362009-09-25 16:07:19 -07004669 goto reconnect;
4670
Lars Ellenbergf36af182011-03-09 22:44:55 +01004671 /* the idle_timeout (ping-int)
4672 * has been restored in got_PingAck() */
4673 if (cmd == get_asender_cmd(P_PING_ACK))
4674 ping_timeout_active = 0;
4675
Philipp Reisnerb411b362009-09-25 16:07:19 -07004676 buf = h;
4677 received = 0;
Philipp Reisner257d0af2011-01-26 12:15:29 +01004678 expect = sizeof(struct p_header);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004679 cmd = NULL;
4680 }
4681 }
4682
4683 if (0) {
4684reconnect:
4685 drbd_force_state(mdev, NS(conn, C_NETWORK_FAILURE));
Lars Ellenberg856c50c2010-10-14 13:37:40 +02004686 drbd_md_sync(mdev);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004687 }
4688 if (0) {
4689disconnect:
4690 drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
Lars Ellenberg856c50c2010-10-14 13:37:40 +02004691 drbd_md_sync(mdev);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004692 }
Philipp Reisner808e37b2011-02-07 14:44:14 +01004693 clear_bit(SIGNAL_ASENDER, &mdev->tconn->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004694
4695 D_ASSERT(mdev->state.conn < C_CONNECTED);
4696 dev_info(DEV, "asender terminated\n");
4697
4698 return 0;
4699}