blob: 8f5a241fe20aa44921e18be728228009792c5063 [file] [log] [blame]
Philipp Reisnerb411b362009-09-25 16:07:19 -07001/*
2 drbd_receiver.c
3
4 This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
5
6 Copyright (C) 2001-2008, LINBIT Information Technologies GmbH.
7 Copyright (C) 1999-2008, Philipp Reisner <philipp.reisner@linbit.com>.
8 Copyright (C) 2002-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
9
10 drbd is free software; you can redistribute it and/or modify
11 it under the terms of the GNU General Public License as published by
12 the Free Software Foundation; either version 2, or (at your option)
13 any later version.
14
15 drbd is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License for more details.
19
20 You should have received a copy of the GNU General Public License
21 along with drbd; see the file COPYING. If not, write to
22 the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
23 */
24
25
Philipp Reisnerb411b362009-09-25 16:07:19 -070026#include <linux/module.h>
27
28#include <asm/uaccess.h>
29#include <net/sock.h>
30
Philipp Reisnerb411b362009-09-25 16:07:19 -070031#include <linux/drbd.h>
32#include <linux/fs.h>
33#include <linux/file.h>
34#include <linux/in.h>
35#include <linux/mm.h>
36#include <linux/memcontrol.h>
37#include <linux/mm_inline.h>
38#include <linux/slab.h>
Philipp Reisnerb411b362009-09-25 16:07:19 -070039#include <linux/pkt_sched.h>
40#define __KERNEL_SYSCALLS__
41#include <linux/unistd.h>
42#include <linux/vmalloc.h>
43#include <linux/random.h>
Philipp Reisnerb411b362009-09-25 16:07:19 -070044#include <linux/string.h>
45#include <linux/scatterlist.h>
46#include "drbd_int.h"
Philipp Reisnerb411b362009-09-25 16:07:19 -070047#include "drbd_req.h"
48
49#include "drbd_vli.h"
50
Philipp Reisnerb411b362009-09-25 16:07:19 -070051enum finish_epoch {
52 FE_STILL_LIVE,
53 FE_DESTROYED,
54 FE_RECYCLED,
55};
56
57static int drbd_do_handshake(struct drbd_conf *mdev);
58static int drbd_do_auth(struct drbd_conf *mdev);
59
60static enum finish_epoch drbd_may_finish_epoch(struct drbd_conf *, struct drbd_epoch *, enum epoch_event);
61static int e_end_block(struct drbd_conf *, struct drbd_work *, int);
62
Philipp Reisnerb411b362009-09-25 16:07:19 -070063
64#define GFP_TRY (__GFP_HIGHMEM | __GFP_NOWARN)
65
Lars Ellenberg45bb9122010-05-14 17:10:48 +020066/*
67 * some helper functions to deal with single linked page lists,
68 * page->private being our "next" pointer.
69 */
70
71/* If at least n pages are linked at head, get n pages off.
72 * Otherwise, don't modify head, and return NULL.
73 * Locking is the responsibility of the caller.
74 */
75static struct page *page_chain_del(struct page **head, int n)
76{
77 struct page *page;
78 struct page *tmp;
79
80 BUG_ON(!n);
81 BUG_ON(!head);
82
83 page = *head;
Philipp Reisner23ce4222010-05-20 13:35:31 +020084
85 if (!page)
86 return NULL;
87
Lars Ellenberg45bb9122010-05-14 17:10:48 +020088 while (page) {
89 tmp = page_chain_next(page);
90 if (--n == 0)
91 break; /* found sufficient pages */
92 if (tmp == NULL)
93 /* insufficient pages, don't use any of them. */
94 return NULL;
95 page = tmp;
96 }
97
98 /* add end of list marker for the returned list */
99 set_page_private(page, 0);
100 /* actual return value, and adjustment of head */
101 page = *head;
102 *head = tmp;
103 return page;
104}
105
106/* may be used outside of locks to find the tail of a (usually short)
107 * "private" page chain, before adding it back to a global chain head
108 * with page_chain_add() under a spinlock. */
109static struct page *page_chain_tail(struct page *page, int *len)
110{
111 struct page *tmp;
112 int i = 1;
113 while ((tmp = page_chain_next(page)))
114 ++i, page = tmp;
115 if (len)
116 *len = i;
117 return page;
118}
119
120static int page_chain_free(struct page *page)
121{
122 struct page *tmp;
123 int i = 0;
124 page_chain_for_each_safe(page, tmp) {
125 put_page(page);
126 ++i;
127 }
128 return i;
129}
130
131static void page_chain_add(struct page **head,
132 struct page *chain_first, struct page *chain_last)
133{
134#if 1
135 struct page *tmp;
136 tmp = page_chain_tail(chain_first, NULL);
137 BUG_ON(tmp != chain_last);
138#endif
139
140 /* add chain to head */
141 set_page_private(chain_last, (unsigned long)*head);
142 *head = chain_first;
143}
144
145static struct page *drbd_pp_first_pages_or_try_alloc(struct drbd_conf *mdev, int number)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700146{
147 struct page *page = NULL;
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200148 struct page *tmp = NULL;
149 int i = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700150
151 /* Yes, testing drbd_pp_vacant outside the lock is racy.
152 * So what. It saves a spin_lock. */
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200153 if (drbd_pp_vacant >= number) {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700154 spin_lock(&drbd_pp_lock);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200155 page = page_chain_del(&drbd_pp_pool, number);
156 if (page)
157 drbd_pp_vacant -= number;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700158 spin_unlock(&drbd_pp_lock);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200159 if (page)
160 return page;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700161 }
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200162
Philipp Reisnerb411b362009-09-25 16:07:19 -0700163 /* GFP_TRY, because we must not cause arbitrary write-out: in a DRBD
164 * "criss-cross" setup, that might cause write-out on some other DRBD,
165 * which in turn might block on the other node at this very place. */
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200166 for (i = 0; i < number; i++) {
167 tmp = alloc_page(GFP_TRY);
168 if (!tmp)
169 break;
170 set_page_private(tmp, (unsigned long)page);
171 page = tmp;
172 }
173
174 if (i == number)
175 return page;
176
177 /* Not enough pages immediately available this time.
178 * No need to jump around here, drbd_pp_alloc will retry this
179 * function "soon". */
180 if (page) {
181 tmp = page_chain_tail(page, NULL);
182 spin_lock(&drbd_pp_lock);
183 page_chain_add(&drbd_pp_pool, page, tmp);
184 drbd_pp_vacant += i;
185 spin_unlock(&drbd_pp_lock);
186 }
187 return NULL;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700188}
189
Philipp Reisnerb411b362009-09-25 16:07:19 -0700190static void reclaim_net_ee(struct drbd_conf *mdev, struct list_head *to_be_freed)
191{
192 struct drbd_epoch_entry *e;
193 struct list_head *le, *tle;
194
195 /* The EEs are always appended to the end of the list. Since
196 they are sent in order over the wire, they have to finish
197 in order. As soon as we see the first not finished we can
198 stop to examine the list... */
199
200 list_for_each_safe(le, tle, &mdev->net_ee) {
201 e = list_entry(le, struct drbd_epoch_entry, w.list);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200202 if (drbd_ee_has_active_page(e))
Philipp Reisnerb411b362009-09-25 16:07:19 -0700203 break;
204 list_move(le, to_be_freed);
205 }
206}
207
208static void drbd_kick_lo_and_reclaim_net(struct drbd_conf *mdev)
209{
210 LIST_HEAD(reclaimed);
211 struct drbd_epoch_entry *e, *t;
212
Philipp Reisner87eeee42011-01-19 14:16:30 +0100213 spin_lock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700214 reclaim_net_ee(mdev, &reclaimed);
Philipp Reisner87eeee42011-01-19 14:16:30 +0100215 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700216
217 list_for_each_entry_safe(e, t, &reclaimed, w.list)
Lars Ellenberg435f0742010-09-06 12:30:25 +0200218 drbd_free_net_ee(mdev, e);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700219}
220
221/**
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200222 * drbd_pp_alloc() - Returns @number pages, retries forever (or until signalled)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700223 * @mdev: DRBD device.
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200224 * @number: number of pages requested
225 * @retry: whether to retry, if not enough pages are available right now
Philipp Reisnerb411b362009-09-25 16:07:19 -0700226 *
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200227 * Tries to allocate number pages, first from our own page pool, then from
228 * the kernel, unless this allocation would exceed the max_buffers setting.
229 * Possibly retry until DRBD frees sufficient pages somewhere else.
230 *
231 * Returns a page chain linked via page->private.
Philipp Reisnerb411b362009-09-25 16:07:19 -0700232 */
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200233static struct page *drbd_pp_alloc(struct drbd_conf *mdev, unsigned number, bool retry)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700234{
235 struct page *page = NULL;
236 DEFINE_WAIT(wait);
237
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200238 /* Yes, we may run up to @number over max_buffers. If we
239 * follow it strictly, the admin will get it wrong anyways. */
Philipp Reisner89e58e72011-01-19 13:12:45 +0100240 if (atomic_read(&mdev->pp_in_use) < mdev->tconn->net_conf->max_buffers)
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200241 page = drbd_pp_first_pages_or_try_alloc(mdev, number);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700242
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200243 while (page == NULL) {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700244 prepare_to_wait(&drbd_pp_wait, &wait, TASK_INTERRUPTIBLE);
245
246 drbd_kick_lo_and_reclaim_net(mdev);
247
Philipp Reisner89e58e72011-01-19 13:12:45 +0100248 if (atomic_read(&mdev->pp_in_use) < mdev->tconn->net_conf->max_buffers) {
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200249 page = drbd_pp_first_pages_or_try_alloc(mdev, number);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700250 if (page)
251 break;
252 }
253
254 if (!retry)
255 break;
256
257 if (signal_pending(current)) {
258 dev_warn(DEV, "drbd_pp_alloc interrupted!\n");
259 break;
260 }
261
262 schedule();
263 }
264 finish_wait(&drbd_pp_wait, &wait);
265
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200266 if (page)
267 atomic_add(number, &mdev->pp_in_use);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700268 return page;
269}
270
271/* Must not be used from irq, as that may deadlock: see drbd_pp_alloc.
Philipp Reisner87eeee42011-01-19 14:16:30 +0100272 * Is also used from inside an other spin_lock_irq(&mdev->tconn->req_lock);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200273 * Either links the page chain back to the global pool,
274 * or returns all pages to the system. */
Lars Ellenberg435f0742010-09-06 12:30:25 +0200275static void drbd_pp_free(struct drbd_conf *mdev, struct page *page, int is_net)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700276{
Lars Ellenberg435f0742010-09-06 12:30:25 +0200277 atomic_t *a = is_net ? &mdev->pp_in_use_by_net : &mdev->pp_in_use;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700278 int i;
Lars Ellenberg435f0742010-09-06 12:30:25 +0200279
Lars Ellenberg1816a2b2010-11-11 15:19:07 +0100280 if (drbd_pp_vacant > (DRBD_MAX_BIO_SIZE/PAGE_SIZE)*minor_count)
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200281 i = page_chain_free(page);
282 else {
283 struct page *tmp;
284 tmp = page_chain_tail(page, &i);
285 spin_lock(&drbd_pp_lock);
286 page_chain_add(&drbd_pp_pool, page, tmp);
287 drbd_pp_vacant += i;
288 spin_unlock(&drbd_pp_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700289 }
Lars Ellenberg435f0742010-09-06 12:30:25 +0200290 i = atomic_sub_return(i, a);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200291 if (i < 0)
Lars Ellenberg435f0742010-09-06 12:30:25 +0200292 dev_warn(DEV, "ASSERTION FAILED: %s: %d < 0\n",
293 is_net ? "pp_in_use_by_net" : "pp_in_use", i);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700294 wake_up(&drbd_pp_wait);
295}
296
297/*
298You need to hold the req_lock:
299 _drbd_wait_ee_list_empty()
300
301You must not have the req_lock:
302 drbd_free_ee()
303 drbd_alloc_ee()
304 drbd_init_ee()
305 drbd_release_ee()
306 drbd_ee_fix_bhs()
307 drbd_process_done_ee()
308 drbd_clear_done_ee()
309 drbd_wait_ee_list_empty()
310*/
311
312struct drbd_epoch_entry *drbd_alloc_ee(struct drbd_conf *mdev,
313 u64 id,
314 sector_t sector,
315 unsigned int data_size,
316 gfp_t gfp_mask) __must_hold(local)
317{
Philipp Reisnerb411b362009-09-25 16:07:19 -0700318 struct drbd_epoch_entry *e;
319 struct page *page;
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200320 unsigned nr_pages = (data_size + PAGE_SIZE -1) >> PAGE_SHIFT;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700321
Andreas Gruenbacher0cf9d272010-12-07 10:43:29 +0100322 if (drbd_insert_fault(mdev, DRBD_FAULT_AL_EE))
Philipp Reisnerb411b362009-09-25 16:07:19 -0700323 return NULL;
324
325 e = mempool_alloc(drbd_ee_mempool, gfp_mask & ~__GFP_HIGHMEM);
326 if (!e) {
327 if (!(gfp_mask & __GFP_NOWARN))
328 dev_err(DEV, "alloc_ee: Allocation of an EE failed\n");
329 return NULL;
330 }
331
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200332 page = drbd_pp_alloc(mdev, nr_pages, (gfp_mask & __GFP_WAIT));
333 if (!page)
334 goto fail;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700335
Andreas Gruenbacher8b946252011-01-20 15:23:07 +0100336 drbd_clear_interval(&e->i);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700337 e->epoch = NULL;
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200338 e->mdev = mdev;
339 e->pages = page;
340 atomic_set(&e->pending_bios, 0);
Andreas Gruenbacher010f6e62011-01-14 20:59:35 +0100341 e->i.size = data_size;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700342 e->flags = 0;
Andreas Gruenbacher010f6e62011-01-14 20:59:35 +0100343 e->i.sector = sector;
Andreas Gruenbacher9a8e7752011-01-11 14:04:09 +0100344 /*
345 * The block_id is opaque to the receiver. It is not endianness
346 * converted, and sent back to the sender unchanged.
347 */
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200348 e->block_id = id;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700349
Philipp Reisnerb411b362009-09-25 16:07:19 -0700350 return e;
351
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200352 fail:
Philipp Reisnerb411b362009-09-25 16:07:19 -0700353 mempool_free(e, drbd_ee_mempool);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700354 return NULL;
355}
356
Lars Ellenberg435f0742010-09-06 12:30:25 +0200357void drbd_free_some_ee(struct drbd_conf *mdev, struct drbd_epoch_entry *e, int is_net)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700358{
Lars Ellenbergc36c3ce2010-08-11 20:42:55 +0200359 if (e->flags & EE_HAS_DIGEST)
360 kfree(e->digest);
Lars Ellenberg435f0742010-09-06 12:30:25 +0200361 drbd_pp_free(mdev, e->pages, is_net);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200362 D_ASSERT(atomic_read(&e->pending_bios) == 0);
Andreas Gruenbacher8b946252011-01-20 15:23:07 +0100363 D_ASSERT(drbd_interval_empty(&e->i));
Philipp Reisnerb411b362009-09-25 16:07:19 -0700364 mempool_free(e, drbd_ee_mempool);
365}
366
367int drbd_release_ee(struct drbd_conf *mdev, struct list_head *list)
368{
369 LIST_HEAD(work_list);
370 struct drbd_epoch_entry *e, *t;
371 int count = 0;
Lars Ellenberg435f0742010-09-06 12:30:25 +0200372 int is_net = list == &mdev->net_ee;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700373
Philipp Reisner87eeee42011-01-19 14:16:30 +0100374 spin_lock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700375 list_splice_init(list, &work_list);
Philipp Reisner87eeee42011-01-19 14:16:30 +0100376 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700377
378 list_for_each_entry_safe(e, t, &work_list, w.list) {
Lars Ellenberg435f0742010-09-06 12:30:25 +0200379 drbd_free_some_ee(mdev, e, is_net);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700380 count++;
381 }
382 return count;
383}
384
385
386/*
387 * This function is called from _asender only_
Andreas Gruenbacher8554df12011-01-25 15:37:43 +0100388 * but see also comments in _req_mod(,BARRIER_ACKED)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700389 * and receive_Barrier.
390 *
391 * Move entries from net_ee to done_ee, if ready.
392 * Grab done_ee, call all callbacks, free the entries.
393 * The callbacks typically send out ACKs.
394 */
395static int drbd_process_done_ee(struct drbd_conf *mdev)
396{
397 LIST_HEAD(work_list);
398 LIST_HEAD(reclaimed);
399 struct drbd_epoch_entry *e, *t;
400 int ok = (mdev->state.conn >= C_WF_REPORT_PARAMS);
401
Philipp Reisner87eeee42011-01-19 14:16:30 +0100402 spin_lock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700403 reclaim_net_ee(mdev, &reclaimed);
404 list_splice_init(&mdev->done_ee, &work_list);
Philipp Reisner87eeee42011-01-19 14:16:30 +0100405 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700406
407 list_for_each_entry_safe(e, t, &reclaimed, w.list)
Lars Ellenberg435f0742010-09-06 12:30:25 +0200408 drbd_free_net_ee(mdev, e);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700409
410 /* possible callbacks here:
411 * e_end_block, and e_end_resync_block, e_send_discard_ack.
412 * all ignore the last argument.
413 */
414 list_for_each_entry_safe(e, t, &work_list, w.list) {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700415 /* list_del not necessary, next/prev members not touched */
416 ok = e->w.cb(mdev, &e->w, !ok) && ok;
417 drbd_free_ee(mdev, e);
418 }
419 wake_up(&mdev->ee_wait);
420
421 return ok;
422}
423
424void _drbd_wait_ee_list_empty(struct drbd_conf *mdev, struct list_head *head)
425{
426 DEFINE_WAIT(wait);
427
428 /* avoids spin_lock/unlock
429 * and calling prepare_to_wait in the fast path */
430 while (!list_empty(head)) {
431 prepare_to_wait(&mdev->ee_wait, &wait, TASK_UNINTERRUPTIBLE);
Philipp Reisner87eeee42011-01-19 14:16:30 +0100432 spin_unlock_irq(&mdev->tconn->req_lock);
Jens Axboe7eaceac2011-03-10 08:52:07 +0100433 io_schedule();
Philipp Reisnerb411b362009-09-25 16:07:19 -0700434 finish_wait(&mdev->ee_wait, &wait);
Philipp Reisner87eeee42011-01-19 14:16:30 +0100435 spin_lock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700436 }
437}
438
439void drbd_wait_ee_list_empty(struct drbd_conf *mdev, struct list_head *head)
440{
Philipp Reisner87eeee42011-01-19 14:16:30 +0100441 spin_lock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700442 _drbd_wait_ee_list_empty(mdev, head);
Philipp Reisner87eeee42011-01-19 14:16:30 +0100443 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700444}
445
446/* see also kernel_accept; which is only present since 2.6.18.
447 * also we want to log which part of it failed, exactly */
448static int drbd_accept(struct drbd_conf *mdev, const char **what,
449 struct socket *sock, struct socket **newsock)
450{
451 struct sock *sk = sock->sk;
452 int err = 0;
453
454 *what = "listen";
455 err = sock->ops->listen(sock, 5);
456 if (err < 0)
457 goto out;
458
459 *what = "sock_create_lite";
460 err = sock_create_lite(sk->sk_family, sk->sk_type, sk->sk_protocol,
461 newsock);
462 if (err < 0)
463 goto out;
464
465 *what = "accept";
466 err = sock->ops->accept(sock, *newsock, 0);
467 if (err < 0) {
468 sock_release(*newsock);
469 *newsock = NULL;
470 goto out;
471 }
472 (*newsock)->ops = sock->ops;
473
474out:
475 return err;
476}
477
478static int drbd_recv_short(struct drbd_conf *mdev, struct socket *sock,
479 void *buf, size_t size, int flags)
480{
481 mm_segment_t oldfs;
482 struct kvec iov = {
483 .iov_base = buf,
484 .iov_len = size,
485 };
486 struct msghdr msg = {
487 .msg_iovlen = 1,
488 .msg_iov = (struct iovec *)&iov,
489 .msg_flags = (flags ? flags : MSG_WAITALL | MSG_NOSIGNAL)
490 };
491 int rv;
492
493 oldfs = get_fs();
494 set_fs(KERNEL_DS);
495 rv = sock_recvmsg(sock, &msg, size, msg.msg_flags);
496 set_fs(oldfs);
497
498 return rv;
499}
500
501static int drbd_recv(struct drbd_conf *mdev, void *buf, size_t size)
502{
503 mm_segment_t oldfs;
504 struct kvec iov = {
505 .iov_base = buf,
506 .iov_len = size,
507 };
508 struct msghdr msg = {
509 .msg_iovlen = 1,
510 .msg_iov = (struct iovec *)&iov,
511 .msg_flags = MSG_WAITALL | MSG_NOSIGNAL
512 };
513 int rv;
514
515 oldfs = get_fs();
516 set_fs(KERNEL_DS);
517
518 for (;;) {
Philipp Reisnere42325a2011-01-19 13:55:45 +0100519 rv = sock_recvmsg(mdev->tconn->data.socket, &msg, size, msg.msg_flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700520 if (rv == size)
521 break;
522
523 /* Note:
524 * ECONNRESET other side closed the connection
525 * ERESTARTSYS (on sock) we got a signal
526 */
527
528 if (rv < 0) {
529 if (rv == -ECONNRESET)
530 dev_info(DEV, "sock was reset by peer\n");
531 else if (rv != -ERESTARTSYS)
532 dev_err(DEV, "sock_recvmsg returned %d\n", rv);
533 break;
534 } else if (rv == 0) {
535 dev_info(DEV, "sock was shut down by peer\n");
536 break;
537 } else {
538 /* signal came in, or peer/link went down,
539 * after we read a partial message
540 */
541 /* D_ASSERT(signal_pending(current)); */
542 break;
543 }
544 };
545
546 set_fs(oldfs);
547
548 if (rv != size)
549 drbd_force_state(mdev, NS(conn, C_BROKEN_PIPE));
550
551 return rv;
552}
553
Lars Ellenberg5dbf1672010-05-25 16:18:01 +0200554/* quoting tcp(7):
555 * On individual connections, the socket buffer size must be set prior to the
556 * listen(2) or connect(2) calls in order to have it take effect.
557 * This is our wrapper to do so.
558 */
559static void drbd_setbufsize(struct socket *sock, unsigned int snd,
560 unsigned int rcv)
561{
562 /* open coded SO_SNDBUF, SO_RCVBUF */
563 if (snd) {
564 sock->sk->sk_sndbuf = snd;
565 sock->sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
566 }
567 if (rcv) {
568 sock->sk->sk_rcvbuf = rcv;
569 sock->sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
570 }
571}
572
Philipp Reisnerb411b362009-09-25 16:07:19 -0700573static struct socket *drbd_try_connect(struct drbd_conf *mdev)
574{
575 const char *what;
576 struct socket *sock;
577 struct sockaddr_in6 src_in6;
578 int err;
579 int disconnect_on_error = 1;
580
Philipp Reisnerb2fb6dbe2011-01-19 13:48:44 +0100581 if (!get_net_conf(mdev->tconn))
Philipp Reisnerb411b362009-09-25 16:07:19 -0700582 return NULL;
583
584 what = "sock_create_kern";
Philipp Reisner89e58e72011-01-19 13:12:45 +0100585 err = sock_create_kern(((struct sockaddr *)mdev->tconn->net_conf->my_addr)->sa_family,
Philipp Reisnerb411b362009-09-25 16:07:19 -0700586 SOCK_STREAM, IPPROTO_TCP, &sock);
587 if (err < 0) {
588 sock = NULL;
589 goto out;
590 }
591
592 sock->sk->sk_rcvtimeo =
Philipp Reisner89e58e72011-01-19 13:12:45 +0100593 sock->sk->sk_sndtimeo = mdev->tconn->net_conf->try_connect_int*HZ;
594 drbd_setbufsize(sock, mdev->tconn->net_conf->sndbuf_size,
595 mdev->tconn->net_conf->rcvbuf_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700596
597 /* explicitly bind to the configured IP as source IP
598 * for the outgoing connections.
599 * This is needed for multihomed hosts and to be
600 * able to use lo: interfaces for drbd.
601 * Make sure to use 0 as port number, so linux selects
602 * a free one dynamically.
603 */
Philipp Reisner89e58e72011-01-19 13:12:45 +0100604 memcpy(&src_in6, mdev->tconn->net_conf->my_addr,
605 min_t(int, mdev->tconn->net_conf->my_addr_len, sizeof(src_in6)));
606 if (((struct sockaddr *)mdev->tconn->net_conf->my_addr)->sa_family == AF_INET6)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700607 src_in6.sin6_port = 0;
608 else
609 ((struct sockaddr_in *)&src_in6)->sin_port = 0; /* AF_INET & AF_SCI */
610
611 what = "bind before connect";
612 err = sock->ops->bind(sock,
613 (struct sockaddr *) &src_in6,
Philipp Reisner89e58e72011-01-19 13:12:45 +0100614 mdev->tconn->net_conf->my_addr_len);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700615 if (err < 0)
616 goto out;
617
618 /* connect may fail, peer not yet available.
619 * stay C_WF_CONNECTION, don't go Disconnecting! */
620 disconnect_on_error = 0;
621 what = "connect";
622 err = sock->ops->connect(sock,
Philipp Reisner89e58e72011-01-19 13:12:45 +0100623 (struct sockaddr *)mdev->tconn->net_conf->peer_addr,
624 mdev->tconn->net_conf->peer_addr_len, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700625
626out:
627 if (err < 0) {
628 if (sock) {
629 sock_release(sock);
630 sock = NULL;
631 }
632 switch (-err) {
633 /* timeout, busy, signal pending */
634 case ETIMEDOUT: case EAGAIN: case EINPROGRESS:
635 case EINTR: case ERESTARTSYS:
636 /* peer not (yet) available, network problem */
637 case ECONNREFUSED: case ENETUNREACH:
638 case EHOSTDOWN: case EHOSTUNREACH:
639 disconnect_on_error = 0;
640 break;
641 default:
642 dev_err(DEV, "%s failed, err = %d\n", what, err);
643 }
644 if (disconnect_on_error)
645 drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
646 }
Philipp Reisnerb2fb6dbe2011-01-19 13:48:44 +0100647 put_net_conf(mdev->tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700648 return sock;
649}
650
651static struct socket *drbd_wait_for_connect(struct drbd_conf *mdev)
652{
653 int timeo, err;
654 struct socket *s_estab = NULL, *s_listen;
655 const char *what;
656
Philipp Reisnerb2fb6dbe2011-01-19 13:48:44 +0100657 if (!get_net_conf(mdev->tconn))
Philipp Reisnerb411b362009-09-25 16:07:19 -0700658 return NULL;
659
660 what = "sock_create_kern";
Philipp Reisner89e58e72011-01-19 13:12:45 +0100661 err = sock_create_kern(((struct sockaddr *)mdev->tconn->net_conf->my_addr)->sa_family,
Philipp Reisnerb411b362009-09-25 16:07:19 -0700662 SOCK_STREAM, IPPROTO_TCP, &s_listen);
663 if (err) {
664 s_listen = NULL;
665 goto out;
666 }
667
Philipp Reisner89e58e72011-01-19 13:12:45 +0100668 timeo = mdev->tconn->net_conf->try_connect_int * HZ;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700669 timeo += (random32() & 1) ? timeo / 7 : -timeo / 7; /* 28.5% random jitter */
670
671 s_listen->sk->sk_reuse = 1; /* SO_REUSEADDR */
672 s_listen->sk->sk_rcvtimeo = timeo;
673 s_listen->sk->sk_sndtimeo = timeo;
Philipp Reisner89e58e72011-01-19 13:12:45 +0100674 drbd_setbufsize(s_listen, mdev->tconn->net_conf->sndbuf_size,
675 mdev->tconn->net_conf->rcvbuf_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700676
677 what = "bind before listen";
678 err = s_listen->ops->bind(s_listen,
Philipp Reisner89e58e72011-01-19 13:12:45 +0100679 (struct sockaddr *) mdev->tconn->net_conf->my_addr,
680 mdev->tconn->net_conf->my_addr_len);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700681 if (err < 0)
682 goto out;
683
684 err = drbd_accept(mdev, &what, s_listen, &s_estab);
685
686out:
687 if (s_listen)
688 sock_release(s_listen);
689 if (err < 0) {
690 if (err != -EAGAIN && err != -EINTR && err != -ERESTARTSYS) {
691 dev_err(DEV, "%s failed, err = %d\n", what, err);
692 drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
693 }
694 }
Philipp Reisnerb2fb6dbe2011-01-19 13:48:44 +0100695 put_net_conf(mdev->tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700696
697 return s_estab;
698}
699
700static int drbd_send_fp(struct drbd_conf *mdev,
701 struct socket *sock, enum drbd_packets cmd)
702{
Philipp Reisnerc0129492011-01-19 16:58:16 +0100703 struct p_header *h = &mdev->tconn->data.sbuf.header;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700704
705 return _drbd_send_cmd(mdev, sock, cmd, h, sizeof(*h), 0);
706}
707
708static enum drbd_packets drbd_recv_fp(struct drbd_conf *mdev, struct socket *sock)
709{
Philipp Reisnere42325a2011-01-19 13:55:45 +0100710 struct p_header80 *h = &mdev->tconn->data.rbuf.header.h80;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700711 int rr;
712
713 rr = drbd_recv_short(mdev, sock, h, sizeof(*h), 0);
714
Andreas Gruenbacherca9bc122011-01-11 13:47:24 +0100715 if (rr == sizeof(*h) && h->magic == cpu_to_be32(DRBD_MAGIC))
Philipp Reisnerb411b362009-09-25 16:07:19 -0700716 return be16_to_cpu(h->command);
717
718 return 0xffff;
719}
720
721/**
722 * drbd_socket_okay() - Free the socket if its connection is not okay
723 * @mdev: DRBD device.
724 * @sock: pointer to the pointer to the socket.
725 */
726static int drbd_socket_okay(struct drbd_conf *mdev, struct socket **sock)
727{
728 int rr;
729 char tb[4];
730
731 if (!*sock)
Andreas Gruenbacher81e84652010-12-09 15:03:57 +0100732 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700733
734 rr = drbd_recv_short(mdev, *sock, tb, 4, MSG_DONTWAIT | MSG_PEEK);
735
736 if (rr > 0 || rr == -EAGAIN) {
Andreas Gruenbacher81e84652010-12-09 15:03:57 +0100737 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700738 } else {
739 sock_release(*sock);
740 *sock = NULL;
Andreas Gruenbacher81e84652010-12-09 15:03:57 +0100741 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700742 }
743}
744
745/*
746 * return values:
747 * 1 yes, we have a valid connection
748 * 0 oops, did not work out, please try again
749 * -1 peer talks different language,
750 * no point in trying again, please go standalone.
751 * -2 We do not have a network config...
752 */
753static int drbd_connect(struct drbd_conf *mdev)
754{
755 struct socket *s, *sock, *msock;
756 int try, h, ok;
757
Philipp Reisnere42325a2011-01-19 13:55:45 +0100758 D_ASSERT(!mdev->tconn->data.socket);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700759
Philipp Reisnerb411b362009-09-25 16:07:19 -0700760 if (drbd_request_state(mdev, NS(conn, C_WF_CONNECTION)) < SS_SUCCESS)
761 return -2;
762
763 clear_bit(DISCARD_CONCURRENT, &mdev->flags);
Philipp Reisnerfd340c12011-01-19 16:57:39 +0100764 mdev->tconn->agreed_pro_version = 99;
765 /* agreed_pro_version must be smaller than 100 so we send the old
766 header (h80) in the first packet and in the handshake packet. */
Philipp Reisnerb411b362009-09-25 16:07:19 -0700767
768 sock = NULL;
769 msock = NULL;
770
771 do {
772 for (try = 0;;) {
773 /* 3 tries, this should take less than a second! */
774 s = drbd_try_connect(mdev);
775 if (s || ++try >= 3)
776 break;
777 /* give the other side time to call bind() & listen() */
Philipp Reisner20ee6392011-01-18 15:28:59 +0100778 schedule_timeout_interruptible(HZ / 10);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700779 }
780
781 if (s) {
782 if (!sock) {
783 drbd_send_fp(mdev, s, P_HAND_SHAKE_S);
784 sock = s;
785 s = NULL;
786 } else if (!msock) {
787 drbd_send_fp(mdev, s, P_HAND_SHAKE_M);
788 msock = s;
789 s = NULL;
790 } else {
791 dev_err(DEV, "Logic error in drbd_connect()\n");
792 goto out_release_sockets;
793 }
794 }
795
796 if (sock && msock) {
Philipp Reisner89e58e72011-01-19 13:12:45 +0100797 schedule_timeout_interruptible(mdev->tconn->net_conf->ping_timeo*HZ/10);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700798 ok = drbd_socket_okay(mdev, &sock);
799 ok = drbd_socket_okay(mdev, &msock) && ok;
800 if (ok)
801 break;
802 }
803
804retry:
805 s = drbd_wait_for_connect(mdev);
806 if (s) {
807 try = drbd_recv_fp(mdev, s);
808 drbd_socket_okay(mdev, &sock);
809 drbd_socket_okay(mdev, &msock);
810 switch (try) {
811 case P_HAND_SHAKE_S:
812 if (sock) {
813 dev_warn(DEV, "initial packet S crossed\n");
814 sock_release(sock);
815 }
816 sock = s;
817 break;
818 case P_HAND_SHAKE_M:
819 if (msock) {
820 dev_warn(DEV, "initial packet M crossed\n");
821 sock_release(msock);
822 }
823 msock = s;
824 set_bit(DISCARD_CONCURRENT, &mdev->flags);
825 break;
826 default:
827 dev_warn(DEV, "Error receiving initial packet\n");
828 sock_release(s);
829 if (random32() & 1)
830 goto retry;
831 }
832 }
833
834 if (mdev->state.conn <= C_DISCONNECTING)
835 goto out_release_sockets;
836 if (signal_pending(current)) {
837 flush_signals(current);
838 smp_rmb();
Philipp Reisnere6b3ea82011-01-19 14:02:01 +0100839 if (get_t_state(&mdev->tconn->receiver) == EXITING)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700840 goto out_release_sockets;
841 }
842
843 if (sock && msock) {
844 ok = drbd_socket_okay(mdev, &sock);
845 ok = drbd_socket_okay(mdev, &msock) && ok;
846 if (ok)
847 break;
848 }
849 } while (1);
850
851 msock->sk->sk_reuse = 1; /* SO_REUSEADDR */
852 sock->sk->sk_reuse = 1; /* SO_REUSEADDR */
853
854 sock->sk->sk_allocation = GFP_NOIO;
855 msock->sk->sk_allocation = GFP_NOIO;
856
857 sock->sk->sk_priority = TC_PRIO_INTERACTIVE_BULK;
858 msock->sk->sk_priority = TC_PRIO_INTERACTIVE;
859
Philipp Reisnerb411b362009-09-25 16:07:19 -0700860 /* NOT YET ...
Philipp Reisner89e58e72011-01-19 13:12:45 +0100861 * sock->sk->sk_sndtimeo = mdev->tconn->net_conf->timeout*HZ/10;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700862 * sock->sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
863 * first set it to the P_HAND_SHAKE timeout,
864 * which we set to 4x the configured ping_timeout. */
865 sock->sk->sk_sndtimeo =
Philipp Reisner89e58e72011-01-19 13:12:45 +0100866 sock->sk->sk_rcvtimeo = mdev->tconn->net_conf->ping_timeo*4*HZ/10;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700867
Philipp Reisner89e58e72011-01-19 13:12:45 +0100868 msock->sk->sk_sndtimeo = mdev->tconn->net_conf->timeout*HZ/10;
869 msock->sk->sk_rcvtimeo = mdev->tconn->net_conf->ping_int*HZ;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700870
871 /* we don't want delays.
Lucas De Marchi25985ed2011-03-30 22:57:33 -0300872 * we use TCP_CORK where appropriate, though */
Philipp Reisnerb411b362009-09-25 16:07:19 -0700873 drbd_tcp_nodelay(sock);
874 drbd_tcp_nodelay(msock);
875
Philipp Reisnere42325a2011-01-19 13:55:45 +0100876 mdev->tconn->data.socket = sock;
877 mdev->tconn->meta.socket = msock;
Philipp Reisner31890f42011-01-19 14:12:51 +0100878 mdev->tconn->last_received = jiffies;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700879
Philipp Reisnere6b3ea82011-01-19 14:02:01 +0100880 D_ASSERT(mdev->tconn->asender.task == NULL);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700881
882 h = drbd_do_handshake(mdev);
883 if (h <= 0)
884 return h;
885
Philipp Reisnera0638452011-01-19 14:31:32 +0100886 if (mdev->tconn->cram_hmac_tfm) {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700887 /* drbd_request_state(mdev, NS(conn, WFAuth)); */
Johannes Thomab10d96c2010-01-07 16:02:50 +0100888 switch (drbd_do_auth(mdev)) {
889 case -1:
Philipp Reisnerb411b362009-09-25 16:07:19 -0700890 dev_err(DEV, "Authentication of peer failed\n");
891 return -1;
Johannes Thomab10d96c2010-01-07 16:02:50 +0100892 case 0:
893 dev_err(DEV, "Authentication of peer failed, trying again.\n");
894 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700895 }
896 }
897
898 if (drbd_request_state(mdev, NS(conn, C_WF_REPORT_PARAMS)) < SS_SUCCESS)
899 return 0;
900
Philipp Reisner89e58e72011-01-19 13:12:45 +0100901 sock->sk->sk_sndtimeo = mdev->tconn->net_conf->timeout*HZ/10;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700902 sock->sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
903
904 atomic_set(&mdev->packet_seq, 0);
905 mdev->peer_seq = 0;
906
Philipp Reisnere6b3ea82011-01-19 14:02:01 +0100907 drbd_thread_start(&mdev->tconn->asender);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700908
Philipp Reisner148efa12011-01-15 00:21:15 +0100909 if (drbd_send_protocol(mdev) == -1)
Philipp Reisner7e2455c2010-04-22 14:50:23 +0200910 return -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700911 drbd_send_sync_param(mdev, &mdev->sync_conf);
Philipp Reisnere89b5912010-03-24 17:11:33 +0100912 drbd_send_sizes(mdev, 0, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700913 drbd_send_uuids(mdev);
914 drbd_send_state(mdev);
915 clear_bit(USE_DEGR_WFC_T, &mdev->flags);
916 clear_bit(RESIZE_PENDING, &mdev->flags);
Philipp Reisner7fde2be2011-03-01 11:08:28 +0100917 mod_timer(&mdev->request_timer, jiffies + HZ); /* just start it here. */
Philipp Reisnerb411b362009-09-25 16:07:19 -0700918
919 return 1;
920
921out_release_sockets:
922 if (sock)
923 sock_release(sock);
924 if (msock)
925 sock_release(msock);
926 return -1;
927}
928
Philipp Reisner02918be2010-08-20 14:35:10 +0200929static int drbd_recv_header(struct drbd_conf *mdev, enum drbd_packets *cmd, unsigned int *packet_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700930{
Philipp Reisnerc0129492011-01-19 16:58:16 +0100931 struct p_header *h = &mdev->tconn->data.rbuf.header;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700932 int r;
933
934 r = drbd_recv(mdev, h, sizeof(*h));
Philipp Reisnerb411b362009-09-25 16:07:19 -0700935 if (unlikely(r != sizeof(*h))) {
Lars Ellenberg0ddc5542011-01-21 12:35:15 +0100936 if (!signal_pending(current))
937 dev_warn(DEV, "short read expecting header on sock: r=%d\n", r);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +0100938 return false;
Philipp Reisner02918be2010-08-20 14:35:10 +0200939 }
940
Philipp Reisnerfd340c12011-01-19 16:57:39 +0100941 if (h->h80.magic == cpu_to_be32(DRBD_MAGIC)) {
Philipp Reisner02918be2010-08-20 14:35:10 +0200942 *cmd = be16_to_cpu(h->h80.command);
943 *packet_size = be16_to_cpu(h->h80.length);
Andreas Gruenbacherca9bc122011-01-11 13:47:24 +0100944 } else if (h->h95.magic == cpu_to_be16(DRBD_MAGIC_BIG)) {
Philipp Reisner02918be2010-08-20 14:35:10 +0200945 *cmd = be16_to_cpu(h->h95.command);
Philipp Reisnerfd340c12011-01-19 16:57:39 +0100946 *packet_size = be32_to_cpu(h->h95.length) & 0x00ffffff;
Philipp Reisner02918be2010-08-20 14:35:10 +0200947 } else {
Lars Ellenberg004352f2010-10-05 20:13:58 +0200948 dev_err(DEV, "magic?? on data m: 0x%08x c: %d l: %d\n",
949 be32_to_cpu(h->h80.magic),
950 be16_to_cpu(h->h80.command),
951 be16_to_cpu(h->h80.length));
Andreas Gruenbacher81e84652010-12-09 15:03:57 +0100952 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700953 }
Philipp Reisner31890f42011-01-19 14:12:51 +0100954 mdev->tconn->last_received = jiffies;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700955
Andreas Gruenbacher81e84652010-12-09 15:03:57 +0100956 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700957}
958
Philipp Reisner2451fc32010-08-24 13:43:11 +0200959static void drbd_flush(struct drbd_conf *mdev)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700960{
961 int rv;
962
963 if (mdev->write_ordering >= WO_bdev_flush && get_ldev(mdev)) {
Dmitry Monakhovfbd9b092010-04-28 17:55:06 +0400964 rv = blkdev_issue_flush(mdev->ldev->backing_bdev, GFP_KERNEL,
Christoph Hellwigdd3932e2010-09-16 20:51:46 +0200965 NULL);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700966 if (rv) {
967 dev_err(DEV, "local disk flush failed with status %d\n", rv);
968 /* would rather check on EOPNOTSUPP, but that is not reliable.
969 * don't try again for ANY return value != 0
970 * if (rv == -EOPNOTSUPP) */
971 drbd_bump_write_ordering(mdev, WO_drain_io);
972 }
973 put_ldev(mdev);
974 }
Philipp Reisnerb411b362009-09-25 16:07:19 -0700975}
976
977/**
978 * drbd_may_finish_epoch() - Applies an epoch_event to the epoch's state, eventually finishes it.
979 * @mdev: DRBD device.
980 * @epoch: Epoch object.
981 * @ev: Epoch event.
982 */
983static enum finish_epoch drbd_may_finish_epoch(struct drbd_conf *mdev,
984 struct drbd_epoch *epoch,
985 enum epoch_event ev)
986{
Philipp Reisner2451fc32010-08-24 13:43:11 +0200987 int epoch_size;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700988 struct drbd_epoch *next_epoch;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700989 enum finish_epoch rv = FE_STILL_LIVE;
990
991 spin_lock(&mdev->epoch_lock);
992 do {
993 next_epoch = NULL;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700994
995 epoch_size = atomic_read(&epoch->epoch_size);
996
997 switch (ev & ~EV_CLEANUP) {
998 case EV_PUT:
999 atomic_dec(&epoch->active);
1000 break;
1001 case EV_GOT_BARRIER_NR:
1002 set_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001003 break;
1004 case EV_BECAME_LAST:
1005 /* nothing to do*/
1006 break;
1007 }
1008
Philipp Reisnerb411b362009-09-25 16:07:19 -07001009 if (epoch_size != 0 &&
1010 atomic_read(&epoch->active) == 0 &&
Philipp Reisner2451fc32010-08-24 13:43:11 +02001011 test_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001012 if (!(ev & EV_CLEANUP)) {
1013 spin_unlock(&mdev->epoch_lock);
1014 drbd_send_b_ack(mdev, epoch->barrier_nr, epoch_size);
1015 spin_lock(&mdev->epoch_lock);
1016 }
1017 dec_unacked(mdev);
1018
1019 if (mdev->current_epoch != epoch) {
1020 next_epoch = list_entry(epoch->list.next, struct drbd_epoch, list);
1021 list_del(&epoch->list);
1022 ev = EV_BECAME_LAST | (ev & EV_CLEANUP);
1023 mdev->epochs--;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001024 kfree(epoch);
1025
1026 if (rv == FE_STILL_LIVE)
1027 rv = FE_DESTROYED;
1028 } else {
1029 epoch->flags = 0;
1030 atomic_set(&epoch->epoch_size, 0);
Uwe Kleine-König698f9312010-07-02 20:41:51 +02001031 /* atomic_set(&epoch->active, 0); is already zero */
Philipp Reisnerb411b362009-09-25 16:07:19 -07001032 if (rv == FE_STILL_LIVE)
1033 rv = FE_RECYCLED;
Philipp Reisner2451fc32010-08-24 13:43:11 +02001034 wake_up(&mdev->ee_wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001035 }
1036 }
1037
1038 if (!next_epoch)
1039 break;
1040
1041 epoch = next_epoch;
1042 } while (1);
1043
1044 spin_unlock(&mdev->epoch_lock);
1045
Philipp Reisnerb411b362009-09-25 16:07:19 -07001046 return rv;
1047}
1048
1049/**
1050 * drbd_bump_write_ordering() - Fall back to an other write ordering method
1051 * @mdev: DRBD device.
1052 * @wo: Write ordering method to try.
1053 */
1054void drbd_bump_write_ordering(struct drbd_conf *mdev, enum write_ordering_e wo) __must_hold(local)
1055{
1056 enum write_ordering_e pwo;
1057 static char *write_ordering_str[] = {
1058 [WO_none] = "none",
1059 [WO_drain_io] = "drain",
1060 [WO_bdev_flush] = "flush",
Philipp Reisnerb411b362009-09-25 16:07:19 -07001061 };
1062
1063 pwo = mdev->write_ordering;
1064 wo = min(pwo, wo);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001065 if (wo == WO_bdev_flush && mdev->ldev->dc.no_disk_flush)
1066 wo = WO_drain_io;
1067 if (wo == WO_drain_io && mdev->ldev->dc.no_disk_drain)
1068 wo = WO_none;
1069 mdev->write_ordering = wo;
Philipp Reisner2451fc32010-08-24 13:43:11 +02001070 if (pwo != mdev->write_ordering || wo == WO_bdev_flush)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001071 dev_info(DEV, "Method to ensure write ordering: %s\n", write_ordering_str[mdev->write_ordering]);
1072}
1073
1074/**
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001075 * drbd_submit_ee()
1076 * @mdev: DRBD device.
1077 * @e: epoch entry
1078 * @rw: flag field, see bio->bi_rw
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001079 *
1080 * May spread the pages to multiple bios,
1081 * depending on bio_add_page restrictions.
1082 *
1083 * Returns 0 if all bios have been submitted,
1084 * -ENOMEM if we could not allocate enough bios,
1085 * -ENOSPC (any better suggestion?) if we have not been able to bio_add_page a
1086 * single page to an empty bio (which should never happen and likely indicates
1087 * that the lower level IO stack is in some way broken). This has been observed
1088 * on certain Xen deployments.
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001089 */
1090/* TODO allocate from our own bio_set. */
1091int drbd_submit_ee(struct drbd_conf *mdev, struct drbd_epoch_entry *e,
1092 const unsigned rw, const int fault_type)
1093{
1094 struct bio *bios = NULL;
1095 struct bio *bio;
1096 struct page *page = e->pages;
Andreas Gruenbacher010f6e62011-01-14 20:59:35 +01001097 sector_t sector = e->i.sector;
1098 unsigned ds = e->i.size;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001099 unsigned n_bios = 0;
1100 unsigned nr_pages = (ds + PAGE_SIZE -1) >> PAGE_SHIFT;
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001101 int err = -ENOMEM;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001102
1103 /* In most cases, we will only need one bio. But in case the lower
1104 * level restrictions happen to be different at this offset on this
1105 * side than those of the sending peer, we may need to submit the
1106 * request in more than one bio. */
1107next_bio:
1108 bio = bio_alloc(GFP_NOIO, nr_pages);
1109 if (!bio) {
1110 dev_err(DEV, "submit_ee: Allocation of a bio failed\n");
1111 goto fail;
1112 }
Andreas Gruenbacher010f6e62011-01-14 20:59:35 +01001113 /* > e->i.sector, unless this is the first bio */
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001114 bio->bi_sector = sector;
1115 bio->bi_bdev = mdev->ldev->backing_bdev;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001116 bio->bi_rw = rw;
1117 bio->bi_private = e;
1118 bio->bi_end_io = drbd_endio_sec;
1119
1120 bio->bi_next = bios;
1121 bios = bio;
1122 ++n_bios;
1123
1124 page_chain_for_each(page) {
1125 unsigned len = min_t(unsigned, ds, PAGE_SIZE);
1126 if (!bio_add_page(bio, page, len, 0)) {
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001127 /* A single page must always be possible!
1128 * But in case it fails anyways,
1129 * we deal with it, and complain (below). */
1130 if (bio->bi_vcnt == 0) {
1131 dev_err(DEV,
1132 "bio_add_page failed for len=%u, "
1133 "bi_vcnt=0 (bi_sector=%llu)\n",
1134 len, (unsigned long long)bio->bi_sector);
1135 err = -ENOSPC;
1136 goto fail;
1137 }
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001138 goto next_bio;
1139 }
1140 ds -= len;
1141 sector += len >> 9;
1142 --nr_pages;
1143 }
1144 D_ASSERT(page == NULL);
1145 D_ASSERT(ds == 0);
1146
1147 atomic_set(&e->pending_bios, n_bios);
1148 do {
1149 bio = bios;
1150 bios = bios->bi_next;
1151 bio->bi_next = NULL;
1152
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001153 drbd_generic_make_request(mdev, fault_type, bio);
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001154 } while (bios);
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001155 return 0;
1156
1157fail:
1158 while (bios) {
1159 bio = bios;
1160 bios = bios->bi_next;
1161 bio_put(bio);
1162 }
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001163 return err;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001164}
1165
Philipp Reisner02918be2010-08-20 14:35:10 +02001166static int receive_Barrier(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001167{
Philipp Reisner2451fc32010-08-24 13:43:11 +02001168 int rv;
Philipp Reisnere42325a2011-01-19 13:55:45 +01001169 struct p_barrier *p = &mdev->tconn->data.rbuf.barrier;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001170 struct drbd_epoch *epoch;
1171
Philipp Reisnerb411b362009-09-25 16:07:19 -07001172 inc_unacked(mdev);
1173
Philipp Reisnerb411b362009-09-25 16:07:19 -07001174 mdev->current_epoch->barrier_nr = p->barrier;
1175 rv = drbd_may_finish_epoch(mdev, mdev->current_epoch, EV_GOT_BARRIER_NR);
1176
1177 /* P_BARRIER_ACK may imply that the corresponding extent is dropped from
1178 * the activity log, which means it would not be resynced in case the
1179 * R_PRIMARY crashes now.
1180 * Therefore we must send the barrier_ack after the barrier request was
1181 * completed. */
1182 switch (mdev->write_ordering) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001183 case WO_none:
1184 if (rv == FE_RECYCLED)
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01001185 return true;
Philipp Reisner2451fc32010-08-24 13:43:11 +02001186
1187 /* receiver context, in the writeout path of the other node.
1188 * avoid potential distributed deadlock */
1189 epoch = kmalloc(sizeof(struct drbd_epoch), GFP_NOIO);
1190 if (epoch)
1191 break;
1192 else
1193 dev_warn(DEV, "Allocation of an epoch failed, slowing down\n");
1194 /* Fall through */
Philipp Reisnerb411b362009-09-25 16:07:19 -07001195
1196 case WO_bdev_flush:
1197 case WO_drain_io:
Philipp Reisnerb411b362009-09-25 16:07:19 -07001198 drbd_wait_ee_list_empty(mdev, &mdev->active_ee);
Philipp Reisner2451fc32010-08-24 13:43:11 +02001199 drbd_flush(mdev);
1200
1201 if (atomic_read(&mdev->current_epoch->epoch_size)) {
1202 epoch = kmalloc(sizeof(struct drbd_epoch), GFP_NOIO);
1203 if (epoch)
1204 break;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001205 }
1206
Philipp Reisner2451fc32010-08-24 13:43:11 +02001207 epoch = mdev->current_epoch;
1208 wait_event(mdev->ee_wait, atomic_read(&epoch->epoch_size) == 0);
1209
1210 D_ASSERT(atomic_read(&epoch->active) == 0);
1211 D_ASSERT(epoch->flags == 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001212
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01001213 return true;
Philipp Reisner2451fc32010-08-24 13:43:11 +02001214 default:
1215 dev_err(DEV, "Strangeness in mdev->write_ordering %d\n", mdev->write_ordering);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01001216 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001217 }
1218
1219 epoch->flags = 0;
1220 atomic_set(&epoch->epoch_size, 0);
1221 atomic_set(&epoch->active, 0);
1222
1223 spin_lock(&mdev->epoch_lock);
1224 if (atomic_read(&mdev->current_epoch->epoch_size)) {
1225 list_add(&epoch->list, &mdev->current_epoch->list);
1226 mdev->current_epoch = epoch;
1227 mdev->epochs++;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001228 } else {
1229 /* The current_epoch got recycled while we allocated this one... */
1230 kfree(epoch);
1231 }
1232 spin_unlock(&mdev->epoch_lock);
1233
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01001234 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001235}
1236
1237/* used from receive_RSDataReply (recv_resync_read)
1238 * and from receive_Data */
1239static struct drbd_epoch_entry *
1240read_in_block(struct drbd_conf *mdev, u64 id, sector_t sector, int data_size) __must_hold(local)
1241{
Lars Ellenberg66660322010-04-06 12:15:04 +02001242 const sector_t capacity = drbd_get_capacity(mdev->this_bdev);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001243 struct drbd_epoch_entry *e;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001244 struct page *page;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001245 int dgs, ds, rr;
Philipp Reisnera0638452011-01-19 14:31:32 +01001246 void *dig_in = mdev->tconn->int_dig_in;
1247 void *dig_vv = mdev->tconn->int_dig_vv;
Philipp Reisner6b4388a2010-04-26 14:11:45 +02001248 unsigned long *data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001249
Philipp Reisnera0638452011-01-19 14:31:32 +01001250 dgs = (mdev->tconn->agreed_pro_version >= 87 && mdev->tconn->integrity_r_tfm) ?
1251 crypto_hash_digestsize(mdev->tconn->integrity_r_tfm) : 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001252
1253 if (dgs) {
1254 rr = drbd_recv(mdev, dig_in, dgs);
1255 if (rr != dgs) {
Lars Ellenberg0ddc5542011-01-21 12:35:15 +01001256 if (!signal_pending(current))
1257 dev_warn(DEV,
1258 "short read receiving data digest: read %d expected %d\n",
1259 rr, dgs);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001260 return NULL;
1261 }
1262 }
1263
1264 data_size -= dgs;
1265
Andreas Gruenbacher841ce242010-12-15 19:31:20 +01001266 if (!expect(data_size != 0))
1267 return NULL;
1268 if (!expect(IS_ALIGNED(data_size, 512)))
1269 return NULL;
1270 if (!expect(data_size <= DRBD_MAX_BIO_SIZE))
1271 return NULL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001272
Lars Ellenberg66660322010-04-06 12:15:04 +02001273 /* even though we trust out peer,
1274 * we sometimes have to double check. */
1275 if (sector + (data_size>>9) > capacity) {
Lars Ellenbergfdda6542011-01-24 15:11:01 +01001276 dev_err(DEV, "request from peer beyond end of local disk: "
1277 "capacity: %llus < sector: %llus + size: %u\n",
Lars Ellenberg66660322010-04-06 12:15:04 +02001278 (unsigned long long)capacity,
1279 (unsigned long long)sector, data_size);
1280 return NULL;
1281 }
1282
Philipp Reisnerb411b362009-09-25 16:07:19 -07001283 /* GFP_NOIO, because we must not cause arbitrary write-out: in a DRBD
1284 * "criss-cross" setup, that might cause write-out on some other DRBD,
1285 * which in turn might block on the other node at this very place. */
1286 e = drbd_alloc_ee(mdev, id, sector, data_size, GFP_NOIO);
1287 if (!e)
1288 return NULL;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001289
Philipp Reisnerb411b362009-09-25 16:07:19 -07001290 ds = data_size;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001291 page = e->pages;
1292 page_chain_for_each(page) {
1293 unsigned len = min_t(int, ds, PAGE_SIZE);
Philipp Reisner6b4388a2010-04-26 14:11:45 +02001294 data = kmap(page);
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001295 rr = drbd_recv(mdev, data, len);
Andreas Gruenbacher0cf9d272010-12-07 10:43:29 +01001296 if (drbd_insert_fault(mdev, DRBD_FAULT_RECEIVE)) {
Philipp Reisner6b4388a2010-04-26 14:11:45 +02001297 dev_err(DEV, "Fault injection: Corrupting data on receive\n");
1298 data[0] = data[0] ^ (unsigned long)-1;
1299 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07001300 kunmap(page);
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001301 if (rr != len) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001302 drbd_free_ee(mdev, e);
Lars Ellenberg0ddc5542011-01-21 12:35:15 +01001303 if (!signal_pending(current))
1304 dev_warn(DEV, "short read receiving data: read %d expected %d\n",
1305 rr, len);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001306 return NULL;
1307 }
1308 ds -= rr;
1309 }
1310
1311 if (dgs) {
Philipp Reisnera0638452011-01-19 14:31:32 +01001312 drbd_csum_ee(mdev, mdev->tconn->integrity_r_tfm, e, dig_vv);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001313 if (memcmp(dig_in, dig_vv, dgs)) {
Lars Ellenberg470be442010-11-10 10:36:52 +01001314 dev_err(DEV, "Digest integrity check FAILED: %llus +%u\n",
1315 (unsigned long long)sector, data_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001316 drbd_bcast_ee(mdev, "digest failed",
1317 dgs, dig_in, dig_vv, e);
1318 drbd_free_ee(mdev, e);
1319 return NULL;
1320 }
1321 }
1322 mdev->recv_cnt += data_size>>9;
1323 return e;
1324}
1325
1326/* drbd_drain_block() just takes a data block
1327 * out of the socket input buffer, and discards it.
1328 */
1329static int drbd_drain_block(struct drbd_conf *mdev, int data_size)
1330{
1331 struct page *page;
1332 int rr, rv = 1;
1333 void *data;
1334
Lars Ellenbergc3470cd2010-04-01 16:57:19 +02001335 if (!data_size)
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01001336 return true;
Lars Ellenbergc3470cd2010-04-01 16:57:19 +02001337
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001338 page = drbd_pp_alloc(mdev, 1, 1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001339
1340 data = kmap(page);
1341 while (data_size) {
1342 rr = drbd_recv(mdev, data, min_t(int, data_size, PAGE_SIZE));
1343 if (rr != min_t(int, data_size, PAGE_SIZE)) {
1344 rv = 0;
Lars Ellenberg0ddc5542011-01-21 12:35:15 +01001345 if (!signal_pending(current))
1346 dev_warn(DEV,
1347 "short read receiving data: read %d expected %d\n",
1348 rr, min_t(int, data_size, PAGE_SIZE));
Philipp Reisnerb411b362009-09-25 16:07:19 -07001349 break;
1350 }
1351 data_size -= rr;
1352 }
1353 kunmap(page);
Lars Ellenberg435f0742010-09-06 12:30:25 +02001354 drbd_pp_free(mdev, page, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001355 return rv;
1356}
1357
1358static int recv_dless_read(struct drbd_conf *mdev, struct drbd_request *req,
1359 sector_t sector, int data_size)
1360{
1361 struct bio_vec *bvec;
1362 struct bio *bio;
1363 int dgs, rr, i, expect;
Philipp Reisnera0638452011-01-19 14:31:32 +01001364 void *dig_in = mdev->tconn->int_dig_in;
1365 void *dig_vv = mdev->tconn->int_dig_vv;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001366
Philipp Reisnera0638452011-01-19 14:31:32 +01001367 dgs = (mdev->tconn->agreed_pro_version >= 87 && mdev->tconn->integrity_r_tfm) ?
1368 crypto_hash_digestsize(mdev->tconn->integrity_r_tfm) : 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001369
1370 if (dgs) {
1371 rr = drbd_recv(mdev, dig_in, dgs);
1372 if (rr != dgs) {
Lars Ellenberg0ddc5542011-01-21 12:35:15 +01001373 if (!signal_pending(current))
1374 dev_warn(DEV,
1375 "short read receiving data reply digest: read %d expected %d\n",
1376 rr, dgs);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001377 return 0;
1378 }
1379 }
1380
1381 data_size -= dgs;
1382
1383 /* optimistically update recv_cnt. if receiving fails below,
1384 * we disconnect anyways, and counters will be reset. */
1385 mdev->recv_cnt += data_size>>9;
1386
1387 bio = req->master_bio;
1388 D_ASSERT(sector == bio->bi_sector);
1389
1390 bio_for_each_segment(bvec, bio, i) {
1391 expect = min_t(int, data_size, bvec->bv_len);
1392 rr = drbd_recv(mdev,
1393 kmap(bvec->bv_page)+bvec->bv_offset,
1394 expect);
1395 kunmap(bvec->bv_page);
1396 if (rr != expect) {
Lars Ellenberg0ddc5542011-01-21 12:35:15 +01001397 if (!signal_pending(current))
1398 dev_warn(DEV, "short read receiving data reply: "
1399 "read %d expected %d\n",
1400 rr, expect);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001401 return 0;
1402 }
1403 data_size -= rr;
1404 }
1405
1406 if (dgs) {
Philipp Reisnera0638452011-01-19 14:31:32 +01001407 drbd_csum_bio(mdev, mdev->tconn->integrity_r_tfm, bio, dig_vv);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001408 if (memcmp(dig_in, dig_vv, dgs)) {
1409 dev_err(DEV, "Digest integrity check FAILED. Broken NICs?\n");
1410 return 0;
1411 }
1412 }
1413
1414 D_ASSERT(data_size == 0);
1415 return 1;
1416}
1417
1418/* e_end_resync_block() is called via
1419 * drbd_process_done_ee() by asender only */
1420static int e_end_resync_block(struct drbd_conf *mdev, struct drbd_work *w, int unused)
1421{
1422 struct drbd_epoch_entry *e = (struct drbd_epoch_entry *)w;
Andreas Gruenbacher010f6e62011-01-14 20:59:35 +01001423 sector_t sector = e->i.sector;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001424 int ok;
1425
Andreas Gruenbacher8b946252011-01-20 15:23:07 +01001426 D_ASSERT(drbd_interval_empty(&e->i));
Philipp Reisnerb411b362009-09-25 16:07:19 -07001427
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001428 if (likely((e->flags & EE_WAS_ERROR) == 0)) {
Andreas Gruenbacher010f6e62011-01-14 20:59:35 +01001429 drbd_set_in_sync(mdev, sector, e->i.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001430 ok = drbd_send_ack(mdev, P_RS_WRITE_ACK, e);
1431 } else {
1432 /* Record failure to sync */
Andreas Gruenbacher010f6e62011-01-14 20:59:35 +01001433 drbd_rs_failed_io(mdev, sector, e->i.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001434
1435 ok = drbd_send_ack(mdev, P_NEG_ACK, e);
1436 }
1437 dec_unacked(mdev);
1438
1439 return ok;
1440}
1441
1442static int recv_resync_read(struct drbd_conf *mdev, sector_t sector, int data_size) __releases(local)
1443{
1444 struct drbd_epoch_entry *e;
1445
1446 e = read_in_block(mdev, ID_SYNCER, sector, data_size);
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001447 if (!e)
1448 goto fail;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001449
1450 dec_rs_pending(mdev);
1451
Philipp Reisnerb411b362009-09-25 16:07:19 -07001452 inc_unacked(mdev);
1453 /* corresponding dec_unacked() in e_end_resync_block()
1454 * respective _drbd_clear_done_ee */
1455
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001456 e->w.cb = e_end_resync_block;
1457
Philipp Reisner87eeee42011-01-19 14:16:30 +01001458 spin_lock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001459 list_add(&e->w.list, &mdev->sync_ee);
Philipp Reisner87eeee42011-01-19 14:16:30 +01001460 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001461
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02001462 atomic_add(data_size >> 9, &mdev->rs_sect_ev);
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001463 if (drbd_submit_ee(mdev, e, WRITE, DRBD_FAULT_RS_WR) == 0)
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01001464 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001465
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001466 /* don't care for the reason here */
1467 dev_err(DEV, "submit failed, triggering re-connect\n");
Philipp Reisner87eeee42011-01-19 14:16:30 +01001468 spin_lock_irq(&mdev->tconn->req_lock);
Lars Ellenberg22cc37a2010-09-14 20:40:41 +02001469 list_del(&e->w.list);
Philipp Reisner87eeee42011-01-19 14:16:30 +01001470 spin_unlock_irq(&mdev->tconn->req_lock);
Lars Ellenberg22cc37a2010-09-14 20:40:41 +02001471
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001472 drbd_free_ee(mdev, e);
1473fail:
1474 put_ldev(mdev);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01001475 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001476}
1477
Andreas Gruenbacher668eebc2011-01-20 17:14:26 +01001478static struct drbd_request *
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01001479find_request(struct drbd_conf *mdev, struct rb_root *root, u64 id,
1480 sector_t sector, bool missing_ok, const char *func)
Andreas Gruenbacher668eebc2011-01-20 17:14:26 +01001481{
Andreas Gruenbacher668eebc2011-01-20 17:14:26 +01001482 struct drbd_request *req;
1483
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01001484 /* Request object according to our peer */
1485 req = (struct drbd_request *)(unsigned long)id;
1486 if (drbd_contains_interval(root, sector, &req->i))
Andreas Gruenbacher668eebc2011-01-20 17:14:26 +01001487 return req;
Andreas Gruenbacherc3afd8f2011-01-20 22:25:40 +01001488 if (!missing_ok) {
1489 dev_err(DEV, "%s: failed to find request %lu, sector %llus\n", func,
1490 (unsigned long)id, (unsigned long long)sector);
1491 }
Andreas Gruenbacher668eebc2011-01-20 17:14:26 +01001492 return NULL;
1493}
1494
Philipp Reisner02918be2010-08-20 14:35:10 +02001495static int receive_DataReply(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001496{
1497 struct drbd_request *req;
1498 sector_t sector;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001499 int ok;
Philipp Reisnere42325a2011-01-19 13:55:45 +01001500 struct p_data *p = &mdev->tconn->data.rbuf.data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001501
1502 sector = be64_to_cpu(p->sector);
1503
Philipp Reisner87eeee42011-01-19 14:16:30 +01001504 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01001505 req = find_request(mdev, &mdev->read_requests, p->block_id, sector, false, __func__);
Philipp Reisner87eeee42011-01-19 14:16:30 +01001506 spin_unlock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherc3afd8f2011-01-20 22:25:40 +01001507 if (unlikely(!req))
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01001508 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001509
Bart Van Assche24c48302011-05-21 18:32:29 +02001510 /* hlist_del(&req->collision) is done in _req_may_be_done, to avoid
Philipp Reisnerb411b362009-09-25 16:07:19 -07001511 * special casing it there for the various failure cases.
1512 * still no race with drbd_fail_pending_reads */
1513 ok = recv_dless_read(mdev, req, sector, data_size);
1514
1515 if (ok)
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01001516 req_mod(req, DATA_RECEIVED);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001517 /* else: nothing. handled from drbd_disconnect...
1518 * I don't think we may complete this just yet
1519 * in case we are "on-disconnect: freeze" */
1520
1521 return ok;
1522}
1523
Philipp Reisner02918be2010-08-20 14:35:10 +02001524static int receive_RSDataReply(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001525{
1526 sector_t sector;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001527 int ok;
Philipp Reisnere42325a2011-01-19 13:55:45 +01001528 struct p_data *p = &mdev->tconn->data.rbuf.data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001529
1530 sector = be64_to_cpu(p->sector);
1531 D_ASSERT(p->block_id == ID_SYNCER);
1532
1533 if (get_ldev(mdev)) {
1534 /* data is submitted to disk within recv_resync_read.
1535 * corresponding put_ldev done below on error,
Andreas Gruenbacher9c508422011-01-14 21:19:36 +01001536 * or in drbd_endio_sec. */
Philipp Reisnerb411b362009-09-25 16:07:19 -07001537 ok = recv_resync_read(mdev, sector, data_size);
1538 } else {
1539 if (__ratelimit(&drbd_ratelimit_state))
1540 dev_err(DEV, "Can not write resync data to local disk.\n");
1541
1542 ok = drbd_drain_block(mdev, data_size);
1543
Lars Ellenberg2b2bf212010-10-06 11:46:55 +02001544 drbd_send_ack_dp(mdev, P_NEG_ACK, p, data_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001545 }
1546
Philipp Reisner778f2712010-07-06 11:14:00 +02001547 atomic_add(data_size >> 9, &mdev->rs_sect_in);
1548
Philipp Reisnerb411b362009-09-25 16:07:19 -07001549 return ok;
1550}
1551
1552/* e_end_block() is called via drbd_process_done_ee().
1553 * this means this function only runs in the asender thread
1554 */
1555static int e_end_block(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
1556{
1557 struct drbd_epoch_entry *e = (struct drbd_epoch_entry *)w;
Andreas Gruenbacher010f6e62011-01-14 20:59:35 +01001558 sector_t sector = e->i.sector;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001559 int ok = 1, pcmd;
1560
Philipp Reisner89e58e72011-01-19 13:12:45 +01001561 if (mdev->tconn->net_conf->wire_protocol == DRBD_PROT_C) {
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001562 if (likely((e->flags & EE_WAS_ERROR) == 0)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001563 pcmd = (mdev->state.conn >= C_SYNC_SOURCE &&
1564 mdev->state.conn <= C_PAUSED_SYNC_T &&
1565 e->flags & EE_MAY_SET_IN_SYNC) ?
1566 P_RS_WRITE_ACK : P_WRITE_ACK;
1567 ok &= drbd_send_ack(mdev, pcmd, e);
1568 if (pcmd == P_RS_WRITE_ACK)
Andreas Gruenbacher010f6e62011-01-14 20:59:35 +01001569 drbd_set_in_sync(mdev, sector, e->i.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001570 } else {
1571 ok = drbd_send_ack(mdev, P_NEG_ACK, e);
1572 /* we expect it to be marked out of sync anyways...
1573 * maybe assert this? */
1574 }
1575 dec_unacked(mdev);
1576 }
1577 /* we delete from the conflict detection hash _after_ we sent out the
1578 * P_WRITE_ACK / P_NEG_ACK, to get the sequence number right. */
Philipp Reisner89e58e72011-01-19 13:12:45 +01001579 if (mdev->tconn->net_conf->two_primaries) {
Philipp Reisner87eeee42011-01-19 14:16:30 +01001580 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacher8b946252011-01-20 15:23:07 +01001581 D_ASSERT(!drbd_interval_empty(&e->i));
1582 drbd_remove_interval(&mdev->epoch_entries, &e->i);
1583 drbd_clear_interval(&e->i);
Philipp Reisner87eeee42011-01-19 14:16:30 +01001584 spin_unlock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherbb3bfe92011-01-21 15:59:23 +01001585 } else
Andreas Gruenbacher8b946252011-01-20 15:23:07 +01001586 D_ASSERT(drbd_interval_empty(&e->i));
Philipp Reisnerb411b362009-09-25 16:07:19 -07001587
1588 drbd_may_finish_epoch(mdev, e->epoch, EV_PUT + (cancel ? EV_CLEANUP : 0));
1589
1590 return ok;
1591}
1592
1593static int e_send_discard_ack(struct drbd_conf *mdev, struct drbd_work *w, int unused)
1594{
1595 struct drbd_epoch_entry *e = (struct drbd_epoch_entry *)w;
1596 int ok = 1;
1597
Philipp Reisner89e58e72011-01-19 13:12:45 +01001598 D_ASSERT(mdev->tconn->net_conf->wire_protocol == DRBD_PROT_C);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001599 ok = drbd_send_ack(mdev, P_DISCARD_ACK, e);
1600
Philipp Reisner87eeee42011-01-19 14:16:30 +01001601 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacher8b946252011-01-20 15:23:07 +01001602 D_ASSERT(!drbd_interval_empty(&e->i));
1603 drbd_remove_interval(&mdev->epoch_entries, &e->i);
1604 drbd_clear_interval(&e->i);
Philipp Reisner87eeee42011-01-19 14:16:30 +01001605 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001606
1607 dec_unacked(mdev);
1608
1609 return ok;
1610}
1611
1612/* Called from receive_Data.
1613 * Synchronize packets on sock with packets on msock.
1614 *
1615 * This is here so even when a P_DATA packet traveling via sock overtook an Ack
1616 * packet traveling on msock, they are still processed in the order they have
1617 * been sent.
1618 *
1619 * Note: we don't care for Ack packets overtaking P_DATA packets.
1620 *
1621 * In case packet_seq is larger than mdev->peer_seq number, there are
1622 * outstanding packets on the msock. We wait for them to arrive.
1623 * In case we are the logically next packet, we update mdev->peer_seq
1624 * ourselves. Correctly handles 32bit wrap around.
1625 *
1626 * Assume we have a 10 GBit connection, that is about 1<<30 byte per second,
1627 * about 1<<21 sectors per second. So "worst" case, we have 1<<3 == 8 seconds
1628 * for the 24bit wrap (historical atomic_t guarantee on some archs), and we have
1629 * 1<<9 == 512 seconds aka ages for the 32bit wrap around...
1630 *
1631 * returns 0 if we may process the packet,
1632 * -ERESTARTSYS if we were interrupted (by disconnect signal). */
1633static int drbd_wait_peer_seq(struct drbd_conf *mdev, const u32 packet_seq)
1634{
1635 DEFINE_WAIT(wait);
1636 unsigned int p_seq;
1637 long timeout;
1638 int ret = 0;
1639 spin_lock(&mdev->peer_seq_lock);
1640 for (;;) {
1641 prepare_to_wait(&mdev->seq_wait, &wait, TASK_INTERRUPTIBLE);
1642 if (seq_le(packet_seq, mdev->peer_seq+1))
1643 break;
1644 if (signal_pending(current)) {
1645 ret = -ERESTARTSYS;
1646 break;
1647 }
1648 p_seq = mdev->peer_seq;
1649 spin_unlock(&mdev->peer_seq_lock);
1650 timeout = schedule_timeout(30*HZ);
1651 spin_lock(&mdev->peer_seq_lock);
1652 if (timeout == 0 && p_seq == mdev->peer_seq) {
1653 ret = -ETIMEDOUT;
1654 dev_err(DEV, "ASSERT FAILED waited 30 seconds for sequence update, forcing reconnect\n");
1655 break;
1656 }
1657 }
1658 finish_wait(&mdev->seq_wait, &wait);
1659 if (mdev->peer_seq+1 == packet_seq)
1660 mdev->peer_seq++;
1661 spin_unlock(&mdev->peer_seq_lock);
1662 return ret;
1663}
1664
Lars Ellenberg688593c2010-11-17 22:25:03 +01001665/* see also bio_flags_to_wire()
1666 * DRBD_REQ_*, because we need to semantically map the flags to data packet
1667 * flags and back. We may replicate to other kernel versions. */
1668static unsigned long wire_flags_to_bio(struct drbd_conf *mdev, u32 dpf)
Philipp Reisner76d2e7e2010-08-25 11:58:05 +02001669{
Lars Ellenberg688593c2010-11-17 22:25:03 +01001670 return (dpf & DP_RW_SYNC ? REQ_SYNC : 0) |
1671 (dpf & DP_FUA ? REQ_FUA : 0) |
1672 (dpf & DP_FLUSH ? REQ_FLUSH : 0) |
1673 (dpf & DP_DISCARD ? REQ_DISCARD : 0);
Philipp Reisner76d2e7e2010-08-25 11:58:05 +02001674}
1675
Philipp Reisnerb411b362009-09-25 16:07:19 -07001676/* mirrored write */
Philipp Reisner02918be2010-08-20 14:35:10 +02001677static int receive_Data(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001678{
1679 sector_t sector;
1680 struct drbd_epoch_entry *e;
Philipp Reisnere42325a2011-01-19 13:55:45 +01001681 struct p_data *p = &mdev->tconn->data.rbuf.data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001682 int rw = WRITE;
1683 u32 dp_flags;
1684
Philipp Reisnerb411b362009-09-25 16:07:19 -07001685 if (!get_ldev(mdev)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001686 spin_lock(&mdev->peer_seq_lock);
1687 if (mdev->peer_seq+1 == be32_to_cpu(p->seq_num))
1688 mdev->peer_seq++;
1689 spin_unlock(&mdev->peer_seq_lock);
1690
Lars Ellenberg2b2bf212010-10-06 11:46:55 +02001691 drbd_send_ack_dp(mdev, P_NEG_ACK, p, data_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001692 atomic_inc(&mdev->current_epoch->epoch_size);
1693 return drbd_drain_block(mdev, data_size);
1694 }
1695
1696 /* get_ldev(mdev) successful.
1697 * Corresponding put_ldev done either below (on various errors),
Andreas Gruenbacher9c508422011-01-14 21:19:36 +01001698 * or in drbd_endio_sec, if we successfully submit the data at
Philipp Reisnerb411b362009-09-25 16:07:19 -07001699 * the end of this function. */
1700
1701 sector = be64_to_cpu(p->sector);
1702 e = read_in_block(mdev, p->block_id, sector, data_size);
1703 if (!e) {
1704 put_ldev(mdev);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01001705 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001706 }
1707
Philipp Reisnerb411b362009-09-25 16:07:19 -07001708 e->w.cb = e_end_block;
1709
Lars Ellenberg688593c2010-11-17 22:25:03 +01001710 dp_flags = be32_to_cpu(p->dp_flags);
1711 rw |= wire_flags_to_bio(mdev, dp_flags);
1712
1713 if (dp_flags & DP_MAY_SET_IN_SYNC)
1714 e->flags |= EE_MAY_SET_IN_SYNC;
1715
Philipp Reisnerb411b362009-09-25 16:07:19 -07001716 spin_lock(&mdev->epoch_lock);
1717 e->epoch = mdev->current_epoch;
1718 atomic_inc(&e->epoch->epoch_size);
1719 atomic_inc(&e->epoch->active);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001720 spin_unlock(&mdev->epoch_lock);
1721
Philipp Reisnerb411b362009-09-25 16:07:19 -07001722 /* I'm the receiver, I do hold a net_cnt reference. */
Philipp Reisner89e58e72011-01-19 13:12:45 +01001723 if (!mdev->tconn->net_conf->two_primaries) {
Philipp Reisner87eeee42011-01-19 14:16:30 +01001724 spin_lock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001725 } else {
1726 /* don't get the req_lock yet,
1727 * we may sleep in drbd_wait_peer_seq */
Andreas Gruenbacher010f6e62011-01-14 20:59:35 +01001728 const int size = e->i.size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001729 const int discard = test_bit(DISCARD_CONCURRENT, &mdev->flags);
1730 DEFINE_WAIT(wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001731 int first;
1732
Philipp Reisner89e58e72011-01-19 13:12:45 +01001733 D_ASSERT(mdev->tconn->net_conf->wire_protocol == DRBD_PROT_C);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001734
1735 /* conflict detection and handling:
1736 * 1. wait on the sequence number,
1737 * in case this data packet overtook ACK packets.
Andreas Gruenbacherbb3bfe92011-01-21 15:59:23 +01001738 * 2. check our interval trees for conflicting requests:
1739 * we only need to check the write_requests tree; the
1740 * epoch_entries tree cannot contain any overlaps because
1741 * they were already eliminated on the submitting node.
Philipp Reisnerb411b362009-09-25 16:07:19 -07001742 *
1743 * Note: for two_primaries, we are protocol C,
1744 * so there cannot be any request that is DONE
1745 * but still on the transfer log.
1746 *
Andreas Gruenbacherbb3bfe92011-01-21 15:59:23 +01001747 * unconditionally add to the epoch_entries tree.
Philipp Reisnerb411b362009-09-25 16:07:19 -07001748 *
1749 * if no conflicting request is found:
1750 * submit.
1751 *
1752 * if any conflicting request is found
1753 * that has not yet been acked,
1754 * AND I have the "discard concurrent writes" flag:
1755 * queue (via done_ee) the P_DISCARD_ACK; OUT.
1756 *
1757 * if any conflicting request is found:
1758 * block the receiver, waiting on misc_wait
1759 * until no more conflicting requests are there,
1760 * or we get interrupted (disconnect).
1761 *
1762 * we do not just write after local io completion of those
1763 * requests, but only after req is done completely, i.e.
1764 * we wait for the P_DISCARD_ACK to arrive!
1765 *
1766 * then proceed normally, i.e. submit.
1767 */
1768 if (drbd_wait_peer_seq(mdev, be32_to_cpu(p->seq_num)))
1769 goto out_interrupted;
1770
Philipp Reisner87eeee42011-01-19 14:16:30 +01001771 spin_lock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001772
Andreas Gruenbacher8b946252011-01-20 15:23:07 +01001773 drbd_insert_interval(&mdev->epoch_entries, &e->i);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001774
Philipp Reisnerb411b362009-09-25 16:07:19 -07001775 first = 1;
1776 for (;;) {
Andreas Gruenbacherde696712011-01-20 15:00:24 +01001777 struct drbd_interval *i;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001778 int have_unacked = 0;
1779 int have_conflict = 0;
1780 prepare_to_wait(&mdev->misc_wait, &wait,
1781 TASK_INTERRUPTIBLE);
Andreas Gruenbacherde696712011-01-20 15:00:24 +01001782
1783 i = drbd_find_overlap(&mdev->write_requests, sector, size);
1784 if (i) {
1785 struct drbd_request *req2 =
1786 container_of(i, struct drbd_request, i);
1787
1788 /* only ALERT on first iteration,
1789 * we may be woken up early... */
1790 if (first)
1791 dev_alert(DEV, "%s[%u] Concurrent local write detected!"
1792 " new: %llus +%u; pending: %llus +%u\n",
1793 current->comm, current->pid,
1794 (unsigned long long)sector, size,
1795 (unsigned long long)req2->i.sector, req2->i.size);
1796 if (req2->rq_state & RQ_NET_PENDING)
1797 ++have_unacked;
1798 ++have_conflict;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001799 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07001800 if (!have_conflict)
1801 break;
1802
1803 /* Discard Ack only for the _first_ iteration */
1804 if (first && discard && have_unacked) {
1805 dev_alert(DEV, "Concurrent write! [DISCARD BY FLAG] sec=%llus\n",
1806 (unsigned long long)sector);
1807 inc_unacked(mdev);
1808 e->w.cb = e_send_discard_ack;
1809 list_add_tail(&e->w.list, &mdev->done_ee);
1810
Philipp Reisner87eeee42011-01-19 14:16:30 +01001811 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001812
1813 /* we could probably send that P_DISCARD_ACK ourselves,
1814 * but I don't like the receiver using the msock */
1815
1816 put_ldev(mdev);
1817 wake_asender(mdev);
1818 finish_wait(&mdev->misc_wait, &wait);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01001819 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001820 }
1821
1822 if (signal_pending(current)) {
Andreas Gruenbacher8b946252011-01-20 15:23:07 +01001823 drbd_remove_interval(&mdev->epoch_entries, &e->i);
1824 drbd_clear_interval(&e->i);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001825
Philipp Reisner87eeee42011-01-19 14:16:30 +01001826 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001827
1828 finish_wait(&mdev->misc_wait, &wait);
1829 goto out_interrupted;
1830 }
1831
Philipp Reisner87eeee42011-01-19 14:16:30 +01001832 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001833 if (first) {
1834 first = 0;
1835 dev_alert(DEV, "Concurrent write! [W AFTERWARDS] "
1836 "sec=%llus\n", (unsigned long long)sector);
1837 } else if (discard) {
1838 /* we had none on the first iteration.
1839 * there must be none now. */
1840 D_ASSERT(have_unacked == 0);
1841 }
1842 schedule();
Philipp Reisner87eeee42011-01-19 14:16:30 +01001843 spin_lock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001844 }
1845 finish_wait(&mdev->misc_wait, &wait);
1846 }
1847
1848 list_add(&e->w.list, &mdev->active_ee);
Philipp Reisner87eeee42011-01-19 14:16:30 +01001849 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001850
Philipp Reisner89e58e72011-01-19 13:12:45 +01001851 switch (mdev->tconn->net_conf->wire_protocol) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001852 case DRBD_PROT_C:
1853 inc_unacked(mdev);
1854 /* corresponding dec_unacked() in e_end_block()
1855 * respective _drbd_clear_done_ee */
1856 break;
1857 case DRBD_PROT_B:
1858 /* I really don't like it that the receiver thread
1859 * sends on the msock, but anyways */
1860 drbd_send_ack(mdev, P_RECV_ACK, e);
1861 break;
1862 case DRBD_PROT_A:
1863 /* nothing to do */
1864 break;
1865 }
1866
Lars Ellenberg6719fb02010-10-18 23:04:07 +02001867 if (mdev->state.pdsk < D_INCONSISTENT) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001868 /* In case we have the only disk of the cluster, */
Andreas Gruenbacher010f6e62011-01-14 20:59:35 +01001869 drbd_set_out_of_sync(mdev, e->i.sector, e->i.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001870 e->flags |= EE_CALL_AL_COMPLETE_IO;
Lars Ellenberg6719fb02010-10-18 23:04:07 +02001871 e->flags &= ~EE_MAY_SET_IN_SYNC;
Andreas Gruenbacher010f6e62011-01-14 20:59:35 +01001872 drbd_al_begin_io(mdev, e->i.sector);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001873 }
1874
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001875 if (drbd_submit_ee(mdev, e, rw, DRBD_FAULT_DT_WR) == 0)
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01001876 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001877
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001878 /* don't care for the reason here */
1879 dev_err(DEV, "submit failed, triggering re-connect\n");
Philipp Reisner87eeee42011-01-19 14:16:30 +01001880 spin_lock_irq(&mdev->tconn->req_lock);
Lars Ellenberg22cc37a2010-09-14 20:40:41 +02001881 list_del(&e->w.list);
Andreas Gruenbacher8b946252011-01-20 15:23:07 +01001882 drbd_remove_interval(&mdev->epoch_entries, &e->i);
1883 drbd_clear_interval(&e->i);
Philipp Reisner87eeee42011-01-19 14:16:30 +01001884 spin_unlock_irq(&mdev->tconn->req_lock);
Lars Ellenberg22cc37a2010-09-14 20:40:41 +02001885 if (e->flags & EE_CALL_AL_COMPLETE_IO)
Andreas Gruenbacher010f6e62011-01-14 20:59:35 +01001886 drbd_al_complete_io(mdev, e->i.sector);
Lars Ellenberg22cc37a2010-09-14 20:40:41 +02001887
Philipp Reisnerb411b362009-09-25 16:07:19 -07001888out_interrupted:
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001889 drbd_may_finish_epoch(mdev, e->epoch, EV_PUT + EV_CLEANUP);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001890 put_ldev(mdev);
1891 drbd_free_ee(mdev, e);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01001892 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001893}
1894
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02001895/* We may throttle resync, if the lower device seems to be busy,
1896 * and current sync rate is above c_min_rate.
1897 *
1898 * To decide whether or not the lower device is busy, we use a scheme similar
1899 * to MD RAID is_mddev_idle(): if the partition stats reveal "significant"
1900 * (more than 64 sectors) of activity we cannot account for with our own resync
1901 * activity, it obviously is "busy".
1902 *
1903 * The current sync rate used here uses only the most recent two step marks,
1904 * to have a short time average so we can react faster.
1905 */
Philipp Reisnere3555d82010-11-07 15:56:29 +01001906int drbd_rs_should_slow_down(struct drbd_conf *mdev, sector_t sector)
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02001907{
1908 struct gendisk *disk = mdev->ldev->backing_bdev->bd_contains->bd_disk;
1909 unsigned long db, dt, dbdt;
Philipp Reisnere3555d82010-11-07 15:56:29 +01001910 struct lc_element *tmp;
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02001911 int curr_events;
1912 int throttle = 0;
1913
1914 /* feature disabled? */
1915 if (mdev->sync_conf.c_min_rate == 0)
1916 return 0;
1917
Philipp Reisnere3555d82010-11-07 15:56:29 +01001918 spin_lock_irq(&mdev->al_lock);
1919 tmp = lc_find(mdev->resync, BM_SECT_TO_EXT(sector));
1920 if (tmp) {
1921 struct bm_extent *bm_ext = lc_entry(tmp, struct bm_extent, lce);
1922 if (test_bit(BME_PRIORITY, &bm_ext->flags)) {
1923 spin_unlock_irq(&mdev->al_lock);
1924 return 0;
1925 }
1926 /* Do not slow down if app IO is already waiting for this extent */
1927 }
1928 spin_unlock_irq(&mdev->al_lock);
1929
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02001930 curr_events = (int)part_stat_read(&disk->part0, sectors[0]) +
1931 (int)part_stat_read(&disk->part0, sectors[1]) -
1932 atomic_read(&mdev->rs_sect_ev);
Philipp Reisnere3555d82010-11-07 15:56:29 +01001933
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02001934 if (!mdev->rs_last_events || curr_events - mdev->rs_last_events > 64) {
1935 unsigned long rs_left;
1936 int i;
1937
1938 mdev->rs_last_events = curr_events;
1939
1940 /* sync speed average over the last 2*DRBD_SYNC_MARK_STEP,
1941 * approx. */
Lars Ellenberg2649f082010-11-05 10:05:47 +01001942 i = (mdev->rs_last_mark + DRBD_SYNC_MARKS-1) % DRBD_SYNC_MARKS;
1943
1944 if (mdev->state.conn == C_VERIFY_S || mdev->state.conn == C_VERIFY_T)
1945 rs_left = mdev->ov_left;
1946 else
1947 rs_left = drbd_bm_total_weight(mdev) - mdev->rs_failed;
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02001948
1949 dt = ((long)jiffies - (long)mdev->rs_mark_time[i]) / HZ;
1950 if (!dt)
1951 dt++;
1952 db = mdev->rs_mark_left[i] - rs_left;
1953 dbdt = Bit2KB(db/dt);
1954
1955 if (dbdt > mdev->sync_conf.c_min_rate)
1956 throttle = 1;
1957 }
1958 return throttle;
1959}
1960
1961
Philipp Reisner02918be2010-08-20 14:35:10 +02001962static int receive_DataRequest(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int digest_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001963{
1964 sector_t sector;
1965 const sector_t capacity = drbd_get_capacity(mdev->this_bdev);
1966 struct drbd_epoch_entry *e;
1967 struct digest_info *di = NULL;
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02001968 int size, verb;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001969 unsigned int fault_type;
Philipp Reisnere42325a2011-01-19 13:55:45 +01001970 struct p_block_req *p = &mdev->tconn->data.rbuf.block_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001971
1972 sector = be64_to_cpu(p->sector);
1973 size = be32_to_cpu(p->blksize);
1974
Lars Ellenberg1816a2b2010-11-11 15:19:07 +01001975 if (size <= 0 || (size & 0x1ff) != 0 || size > DRBD_MAX_BIO_SIZE) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001976 dev_err(DEV, "%s:%d: sector: %llus, size: %u\n", __FILE__, __LINE__,
1977 (unsigned long long)sector, size);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01001978 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001979 }
1980 if (sector + (size>>9) > capacity) {
1981 dev_err(DEV, "%s:%d: sector: %llus, size: %u\n", __FILE__, __LINE__,
1982 (unsigned long long)sector, size);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01001983 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001984 }
1985
1986 if (!get_ldev_if_state(mdev, D_UP_TO_DATE)) {
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02001987 verb = 1;
1988 switch (cmd) {
1989 case P_DATA_REQUEST:
1990 drbd_send_ack_rp(mdev, P_NEG_DREPLY, p);
1991 break;
1992 case P_RS_DATA_REQUEST:
1993 case P_CSUM_RS_REQUEST:
1994 case P_OV_REQUEST:
1995 drbd_send_ack_rp(mdev, P_NEG_RS_DREPLY , p);
1996 break;
1997 case P_OV_REPLY:
1998 verb = 0;
1999 dec_rs_pending(mdev);
2000 drbd_send_ack_ex(mdev, P_OV_RESULT, sector, size, ID_IN_SYNC);
2001 break;
2002 default:
2003 dev_err(DEV, "unexpected command (%s) in receive_DataRequest\n",
2004 cmdname(cmd));
2005 }
2006 if (verb && __ratelimit(&drbd_ratelimit_state))
Philipp Reisnerb411b362009-09-25 16:07:19 -07002007 dev_err(DEV, "Can not satisfy peer's read request, "
2008 "no local data.\n");
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02002009
Lars Ellenberga821cc42010-09-06 12:31:37 +02002010 /* drain possibly payload */
2011 return drbd_drain_block(mdev, digest_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002012 }
2013
2014 /* GFP_NOIO, because we must not cause arbitrary write-out: in a DRBD
2015 * "criss-cross" setup, that might cause write-out on some other DRBD,
2016 * which in turn might block on the other node at this very place. */
2017 e = drbd_alloc_ee(mdev, p->block_id, sector, size, GFP_NOIO);
2018 if (!e) {
2019 put_ldev(mdev);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002020 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002021 }
2022
Philipp Reisner02918be2010-08-20 14:35:10 +02002023 switch (cmd) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002024 case P_DATA_REQUEST:
2025 e->w.cb = w_e_end_data_req;
2026 fault_type = DRBD_FAULT_DT_RD;
Lars Ellenberg80a40e42010-08-11 23:28:00 +02002027 /* application IO, don't drbd_rs_begin_io */
2028 goto submit;
2029
Philipp Reisnerb411b362009-09-25 16:07:19 -07002030 case P_RS_DATA_REQUEST:
2031 e->w.cb = w_e_end_rsdata_req;
2032 fault_type = DRBD_FAULT_RS_RD;
Lars Ellenberg5f9915b2010-11-09 14:15:24 +01002033 /* used in the sector offset progress display */
2034 mdev->bm_resync_fo = BM_SECT_TO_BIT(sector);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002035 break;
2036
2037 case P_OV_REPLY:
2038 case P_CSUM_RS_REQUEST:
2039 fault_type = DRBD_FAULT_RS_RD;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002040 di = kmalloc(sizeof(*di) + digest_size, GFP_NOIO);
2041 if (!di)
2042 goto out_free_e;
2043
2044 di->digest_size = digest_size;
2045 di->digest = (((char *)di)+sizeof(struct digest_info));
2046
Lars Ellenbergc36c3ce2010-08-11 20:42:55 +02002047 e->digest = di;
2048 e->flags |= EE_HAS_DIGEST;
2049
Philipp Reisnerb411b362009-09-25 16:07:19 -07002050 if (drbd_recv(mdev, di->digest, digest_size) != digest_size)
2051 goto out_free_e;
2052
Philipp Reisner02918be2010-08-20 14:35:10 +02002053 if (cmd == P_CSUM_RS_REQUEST) {
Philipp Reisner31890f42011-01-19 14:12:51 +01002054 D_ASSERT(mdev->tconn->agreed_pro_version >= 89);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002055 e->w.cb = w_e_end_csum_rs_req;
Lars Ellenberg5f9915b2010-11-09 14:15:24 +01002056 /* used in the sector offset progress display */
2057 mdev->bm_resync_fo = BM_SECT_TO_BIT(sector);
Philipp Reisner02918be2010-08-20 14:35:10 +02002058 } else if (cmd == P_OV_REPLY) {
Lars Ellenberg2649f082010-11-05 10:05:47 +01002059 /* track progress, we may need to throttle */
2060 atomic_add(size >> 9, &mdev->rs_sect_in);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002061 e->w.cb = w_e_end_ov_reply;
2062 dec_rs_pending(mdev);
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002063 /* drbd_rs_begin_io done when we sent this request,
2064 * but accounting still needs to be done. */
2065 goto submit_for_resync;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002066 }
2067 break;
2068
2069 case P_OV_REQUEST:
Philipp Reisnerb411b362009-09-25 16:07:19 -07002070 if (mdev->ov_start_sector == ~(sector_t)0 &&
Philipp Reisner31890f42011-01-19 14:12:51 +01002071 mdev->tconn->agreed_pro_version >= 90) {
Lars Ellenbergde228bb2010-11-05 09:43:15 +01002072 unsigned long now = jiffies;
2073 int i;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002074 mdev->ov_start_sector = sector;
2075 mdev->ov_position = sector;
Lars Ellenberg30b743a2010-11-05 09:39:06 +01002076 mdev->ov_left = drbd_bm_bits(mdev) - BM_SECT_TO_BIT(sector);
2077 mdev->rs_total = mdev->ov_left;
Lars Ellenbergde228bb2010-11-05 09:43:15 +01002078 for (i = 0; i < DRBD_SYNC_MARKS; i++) {
2079 mdev->rs_mark_left[i] = mdev->ov_left;
2080 mdev->rs_mark_time[i] = now;
2081 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07002082 dev_info(DEV, "Online Verify start sector: %llu\n",
2083 (unsigned long long)sector);
2084 }
2085 e->w.cb = w_e_end_ov_req;
2086 fault_type = DRBD_FAULT_RS_RD;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002087 break;
2088
Philipp Reisnerb411b362009-09-25 16:07:19 -07002089 default:
2090 dev_err(DEV, "unexpected command (%s) in receive_DataRequest\n",
Philipp Reisner02918be2010-08-20 14:35:10 +02002091 cmdname(cmd));
Philipp Reisnerb411b362009-09-25 16:07:19 -07002092 fault_type = DRBD_FAULT_MAX;
Lars Ellenberg80a40e42010-08-11 23:28:00 +02002093 goto out_free_e;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002094 }
2095
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002096 /* Throttle, drbd_rs_begin_io and submit should become asynchronous
2097 * wrt the receiver, but it is not as straightforward as it may seem.
2098 * Various places in the resync start and stop logic assume resync
2099 * requests are processed in order, requeuing this on the worker thread
2100 * introduces a bunch of new code for synchronization between threads.
2101 *
2102 * Unlimited throttling before drbd_rs_begin_io may stall the resync
2103 * "forever", throttling after drbd_rs_begin_io will lock that extent
2104 * for application writes for the same time. For now, just throttle
2105 * here, where the rest of the code expects the receiver to sleep for
2106 * a while, anyways.
2107 */
Philipp Reisnerb411b362009-09-25 16:07:19 -07002108
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002109 /* Throttle before drbd_rs_begin_io, as that locks out application IO;
2110 * this defers syncer requests for some time, before letting at least
2111 * on request through. The resync controller on the receiving side
2112 * will adapt to the incoming rate accordingly.
2113 *
2114 * We cannot throttle here if remote is Primary/SyncTarget:
2115 * we would also throttle its application reads.
2116 * In that case, throttling is done on the SyncTarget only.
2117 */
Philipp Reisnere3555d82010-11-07 15:56:29 +01002118 if (mdev->state.peer != R_PRIMARY && drbd_rs_should_slow_down(mdev, sector))
2119 schedule_timeout_uninterruptible(HZ/10);
2120 if (drbd_rs_begin_io(mdev, sector))
Lars Ellenberg80a40e42010-08-11 23:28:00 +02002121 goto out_free_e;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002122
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002123submit_for_resync:
2124 atomic_add(size >> 9, &mdev->rs_sect_ev);
2125
Lars Ellenberg80a40e42010-08-11 23:28:00 +02002126submit:
Philipp Reisnerb411b362009-09-25 16:07:19 -07002127 inc_unacked(mdev);
Philipp Reisner87eeee42011-01-19 14:16:30 +01002128 spin_lock_irq(&mdev->tconn->req_lock);
Lars Ellenberg80a40e42010-08-11 23:28:00 +02002129 list_add_tail(&e->w.list, &mdev->read_ee);
Philipp Reisner87eeee42011-01-19 14:16:30 +01002130 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002131
Lars Ellenberg45bb9122010-05-14 17:10:48 +02002132 if (drbd_submit_ee(mdev, e, READ, fault_type) == 0)
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002133 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002134
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01002135 /* don't care for the reason here */
2136 dev_err(DEV, "submit failed, triggering re-connect\n");
Philipp Reisner87eeee42011-01-19 14:16:30 +01002137 spin_lock_irq(&mdev->tconn->req_lock);
Lars Ellenberg22cc37a2010-09-14 20:40:41 +02002138 list_del(&e->w.list);
Philipp Reisner87eeee42011-01-19 14:16:30 +01002139 spin_unlock_irq(&mdev->tconn->req_lock);
Lars Ellenberg22cc37a2010-09-14 20:40:41 +02002140 /* no drbd_rs_complete_io(), we are dropping the connection anyways */
2141
Philipp Reisnerb411b362009-09-25 16:07:19 -07002142out_free_e:
Philipp Reisnerb411b362009-09-25 16:07:19 -07002143 put_ldev(mdev);
2144 drbd_free_ee(mdev, e);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002145 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002146}
2147
2148static int drbd_asb_recover_0p(struct drbd_conf *mdev) __must_hold(local)
2149{
2150 int self, peer, rv = -100;
2151 unsigned long ch_self, ch_peer;
2152
2153 self = mdev->ldev->md.uuid[UI_BITMAP] & 1;
2154 peer = mdev->p_uuid[UI_BITMAP] & 1;
2155
2156 ch_peer = mdev->p_uuid[UI_SIZE];
2157 ch_self = mdev->comm_bm_set;
2158
Philipp Reisner89e58e72011-01-19 13:12:45 +01002159 switch (mdev->tconn->net_conf->after_sb_0p) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002160 case ASB_CONSENSUS:
2161 case ASB_DISCARD_SECONDARY:
2162 case ASB_CALL_HELPER:
2163 dev_err(DEV, "Configuration error.\n");
2164 break;
2165 case ASB_DISCONNECT:
2166 break;
2167 case ASB_DISCARD_YOUNGER_PRI:
2168 if (self == 0 && peer == 1) {
2169 rv = -1;
2170 break;
2171 }
2172 if (self == 1 && peer == 0) {
2173 rv = 1;
2174 break;
2175 }
2176 /* Else fall through to one of the other strategies... */
2177 case ASB_DISCARD_OLDER_PRI:
2178 if (self == 0 && peer == 1) {
2179 rv = 1;
2180 break;
2181 }
2182 if (self == 1 && peer == 0) {
2183 rv = -1;
2184 break;
2185 }
2186 /* Else fall through to one of the other strategies... */
Lars Ellenbergad19bf62009-10-14 09:36:49 +02002187 dev_warn(DEV, "Discard younger/older primary did not find a decision\n"
Philipp Reisnerb411b362009-09-25 16:07:19 -07002188 "Using discard-least-changes instead\n");
2189 case ASB_DISCARD_ZERO_CHG:
2190 if (ch_peer == 0 && ch_self == 0) {
2191 rv = test_bit(DISCARD_CONCURRENT, &mdev->flags)
2192 ? -1 : 1;
2193 break;
2194 } else {
2195 if (ch_peer == 0) { rv = 1; break; }
2196 if (ch_self == 0) { rv = -1; break; }
2197 }
Philipp Reisner89e58e72011-01-19 13:12:45 +01002198 if (mdev->tconn->net_conf->after_sb_0p == ASB_DISCARD_ZERO_CHG)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002199 break;
2200 case ASB_DISCARD_LEAST_CHG:
2201 if (ch_self < ch_peer)
2202 rv = -1;
2203 else if (ch_self > ch_peer)
2204 rv = 1;
2205 else /* ( ch_self == ch_peer ) */
2206 /* Well, then use something else. */
2207 rv = test_bit(DISCARD_CONCURRENT, &mdev->flags)
2208 ? -1 : 1;
2209 break;
2210 case ASB_DISCARD_LOCAL:
2211 rv = -1;
2212 break;
2213 case ASB_DISCARD_REMOTE:
2214 rv = 1;
2215 }
2216
2217 return rv;
2218}
2219
2220static int drbd_asb_recover_1p(struct drbd_conf *mdev) __must_hold(local)
2221{
Andreas Gruenbacher6184ea22010-12-09 14:23:27 +01002222 int hg, rv = -100;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002223
Philipp Reisner89e58e72011-01-19 13:12:45 +01002224 switch (mdev->tconn->net_conf->after_sb_1p) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002225 case ASB_DISCARD_YOUNGER_PRI:
2226 case ASB_DISCARD_OLDER_PRI:
2227 case ASB_DISCARD_LEAST_CHG:
2228 case ASB_DISCARD_LOCAL:
2229 case ASB_DISCARD_REMOTE:
2230 dev_err(DEV, "Configuration error.\n");
2231 break;
2232 case ASB_DISCONNECT:
2233 break;
2234 case ASB_CONSENSUS:
2235 hg = drbd_asb_recover_0p(mdev);
2236 if (hg == -1 && mdev->state.role == R_SECONDARY)
2237 rv = hg;
2238 if (hg == 1 && mdev->state.role == R_PRIMARY)
2239 rv = hg;
2240 break;
2241 case ASB_VIOLENTLY:
2242 rv = drbd_asb_recover_0p(mdev);
2243 break;
2244 case ASB_DISCARD_SECONDARY:
2245 return mdev->state.role == R_PRIMARY ? 1 : -1;
2246 case ASB_CALL_HELPER:
2247 hg = drbd_asb_recover_0p(mdev);
2248 if (hg == -1 && mdev->state.role == R_PRIMARY) {
Andreas Gruenbacherbb437942010-12-09 14:02:35 +01002249 enum drbd_state_rv rv2;
2250
2251 drbd_set_role(mdev, R_SECONDARY, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002252 /* drbd_change_state() does not sleep while in SS_IN_TRANSIENT_STATE,
2253 * we might be here in C_WF_REPORT_PARAMS which is transient.
2254 * we do not need to wait for the after state change work either. */
Andreas Gruenbacherbb437942010-12-09 14:02:35 +01002255 rv2 = drbd_change_state(mdev, CS_VERBOSE, NS(role, R_SECONDARY));
2256 if (rv2 != SS_SUCCESS) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002257 drbd_khelper(mdev, "pri-lost-after-sb");
2258 } else {
2259 dev_warn(DEV, "Successfully gave up primary role.\n");
2260 rv = hg;
2261 }
2262 } else
2263 rv = hg;
2264 }
2265
2266 return rv;
2267}
2268
2269static int drbd_asb_recover_2p(struct drbd_conf *mdev) __must_hold(local)
2270{
Andreas Gruenbacher6184ea22010-12-09 14:23:27 +01002271 int hg, rv = -100;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002272
Philipp Reisner89e58e72011-01-19 13:12:45 +01002273 switch (mdev->tconn->net_conf->after_sb_2p) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002274 case ASB_DISCARD_YOUNGER_PRI:
2275 case ASB_DISCARD_OLDER_PRI:
2276 case ASB_DISCARD_LEAST_CHG:
2277 case ASB_DISCARD_LOCAL:
2278 case ASB_DISCARD_REMOTE:
2279 case ASB_CONSENSUS:
2280 case ASB_DISCARD_SECONDARY:
2281 dev_err(DEV, "Configuration error.\n");
2282 break;
2283 case ASB_VIOLENTLY:
2284 rv = drbd_asb_recover_0p(mdev);
2285 break;
2286 case ASB_DISCONNECT:
2287 break;
2288 case ASB_CALL_HELPER:
2289 hg = drbd_asb_recover_0p(mdev);
2290 if (hg == -1) {
Andreas Gruenbacherbb437942010-12-09 14:02:35 +01002291 enum drbd_state_rv rv2;
2292
Philipp Reisnerb411b362009-09-25 16:07:19 -07002293 /* drbd_change_state() does not sleep while in SS_IN_TRANSIENT_STATE,
2294 * we might be here in C_WF_REPORT_PARAMS which is transient.
2295 * we do not need to wait for the after state change work either. */
Andreas Gruenbacherbb437942010-12-09 14:02:35 +01002296 rv2 = drbd_change_state(mdev, CS_VERBOSE, NS(role, R_SECONDARY));
2297 if (rv2 != SS_SUCCESS) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002298 drbd_khelper(mdev, "pri-lost-after-sb");
2299 } else {
2300 dev_warn(DEV, "Successfully gave up primary role.\n");
2301 rv = hg;
2302 }
2303 } else
2304 rv = hg;
2305 }
2306
2307 return rv;
2308}
2309
2310static void drbd_uuid_dump(struct drbd_conf *mdev, char *text, u64 *uuid,
2311 u64 bits, u64 flags)
2312{
2313 if (!uuid) {
2314 dev_info(DEV, "%s uuid info vanished while I was looking!\n", text);
2315 return;
2316 }
2317 dev_info(DEV, "%s %016llX:%016llX:%016llX:%016llX bits:%llu flags:%llX\n",
2318 text,
2319 (unsigned long long)uuid[UI_CURRENT],
2320 (unsigned long long)uuid[UI_BITMAP],
2321 (unsigned long long)uuid[UI_HISTORY_START],
2322 (unsigned long long)uuid[UI_HISTORY_END],
2323 (unsigned long long)bits,
2324 (unsigned long long)flags);
2325}
2326
2327/*
2328 100 after split brain try auto recover
2329 2 C_SYNC_SOURCE set BitMap
2330 1 C_SYNC_SOURCE use BitMap
2331 0 no Sync
2332 -1 C_SYNC_TARGET use BitMap
2333 -2 C_SYNC_TARGET set BitMap
2334 -100 after split brain, disconnect
2335-1000 unrelated data
Philipp Reisner4a23f262011-01-11 17:42:17 +01002336-1091 requires proto 91
2337-1096 requires proto 96
Philipp Reisnerb411b362009-09-25 16:07:19 -07002338 */
2339static int drbd_uuid_compare(struct drbd_conf *mdev, int *rule_nr) __must_hold(local)
2340{
2341 u64 self, peer;
2342 int i, j;
2343
2344 self = mdev->ldev->md.uuid[UI_CURRENT] & ~((u64)1);
2345 peer = mdev->p_uuid[UI_CURRENT] & ~((u64)1);
2346
2347 *rule_nr = 10;
2348 if (self == UUID_JUST_CREATED && peer == UUID_JUST_CREATED)
2349 return 0;
2350
2351 *rule_nr = 20;
2352 if ((self == UUID_JUST_CREATED || self == (u64)0) &&
2353 peer != UUID_JUST_CREATED)
2354 return -2;
2355
2356 *rule_nr = 30;
2357 if (self != UUID_JUST_CREATED &&
2358 (peer == UUID_JUST_CREATED || peer == (u64)0))
2359 return 2;
2360
2361 if (self == peer) {
2362 int rct, dc; /* roles at crash time */
2363
2364 if (mdev->p_uuid[UI_BITMAP] == (u64)0 && mdev->ldev->md.uuid[UI_BITMAP] != (u64)0) {
2365
Philipp Reisner31890f42011-01-19 14:12:51 +01002366 if (mdev->tconn->agreed_pro_version < 91)
Philipp Reisner4a23f262011-01-11 17:42:17 +01002367 return -1091;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002368
2369 if ((mdev->ldev->md.uuid[UI_BITMAP] & ~((u64)1)) == (mdev->p_uuid[UI_HISTORY_START] & ~((u64)1)) &&
2370 (mdev->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) == (mdev->p_uuid[UI_HISTORY_START + 1] & ~((u64)1))) {
2371 dev_info(DEV, "was SyncSource, missed the resync finished event, corrected myself:\n");
2372 drbd_uuid_set_bm(mdev, 0UL);
2373
2374 drbd_uuid_dump(mdev, "self", mdev->ldev->md.uuid,
2375 mdev->state.disk >= D_NEGOTIATING ? drbd_bm_total_weight(mdev) : 0, 0);
2376 *rule_nr = 34;
2377 } else {
2378 dev_info(DEV, "was SyncSource (peer failed to write sync_uuid)\n");
2379 *rule_nr = 36;
2380 }
2381
2382 return 1;
2383 }
2384
2385 if (mdev->ldev->md.uuid[UI_BITMAP] == (u64)0 && mdev->p_uuid[UI_BITMAP] != (u64)0) {
2386
Philipp Reisner31890f42011-01-19 14:12:51 +01002387 if (mdev->tconn->agreed_pro_version < 91)
Philipp Reisner4a23f262011-01-11 17:42:17 +01002388 return -1091;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002389
2390 if ((mdev->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) == (mdev->p_uuid[UI_BITMAP] & ~((u64)1)) &&
2391 (mdev->ldev->md.uuid[UI_HISTORY_START + 1] & ~((u64)1)) == (mdev->p_uuid[UI_HISTORY_START] & ~((u64)1))) {
2392 dev_info(DEV, "was SyncTarget, peer missed the resync finished event, corrected peer:\n");
2393
2394 mdev->p_uuid[UI_HISTORY_START + 1] = mdev->p_uuid[UI_HISTORY_START];
2395 mdev->p_uuid[UI_HISTORY_START] = mdev->p_uuid[UI_BITMAP];
2396 mdev->p_uuid[UI_BITMAP] = 0UL;
2397
2398 drbd_uuid_dump(mdev, "peer", mdev->p_uuid, mdev->p_uuid[UI_SIZE], mdev->p_uuid[UI_FLAGS]);
2399 *rule_nr = 35;
2400 } else {
2401 dev_info(DEV, "was SyncTarget (failed to write sync_uuid)\n");
2402 *rule_nr = 37;
2403 }
2404
2405 return -1;
2406 }
2407
2408 /* Common power [off|failure] */
2409 rct = (test_bit(CRASHED_PRIMARY, &mdev->flags) ? 1 : 0) +
2410 (mdev->p_uuid[UI_FLAGS] & 2);
2411 /* lowest bit is set when we were primary,
2412 * next bit (weight 2) is set when peer was primary */
2413 *rule_nr = 40;
2414
2415 switch (rct) {
2416 case 0: /* !self_pri && !peer_pri */ return 0;
2417 case 1: /* self_pri && !peer_pri */ return 1;
2418 case 2: /* !self_pri && peer_pri */ return -1;
2419 case 3: /* self_pri && peer_pri */
2420 dc = test_bit(DISCARD_CONCURRENT, &mdev->flags);
2421 return dc ? -1 : 1;
2422 }
2423 }
2424
2425 *rule_nr = 50;
2426 peer = mdev->p_uuid[UI_BITMAP] & ~((u64)1);
2427 if (self == peer)
2428 return -1;
2429
2430 *rule_nr = 51;
2431 peer = mdev->p_uuid[UI_HISTORY_START] & ~((u64)1);
2432 if (self == peer) {
Philipp Reisner31890f42011-01-19 14:12:51 +01002433 if (mdev->tconn->agreed_pro_version < 96 ?
Philipp Reisner4a23f262011-01-11 17:42:17 +01002434 (mdev->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) ==
2435 (mdev->p_uuid[UI_HISTORY_START + 1] & ~((u64)1)) :
2436 peer + UUID_NEW_BM_OFFSET == (mdev->p_uuid[UI_BITMAP] & ~((u64)1))) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002437 /* The last P_SYNC_UUID did not get though. Undo the last start of
2438 resync as sync source modifications of the peer's UUIDs. */
2439
Philipp Reisner31890f42011-01-19 14:12:51 +01002440 if (mdev->tconn->agreed_pro_version < 91)
Philipp Reisner4a23f262011-01-11 17:42:17 +01002441 return -1091;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002442
2443 mdev->p_uuid[UI_BITMAP] = mdev->p_uuid[UI_HISTORY_START];
2444 mdev->p_uuid[UI_HISTORY_START] = mdev->p_uuid[UI_HISTORY_START + 1];
Philipp Reisner4a23f262011-01-11 17:42:17 +01002445
2446 dev_info(DEV, "Did not got last syncUUID packet, corrected:\n");
2447 drbd_uuid_dump(mdev, "peer", mdev->p_uuid, mdev->p_uuid[UI_SIZE], mdev->p_uuid[UI_FLAGS]);
2448
Philipp Reisnerb411b362009-09-25 16:07:19 -07002449 return -1;
2450 }
2451 }
2452
2453 *rule_nr = 60;
2454 self = mdev->ldev->md.uuid[UI_CURRENT] & ~((u64)1);
2455 for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) {
2456 peer = mdev->p_uuid[i] & ~((u64)1);
2457 if (self == peer)
2458 return -2;
2459 }
2460
2461 *rule_nr = 70;
2462 self = mdev->ldev->md.uuid[UI_BITMAP] & ~((u64)1);
2463 peer = mdev->p_uuid[UI_CURRENT] & ~((u64)1);
2464 if (self == peer)
2465 return 1;
2466
2467 *rule_nr = 71;
2468 self = mdev->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1);
2469 if (self == peer) {
Philipp Reisner31890f42011-01-19 14:12:51 +01002470 if (mdev->tconn->agreed_pro_version < 96 ?
Philipp Reisner4a23f262011-01-11 17:42:17 +01002471 (mdev->ldev->md.uuid[UI_HISTORY_START + 1] & ~((u64)1)) ==
2472 (mdev->p_uuid[UI_HISTORY_START] & ~((u64)1)) :
2473 self + UUID_NEW_BM_OFFSET == (mdev->ldev->md.uuid[UI_BITMAP] & ~((u64)1))) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002474 /* The last P_SYNC_UUID did not get though. Undo the last start of
2475 resync as sync source modifications of our UUIDs. */
2476
Philipp Reisner31890f42011-01-19 14:12:51 +01002477 if (mdev->tconn->agreed_pro_version < 91)
Philipp Reisner4a23f262011-01-11 17:42:17 +01002478 return -1091;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002479
2480 _drbd_uuid_set(mdev, UI_BITMAP, mdev->ldev->md.uuid[UI_HISTORY_START]);
2481 _drbd_uuid_set(mdev, UI_HISTORY_START, mdev->ldev->md.uuid[UI_HISTORY_START + 1]);
2482
Philipp Reisner4a23f262011-01-11 17:42:17 +01002483 dev_info(DEV, "Last syncUUID did not get through, corrected:\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002484 drbd_uuid_dump(mdev, "self", mdev->ldev->md.uuid,
2485 mdev->state.disk >= D_NEGOTIATING ? drbd_bm_total_weight(mdev) : 0, 0);
2486
2487 return 1;
2488 }
2489 }
2490
2491
2492 *rule_nr = 80;
Philipp Reisnerd8c2a362009-11-18 15:52:51 +01002493 peer = mdev->p_uuid[UI_CURRENT] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002494 for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) {
2495 self = mdev->ldev->md.uuid[i] & ~((u64)1);
2496 if (self == peer)
2497 return 2;
2498 }
2499
2500 *rule_nr = 90;
2501 self = mdev->ldev->md.uuid[UI_BITMAP] & ~((u64)1);
2502 peer = mdev->p_uuid[UI_BITMAP] & ~((u64)1);
2503 if (self == peer && self != ((u64)0))
2504 return 100;
2505
2506 *rule_nr = 100;
2507 for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) {
2508 self = mdev->ldev->md.uuid[i] & ~((u64)1);
2509 for (j = UI_HISTORY_START; j <= UI_HISTORY_END; j++) {
2510 peer = mdev->p_uuid[j] & ~((u64)1);
2511 if (self == peer)
2512 return -100;
2513 }
2514 }
2515
2516 return -1000;
2517}
2518
2519/* drbd_sync_handshake() returns the new conn state on success, or
2520 CONN_MASK (-1) on failure.
2521 */
2522static enum drbd_conns drbd_sync_handshake(struct drbd_conf *mdev, enum drbd_role peer_role,
2523 enum drbd_disk_state peer_disk) __must_hold(local)
2524{
2525 int hg, rule_nr;
2526 enum drbd_conns rv = C_MASK;
2527 enum drbd_disk_state mydisk;
2528
2529 mydisk = mdev->state.disk;
2530 if (mydisk == D_NEGOTIATING)
2531 mydisk = mdev->new_state_tmp.disk;
2532
2533 dev_info(DEV, "drbd_sync_handshake:\n");
2534 drbd_uuid_dump(mdev, "self", mdev->ldev->md.uuid, mdev->comm_bm_set, 0);
2535 drbd_uuid_dump(mdev, "peer", mdev->p_uuid,
2536 mdev->p_uuid[UI_SIZE], mdev->p_uuid[UI_FLAGS]);
2537
2538 hg = drbd_uuid_compare(mdev, &rule_nr);
2539
2540 dev_info(DEV, "uuid_compare()=%d by rule %d\n", hg, rule_nr);
2541
2542 if (hg == -1000) {
2543 dev_alert(DEV, "Unrelated data, aborting!\n");
2544 return C_MASK;
2545 }
Philipp Reisner4a23f262011-01-11 17:42:17 +01002546 if (hg < -1000) {
2547 dev_alert(DEV, "To resolve this both sides have to support at least protocol %d\n", -hg - 1000);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002548 return C_MASK;
2549 }
2550
2551 if ((mydisk == D_INCONSISTENT && peer_disk > D_INCONSISTENT) ||
2552 (peer_disk == D_INCONSISTENT && mydisk > D_INCONSISTENT)) {
2553 int f = (hg == -100) || abs(hg) == 2;
2554 hg = mydisk > D_INCONSISTENT ? 1 : -1;
2555 if (f)
2556 hg = hg*2;
2557 dev_info(DEV, "Becoming sync %s due to disk states.\n",
2558 hg > 0 ? "source" : "target");
2559 }
2560
Adam Gandelman3a11a482010-04-08 16:48:23 -07002561 if (abs(hg) == 100)
2562 drbd_khelper(mdev, "initial-split-brain");
2563
Philipp Reisner89e58e72011-01-19 13:12:45 +01002564 if (hg == 100 || (hg == -100 && mdev->tconn->net_conf->always_asbp)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002565 int pcount = (mdev->state.role == R_PRIMARY)
2566 + (peer_role == R_PRIMARY);
2567 int forced = (hg == -100);
2568
2569 switch (pcount) {
2570 case 0:
2571 hg = drbd_asb_recover_0p(mdev);
2572 break;
2573 case 1:
2574 hg = drbd_asb_recover_1p(mdev);
2575 break;
2576 case 2:
2577 hg = drbd_asb_recover_2p(mdev);
2578 break;
2579 }
2580 if (abs(hg) < 100) {
2581 dev_warn(DEV, "Split-Brain detected, %d primaries, "
2582 "automatically solved. Sync from %s node\n",
2583 pcount, (hg < 0) ? "peer" : "this");
2584 if (forced) {
2585 dev_warn(DEV, "Doing a full sync, since"
2586 " UUIDs where ambiguous.\n");
2587 hg = hg*2;
2588 }
2589 }
2590 }
2591
2592 if (hg == -100) {
Philipp Reisner89e58e72011-01-19 13:12:45 +01002593 if (mdev->tconn->net_conf->want_lose && !(mdev->p_uuid[UI_FLAGS]&1))
Philipp Reisnerb411b362009-09-25 16:07:19 -07002594 hg = -1;
Philipp Reisner89e58e72011-01-19 13:12:45 +01002595 if (!mdev->tconn->net_conf->want_lose && (mdev->p_uuid[UI_FLAGS]&1))
Philipp Reisnerb411b362009-09-25 16:07:19 -07002596 hg = 1;
2597
2598 if (abs(hg) < 100)
2599 dev_warn(DEV, "Split-Brain detected, manually solved. "
2600 "Sync from %s node\n",
2601 (hg < 0) ? "peer" : "this");
2602 }
2603
2604 if (hg == -100) {
Lars Ellenberg580b9762010-02-26 23:15:23 +01002605 /* FIXME this log message is not correct if we end up here
2606 * after an attempted attach on a diskless node.
2607 * We just refuse to attach -- well, we drop the "connection"
2608 * to that disk, in a way... */
Adam Gandelman3a11a482010-04-08 16:48:23 -07002609 dev_alert(DEV, "Split-Brain detected but unresolved, dropping connection!\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002610 drbd_khelper(mdev, "split-brain");
2611 return C_MASK;
2612 }
2613
2614 if (hg > 0 && mydisk <= D_INCONSISTENT) {
2615 dev_err(DEV, "I shall become SyncSource, but I am inconsistent!\n");
2616 return C_MASK;
2617 }
2618
2619 if (hg < 0 && /* by intention we do not use mydisk here. */
2620 mdev->state.role == R_PRIMARY && mdev->state.disk >= D_CONSISTENT) {
Philipp Reisner89e58e72011-01-19 13:12:45 +01002621 switch (mdev->tconn->net_conf->rr_conflict) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002622 case ASB_CALL_HELPER:
2623 drbd_khelper(mdev, "pri-lost");
2624 /* fall through */
2625 case ASB_DISCONNECT:
2626 dev_err(DEV, "I shall become SyncTarget, but I am primary!\n");
2627 return C_MASK;
2628 case ASB_VIOLENTLY:
2629 dev_warn(DEV, "Becoming SyncTarget, violating the stable-data"
2630 "assumption\n");
2631 }
2632 }
2633
Philipp Reisner89e58e72011-01-19 13:12:45 +01002634 if (mdev->tconn->net_conf->dry_run || test_bit(CONN_DRY_RUN, &mdev->flags)) {
Philipp Reisnercf14c2e2010-02-02 21:03:50 +01002635 if (hg == 0)
2636 dev_info(DEV, "dry-run connect: No resync, would become Connected immediately.\n");
2637 else
2638 dev_info(DEV, "dry-run connect: Would become %s, doing a %s resync.",
2639 drbd_conn_str(hg > 0 ? C_SYNC_SOURCE : C_SYNC_TARGET),
2640 abs(hg) >= 2 ? "full" : "bit-map based");
2641 return C_MASK;
2642 }
2643
Philipp Reisnerb411b362009-09-25 16:07:19 -07002644 if (abs(hg) >= 2) {
2645 dev_info(DEV, "Writing the whole bitmap, full sync required after drbd_sync_handshake.\n");
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01002646 if (drbd_bitmap_io(mdev, &drbd_bmio_set_n_write, "set_n_write from sync_handshake",
2647 BM_LOCKED_SET_ALLOWED))
Philipp Reisnerb411b362009-09-25 16:07:19 -07002648 return C_MASK;
2649 }
2650
2651 if (hg > 0) { /* become sync source. */
2652 rv = C_WF_BITMAP_S;
2653 } else if (hg < 0) { /* become sync target */
2654 rv = C_WF_BITMAP_T;
2655 } else {
2656 rv = C_CONNECTED;
2657 if (drbd_bm_total_weight(mdev)) {
2658 dev_info(DEV, "No resync, but %lu bits in bitmap!\n",
2659 drbd_bm_total_weight(mdev));
2660 }
2661 }
2662
2663 return rv;
2664}
2665
2666/* returns 1 if invalid */
2667static int cmp_after_sb(enum drbd_after_sb_p peer, enum drbd_after_sb_p self)
2668{
2669 /* ASB_DISCARD_REMOTE - ASB_DISCARD_LOCAL is valid */
2670 if ((peer == ASB_DISCARD_REMOTE && self == ASB_DISCARD_LOCAL) ||
2671 (self == ASB_DISCARD_REMOTE && peer == ASB_DISCARD_LOCAL))
2672 return 0;
2673
2674 /* any other things with ASB_DISCARD_REMOTE or ASB_DISCARD_LOCAL are invalid */
2675 if (peer == ASB_DISCARD_REMOTE || peer == ASB_DISCARD_LOCAL ||
2676 self == ASB_DISCARD_REMOTE || self == ASB_DISCARD_LOCAL)
2677 return 1;
2678
2679 /* everything else is valid if they are equal on both sides. */
2680 if (peer == self)
2681 return 0;
2682
2683 /* everything es is invalid. */
2684 return 1;
2685}
2686
Philipp Reisner02918be2010-08-20 14:35:10 +02002687static int receive_protocol(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002688{
Philipp Reisnere42325a2011-01-19 13:55:45 +01002689 struct p_protocol *p = &mdev->tconn->data.rbuf.protocol;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002690 int p_proto, p_after_sb_0p, p_after_sb_1p, p_after_sb_2p;
Philipp Reisnercf14c2e2010-02-02 21:03:50 +01002691 int p_want_lose, p_two_primaries, cf;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002692 char p_integrity_alg[SHARED_SECRET_MAX] = "";
2693
Philipp Reisnerb411b362009-09-25 16:07:19 -07002694 p_proto = be32_to_cpu(p->protocol);
2695 p_after_sb_0p = be32_to_cpu(p->after_sb_0p);
2696 p_after_sb_1p = be32_to_cpu(p->after_sb_1p);
2697 p_after_sb_2p = be32_to_cpu(p->after_sb_2p);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002698 p_two_primaries = be32_to_cpu(p->two_primaries);
Philipp Reisnercf14c2e2010-02-02 21:03:50 +01002699 cf = be32_to_cpu(p->conn_flags);
2700 p_want_lose = cf & CF_WANT_LOSE;
2701
2702 clear_bit(CONN_DRY_RUN, &mdev->flags);
2703
2704 if (cf & CF_DRY_RUN)
2705 set_bit(CONN_DRY_RUN, &mdev->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002706
Philipp Reisner89e58e72011-01-19 13:12:45 +01002707 if (p_proto != mdev->tconn->net_conf->wire_protocol) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002708 dev_err(DEV, "incompatible communication protocols\n");
2709 goto disconnect;
2710 }
2711
Philipp Reisner89e58e72011-01-19 13:12:45 +01002712 if (cmp_after_sb(p_after_sb_0p, mdev->tconn->net_conf->after_sb_0p)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002713 dev_err(DEV, "incompatible after-sb-0pri settings\n");
2714 goto disconnect;
2715 }
2716
Philipp Reisner89e58e72011-01-19 13:12:45 +01002717 if (cmp_after_sb(p_after_sb_1p, mdev->tconn->net_conf->after_sb_1p)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002718 dev_err(DEV, "incompatible after-sb-1pri settings\n");
2719 goto disconnect;
2720 }
2721
Philipp Reisner89e58e72011-01-19 13:12:45 +01002722 if (cmp_after_sb(p_after_sb_2p, mdev->tconn->net_conf->after_sb_2p)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002723 dev_err(DEV, "incompatible after-sb-2pri settings\n");
2724 goto disconnect;
2725 }
2726
Philipp Reisner89e58e72011-01-19 13:12:45 +01002727 if (p_want_lose && mdev->tconn->net_conf->want_lose) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002728 dev_err(DEV, "both sides have the 'want_lose' flag set\n");
2729 goto disconnect;
2730 }
2731
Philipp Reisner89e58e72011-01-19 13:12:45 +01002732 if (p_two_primaries != mdev->tconn->net_conf->two_primaries) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002733 dev_err(DEV, "incompatible setting of the two-primaries options\n");
2734 goto disconnect;
2735 }
2736
Philipp Reisner31890f42011-01-19 14:12:51 +01002737 if (mdev->tconn->agreed_pro_version >= 87) {
Philipp Reisner89e58e72011-01-19 13:12:45 +01002738 unsigned char *my_alg = mdev->tconn->net_conf->integrity_alg;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002739
2740 if (drbd_recv(mdev, p_integrity_alg, data_size) != data_size)
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002741 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002742
2743 p_integrity_alg[SHARED_SECRET_MAX-1] = 0;
2744 if (strcmp(p_integrity_alg, my_alg)) {
2745 dev_err(DEV, "incompatible setting of the data-integrity-alg\n");
2746 goto disconnect;
2747 }
2748 dev_info(DEV, "data-integrity-alg: %s\n",
2749 my_alg[0] ? my_alg : (unsigned char *)"<not-used>");
2750 }
2751
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002752 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002753
2754disconnect:
2755 drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002756 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002757}
2758
2759/* helper function
2760 * input: alg name, feature name
2761 * return: NULL (alg name was "")
2762 * ERR_PTR(error) if something goes wrong
2763 * or the crypto hash ptr, if it worked out ok. */
2764struct crypto_hash *drbd_crypto_alloc_digest_safe(const struct drbd_conf *mdev,
2765 const char *alg, const char *name)
2766{
2767 struct crypto_hash *tfm;
2768
2769 if (!alg[0])
2770 return NULL;
2771
2772 tfm = crypto_alloc_hash(alg, 0, CRYPTO_ALG_ASYNC);
2773 if (IS_ERR(tfm)) {
2774 dev_err(DEV, "Can not allocate \"%s\" as %s (reason: %ld)\n",
2775 alg, name, PTR_ERR(tfm));
2776 return tfm;
2777 }
2778 if (!drbd_crypto_is_hash(crypto_hash_tfm(tfm))) {
2779 crypto_free_hash(tfm);
2780 dev_err(DEV, "\"%s\" is not a digest (%s)\n", alg, name);
2781 return ERR_PTR(-EINVAL);
2782 }
2783 return tfm;
2784}
2785
Philipp Reisner02918be2010-08-20 14:35:10 +02002786static int receive_SyncParam(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int packet_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002787{
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002788 int ok = true;
Philipp Reisnere42325a2011-01-19 13:55:45 +01002789 struct p_rs_param_95 *p = &mdev->tconn->data.rbuf.rs_param_95;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002790 unsigned int header_size, data_size, exp_max_sz;
2791 struct crypto_hash *verify_tfm = NULL;
2792 struct crypto_hash *csums_tfm = NULL;
Philipp Reisner31890f42011-01-19 14:12:51 +01002793 const int apv = mdev->tconn->agreed_pro_version;
Philipp Reisner778f2712010-07-06 11:14:00 +02002794 int *rs_plan_s = NULL;
2795 int fifo_size = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002796
2797 exp_max_sz = apv <= 87 ? sizeof(struct p_rs_param)
2798 : apv == 88 ? sizeof(struct p_rs_param)
2799 + SHARED_SECRET_MAX
Philipp Reisner8e26f9c2010-07-06 17:25:54 +02002800 : apv <= 94 ? sizeof(struct p_rs_param_89)
2801 : /* apv >= 95 */ sizeof(struct p_rs_param_95);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002802
Philipp Reisner02918be2010-08-20 14:35:10 +02002803 if (packet_size > exp_max_sz) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002804 dev_err(DEV, "SyncParam packet too long: received %u, expected <= %u bytes\n",
Philipp Reisner02918be2010-08-20 14:35:10 +02002805 packet_size, exp_max_sz);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002806 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002807 }
2808
2809 if (apv <= 88) {
Philipp Reisner02918be2010-08-20 14:35:10 +02002810 header_size = sizeof(struct p_rs_param) - sizeof(struct p_header80);
2811 data_size = packet_size - header_size;
Philipp Reisner8e26f9c2010-07-06 17:25:54 +02002812 } else if (apv <= 94) {
Philipp Reisner02918be2010-08-20 14:35:10 +02002813 header_size = sizeof(struct p_rs_param_89) - sizeof(struct p_header80);
2814 data_size = packet_size - header_size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002815 D_ASSERT(data_size == 0);
Philipp Reisner8e26f9c2010-07-06 17:25:54 +02002816 } else {
Philipp Reisner02918be2010-08-20 14:35:10 +02002817 header_size = sizeof(struct p_rs_param_95) - sizeof(struct p_header80);
2818 data_size = packet_size - header_size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002819 D_ASSERT(data_size == 0);
2820 }
2821
2822 /* initialize verify_alg and csums_alg */
2823 memset(p->verify_alg, 0, 2 * SHARED_SECRET_MAX);
2824
Philipp Reisner02918be2010-08-20 14:35:10 +02002825 if (drbd_recv(mdev, &p->head.payload, header_size) != header_size)
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002826 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002827
2828 mdev->sync_conf.rate = be32_to_cpu(p->rate);
2829
2830 if (apv >= 88) {
2831 if (apv == 88) {
2832 if (data_size > SHARED_SECRET_MAX) {
2833 dev_err(DEV, "verify-alg too long, "
2834 "peer wants %u, accepting only %u byte\n",
2835 data_size, SHARED_SECRET_MAX);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002836 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002837 }
2838
2839 if (drbd_recv(mdev, p->verify_alg, data_size) != data_size)
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002840 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002841
2842 /* we expect NUL terminated string */
2843 /* but just in case someone tries to be evil */
2844 D_ASSERT(p->verify_alg[data_size-1] == 0);
2845 p->verify_alg[data_size-1] = 0;
2846
2847 } else /* apv >= 89 */ {
2848 /* we still expect NUL terminated strings */
2849 /* but just in case someone tries to be evil */
2850 D_ASSERT(p->verify_alg[SHARED_SECRET_MAX-1] == 0);
2851 D_ASSERT(p->csums_alg[SHARED_SECRET_MAX-1] == 0);
2852 p->verify_alg[SHARED_SECRET_MAX-1] = 0;
2853 p->csums_alg[SHARED_SECRET_MAX-1] = 0;
2854 }
2855
2856 if (strcmp(mdev->sync_conf.verify_alg, p->verify_alg)) {
2857 if (mdev->state.conn == C_WF_REPORT_PARAMS) {
2858 dev_err(DEV, "Different verify-alg settings. me=\"%s\" peer=\"%s\"\n",
2859 mdev->sync_conf.verify_alg, p->verify_alg);
2860 goto disconnect;
2861 }
2862 verify_tfm = drbd_crypto_alloc_digest_safe(mdev,
2863 p->verify_alg, "verify-alg");
2864 if (IS_ERR(verify_tfm)) {
2865 verify_tfm = NULL;
2866 goto disconnect;
2867 }
2868 }
2869
2870 if (apv >= 89 && strcmp(mdev->sync_conf.csums_alg, p->csums_alg)) {
2871 if (mdev->state.conn == C_WF_REPORT_PARAMS) {
2872 dev_err(DEV, "Different csums-alg settings. me=\"%s\" peer=\"%s\"\n",
2873 mdev->sync_conf.csums_alg, p->csums_alg);
2874 goto disconnect;
2875 }
2876 csums_tfm = drbd_crypto_alloc_digest_safe(mdev,
2877 p->csums_alg, "csums-alg");
2878 if (IS_ERR(csums_tfm)) {
2879 csums_tfm = NULL;
2880 goto disconnect;
2881 }
2882 }
2883
Philipp Reisner8e26f9c2010-07-06 17:25:54 +02002884 if (apv > 94) {
2885 mdev->sync_conf.rate = be32_to_cpu(p->rate);
2886 mdev->sync_conf.c_plan_ahead = be32_to_cpu(p->c_plan_ahead);
2887 mdev->sync_conf.c_delay_target = be32_to_cpu(p->c_delay_target);
2888 mdev->sync_conf.c_fill_target = be32_to_cpu(p->c_fill_target);
2889 mdev->sync_conf.c_max_rate = be32_to_cpu(p->c_max_rate);
Philipp Reisner778f2712010-07-06 11:14:00 +02002890
2891 fifo_size = (mdev->sync_conf.c_plan_ahead * 10 * SLEEP_TIME) / HZ;
2892 if (fifo_size != mdev->rs_plan_s.size && fifo_size > 0) {
2893 rs_plan_s = kzalloc(sizeof(int) * fifo_size, GFP_KERNEL);
2894 if (!rs_plan_s) {
2895 dev_err(DEV, "kmalloc of fifo_buffer failed");
2896 goto disconnect;
2897 }
2898 }
Philipp Reisner8e26f9c2010-07-06 17:25:54 +02002899 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07002900
2901 spin_lock(&mdev->peer_seq_lock);
2902 /* lock against drbd_nl_syncer_conf() */
2903 if (verify_tfm) {
2904 strcpy(mdev->sync_conf.verify_alg, p->verify_alg);
2905 mdev->sync_conf.verify_alg_len = strlen(p->verify_alg) + 1;
2906 crypto_free_hash(mdev->verify_tfm);
2907 mdev->verify_tfm = verify_tfm;
2908 dev_info(DEV, "using verify-alg: \"%s\"\n", p->verify_alg);
2909 }
2910 if (csums_tfm) {
2911 strcpy(mdev->sync_conf.csums_alg, p->csums_alg);
2912 mdev->sync_conf.csums_alg_len = strlen(p->csums_alg) + 1;
2913 crypto_free_hash(mdev->csums_tfm);
2914 mdev->csums_tfm = csums_tfm;
2915 dev_info(DEV, "using csums-alg: \"%s\"\n", p->csums_alg);
2916 }
Philipp Reisner778f2712010-07-06 11:14:00 +02002917 if (fifo_size != mdev->rs_plan_s.size) {
2918 kfree(mdev->rs_plan_s.values);
2919 mdev->rs_plan_s.values = rs_plan_s;
2920 mdev->rs_plan_s.size = fifo_size;
2921 mdev->rs_planed = 0;
2922 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07002923 spin_unlock(&mdev->peer_seq_lock);
2924 }
2925
2926 return ok;
2927disconnect:
2928 /* just for completeness: actually not needed,
2929 * as this is not reached if csums_tfm was ok. */
2930 crypto_free_hash(csums_tfm);
2931 /* but free the verify_tfm again, if csums_tfm did not work out */
2932 crypto_free_hash(verify_tfm);
2933 drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002934 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002935}
2936
Philipp Reisnerb411b362009-09-25 16:07:19 -07002937/* warn if the arguments differ by more than 12.5% */
2938static void warn_if_differ_considerably(struct drbd_conf *mdev,
2939 const char *s, sector_t a, sector_t b)
2940{
2941 sector_t d;
2942 if (a == 0 || b == 0)
2943 return;
2944 d = (a > b) ? (a - b) : (b - a);
2945 if (d > (a>>3) || d > (b>>3))
2946 dev_warn(DEV, "Considerable difference in %s: %llus vs. %llus\n", s,
2947 (unsigned long long)a, (unsigned long long)b);
2948}
2949
Philipp Reisner02918be2010-08-20 14:35:10 +02002950static int receive_sizes(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002951{
Philipp Reisnere42325a2011-01-19 13:55:45 +01002952 struct p_sizes *p = &mdev->tconn->data.rbuf.sizes;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002953 enum determine_dev_size dd = unchanged;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002954 sector_t p_size, p_usize, my_usize;
2955 int ldsc = 0; /* local disk size changed */
Philipp Reisnere89b5912010-03-24 17:11:33 +01002956 enum dds_flags ddsf;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002957
Philipp Reisnerb411b362009-09-25 16:07:19 -07002958 p_size = be64_to_cpu(p->d_size);
2959 p_usize = be64_to_cpu(p->u_size);
2960
2961 if (p_size == 0 && mdev->state.disk == D_DISKLESS) {
2962 dev_err(DEV, "some backing storage is needed\n");
2963 drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002964 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002965 }
2966
2967 /* just store the peer's disk size for now.
2968 * we still need to figure out whether we accept that. */
2969 mdev->p_size = p_size;
2970
Philipp Reisnerb411b362009-09-25 16:07:19 -07002971 if (get_ldev(mdev)) {
2972 warn_if_differ_considerably(mdev, "lower level device sizes",
2973 p_size, drbd_get_max_capacity(mdev->ldev));
2974 warn_if_differ_considerably(mdev, "user requested size",
2975 p_usize, mdev->ldev->dc.disk_size);
2976
2977 /* if this is the first connect, or an otherwise expected
2978 * param exchange, choose the minimum */
2979 if (mdev->state.conn == C_WF_REPORT_PARAMS)
2980 p_usize = min_not_zero((sector_t)mdev->ldev->dc.disk_size,
2981 p_usize);
2982
2983 my_usize = mdev->ldev->dc.disk_size;
2984
2985 if (mdev->ldev->dc.disk_size != p_usize) {
2986 mdev->ldev->dc.disk_size = p_usize;
2987 dev_info(DEV, "Peer sets u_size to %lu sectors\n",
2988 (unsigned long)mdev->ldev->dc.disk_size);
2989 }
2990
2991 /* Never shrink a device with usable data during connect.
2992 But allow online shrinking if we are connected. */
Philipp Reisnera393db62009-12-22 13:35:52 +01002993 if (drbd_new_dev_size(mdev, mdev->ldev, 0) <
Philipp Reisnerb411b362009-09-25 16:07:19 -07002994 drbd_get_capacity(mdev->this_bdev) &&
2995 mdev->state.disk >= D_OUTDATED &&
2996 mdev->state.conn < C_CONNECTED) {
2997 dev_err(DEV, "The peer's disk size is too small!\n");
2998 drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
2999 mdev->ldev->dc.disk_size = my_usize;
3000 put_ldev(mdev);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003001 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003002 }
3003 put_ldev(mdev);
3004 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003005
Philipp Reisnere89b5912010-03-24 17:11:33 +01003006 ddsf = be16_to_cpu(p->dds_flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003007 if (get_ldev(mdev)) {
Bart Van Assche24c48302011-05-21 18:32:29 +02003008 dd = drbd_determine_dev_size(mdev, ddsf);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003009 put_ldev(mdev);
3010 if (dd == dev_size_error)
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003011 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003012 drbd_md_sync(mdev);
3013 } else {
3014 /* I am diskless, need to accept the peer's size. */
3015 drbd_set_my_capacity(mdev, p_size);
3016 }
3017
Philipp Reisner99432fc2011-05-20 16:39:13 +02003018 mdev->peer_max_bio_size = be32_to_cpu(p->max_bio_size);
3019 drbd_reconsider_max_bio_size(mdev);
3020
Philipp Reisnerb411b362009-09-25 16:07:19 -07003021 if (get_ldev(mdev)) {
3022 if (mdev->ldev->known_size != drbd_get_capacity(mdev->ldev->backing_bdev)) {
3023 mdev->ldev->known_size = drbd_get_capacity(mdev->ldev->backing_bdev);
3024 ldsc = 1;
3025 }
3026
Philipp Reisnerb411b362009-09-25 16:07:19 -07003027 put_ldev(mdev);
3028 }
3029
3030 if (mdev->state.conn > C_WF_REPORT_PARAMS) {
3031 if (be64_to_cpu(p->c_size) !=
3032 drbd_get_capacity(mdev->this_bdev) || ldsc) {
3033 /* we have different sizes, probably peer
3034 * needs to know my new size... */
Philipp Reisnere89b5912010-03-24 17:11:33 +01003035 drbd_send_sizes(mdev, 0, ddsf);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003036 }
3037 if (test_and_clear_bit(RESIZE_PENDING, &mdev->flags) ||
3038 (dd == grew && mdev->state.conn == C_CONNECTED)) {
3039 if (mdev->state.pdsk >= D_INCONSISTENT &&
Philipp Reisnere89b5912010-03-24 17:11:33 +01003040 mdev->state.disk >= D_INCONSISTENT) {
3041 if (ddsf & DDSF_NO_RESYNC)
3042 dev_info(DEV, "Resync of new storage suppressed with --assume-clean\n");
3043 else
3044 resync_after_online_grow(mdev);
3045 } else
Philipp Reisnerb411b362009-09-25 16:07:19 -07003046 set_bit(RESYNC_AFTER_NEG, &mdev->flags);
3047 }
3048 }
3049
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003050 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003051}
3052
Philipp Reisner02918be2010-08-20 14:35:10 +02003053static int receive_uuids(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003054{
Philipp Reisnere42325a2011-01-19 13:55:45 +01003055 struct p_uuids *p = &mdev->tconn->data.rbuf.uuids;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003056 u64 *p_uuid;
Lars Ellenberg62b0da32011-01-20 13:25:21 +01003057 int i, updated_uuids = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003058
Philipp Reisnerb411b362009-09-25 16:07:19 -07003059 p_uuid = kmalloc(sizeof(u64)*UI_EXTENDED_SIZE, GFP_NOIO);
3060
3061 for (i = UI_CURRENT; i < UI_EXTENDED_SIZE; i++)
3062 p_uuid[i] = be64_to_cpu(p->uuid[i]);
3063
3064 kfree(mdev->p_uuid);
3065 mdev->p_uuid = p_uuid;
3066
3067 if (mdev->state.conn < C_CONNECTED &&
3068 mdev->state.disk < D_INCONSISTENT &&
3069 mdev->state.role == R_PRIMARY &&
3070 (mdev->ed_uuid & ~((u64)1)) != (p_uuid[UI_CURRENT] & ~((u64)1))) {
3071 dev_err(DEV, "Can only connect to data with current UUID=%016llX\n",
3072 (unsigned long long)mdev->ed_uuid);
3073 drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003074 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003075 }
3076
3077 if (get_ldev(mdev)) {
3078 int skip_initial_sync =
3079 mdev->state.conn == C_CONNECTED &&
Philipp Reisner31890f42011-01-19 14:12:51 +01003080 mdev->tconn->agreed_pro_version >= 90 &&
Philipp Reisnerb411b362009-09-25 16:07:19 -07003081 mdev->ldev->md.uuid[UI_CURRENT] == UUID_JUST_CREATED &&
3082 (p_uuid[UI_FLAGS] & 8);
3083 if (skip_initial_sync) {
3084 dev_info(DEV, "Accepted new current UUID, preparing to skip initial sync\n");
3085 drbd_bitmap_io(mdev, &drbd_bmio_clear_n_write,
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01003086 "clear_n_write from receive_uuids",
3087 BM_LOCKED_TEST_ALLOWED);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003088 _drbd_uuid_set(mdev, UI_CURRENT, p_uuid[UI_CURRENT]);
3089 _drbd_uuid_set(mdev, UI_BITMAP, 0);
3090 _drbd_set_state(_NS2(mdev, disk, D_UP_TO_DATE, pdsk, D_UP_TO_DATE),
3091 CS_VERBOSE, NULL);
3092 drbd_md_sync(mdev);
Lars Ellenberg62b0da32011-01-20 13:25:21 +01003093 updated_uuids = 1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003094 }
3095 put_ldev(mdev);
Philipp Reisner18a50fa2010-06-21 14:14:15 +02003096 } else if (mdev->state.disk < D_INCONSISTENT &&
3097 mdev->state.role == R_PRIMARY) {
3098 /* I am a diskless primary, the peer just created a new current UUID
3099 for me. */
Lars Ellenberg62b0da32011-01-20 13:25:21 +01003100 updated_uuids = drbd_set_ed_uuid(mdev, p_uuid[UI_CURRENT]);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003101 }
3102
3103 /* Before we test for the disk state, we should wait until an eventually
3104 ongoing cluster wide state change is finished. That is important if
3105 we are primary and are detaching from our disk. We need to see the
3106 new disk state... */
3107 wait_event(mdev->misc_wait, !test_bit(CLUSTER_ST_CHANGE, &mdev->flags));
3108 if (mdev->state.conn >= C_CONNECTED && mdev->state.disk < D_INCONSISTENT)
Lars Ellenberg62b0da32011-01-20 13:25:21 +01003109 updated_uuids |= drbd_set_ed_uuid(mdev, p_uuid[UI_CURRENT]);
3110
3111 if (updated_uuids)
3112 drbd_print_uuids(mdev, "receiver updated UUIDs to");
Philipp Reisnerb411b362009-09-25 16:07:19 -07003113
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003114 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003115}
3116
3117/**
3118 * convert_state() - Converts the peer's view of the cluster state to our point of view
3119 * @ps: The state as seen by the peer.
3120 */
3121static union drbd_state convert_state(union drbd_state ps)
3122{
3123 union drbd_state ms;
3124
3125 static enum drbd_conns c_tab[] = {
3126 [C_CONNECTED] = C_CONNECTED,
3127
3128 [C_STARTING_SYNC_S] = C_STARTING_SYNC_T,
3129 [C_STARTING_SYNC_T] = C_STARTING_SYNC_S,
3130 [C_DISCONNECTING] = C_TEAR_DOWN, /* C_NETWORK_FAILURE, */
3131 [C_VERIFY_S] = C_VERIFY_T,
3132 [C_MASK] = C_MASK,
3133 };
3134
3135 ms.i = ps.i;
3136
3137 ms.conn = c_tab[ps.conn];
3138 ms.peer = ps.role;
3139 ms.role = ps.peer;
3140 ms.pdsk = ps.disk;
3141 ms.disk = ps.pdsk;
3142 ms.peer_isp = (ps.aftr_isp | ps.user_isp);
3143
3144 return ms;
3145}
3146
Philipp Reisner02918be2010-08-20 14:35:10 +02003147static int receive_req_state(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003148{
Philipp Reisnere42325a2011-01-19 13:55:45 +01003149 struct p_req_state *p = &mdev->tconn->data.rbuf.req_state;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003150 union drbd_state mask, val;
Andreas Gruenbacherbf885f82010-12-08 00:39:32 +01003151 enum drbd_state_rv rv;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003152
Philipp Reisnerb411b362009-09-25 16:07:19 -07003153 mask.i = be32_to_cpu(p->mask);
3154 val.i = be32_to_cpu(p->val);
3155
3156 if (test_bit(DISCARD_CONCURRENT, &mdev->flags) &&
3157 test_bit(CLUSTER_ST_CHANGE, &mdev->flags)) {
3158 drbd_send_sr_reply(mdev, SS_CONCURRENT_ST_CHG);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003159 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003160 }
3161
3162 mask = convert_state(mask);
3163 val = convert_state(val);
3164
3165 rv = drbd_change_state(mdev, CS_VERBOSE, mask, val);
3166
3167 drbd_send_sr_reply(mdev, rv);
3168 drbd_md_sync(mdev);
3169
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003170 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003171}
3172
Philipp Reisner02918be2010-08-20 14:35:10 +02003173static int receive_state(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003174{
Philipp Reisnere42325a2011-01-19 13:55:45 +01003175 struct p_state *p = &mdev->tconn->data.rbuf.state;
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003176 union drbd_state os, ns, peer_state;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003177 enum drbd_disk_state real_peer_disk;
Philipp Reisner65d922c2010-06-16 16:18:09 +02003178 enum chg_state_flags cs_flags;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003179 int rv;
3180
Philipp Reisnerb411b362009-09-25 16:07:19 -07003181 peer_state.i = be32_to_cpu(p->state);
3182
3183 real_peer_disk = peer_state.disk;
3184 if (peer_state.disk == D_NEGOTIATING) {
3185 real_peer_disk = mdev->p_uuid[UI_FLAGS] & 4 ? D_INCONSISTENT : D_CONSISTENT;
3186 dev_info(DEV, "real peer disk state = %s\n", drbd_disk_str(real_peer_disk));
3187 }
3188
Philipp Reisner87eeee42011-01-19 14:16:30 +01003189 spin_lock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003190 retry:
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003191 os = ns = mdev->state;
Philipp Reisner87eeee42011-01-19 14:16:30 +01003192 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003193
Lars Ellenberge9ef7bb2010-10-07 15:55:39 +02003194 /* peer says his disk is uptodate, while we think it is inconsistent,
3195 * and this happens while we think we have a sync going on. */
3196 if (os.pdsk == D_INCONSISTENT && real_peer_disk == D_UP_TO_DATE &&
3197 os.conn > C_CONNECTED && os.disk == D_UP_TO_DATE) {
3198 /* If we are (becoming) SyncSource, but peer is still in sync
3199 * preparation, ignore its uptodate-ness to avoid flapping, it
3200 * will change to inconsistent once the peer reaches active
3201 * syncing states.
3202 * It may have changed syncer-paused flags, however, so we
3203 * cannot ignore this completely. */
3204 if (peer_state.conn > C_CONNECTED &&
3205 peer_state.conn < C_SYNC_SOURCE)
3206 real_peer_disk = D_INCONSISTENT;
3207
3208 /* if peer_state changes to connected at the same time,
3209 * it explicitly notifies us that it finished resync.
3210 * Maybe we should finish it up, too? */
3211 else if (os.conn >= C_SYNC_SOURCE &&
3212 peer_state.conn == C_CONNECTED) {
3213 if (drbd_bm_total_weight(mdev) <= mdev->rs_failed)
3214 drbd_resync_finished(mdev);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003215 return true;
Lars Ellenberge9ef7bb2010-10-07 15:55:39 +02003216 }
3217 }
3218
3219 /* peer says his disk is inconsistent, while we think it is uptodate,
3220 * and this happens while the peer still thinks we have a sync going on,
3221 * but we think we are already done with the sync.
3222 * We ignore this to avoid flapping pdsk.
3223 * This should not happen, if the peer is a recent version of drbd. */
3224 if (os.pdsk == D_UP_TO_DATE && real_peer_disk == D_INCONSISTENT &&
3225 os.conn == C_CONNECTED && peer_state.conn > C_SYNC_SOURCE)
3226 real_peer_disk = D_UP_TO_DATE;
3227
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003228 if (ns.conn == C_WF_REPORT_PARAMS)
3229 ns.conn = C_CONNECTED;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003230
Philipp Reisner67531712010-10-27 12:21:30 +02003231 if (peer_state.conn == C_AHEAD)
3232 ns.conn = C_BEHIND;
3233
Philipp Reisnerb411b362009-09-25 16:07:19 -07003234 if (mdev->p_uuid && peer_state.disk >= D_NEGOTIATING &&
3235 get_ldev_if_state(mdev, D_NEGOTIATING)) {
3236 int cr; /* consider resync */
3237
3238 /* if we established a new connection */
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003239 cr = (os.conn < C_CONNECTED);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003240 /* if we had an established connection
3241 * and one of the nodes newly attaches a disk */
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003242 cr |= (os.conn == C_CONNECTED &&
Philipp Reisnerb411b362009-09-25 16:07:19 -07003243 (peer_state.disk == D_NEGOTIATING ||
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003244 os.disk == D_NEGOTIATING));
Philipp Reisnerb411b362009-09-25 16:07:19 -07003245 /* if we have both been inconsistent, and the peer has been
3246 * forced to be UpToDate with --overwrite-data */
3247 cr |= test_bit(CONSIDER_RESYNC, &mdev->flags);
3248 /* if we had been plain connected, and the admin requested to
3249 * start a sync by "invalidate" or "invalidate-remote" */
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003250 cr |= (os.conn == C_CONNECTED &&
Philipp Reisnerb411b362009-09-25 16:07:19 -07003251 (peer_state.conn >= C_STARTING_SYNC_S &&
3252 peer_state.conn <= C_WF_BITMAP_T));
3253
3254 if (cr)
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003255 ns.conn = drbd_sync_handshake(mdev, peer_state.role, real_peer_disk);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003256
3257 put_ldev(mdev);
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003258 if (ns.conn == C_MASK) {
3259 ns.conn = C_CONNECTED;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003260 if (mdev->state.disk == D_NEGOTIATING) {
Lars Ellenberg82f59cc2010-10-16 12:13:47 +02003261 drbd_force_state(mdev, NS(disk, D_FAILED));
Philipp Reisnerb411b362009-09-25 16:07:19 -07003262 } else if (peer_state.disk == D_NEGOTIATING) {
3263 dev_err(DEV, "Disk attach process on the peer node was aborted.\n");
3264 peer_state.disk = D_DISKLESS;
Lars Ellenberg580b9762010-02-26 23:15:23 +01003265 real_peer_disk = D_DISKLESS;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003266 } else {
Philipp Reisnercf14c2e2010-02-02 21:03:50 +01003267 if (test_and_clear_bit(CONN_DRY_RUN, &mdev->flags))
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003268 return false;
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003269 D_ASSERT(os.conn == C_WF_REPORT_PARAMS);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003270 drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003271 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003272 }
3273 }
3274 }
3275
Philipp Reisner87eeee42011-01-19 14:16:30 +01003276 spin_lock_irq(&mdev->tconn->req_lock);
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003277 if (mdev->state.i != os.i)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003278 goto retry;
3279 clear_bit(CONSIDER_RESYNC, &mdev->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003280 ns.peer = peer_state.role;
3281 ns.pdsk = real_peer_disk;
3282 ns.peer_isp = (peer_state.aftr_isp | peer_state.user_isp);
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003283 if ((ns.conn == C_CONNECTED || ns.conn == C_WF_BITMAP_S) && ns.disk == D_NEGOTIATING)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003284 ns.disk = mdev->new_state_tmp.disk;
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003285 cs_flags = CS_VERBOSE + (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED ? 0 : CS_HARD);
3286 if (ns.pdsk == D_CONSISTENT && is_susp(ns) && ns.conn == C_CONNECTED && os.conn < C_CONNECTED &&
Philipp Reisner481c6f52010-06-22 14:03:27 +02003287 test_bit(NEW_CUR_UUID, &mdev->flags)) {
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01003288 /* Do not allow tl_restart(RESEND) for a rebooted peer. We can only allow this
Philipp Reisner481c6f52010-06-22 14:03:27 +02003289 for temporal network outages! */
Philipp Reisner87eeee42011-01-19 14:16:30 +01003290 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisner481c6f52010-06-22 14:03:27 +02003291 dev_err(DEV, "Aborting Connect, can not thaw IO with an only Consistent peer\n");
3292 tl_clear(mdev);
3293 drbd_uuid_new_current(mdev);
3294 clear_bit(NEW_CUR_UUID, &mdev->flags);
3295 drbd_force_state(mdev, NS2(conn, C_PROTOCOL_ERROR, susp, 0));
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003296 return false;
Philipp Reisner481c6f52010-06-22 14:03:27 +02003297 }
Philipp Reisner65d922c2010-06-16 16:18:09 +02003298 rv = _drbd_set_state(mdev, ns, cs_flags, NULL);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003299 ns = mdev->state;
Philipp Reisner87eeee42011-01-19 14:16:30 +01003300 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003301
3302 if (rv < SS_SUCCESS) {
3303 drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003304 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003305 }
3306
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003307 if (os.conn > C_WF_REPORT_PARAMS) {
3308 if (ns.conn > C_CONNECTED && peer_state.conn <= C_CONNECTED &&
Philipp Reisnerb411b362009-09-25 16:07:19 -07003309 peer_state.disk != D_NEGOTIATING ) {
3310 /* we want resync, peer has not yet decided to sync... */
3311 /* Nowadays only used when forcing a node into primary role and
3312 setting its disk to UpToDate with that */
3313 drbd_send_uuids(mdev);
3314 drbd_send_state(mdev);
3315 }
3316 }
3317
Philipp Reisner89e58e72011-01-19 13:12:45 +01003318 mdev->tconn->net_conf->want_lose = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003319
3320 drbd_md_sync(mdev); /* update connected indicator, la_size, ... */
3321
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003322 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003323}
3324
Philipp Reisner02918be2010-08-20 14:35:10 +02003325static int receive_sync_uuid(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003326{
Philipp Reisnere42325a2011-01-19 13:55:45 +01003327 struct p_rs_uuid *p = &mdev->tconn->data.rbuf.rs_uuid;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003328
3329 wait_event(mdev->misc_wait,
3330 mdev->state.conn == C_WF_SYNC_UUID ||
Philipp Reisnerc4752ef2010-10-27 17:32:36 +02003331 mdev->state.conn == C_BEHIND ||
Philipp Reisnerb411b362009-09-25 16:07:19 -07003332 mdev->state.conn < C_CONNECTED ||
3333 mdev->state.disk < D_NEGOTIATING);
3334
3335 /* D_ASSERT( mdev->state.conn == C_WF_SYNC_UUID ); */
3336
Philipp Reisnerb411b362009-09-25 16:07:19 -07003337 /* Here the _drbd_uuid_ functions are right, current should
3338 _not_ be rotated into the history */
3339 if (get_ldev_if_state(mdev, D_NEGOTIATING)) {
3340 _drbd_uuid_set(mdev, UI_CURRENT, be64_to_cpu(p->uuid));
3341 _drbd_uuid_set(mdev, UI_BITMAP, 0UL);
3342
Lars Ellenberg62b0da32011-01-20 13:25:21 +01003343 drbd_print_uuids(mdev, "updated sync uuid");
Philipp Reisnerb411b362009-09-25 16:07:19 -07003344 drbd_start_resync(mdev, C_SYNC_TARGET);
3345
3346 put_ldev(mdev);
3347 } else
3348 dev_err(DEV, "Ignoring SyncUUID packet!\n");
3349
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003350 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003351}
3352
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003353/**
3354 * receive_bitmap_plain
3355 *
3356 * Return 0 when done, 1 when another iteration is needed, and a negative error
3357 * code upon failure.
3358 */
3359static int
Philipp Reisner02918be2010-08-20 14:35:10 +02003360receive_bitmap_plain(struct drbd_conf *mdev, unsigned int data_size,
3361 unsigned long *buffer, struct bm_xfer_ctx *c)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003362{
3363 unsigned num_words = min_t(size_t, BM_PACKET_WORDS, c->bm_words - c->word_offset);
3364 unsigned want = num_words * sizeof(long);
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003365 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003366
Philipp Reisner02918be2010-08-20 14:35:10 +02003367 if (want != data_size) {
3368 dev_err(DEV, "%s:want (%u) != data_size (%u)\n", __func__, want, data_size);
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003369 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003370 }
3371 if (want == 0)
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003372 return 0;
3373 err = drbd_recv(mdev, buffer, want);
3374 if (err != want) {
3375 if (err >= 0)
3376 err = -EIO;
3377 return err;
3378 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003379
3380 drbd_bm_merge_lel(mdev, c->word_offset, num_words, buffer);
3381
3382 c->word_offset += num_words;
3383 c->bit_offset = c->word_offset * BITS_PER_LONG;
3384 if (c->bit_offset > c->bm_bits)
3385 c->bit_offset = c->bm_bits;
3386
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003387 return 1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003388}
3389
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003390/**
3391 * recv_bm_rle_bits
3392 *
3393 * Return 0 when done, 1 when another iteration is needed, and a negative error
3394 * code upon failure.
3395 */
3396static int
Philipp Reisnerb411b362009-09-25 16:07:19 -07003397recv_bm_rle_bits(struct drbd_conf *mdev,
3398 struct p_compressed_bm *p,
Philipp Reisnerc6d25cf2011-01-19 16:13:06 +01003399 struct bm_xfer_ctx *c,
3400 unsigned int len)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003401{
3402 struct bitstream bs;
3403 u64 look_ahead;
3404 u64 rl;
3405 u64 tmp;
3406 unsigned long s = c->bit_offset;
3407 unsigned long e;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003408 int toggle = DCBP_get_start(p);
3409 int have;
3410 int bits;
3411
3412 bitstream_init(&bs, p->code, len, DCBP_get_pad_bits(p));
3413
3414 bits = bitstream_get_bits(&bs, &look_ahead, 64);
3415 if (bits < 0)
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003416 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003417
3418 for (have = bits; have > 0; s += rl, toggle = !toggle) {
3419 bits = vli_decode_bits(&rl, look_ahead);
3420 if (bits <= 0)
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003421 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003422
3423 if (toggle) {
3424 e = s + rl -1;
3425 if (e >= c->bm_bits) {
3426 dev_err(DEV, "bitmap overflow (e:%lu) while decoding bm RLE packet\n", e);
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003427 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003428 }
3429 _drbd_bm_set_bits(mdev, s, e);
3430 }
3431
3432 if (have < bits) {
3433 dev_err(DEV, "bitmap decoding error: h:%d b:%d la:0x%08llx l:%u/%u\n",
3434 have, bits, look_ahead,
3435 (unsigned int)(bs.cur.b - p->code),
3436 (unsigned int)bs.buf_len);
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003437 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003438 }
3439 look_ahead >>= bits;
3440 have -= bits;
3441
3442 bits = bitstream_get_bits(&bs, &tmp, 64 - have);
3443 if (bits < 0)
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003444 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003445 look_ahead |= tmp << have;
3446 have += bits;
3447 }
3448
3449 c->bit_offset = s;
3450 bm_xfer_ctx_bit_to_word_offset(c);
3451
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003452 return (s != c->bm_bits);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003453}
3454
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003455/**
3456 * decode_bitmap_c
3457 *
3458 * Return 0 when done, 1 when another iteration is needed, and a negative error
3459 * code upon failure.
3460 */
3461static int
Philipp Reisnerb411b362009-09-25 16:07:19 -07003462decode_bitmap_c(struct drbd_conf *mdev,
3463 struct p_compressed_bm *p,
Philipp Reisnerc6d25cf2011-01-19 16:13:06 +01003464 struct bm_xfer_ctx *c,
3465 unsigned int len)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003466{
3467 if (DCBP_get_code(p) == RLE_VLI_Bits)
Philipp Reisnerc6d25cf2011-01-19 16:13:06 +01003468 return recv_bm_rle_bits(mdev, p, c, len);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003469
3470 /* other variants had been implemented for evaluation,
3471 * but have been dropped as this one turned out to be "best"
3472 * during all our tests. */
3473
3474 dev_err(DEV, "receive_bitmap_c: unknown encoding %u\n", p->encoding);
3475 drbd_force_state(mdev, NS(conn, C_PROTOCOL_ERROR));
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003476 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003477}
3478
3479void INFO_bm_xfer_stats(struct drbd_conf *mdev,
3480 const char *direction, struct bm_xfer_ctx *c)
3481{
3482 /* what would it take to transfer it "plaintext" */
Philipp Reisnerc0129492011-01-19 16:58:16 +01003483 unsigned plain = sizeof(struct p_header) *
Philipp Reisnerb411b362009-09-25 16:07:19 -07003484 ((c->bm_words+BM_PACKET_WORDS-1)/BM_PACKET_WORDS+1)
3485 + c->bm_words * sizeof(long);
3486 unsigned total = c->bytes[0] + c->bytes[1];
3487 unsigned r;
3488
3489 /* total can not be zero. but just in case: */
3490 if (total == 0)
3491 return;
3492
3493 /* don't report if not compressed */
3494 if (total >= plain)
3495 return;
3496
3497 /* total < plain. check for overflow, still */
3498 r = (total > UINT_MAX/1000) ? (total / (plain/1000))
3499 : (1000 * total / plain);
3500
3501 if (r > 1000)
3502 r = 1000;
3503
3504 r = 1000 - r;
3505 dev_info(DEV, "%s bitmap stats [Bytes(packets)]: plain %u(%u), RLE %u(%u), "
3506 "total %u; compression: %u.%u%%\n",
3507 direction,
3508 c->bytes[1], c->packets[1],
3509 c->bytes[0], c->packets[0],
3510 total, r/10, r % 10);
3511}
3512
3513/* Since we are processing the bitfield from lower addresses to higher,
3514 it does not matter if the process it in 32 bit chunks or 64 bit
3515 chunks as long as it is little endian. (Understand it as byte stream,
3516 beginning with the lowest byte...) If we would use big endian
3517 we would need to process it from the highest address to the lowest,
3518 in order to be agnostic to the 32 vs 64 bits issue.
3519
3520 returns 0 on failure, 1 if we successfully received it. */
Philipp Reisner02918be2010-08-20 14:35:10 +02003521static int receive_bitmap(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003522{
3523 struct bm_xfer_ctx c;
3524 void *buffer;
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003525 int err;
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003526 int ok = false;
Philipp Reisnere42325a2011-01-19 13:55:45 +01003527 struct p_header80 *h = &mdev->tconn->data.rbuf.header.h80;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003528
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01003529 drbd_bm_lock(mdev, "receive bitmap", BM_LOCKED_SET_ALLOWED);
3530 /* you are supposed to send additional out-of-sync information
3531 * if you actually set bits during this phase */
Philipp Reisnerb411b362009-09-25 16:07:19 -07003532
3533 /* maybe we should use some per thread scratch page,
3534 * and allocate that during initial device creation? */
3535 buffer = (unsigned long *) __get_free_page(GFP_NOIO);
3536 if (!buffer) {
3537 dev_err(DEV, "failed to allocate one page buffer in %s\n", __func__);
3538 goto out;
3539 }
3540
3541 c = (struct bm_xfer_ctx) {
3542 .bm_bits = drbd_bm_bits(mdev),
3543 .bm_words = drbd_bm_words(mdev),
3544 };
3545
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003546 for(;;) {
Philipp Reisner02918be2010-08-20 14:35:10 +02003547 if (cmd == P_BITMAP) {
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003548 err = receive_bitmap_plain(mdev, data_size, buffer, &c);
Philipp Reisner02918be2010-08-20 14:35:10 +02003549 } else if (cmd == P_COMPRESSED_BITMAP) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07003550 /* MAYBE: sanity check that we speak proto >= 90,
3551 * and the feature is enabled! */
3552 struct p_compressed_bm *p;
3553
Philipp Reisner02918be2010-08-20 14:35:10 +02003554 if (data_size > BM_PACKET_PAYLOAD_BYTES) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07003555 dev_err(DEV, "ReportCBitmap packet too large\n");
3556 goto out;
3557 }
3558 /* use the page buff */
3559 p = buffer;
3560 memcpy(p, h, sizeof(*h));
Philipp Reisner02918be2010-08-20 14:35:10 +02003561 if (drbd_recv(mdev, p->head.payload, data_size) != data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003562 goto out;
Lars Ellenberg004352f2010-10-05 20:13:58 +02003563 if (data_size <= (sizeof(*p) - sizeof(p->head))) {
3564 dev_err(DEV, "ReportCBitmap packet too small (l:%u)\n", data_size);
Andreas Gruenbacher78fcbda2010-12-10 22:18:27 +01003565 goto out;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003566 }
Philipp Reisnerc6d25cf2011-01-19 16:13:06 +01003567 err = decode_bitmap_c(mdev, p, &c, data_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003568 } else {
Philipp Reisner02918be2010-08-20 14:35:10 +02003569 dev_warn(DEV, "receive_bitmap: cmd neither ReportBitMap nor ReportCBitMap (is 0x%x)", cmd);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003570 goto out;
3571 }
3572
Philipp Reisner02918be2010-08-20 14:35:10 +02003573 c.packets[cmd == P_BITMAP]++;
3574 c.bytes[cmd == P_BITMAP] += sizeof(struct p_header80) + data_size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003575
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003576 if (err <= 0) {
3577 if (err < 0)
3578 goto out;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003579 break;
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003580 }
Philipp Reisner02918be2010-08-20 14:35:10 +02003581 if (!drbd_recv_header(mdev, &cmd, &data_size))
Philipp Reisnerb411b362009-09-25 16:07:19 -07003582 goto out;
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003583 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003584
3585 INFO_bm_xfer_stats(mdev, "receive", &c);
3586
3587 if (mdev->state.conn == C_WF_BITMAP_T) {
Andreas Gruenbacherde1f8e42010-12-10 21:04:00 +01003588 enum drbd_state_rv rv;
3589
Philipp Reisnerb411b362009-09-25 16:07:19 -07003590 ok = !drbd_send_bitmap(mdev);
3591 if (!ok)
3592 goto out;
3593 /* Omit CS_ORDERED with this state transition to avoid deadlocks. */
Andreas Gruenbacherde1f8e42010-12-10 21:04:00 +01003594 rv = _drbd_request_state(mdev, NS(conn, C_WF_SYNC_UUID), CS_VERBOSE);
3595 D_ASSERT(rv == SS_SUCCESS);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003596 } else if (mdev->state.conn != C_WF_BITMAP_S) {
3597 /* admin may have requested C_DISCONNECTING,
3598 * other threads may have noticed network errors */
3599 dev_info(DEV, "unexpected cstate (%s) in receive_bitmap\n",
3600 drbd_conn_str(mdev->state.conn));
3601 }
3602
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003603 ok = true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003604 out:
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01003605 drbd_bm_unlock(mdev);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003606 if (ok && mdev->state.conn == C_WF_BITMAP_S)
3607 drbd_start_resync(mdev, C_SYNC_SOURCE);
3608 free_page((unsigned long) buffer);
3609 return ok;
3610}
3611
Philipp Reisner02918be2010-08-20 14:35:10 +02003612static int receive_skip(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003613{
3614 /* TODO zero copy sink :) */
3615 static char sink[128];
3616 int size, want, r;
3617
Philipp Reisner02918be2010-08-20 14:35:10 +02003618 dev_warn(DEV, "skipping unknown optional packet type %d, l: %d!\n",
3619 cmd, data_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003620
Philipp Reisner02918be2010-08-20 14:35:10 +02003621 size = data_size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003622 while (size > 0) {
3623 want = min_t(int, size, sizeof(sink));
3624 r = drbd_recv(mdev, sink, want);
Andreas Gruenbacher841ce242010-12-15 19:31:20 +01003625 if (!expect(r > 0))
3626 break;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003627 size -= r;
3628 }
3629 return size == 0;
3630}
3631
Philipp Reisner02918be2010-08-20 14:35:10 +02003632static int receive_UnplugRemote(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003633{
Philipp Reisnerb411b362009-09-25 16:07:19 -07003634 /* Make sure we've acked all the TCP data associated
3635 * with the data requests being unplugged */
Philipp Reisnere42325a2011-01-19 13:55:45 +01003636 drbd_tcp_quickack(mdev->tconn->data.socket);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003637
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003638 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003639}
3640
Philipp Reisner73a01a12010-10-27 14:33:00 +02003641static int receive_out_of_sync(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int data_size)
3642{
Philipp Reisnere42325a2011-01-19 13:55:45 +01003643 struct p_block_desc *p = &mdev->tconn->data.rbuf.block_desc;
Philipp Reisner73a01a12010-10-27 14:33:00 +02003644
Lars Ellenbergf735e3632010-12-17 21:06:18 +01003645 switch (mdev->state.conn) {
3646 case C_WF_SYNC_UUID:
3647 case C_WF_BITMAP_T:
3648 case C_BEHIND:
3649 break;
3650 default:
3651 dev_err(DEV, "ASSERT FAILED cstate = %s, expected: WFSyncUUID|WFBitMapT|Behind\n",
3652 drbd_conn_str(mdev->state.conn));
3653 }
3654
Philipp Reisner73a01a12010-10-27 14:33:00 +02003655 drbd_set_out_of_sync(mdev, be64_to_cpu(p->sector), be32_to_cpu(p->blksize));
3656
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003657 return true;
Philipp Reisner73a01a12010-10-27 14:33:00 +02003658}
3659
Philipp Reisner02918be2010-08-20 14:35:10 +02003660typedef int (*drbd_cmd_handler_f)(struct drbd_conf *, enum drbd_packets cmd, unsigned int to_receive);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003661
Philipp Reisner02918be2010-08-20 14:35:10 +02003662struct data_cmd {
3663 int expect_payload;
3664 size_t pkt_size;
3665 drbd_cmd_handler_f function;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003666};
3667
Philipp Reisner02918be2010-08-20 14:35:10 +02003668static struct data_cmd drbd_cmd_handler[] = {
3669 [P_DATA] = { 1, sizeof(struct p_data), receive_Data },
3670 [P_DATA_REPLY] = { 1, sizeof(struct p_data), receive_DataReply },
3671 [P_RS_DATA_REPLY] = { 1, sizeof(struct p_data), receive_RSDataReply } ,
3672 [P_BARRIER] = { 0, sizeof(struct p_barrier), receive_Barrier } ,
3673 [P_BITMAP] = { 1, sizeof(struct p_header80), receive_bitmap } ,
3674 [P_COMPRESSED_BITMAP] = { 1, sizeof(struct p_header80), receive_bitmap } ,
3675 [P_UNPLUG_REMOTE] = { 0, sizeof(struct p_header80), receive_UnplugRemote },
3676 [P_DATA_REQUEST] = { 0, sizeof(struct p_block_req), receive_DataRequest },
3677 [P_RS_DATA_REQUEST] = { 0, sizeof(struct p_block_req), receive_DataRequest },
3678 [P_SYNC_PARAM] = { 1, sizeof(struct p_header80), receive_SyncParam },
3679 [P_SYNC_PARAM89] = { 1, sizeof(struct p_header80), receive_SyncParam },
3680 [P_PROTOCOL] = { 1, sizeof(struct p_protocol), receive_protocol },
3681 [P_UUIDS] = { 0, sizeof(struct p_uuids), receive_uuids },
3682 [P_SIZES] = { 0, sizeof(struct p_sizes), receive_sizes },
3683 [P_STATE] = { 0, sizeof(struct p_state), receive_state },
3684 [P_STATE_CHG_REQ] = { 0, sizeof(struct p_req_state), receive_req_state },
3685 [P_SYNC_UUID] = { 0, sizeof(struct p_rs_uuid), receive_sync_uuid },
3686 [P_OV_REQUEST] = { 0, sizeof(struct p_block_req), receive_DataRequest },
3687 [P_OV_REPLY] = { 1, sizeof(struct p_block_req), receive_DataRequest },
3688 [P_CSUM_RS_REQUEST] = { 1, sizeof(struct p_block_req), receive_DataRequest },
3689 [P_DELAY_PROBE] = { 0, sizeof(struct p_delay_probe93), receive_skip },
Philipp Reisner73a01a12010-10-27 14:33:00 +02003690 [P_OUT_OF_SYNC] = { 0, sizeof(struct p_block_desc), receive_out_of_sync },
Philipp Reisner02918be2010-08-20 14:35:10 +02003691 /* anything missing from this table is in
3692 * the asender_tbl, see get_asender_cmd */
3693 [P_MAX_CMD] = { 0, 0, NULL },
3694};
3695
3696/* All handler functions that expect a sub-header get that sub-heder in
Philipp Reisnere42325a2011-01-19 13:55:45 +01003697 mdev->tconn->data.rbuf.header.head.payload.
Philipp Reisner02918be2010-08-20 14:35:10 +02003698
Philipp Reisnere42325a2011-01-19 13:55:45 +01003699 Usually in mdev->tconn->data.rbuf.header.head the callback can find the usual
Philipp Reisner02918be2010-08-20 14:35:10 +02003700 p_header, but they may not rely on that. Since there is also p_header95 !
3701 */
Philipp Reisnerb411b362009-09-25 16:07:19 -07003702
3703static void drbdd(struct drbd_conf *mdev)
3704{
Philipp Reisnerc0129492011-01-19 16:58:16 +01003705 struct p_header *header = &mdev->tconn->data.rbuf.header;
Philipp Reisner02918be2010-08-20 14:35:10 +02003706 unsigned int packet_size;
3707 enum drbd_packets cmd;
3708 size_t shs; /* sub header size */
3709 int rv;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003710
Philipp Reisnere6b3ea82011-01-19 14:02:01 +01003711 while (get_t_state(&mdev->tconn->receiver) == RUNNING) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07003712 drbd_thread_current_set_cpu(mdev);
Philipp Reisner02918be2010-08-20 14:35:10 +02003713 if (!drbd_recv_header(mdev, &cmd, &packet_size))
3714 goto err_out;
3715
3716 if (unlikely(cmd >= P_MAX_CMD || !drbd_cmd_handler[cmd].function)) {
3717 dev_err(DEV, "unknown packet type %d, l: %d!\n", cmd, packet_size);
3718 goto err_out;
Lars Ellenberg0b33a912009-11-16 15:58:04 +01003719 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003720
Philipp Reisnerc0129492011-01-19 16:58:16 +01003721 shs = drbd_cmd_handler[cmd].pkt_size - sizeof(struct p_header);
Philipp Reisner02918be2010-08-20 14:35:10 +02003722 if (packet_size - shs > 0 && !drbd_cmd_handler[cmd].expect_payload) {
3723 dev_err(DEV, "No payload expected %s l:%d\n", cmdname(cmd), packet_size);
3724 goto err_out;
3725 }
3726
Lars Ellenbergc13f7e12010-10-29 23:32:01 +02003727 if (shs) {
Philipp Reisnerc0129492011-01-19 16:58:16 +01003728 rv = drbd_recv(mdev, &header->payload, shs);
Lars Ellenbergc13f7e12010-10-29 23:32:01 +02003729 if (unlikely(rv != shs)) {
Lars Ellenberg0ddc5542011-01-21 12:35:15 +01003730 if (!signal_pending(current))
3731 dev_warn(DEV, "short read while reading sub header: rv=%d\n", rv);
Lars Ellenbergc13f7e12010-10-29 23:32:01 +02003732 goto err_out;
3733 }
3734 }
3735
Philipp Reisner02918be2010-08-20 14:35:10 +02003736 rv = drbd_cmd_handler[cmd].function(mdev, cmd, packet_size - shs);
3737
3738 if (unlikely(!rv)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07003739 dev_err(DEV, "error receiving %s, l: %d!\n",
Philipp Reisner02918be2010-08-20 14:35:10 +02003740 cmdname(cmd), packet_size);
3741 goto err_out;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003742 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003743 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003744
Philipp Reisner02918be2010-08-20 14:35:10 +02003745 if (0) {
3746 err_out:
3747 drbd_force_state(mdev, NS(conn, C_PROTOCOL_ERROR));
Philipp Reisnerb411b362009-09-25 16:07:19 -07003748 }
Lars Ellenberg856c50c2010-10-14 13:37:40 +02003749 /* If we leave here, we probably want to update at least the
3750 * "Connected" indicator on stable storage. Do so explicitly here. */
3751 drbd_md_sync(mdev);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003752}
3753
Philipp Reisner191d3cc2011-01-19 14:53:22 +01003754void drbd_flush_workqueue(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003755{
3756 struct drbd_wq_barrier barr;
3757
3758 barr.w.cb = w_prev_work_done;
3759 init_completion(&barr.done);
Philipp Reisner191d3cc2011-01-19 14:53:22 +01003760 drbd_queue_work(&tconn->data.work, &barr.w);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003761 wait_for_completion(&barr.done);
3762}
3763
3764static void drbd_disconnect(struct drbd_conf *mdev)
3765{
3766 enum drbd_fencing_p fp;
3767 union drbd_state os, ns;
3768 int rv = SS_UNKNOWN_ERROR;
3769 unsigned int i;
3770
3771 if (mdev->state.conn == C_STANDALONE)
3772 return;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003773
3774 /* asender does not clean up anything. it must not interfere, either */
Philipp Reisnere6b3ea82011-01-19 14:02:01 +01003775 drbd_thread_stop(&mdev->tconn->asender);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003776 drbd_free_sock(mdev);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003777
Philipp Reisner85719572010-07-21 10:20:17 +02003778 /* wait for current activity to cease. */
Philipp Reisner87eeee42011-01-19 14:16:30 +01003779 spin_lock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003780 _drbd_wait_ee_list_empty(mdev, &mdev->active_ee);
3781 _drbd_wait_ee_list_empty(mdev, &mdev->sync_ee);
3782 _drbd_wait_ee_list_empty(mdev, &mdev->read_ee);
Philipp Reisner87eeee42011-01-19 14:16:30 +01003783 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003784
3785 /* We do not have data structures that would allow us to
3786 * get the rs_pending_cnt down to 0 again.
3787 * * On C_SYNC_TARGET we do not have any data structures describing
3788 * the pending RSDataRequest's we have sent.
3789 * * On C_SYNC_SOURCE there is no data structure that tracks
3790 * the P_RS_DATA_REPLY blocks that we sent to the SyncTarget.
3791 * And no, it is not the sum of the reference counts in the
3792 * resync_LRU. The resync_LRU tracks the whole operation including
3793 * the disk-IO, while the rs_pending_cnt only tracks the blocks
3794 * on the fly. */
3795 drbd_rs_cancel_all(mdev);
3796 mdev->rs_total = 0;
3797 mdev->rs_failed = 0;
3798 atomic_set(&mdev->rs_pending_cnt, 0);
3799 wake_up(&mdev->misc_wait);
3800
Philipp Reisner7fde2be2011-03-01 11:08:28 +01003801 del_timer(&mdev->request_timer);
3802
Philipp Reisnerb411b362009-09-25 16:07:19 -07003803 /* make sure syncer is stopped and w_resume_next_sg queued */
3804 del_timer_sync(&mdev->resync_timer);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003805 resync_timer_fn((unsigned long)mdev);
3806
Philipp Reisnerb411b362009-09-25 16:07:19 -07003807 /* wait for all w_e_end_data_req, w_e_end_rsdata_req, w_send_barrier,
3808 * w_make_resync_request etc. which may still be on the worker queue
3809 * to be "canceled" */
Philipp Reisner191d3cc2011-01-19 14:53:22 +01003810 drbd_flush_workqueue(mdev->tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003811
3812 /* This also does reclaim_net_ee(). If we do this too early, we might
3813 * miss some resync ee and pages.*/
3814 drbd_process_done_ee(mdev);
3815
3816 kfree(mdev->p_uuid);
3817 mdev->p_uuid = NULL;
3818
Philipp Reisnerfb22c402010-09-08 23:20:21 +02003819 if (!is_susp(mdev->state))
Philipp Reisnerb411b362009-09-25 16:07:19 -07003820 tl_clear(mdev);
3821
Philipp Reisnerb411b362009-09-25 16:07:19 -07003822 dev_info(DEV, "Connection closed\n");
3823
3824 drbd_md_sync(mdev);
3825
3826 fp = FP_DONT_CARE;
3827 if (get_ldev(mdev)) {
3828 fp = mdev->ldev->dc.fencing;
3829 put_ldev(mdev);
3830 }
3831
Philipp Reisner87f7be42010-06-11 13:56:33 +02003832 if (mdev->state.role == R_PRIMARY && fp >= FP_RESOURCE && mdev->state.pdsk >= D_UNKNOWN)
3833 drbd_try_outdate_peer_async(mdev);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003834
Philipp Reisner87eeee42011-01-19 14:16:30 +01003835 spin_lock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003836 os = mdev->state;
3837 if (os.conn >= C_UNCONNECTED) {
3838 /* Do not restart in case we are C_DISCONNECTING */
3839 ns = os;
3840 ns.conn = C_UNCONNECTED;
3841 rv = _drbd_set_state(mdev, ns, CS_VERBOSE, NULL);
3842 }
Philipp Reisner87eeee42011-01-19 14:16:30 +01003843 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003844
3845 if (os.conn == C_DISCONNECTING) {
Philipp Reisnerb2fb6dbe2011-01-19 13:48:44 +01003846 wait_event(mdev->tconn->net_cnt_wait, atomic_read(&mdev->tconn->net_cnt) == 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003847
Philipp Reisnera0638452011-01-19 14:31:32 +01003848 crypto_free_hash(mdev->tconn->cram_hmac_tfm);
3849 mdev->tconn->cram_hmac_tfm = NULL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003850
Philipp Reisner89e58e72011-01-19 13:12:45 +01003851 kfree(mdev->tconn->net_conf);
3852 mdev->tconn->net_conf = NULL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003853 drbd_request_state(mdev, NS(conn, C_STANDALONE));
3854 }
3855
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01003856 /* serialize with bitmap writeout triggered by the state change,
3857 * if any. */
3858 wait_event(mdev->misc_wait, !test_bit(BITMAP_IO, &mdev->flags));
3859
Philipp Reisnerb411b362009-09-25 16:07:19 -07003860 /* tcp_close and release of sendpage pages can be deferred. I don't
3861 * want to use SO_LINGER, because apparently it can be deferred for
3862 * more than 20 seconds (longest time I checked).
3863 *
3864 * Actually we don't care for exactly when the network stack does its
3865 * put_page(), but release our reference on these pages right here.
3866 */
3867 i = drbd_release_ee(mdev, &mdev->net_ee);
3868 if (i)
3869 dev_info(DEV, "net_ee not empty, killed %u entries\n", i);
Lars Ellenberg435f0742010-09-06 12:30:25 +02003870 i = atomic_read(&mdev->pp_in_use_by_net);
3871 if (i)
3872 dev_info(DEV, "pp_in_use_by_net = %d, expected 0\n", i);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003873 i = atomic_read(&mdev->pp_in_use);
3874 if (i)
Lars Ellenberg45bb9122010-05-14 17:10:48 +02003875 dev_info(DEV, "pp_in_use = %d, expected 0\n", i);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003876
3877 D_ASSERT(list_empty(&mdev->read_ee));
3878 D_ASSERT(list_empty(&mdev->active_ee));
3879 D_ASSERT(list_empty(&mdev->sync_ee));
3880 D_ASSERT(list_empty(&mdev->done_ee));
3881
3882 /* ok, no more ee's on the fly, it is safe to reset the epoch_size */
3883 atomic_set(&mdev->current_epoch->epoch_size, 0);
3884 D_ASSERT(list_empty(&mdev->current_epoch->list));
3885}
3886
3887/*
3888 * We support PRO_VERSION_MIN to PRO_VERSION_MAX. The protocol version
3889 * we can agree on is stored in agreed_pro_version.
3890 *
3891 * feature flags and the reserved array should be enough room for future
3892 * enhancements of the handshake protocol, and possible plugins...
3893 *
3894 * for now, they are expected to be zero, but ignored.
3895 */
3896static int drbd_send_handshake(struct drbd_conf *mdev)
3897{
Philipp Reisnere6b3ea82011-01-19 14:02:01 +01003898 /* ASSERT current == mdev->tconn->receiver ... */
Philipp Reisnere42325a2011-01-19 13:55:45 +01003899 struct p_handshake *p = &mdev->tconn->data.sbuf.handshake;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003900 int ok;
3901
Philipp Reisnere42325a2011-01-19 13:55:45 +01003902 if (mutex_lock_interruptible(&mdev->tconn->data.mutex)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07003903 dev_err(DEV, "interrupted during initial handshake\n");
3904 return 0; /* interrupted. not ok. */
3905 }
3906
Philipp Reisnere42325a2011-01-19 13:55:45 +01003907 if (mdev->tconn->data.socket == NULL) {
3908 mutex_unlock(&mdev->tconn->data.mutex);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003909 return 0;
3910 }
3911
3912 memset(p, 0, sizeof(*p));
3913 p->protocol_min = cpu_to_be32(PRO_VERSION_MIN);
3914 p->protocol_max = cpu_to_be32(PRO_VERSION_MAX);
Philipp Reisnerc0129492011-01-19 16:58:16 +01003915 ok = _drbd_send_cmd(mdev, mdev->tconn->data.socket, P_HAND_SHAKE,
3916 &p->head, sizeof(*p), 0 );
Philipp Reisnere42325a2011-01-19 13:55:45 +01003917 mutex_unlock(&mdev->tconn->data.mutex);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003918 return ok;
3919}
3920
3921/*
3922 * return values:
3923 * 1 yes, we have a valid connection
3924 * 0 oops, did not work out, please try again
3925 * -1 peer talks different language,
3926 * no point in trying again, please go standalone.
3927 */
3928static int drbd_do_handshake(struct drbd_conf *mdev)
3929{
Philipp Reisnere6b3ea82011-01-19 14:02:01 +01003930 /* ASSERT current == mdev->tconn->receiver ... */
Philipp Reisnere42325a2011-01-19 13:55:45 +01003931 struct p_handshake *p = &mdev->tconn->data.rbuf.handshake;
Philipp Reisner02918be2010-08-20 14:35:10 +02003932 const int expect = sizeof(struct p_handshake) - sizeof(struct p_header80);
3933 unsigned int length;
3934 enum drbd_packets cmd;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003935 int rv;
3936
3937 rv = drbd_send_handshake(mdev);
3938 if (!rv)
3939 return 0;
3940
Philipp Reisner02918be2010-08-20 14:35:10 +02003941 rv = drbd_recv_header(mdev, &cmd, &length);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003942 if (!rv)
3943 return 0;
3944
Philipp Reisner02918be2010-08-20 14:35:10 +02003945 if (cmd != P_HAND_SHAKE) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07003946 dev_err(DEV, "expected HandShake packet, received: %s (0x%04x)\n",
Philipp Reisner02918be2010-08-20 14:35:10 +02003947 cmdname(cmd), cmd);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003948 return -1;
3949 }
3950
Philipp Reisner02918be2010-08-20 14:35:10 +02003951 if (length != expect) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07003952 dev_err(DEV, "expected HandShake length: %u, received: %u\n",
Philipp Reisner02918be2010-08-20 14:35:10 +02003953 expect, length);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003954 return -1;
3955 }
3956
3957 rv = drbd_recv(mdev, &p->head.payload, expect);
3958
3959 if (rv != expect) {
Lars Ellenberg0ddc5542011-01-21 12:35:15 +01003960 if (!signal_pending(current))
3961 dev_warn(DEV, "short read receiving handshake packet: l=%u\n", rv);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003962 return 0;
3963 }
3964
Philipp Reisnerb411b362009-09-25 16:07:19 -07003965 p->protocol_min = be32_to_cpu(p->protocol_min);
3966 p->protocol_max = be32_to_cpu(p->protocol_max);
3967 if (p->protocol_max == 0)
3968 p->protocol_max = p->protocol_min;
3969
3970 if (PRO_VERSION_MAX < p->protocol_min ||
3971 PRO_VERSION_MIN > p->protocol_max)
3972 goto incompat;
3973
Philipp Reisner31890f42011-01-19 14:12:51 +01003974 mdev->tconn->agreed_pro_version = min_t(int, PRO_VERSION_MAX, p->protocol_max);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003975
3976 dev_info(DEV, "Handshake successful: "
Philipp Reisner31890f42011-01-19 14:12:51 +01003977 "Agreed network protocol version %d\n", mdev->tconn->agreed_pro_version);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003978
3979 return 1;
3980
3981 incompat:
3982 dev_err(DEV, "incompatible DRBD dialects: "
3983 "I support %d-%d, peer supports %d-%d\n",
3984 PRO_VERSION_MIN, PRO_VERSION_MAX,
3985 p->protocol_min, p->protocol_max);
3986 return -1;
3987}
3988
3989#if !defined(CONFIG_CRYPTO_HMAC) && !defined(CONFIG_CRYPTO_HMAC_MODULE)
3990static int drbd_do_auth(struct drbd_conf *mdev)
3991{
3992 dev_err(DEV, "This kernel was build without CONFIG_CRYPTO_HMAC.\n");
3993 dev_err(DEV, "You need to disable 'cram-hmac-alg' in drbd.conf.\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01003994 return -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003995}
3996#else
3997#define CHALLENGE_LEN 64
Johannes Thomab10d96c2010-01-07 16:02:50 +01003998
3999/* Return value:
4000 1 - auth succeeded,
4001 0 - failed, try again (network error),
4002 -1 - auth failed, don't try again.
4003*/
4004
Philipp Reisnerb411b362009-09-25 16:07:19 -07004005static int drbd_do_auth(struct drbd_conf *mdev)
4006{
4007 char my_challenge[CHALLENGE_LEN]; /* 64 Bytes... */
4008 struct scatterlist sg;
4009 char *response = NULL;
4010 char *right_response = NULL;
4011 char *peers_ch = NULL;
Philipp Reisner89e58e72011-01-19 13:12:45 +01004012 unsigned int key_len = strlen(mdev->tconn->net_conf->shared_secret);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004013 unsigned int resp_size;
4014 struct hash_desc desc;
Philipp Reisner02918be2010-08-20 14:35:10 +02004015 enum drbd_packets cmd;
4016 unsigned int length;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004017 int rv;
4018
Philipp Reisnera0638452011-01-19 14:31:32 +01004019 desc.tfm = mdev->tconn->cram_hmac_tfm;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004020 desc.flags = 0;
4021
Philipp Reisnera0638452011-01-19 14:31:32 +01004022 rv = crypto_hash_setkey(mdev->tconn->cram_hmac_tfm,
Philipp Reisner89e58e72011-01-19 13:12:45 +01004023 (u8 *)mdev->tconn->net_conf->shared_secret, key_len);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004024 if (rv) {
4025 dev_err(DEV, "crypto_hash_setkey() failed with %d\n", rv);
Johannes Thomab10d96c2010-01-07 16:02:50 +01004026 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004027 goto fail;
4028 }
4029
4030 get_random_bytes(my_challenge, CHALLENGE_LEN);
4031
4032 rv = drbd_send_cmd2(mdev, P_AUTH_CHALLENGE, my_challenge, CHALLENGE_LEN);
4033 if (!rv)
4034 goto fail;
4035
Philipp Reisner02918be2010-08-20 14:35:10 +02004036 rv = drbd_recv_header(mdev, &cmd, &length);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004037 if (!rv)
4038 goto fail;
4039
Philipp Reisner02918be2010-08-20 14:35:10 +02004040 if (cmd != P_AUTH_CHALLENGE) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004041 dev_err(DEV, "expected AuthChallenge packet, received: %s (0x%04x)\n",
Philipp Reisner02918be2010-08-20 14:35:10 +02004042 cmdname(cmd), cmd);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004043 rv = 0;
4044 goto fail;
4045 }
4046
Philipp Reisner02918be2010-08-20 14:35:10 +02004047 if (length > CHALLENGE_LEN * 2) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004048 dev_err(DEV, "expected AuthChallenge payload too big.\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01004049 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004050 goto fail;
4051 }
4052
Philipp Reisner02918be2010-08-20 14:35:10 +02004053 peers_ch = kmalloc(length, GFP_NOIO);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004054 if (peers_ch == NULL) {
4055 dev_err(DEV, "kmalloc of peers_ch failed\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01004056 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004057 goto fail;
4058 }
4059
Philipp Reisner02918be2010-08-20 14:35:10 +02004060 rv = drbd_recv(mdev, peers_ch, length);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004061
Philipp Reisner02918be2010-08-20 14:35:10 +02004062 if (rv != length) {
Lars Ellenberg0ddc5542011-01-21 12:35:15 +01004063 if (!signal_pending(current))
4064 dev_warn(DEV, "short read AuthChallenge: l=%u\n", rv);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004065 rv = 0;
4066 goto fail;
4067 }
4068
Philipp Reisnera0638452011-01-19 14:31:32 +01004069 resp_size = crypto_hash_digestsize(mdev->tconn->cram_hmac_tfm);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004070 response = kmalloc(resp_size, GFP_NOIO);
4071 if (response == NULL) {
4072 dev_err(DEV, "kmalloc of response failed\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01004073 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004074 goto fail;
4075 }
4076
4077 sg_init_table(&sg, 1);
Philipp Reisner02918be2010-08-20 14:35:10 +02004078 sg_set_buf(&sg, peers_ch, length);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004079
4080 rv = crypto_hash_digest(&desc, &sg, sg.length, response);
4081 if (rv) {
4082 dev_err(DEV, "crypto_hash_digest() failed with %d\n", rv);
Johannes Thomab10d96c2010-01-07 16:02:50 +01004083 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004084 goto fail;
4085 }
4086
4087 rv = drbd_send_cmd2(mdev, P_AUTH_RESPONSE, response, resp_size);
4088 if (!rv)
4089 goto fail;
4090
Philipp Reisner02918be2010-08-20 14:35:10 +02004091 rv = drbd_recv_header(mdev, &cmd, &length);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004092 if (!rv)
4093 goto fail;
4094
Philipp Reisner02918be2010-08-20 14:35:10 +02004095 if (cmd != P_AUTH_RESPONSE) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004096 dev_err(DEV, "expected AuthResponse packet, received: %s (0x%04x)\n",
Philipp Reisner02918be2010-08-20 14:35:10 +02004097 cmdname(cmd), cmd);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004098 rv = 0;
4099 goto fail;
4100 }
4101
Philipp Reisner02918be2010-08-20 14:35:10 +02004102 if (length != resp_size) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004103 dev_err(DEV, "expected AuthResponse payload of wrong size\n");
4104 rv = 0;
4105 goto fail;
4106 }
4107
4108 rv = drbd_recv(mdev, response , resp_size);
4109
4110 if (rv != resp_size) {
Lars Ellenberg0ddc5542011-01-21 12:35:15 +01004111 if (!signal_pending(current))
4112 dev_warn(DEV, "short read receiving AuthResponse: l=%u\n", rv);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004113 rv = 0;
4114 goto fail;
4115 }
4116
4117 right_response = kmalloc(resp_size, GFP_NOIO);
Julia Lawall2d1ee872009-12-27 22:27:11 +01004118 if (right_response == NULL) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004119 dev_err(DEV, "kmalloc of right_response failed\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01004120 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004121 goto fail;
4122 }
4123
4124 sg_set_buf(&sg, my_challenge, CHALLENGE_LEN);
4125
4126 rv = crypto_hash_digest(&desc, &sg, sg.length, right_response);
4127 if (rv) {
4128 dev_err(DEV, "crypto_hash_digest() failed with %d\n", rv);
Johannes Thomab10d96c2010-01-07 16:02:50 +01004129 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004130 goto fail;
4131 }
4132
4133 rv = !memcmp(response, right_response, resp_size);
4134
4135 if (rv)
4136 dev_info(DEV, "Peer authenticated using %d bytes of '%s' HMAC\n",
Philipp Reisner89e58e72011-01-19 13:12:45 +01004137 resp_size, mdev->tconn->net_conf->cram_hmac_alg);
Johannes Thomab10d96c2010-01-07 16:02:50 +01004138 else
4139 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004140
4141 fail:
4142 kfree(peers_ch);
4143 kfree(response);
4144 kfree(right_response);
4145
4146 return rv;
4147}
4148#endif
4149
4150int drbdd_init(struct drbd_thread *thi)
4151{
4152 struct drbd_conf *mdev = thi->mdev;
4153 unsigned int minor = mdev_to_minor(mdev);
4154 int h;
4155
4156 sprintf(current->comm, "drbd%d_receiver", minor);
4157
4158 dev_info(DEV, "receiver (re)started\n");
4159
4160 do {
4161 h = drbd_connect(mdev);
4162 if (h == 0) {
4163 drbd_disconnect(mdev);
Philipp Reisner20ee6392011-01-18 15:28:59 +01004164 schedule_timeout_interruptible(HZ);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004165 }
4166 if (h == -1) {
4167 dev_warn(DEV, "Discarding network configuration.\n");
4168 drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
4169 }
4170 } while (h == 0);
4171
4172 if (h > 0) {
Philipp Reisnerb2fb6dbe2011-01-19 13:48:44 +01004173 if (get_net_conf(mdev->tconn)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004174 drbdd(mdev);
Philipp Reisnerb2fb6dbe2011-01-19 13:48:44 +01004175 put_net_conf(mdev->tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004176 }
4177 }
4178
4179 drbd_disconnect(mdev);
4180
4181 dev_info(DEV, "receiver terminated\n");
4182 return 0;
4183}
4184
4185/* ********* acknowledge sender ******** */
4186
Philipp Reisner0b70a132010-08-20 13:36:10 +02004187static int got_RqSReply(struct drbd_conf *mdev, struct p_header80 *h)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004188{
4189 struct p_req_state_reply *p = (struct p_req_state_reply *)h;
4190
4191 int retcode = be32_to_cpu(p->retcode);
4192
4193 if (retcode >= SS_SUCCESS) {
4194 set_bit(CL_ST_CHG_SUCCESS, &mdev->flags);
4195 } else {
4196 set_bit(CL_ST_CHG_FAIL, &mdev->flags);
4197 dev_err(DEV, "Requested state change failed by peer: %s (%d)\n",
4198 drbd_set_st_err_str(retcode), retcode);
4199 }
4200 wake_up(&mdev->state_wait);
4201
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004202 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004203}
4204
Philipp Reisner0b70a132010-08-20 13:36:10 +02004205static int got_Ping(struct drbd_conf *mdev, struct p_header80 *h)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004206{
4207 return drbd_send_ping_ack(mdev);
4208
4209}
4210
Philipp Reisner0b70a132010-08-20 13:36:10 +02004211static int got_PingAck(struct drbd_conf *mdev, struct p_header80 *h)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004212{
4213 /* restore idle timeout */
Philipp Reisnere42325a2011-01-19 13:55:45 +01004214 mdev->tconn->meta.socket->sk->sk_rcvtimeo = mdev->tconn->net_conf->ping_int*HZ;
Philipp Reisner309d1602010-03-02 15:03:44 +01004215 if (!test_and_set_bit(GOT_PING_ACK, &mdev->flags))
4216 wake_up(&mdev->misc_wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004217
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004218 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004219}
4220
Philipp Reisner0b70a132010-08-20 13:36:10 +02004221static int got_IsInSync(struct drbd_conf *mdev, struct p_header80 *h)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004222{
4223 struct p_block_ack *p = (struct p_block_ack *)h;
4224 sector_t sector = be64_to_cpu(p->sector);
4225 int blksize = be32_to_cpu(p->blksize);
4226
Philipp Reisner31890f42011-01-19 14:12:51 +01004227 D_ASSERT(mdev->tconn->agreed_pro_version >= 89);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004228
4229 update_peer_seq(mdev, be32_to_cpu(p->seq_num));
4230
Lars Ellenberg1d53f092010-09-05 01:13:24 +02004231 if (get_ldev(mdev)) {
4232 drbd_rs_complete_io(mdev, sector);
4233 drbd_set_in_sync(mdev, sector, blksize);
4234 /* rs_same_csums is supposed to count in units of BM_BLOCK_SIZE */
4235 mdev->rs_same_csum += (blksize >> BM_BLOCK_SHIFT);
4236 put_ldev(mdev);
4237 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07004238 dec_rs_pending(mdev);
Philipp Reisner778f2712010-07-06 11:14:00 +02004239 atomic_add(blksize >> 9, &mdev->rs_sect_in);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004240
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004241 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004242}
4243
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01004244static int
4245validate_req_change_req_state(struct drbd_conf *mdev, u64 id, sector_t sector,
4246 struct rb_root *root, const char *func,
4247 enum drbd_req_event what, bool missing_ok)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004248{
4249 struct drbd_request *req;
4250 struct bio_and_error m;
4251
Philipp Reisner87eeee42011-01-19 14:16:30 +01004252 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01004253 req = find_request(mdev, root, id, sector, missing_ok, func);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004254 if (unlikely(!req)) {
Philipp Reisner87eeee42011-01-19 14:16:30 +01004255 spin_unlock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004256 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004257 }
4258 __req_mod(req, what, &m);
Philipp Reisner87eeee42011-01-19 14:16:30 +01004259 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004260
4261 if (m.bio)
4262 complete_master_bio(mdev, &m);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004263 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004264}
4265
Philipp Reisner0b70a132010-08-20 13:36:10 +02004266static int got_BlockAck(struct drbd_conf *mdev, struct p_header80 *h)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004267{
4268 struct p_block_ack *p = (struct p_block_ack *)h;
4269 sector_t sector = be64_to_cpu(p->sector);
4270 int blksize = be32_to_cpu(p->blksize);
4271 enum drbd_req_event what;
4272
4273 update_peer_seq(mdev, be32_to_cpu(p->seq_num));
4274
Andreas Gruenbacher579b57e2011-01-13 18:40:57 +01004275 if (p->block_id == ID_SYNCER) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004276 drbd_set_in_sync(mdev, sector, blksize);
4277 dec_rs_pending(mdev);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004278 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004279 }
4280 switch (be16_to_cpu(h->command)) {
4281 case P_RS_WRITE_ACK:
Philipp Reisner89e58e72011-01-19 13:12:45 +01004282 D_ASSERT(mdev->tconn->net_conf->wire_protocol == DRBD_PROT_C);
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01004283 what = WRITE_ACKED_BY_PEER_AND_SIS;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004284 break;
4285 case P_WRITE_ACK:
Philipp Reisner89e58e72011-01-19 13:12:45 +01004286 D_ASSERT(mdev->tconn->net_conf->wire_protocol == DRBD_PROT_C);
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01004287 what = WRITE_ACKED_BY_PEER;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004288 break;
4289 case P_RECV_ACK:
Philipp Reisner89e58e72011-01-19 13:12:45 +01004290 D_ASSERT(mdev->tconn->net_conf->wire_protocol == DRBD_PROT_B);
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01004291 what = RECV_ACKED_BY_PEER;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004292 break;
4293 case P_DISCARD_ACK:
Philipp Reisner89e58e72011-01-19 13:12:45 +01004294 D_ASSERT(mdev->tconn->net_conf->wire_protocol == DRBD_PROT_C);
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01004295 what = CONFLICT_DISCARDED_BY_PEER;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004296 break;
4297 default:
4298 D_ASSERT(0);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004299 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004300 }
4301
4302 return validate_req_change_req_state(mdev, p->block_id, sector,
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01004303 &mdev->write_requests, __func__,
4304 what, false);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004305}
4306
Philipp Reisner0b70a132010-08-20 13:36:10 +02004307static int got_NegAck(struct drbd_conf *mdev, struct p_header80 *h)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004308{
4309 struct p_block_ack *p = (struct p_block_ack *)h;
4310 sector_t sector = be64_to_cpu(p->sector);
Philipp Reisner2deb8332011-01-17 18:39:18 +01004311 int size = be32_to_cpu(p->blksize);
Philipp Reisner89e58e72011-01-19 13:12:45 +01004312 bool missing_ok = mdev->tconn->net_conf->wire_protocol == DRBD_PROT_A ||
4313 mdev->tconn->net_conf->wire_protocol == DRBD_PROT_B;
Andreas Gruenbacherc3afd8f2011-01-20 22:25:40 +01004314 bool found;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004315
4316 update_peer_seq(mdev, be32_to_cpu(p->seq_num));
4317
Andreas Gruenbacher579b57e2011-01-13 18:40:57 +01004318 if (p->block_id == ID_SYNCER) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004319 dec_rs_pending(mdev);
4320 drbd_rs_failed_io(mdev, sector, size);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004321 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004322 }
Philipp Reisner2deb8332011-01-17 18:39:18 +01004323
Andreas Gruenbacherc3afd8f2011-01-20 22:25:40 +01004324 found = validate_req_change_req_state(mdev, p->block_id, sector,
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01004325 &mdev->write_requests, __func__,
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01004326 NEG_ACKED, missing_ok);
Andreas Gruenbacherc3afd8f2011-01-20 22:25:40 +01004327 if (!found) {
4328 /* Protocol A has no P_WRITE_ACKs, but has P_NEG_ACKs.
4329 The master bio might already be completed, therefore the
4330 request is no longer in the collision hash. */
4331 /* In Protocol B we might already have got a P_RECV_ACK
4332 but then get a P_NEG_ACK afterwards. */
4333 if (!missing_ok)
Philipp Reisner2deb8332011-01-17 18:39:18 +01004334 return false;
Andreas Gruenbacherc3afd8f2011-01-20 22:25:40 +01004335 drbd_set_out_of_sync(mdev, sector, size);
Philipp Reisner2deb8332011-01-17 18:39:18 +01004336 }
Philipp Reisner2deb8332011-01-17 18:39:18 +01004337 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004338}
4339
Philipp Reisner0b70a132010-08-20 13:36:10 +02004340static int got_NegDReply(struct drbd_conf *mdev, struct p_header80 *h)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004341{
4342 struct p_block_ack *p = (struct p_block_ack *)h;
4343 sector_t sector = be64_to_cpu(p->sector);
4344
4345 update_peer_seq(mdev, be32_to_cpu(p->seq_num));
4346 dev_err(DEV, "Got NegDReply; Sector %llus, len %u; Fail original request.\n",
4347 (unsigned long long)sector, be32_to_cpu(p->blksize));
4348
4349 return validate_req_change_req_state(mdev, p->block_id, sector,
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01004350 &mdev->read_requests, __func__,
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01004351 NEG_ACKED, false);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004352}
4353
Philipp Reisner0b70a132010-08-20 13:36:10 +02004354static int got_NegRSDReply(struct drbd_conf *mdev, struct p_header80 *h)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004355{
4356 sector_t sector;
4357 int size;
4358 struct p_block_ack *p = (struct p_block_ack *)h;
4359
4360 sector = be64_to_cpu(p->sector);
4361 size = be32_to_cpu(p->blksize);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004362
4363 update_peer_seq(mdev, be32_to_cpu(p->seq_num));
4364
4365 dec_rs_pending(mdev);
4366
4367 if (get_ldev_if_state(mdev, D_FAILED)) {
4368 drbd_rs_complete_io(mdev, sector);
Philipp Reisnerd612d302010-12-27 10:53:28 +01004369 switch (be16_to_cpu(h->command)) {
4370 case P_NEG_RS_DREPLY:
4371 drbd_rs_failed_io(mdev, sector, size);
4372 case P_RS_CANCEL:
4373 break;
4374 default:
4375 D_ASSERT(0);
4376 put_ldev(mdev);
4377 return false;
4378 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07004379 put_ldev(mdev);
4380 }
4381
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004382 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004383}
4384
Philipp Reisner0b70a132010-08-20 13:36:10 +02004385static int got_BarrierAck(struct drbd_conf *mdev, struct p_header80 *h)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004386{
4387 struct p_barrier_ack *p = (struct p_barrier_ack *)h;
4388
4389 tl_release(mdev, p->barrier, be32_to_cpu(p->set_size));
4390
Philipp Reisnerc4752ef2010-10-27 17:32:36 +02004391 if (mdev->state.conn == C_AHEAD &&
4392 atomic_read(&mdev->ap_in_flight) == 0 &&
Philipp Reisner370a43e2011-01-14 16:03:11 +01004393 !test_and_set_bit(AHEAD_TO_SYNC_SOURCE, &mdev->current_epoch->flags)) {
4394 mdev->start_resync_timer.expires = jiffies + HZ;
4395 add_timer(&mdev->start_resync_timer);
Philipp Reisnerc4752ef2010-10-27 17:32:36 +02004396 }
4397
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004398 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004399}
4400
Philipp Reisner0b70a132010-08-20 13:36:10 +02004401static int got_OVResult(struct drbd_conf *mdev, struct p_header80 *h)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004402{
4403 struct p_block_ack *p = (struct p_block_ack *)h;
4404 struct drbd_work *w;
4405 sector_t sector;
4406 int size;
4407
4408 sector = be64_to_cpu(p->sector);
4409 size = be32_to_cpu(p->blksize);
4410
4411 update_peer_seq(mdev, be32_to_cpu(p->seq_num));
4412
4413 if (be64_to_cpu(p->block_id) == ID_OUT_OF_SYNC)
4414 drbd_ov_oos_found(mdev, sector, size);
4415 else
4416 ov_oos_print(mdev);
4417
Lars Ellenberg1d53f092010-09-05 01:13:24 +02004418 if (!get_ldev(mdev))
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004419 return true;
Lars Ellenberg1d53f092010-09-05 01:13:24 +02004420
Philipp Reisnerb411b362009-09-25 16:07:19 -07004421 drbd_rs_complete_io(mdev, sector);
4422 dec_rs_pending(mdev);
4423
Lars Ellenbergea5442a2010-11-05 09:48:01 +01004424 --mdev->ov_left;
4425
4426 /* let's advance progress step marks only for every other megabyte */
4427 if ((mdev->ov_left & 0x200) == 0x200)
4428 drbd_advance_rs_marks(mdev, mdev->ov_left);
4429
4430 if (mdev->ov_left == 0) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004431 w = kmalloc(sizeof(*w), GFP_NOIO);
4432 if (w) {
4433 w->cb = w_ov_finished;
Philipp Reisnere42325a2011-01-19 13:55:45 +01004434 drbd_queue_work_front(&mdev->tconn->data.work, w);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004435 } else {
4436 dev_err(DEV, "kmalloc(w) failed.");
4437 ov_oos_print(mdev);
4438 drbd_resync_finished(mdev);
4439 }
4440 }
Lars Ellenberg1d53f092010-09-05 01:13:24 +02004441 put_ldev(mdev);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004442 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004443}
4444
Philipp Reisner02918be2010-08-20 14:35:10 +02004445static int got_skip(struct drbd_conf *mdev, struct p_header80 *h)
Philipp Reisner0ced55a2010-04-30 15:26:20 +02004446{
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004447 return true;
Philipp Reisner0ced55a2010-04-30 15:26:20 +02004448}
4449
Philipp Reisnerb411b362009-09-25 16:07:19 -07004450struct asender_cmd {
4451 size_t pkt_size;
Philipp Reisner0b70a132010-08-20 13:36:10 +02004452 int (*process)(struct drbd_conf *mdev, struct p_header80 *h);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004453};
4454
4455static struct asender_cmd *get_asender_cmd(int cmd)
4456{
4457 static struct asender_cmd asender_tbl[] = {
4458 /* anything missing from this table is in
4459 * the drbd_cmd_handler (drbd_default_handler) table,
4460 * see the beginning of drbdd() */
Philipp Reisner0b70a132010-08-20 13:36:10 +02004461 [P_PING] = { sizeof(struct p_header80), got_Ping },
4462 [P_PING_ACK] = { sizeof(struct p_header80), got_PingAck },
Philipp Reisnerb411b362009-09-25 16:07:19 -07004463 [P_RECV_ACK] = { sizeof(struct p_block_ack), got_BlockAck },
4464 [P_WRITE_ACK] = { sizeof(struct p_block_ack), got_BlockAck },
4465 [P_RS_WRITE_ACK] = { sizeof(struct p_block_ack), got_BlockAck },
4466 [P_DISCARD_ACK] = { sizeof(struct p_block_ack), got_BlockAck },
4467 [P_NEG_ACK] = { sizeof(struct p_block_ack), got_NegAck },
4468 [P_NEG_DREPLY] = { sizeof(struct p_block_ack), got_NegDReply },
4469 [P_NEG_RS_DREPLY] = { sizeof(struct p_block_ack), got_NegRSDReply},
4470 [P_OV_RESULT] = { sizeof(struct p_block_ack), got_OVResult },
4471 [P_BARRIER_ACK] = { sizeof(struct p_barrier_ack), got_BarrierAck },
4472 [P_STATE_CHG_REPLY] = { sizeof(struct p_req_state_reply), got_RqSReply },
4473 [P_RS_IS_IN_SYNC] = { sizeof(struct p_block_ack), got_IsInSync },
Philipp Reisner02918be2010-08-20 14:35:10 +02004474 [P_DELAY_PROBE] = { sizeof(struct p_delay_probe93), got_skip },
Philipp Reisnerd612d302010-12-27 10:53:28 +01004475 [P_RS_CANCEL] = { sizeof(struct p_block_ack), got_NegRSDReply},
Philipp Reisnerb411b362009-09-25 16:07:19 -07004476 [P_MAX_CMD] = { 0, NULL },
4477 };
4478 if (cmd > P_MAX_CMD || asender_tbl[cmd].process == NULL)
4479 return NULL;
4480 return &asender_tbl[cmd];
4481}
4482
4483int drbd_asender(struct drbd_thread *thi)
4484{
4485 struct drbd_conf *mdev = thi->mdev;
Philipp Reisnere42325a2011-01-19 13:55:45 +01004486 struct p_header80 *h = &mdev->tconn->meta.rbuf.header.h80;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004487 struct asender_cmd *cmd = NULL;
4488
4489 int rv, len;
4490 void *buf = h;
4491 int received = 0;
Philipp Reisner0b70a132010-08-20 13:36:10 +02004492 int expect = sizeof(struct p_header80);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004493 int empty;
Lars Ellenbergf36af182011-03-09 22:44:55 +01004494 int ping_timeout_active = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004495
4496 sprintf(current->comm, "drbd%d_asender", mdev_to_minor(mdev));
4497
4498 current->policy = SCHED_RR; /* Make this a realtime task! */
4499 current->rt_priority = 2; /* more important than all other tasks */
4500
Andreas Gruenbachere77a0a52011-01-25 15:43:39 +01004501 while (get_t_state(thi) == RUNNING) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004502 drbd_thread_current_set_cpu(mdev);
4503 if (test_and_clear_bit(SEND_PING, &mdev->flags)) {
Andreas Gruenbacher841ce242010-12-15 19:31:20 +01004504 if (!drbd_send_ping(mdev)) {
4505 dev_err(DEV, "drbd_send_ping has failed\n");
4506 goto reconnect;
4507 }
Philipp Reisnere42325a2011-01-19 13:55:45 +01004508 mdev->tconn->meta.socket->sk->sk_rcvtimeo =
Philipp Reisner89e58e72011-01-19 13:12:45 +01004509 mdev->tconn->net_conf->ping_timeo*HZ/10;
Lars Ellenbergf36af182011-03-09 22:44:55 +01004510 ping_timeout_active = 1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004511 }
4512
4513 /* conditionally cork;
4514 * it may hurt latency if we cork without much to send */
Philipp Reisner89e58e72011-01-19 13:12:45 +01004515 if (!mdev->tconn->net_conf->no_cork &&
Philipp Reisnerb411b362009-09-25 16:07:19 -07004516 3 < atomic_read(&mdev->unacked_cnt))
Philipp Reisnere42325a2011-01-19 13:55:45 +01004517 drbd_tcp_cork(mdev->tconn->meta.socket);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004518 while (1) {
4519 clear_bit(SIGNAL_ASENDER, &mdev->flags);
4520 flush_signals(current);
Lars Ellenberg0f8488e2010-10-13 18:19:23 +02004521 if (!drbd_process_done_ee(mdev))
Philipp Reisnerb411b362009-09-25 16:07:19 -07004522 goto reconnect;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004523 /* to avoid race with newly queued ACKs */
4524 set_bit(SIGNAL_ASENDER, &mdev->flags);
Philipp Reisner87eeee42011-01-19 14:16:30 +01004525 spin_lock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004526 empty = list_empty(&mdev->done_ee);
Philipp Reisner87eeee42011-01-19 14:16:30 +01004527 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004528 /* new ack may have been queued right here,
4529 * but then there is also a signal pending,
4530 * and we start over... */
4531 if (empty)
4532 break;
4533 }
4534 /* but unconditionally uncork unless disabled */
Philipp Reisner89e58e72011-01-19 13:12:45 +01004535 if (!mdev->tconn->net_conf->no_cork)
Philipp Reisnere42325a2011-01-19 13:55:45 +01004536 drbd_tcp_uncork(mdev->tconn->meta.socket);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004537
4538 /* short circuit, recv_msg would return EINTR anyways. */
4539 if (signal_pending(current))
4540 continue;
4541
Philipp Reisnere42325a2011-01-19 13:55:45 +01004542 rv = drbd_recv_short(mdev, mdev->tconn->meta.socket,
Philipp Reisnerb411b362009-09-25 16:07:19 -07004543 buf, expect-received, 0);
4544 clear_bit(SIGNAL_ASENDER, &mdev->flags);
4545
4546 flush_signals(current);
4547
4548 /* Note:
4549 * -EINTR (on meta) we got a signal
4550 * -EAGAIN (on meta) rcvtimeo expired
4551 * -ECONNRESET other side closed the connection
4552 * -ERESTARTSYS (on data) we got a signal
4553 * rv < 0 other than above: unexpected error!
4554 * rv == expected: full header or command
4555 * rv < expected: "woken" by signal during receive
4556 * rv == 0 : "connection shut down by peer"
4557 */
4558 if (likely(rv > 0)) {
4559 received += rv;
4560 buf += rv;
4561 } else if (rv == 0) {
4562 dev_err(DEV, "meta connection shut down by peer.\n");
4563 goto reconnect;
4564 } else if (rv == -EAGAIN) {
Lars Ellenbergcb6518c2011-06-20 14:44:45 +02004565 /* If the data socket received something meanwhile,
4566 * that is good enough: peer is still alive. */
Philipp Reisner31890f42011-01-19 14:12:51 +01004567 if (time_after(mdev->tconn->last_received,
Philipp Reisnere42325a2011-01-19 13:55:45 +01004568 jiffies - mdev->tconn->meta.socket->sk->sk_rcvtimeo))
Lars Ellenbergcb6518c2011-06-20 14:44:45 +02004569 continue;
Lars Ellenbergf36af182011-03-09 22:44:55 +01004570 if (ping_timeout_active) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004571 dev_err(DEV, "PingAck did not arrive in time.\n");
4572 goto reconnect;
4573 }
4574 set_bit(SEND_PING, &mdev->flags);
4575 continue;
4576 } else if (rv == -EINTR) {
4577 continue;
4578 } else {
4579 dev_err(DEV, "sock_recvmsg returned %d\n", rv);
4580 goto reconnect;
4581 }
4582
4583 if (received == expect && cmd == NULL) {
Andreas Gruenbacherca9bc122011-01-11 13:47:24 +01004584 if (unlikely(h->magic != cpu_to_be32(DRBD_MAGIC))) {
Lars Ellenberg004352f2010-10-05 20:13:58 +02004585 dev_err(DEV, "magic?? on meta m: 0x%08x c: %d l: %d\n",
4586 be32_to_cpu(h->magic),
4587 be16_to_cpu(h->command),
4588 be16_to_cpu(h->length));
Philipp Reisnerb411b362009-09-25 16:07:19 -07004589 goto reconnect;
4590 }
4591 cmd = get_asender_cmd(be16_to_cpu(h->command));
4592 len = be16_to_cpu(h->length);
4593 if (unlikely(cmd == NULL)) {
Lars Ellenberg004352f2010-10-05 20:13:58 +02004594 dev_err(DEV, "unknown command?? on meta m: 0x%08x c: %d l: %d\n",
4595 be32_to_cpu(h->magic),
4596 be16_to_cpu(h->command),
4597 be16_to_cpu(h->length));
Philipp Reisnerb411b362009-09-25 16:07:19 -07004598 goto disconnect;
4599 }
4600 expect = cmd->pkt_size;
Andreas Gruenbacher841ce242010-12-15 19:31:20 +01004601 if (!expect(len == expect - sizeof(struct p_header80)))
Philipp Reisnerb411b362009-09-25 16:07:19 -07004602 goto reconnect;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004603 }
4604 if (received == expect) {
Philipp Reisner31890f42011-01-19 14:12:51 +01004605 mdev->tconn->last_received = jiffies;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004606 D_ASSERT(cmd != NULL);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004607 if (!cmd->process(mdev, h))
4608 goto reconnect;
4609
Lars Ellenbergf36af182011-03-09 22:44:55 +01004610 /* the idle_timeout (ping-int)
4611 * has been restored in got_PingAck() */
4612 if (cmd == get_asender_cmd(P_PING_ACK))
4613 ping_timeout_active = 0;
4614
Philipp Reisnerb411b362009-09-25 16:07:19 -07004615 buf = h;
4616 received = 0;
Philipp Reisner0b70a132010-08-20 13:36:10 +02004617 expect = sizeof(struct p_header80);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004618 cmd = NULL;
4619 }
4620 }
4621
4622 if (0) {
4623reconnect:
4624 drbd_force_state(mdev, NS(conn, C_NETWORK_FAILURE));
Lars Ellenberg856c50c2010-10-14 13:37:40 +02004625 drbd_md_sync(mdev);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004626 }
4627 if (0) {
4628disconnect:
4629 drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
Lars Ellenberg856c50c2010-10-14 13:37:40 +02004630 drbd_md_sync(mdev);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004631 }
4632 clear_bit(SIGNAL_ASENDER, &mdev->flags);
4633
4634 D_ASSERT(mdev->state.conn < C_CONNECTED);
4635 dev_info(DEV, "asender terminated\n");
4636
4637 return 0;
4638}