blob: 60a4f651a084db175617e381b704ee48971f17fb [file] [log] [blame]
Philipp Reisnerb411b362009-09-25 16:07:19 -07001/*
2 drbd_receiver.c
3
4 This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
5
6 Copyright (C) 2001-2008, LINBIT Information Technologies GmbH.
7 Copyright (C) 1999-2008, Philipp Reisner <philipp.reisner@linbit.com>.
8 Copyright (C) 2002-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
9
10 drbd is free software; you can redistribute it and/or modify
11 it under the terms of the GNU General Public License as published by
12 the Free Software Foundation; either version 2, or (at your option)
13 any later version.
14
15 drbd is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License for more details.
19
20 You should have received a copy of the GNU General Public License
21 along with drbd; see the file COPYING. If not, write to
22 the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
23 */
24
25
Philipp Reisnerb411b362009-09-25 16:07:19 -070026#include <linux/module.h>
27
28#include <asm/uaccess.h>
29#include <net/sock.h>
30
Philipp Reisnerb411b362009-09-25 16:07:19 -070031#include <linux/drbd.h>
32#include <linux/fs.h>
33#include <linux/file.h>
34#include <linux/in.h>
35#include <linux/mm.h>
36#include <linux/memcontrol.h>
37#include <linux/mm_inline.h>
38#include <linux/slab.h>
Philipp Reisnerb411b362009-09-25 16:07:19 -070039#include <linux/pkt_sched.h>
40#define __KERNEL_SYSCALLS__
41#include <linux/unistd.h>
42#include <linux/vmalloc.h>
43#include <linux/random.h>
Philipp Reisnerb411b362009-09-25 16:07:19 -070044#include <linux/string.h>
45#include <linux/scatterlist.h>
46#include "drbd_int.h"
Philipp Reisnerb411b362009-09-25 16:07:19 -070047#include "drbd_req.h"
48
49#include "drbd_vli.h"
50
Philipp Reisnerb411b362009-09-25 16:07:19 -070051enum finish_epoch {
52 FE_STILL_LIVE,
53 FE_DESTROYED,
54 FE_RECYCLED,
55};
56
57static int drbd_do_handshake(struct drbd_conf *mdev);
58static int drbd_do_auth(struct drbd_conf *mdev);
59
60static enum finish_epoch drbd_may_finish_epoch(struct drbd_conf *, struct drbd_epoch *, enum epoch_event);
61static int e_end_block(struct drbd_conf *, struct drbd_work *, int);
62
Philipp Reisnerb411b362009-09-25 16:07:19 -070063
64#define GFP_TRY (__GFP_HIGHMEM | __GFP_NOWARN)
65
Lars Ellenberg45bb9122010-05-14 17:10:48 +020066/*
67 * some helper functions to deal with single linked page lists,
68 * page->private being our "next" pointer.
69 */
70
71/* If at least n pages are linked at head, get n pages off.
72 * Otherwise, don't modify head, and return NULL.
73 * Locking is the responsibility of the caller.
74 */
75static struct page *page_chain_del(struct page **head, int n)
76{
77 struct page *page;
78 struct page *tmp;
79
80 BUG_ON(!n);
81 BUG_ON(!head);
82
83 page = *head;
Philipp Reisner23ce4222010-05-20 13:35:31 +020084
85 if (!page)
86 return NULL;
87
Lars Ellenberg45bb9122010-05-14 17:10:48 +020088 while (page) {
89 tmp = page_chain_next(page);
90 if (--n == 0)
91 break; /* found sufficient pages */
92 if (tmp == NULL)
93 /* insufficient pages, don't use any of them. */
94 return NULL;
95 page = tmp;
96 }
97
98 /* add end of list marker for the returned list */
99 set_page_private(page, 0);
100 /* actual return value, and adjustment of head */
101 page = *head;
102 *head = tmp;
103 return page;
104}
105
106/* may be used outside of locks to find the tail of a (usually short)
107 * "private" page chain, before adding it back to a global chain head
108 * with page_chain_add() under a spinlock. */
109static struct page *page_chain_tail(struct page *page, int *len)
110{
111 struct page *tmp;
112 int i = 1;
113 while ((tmp = page_chain_next(page)))
114 ++i, page = tmp;
115 if (len)
116 *len = i;
117 return page;
118}
119
120static int page_chain_free(struct page *page)
121{
122 struct page *tmp;
123 int i = 0;
124 page_chain_for_each_safe(page, tmp) {
125 put_page(page);
126 ++i;
127 }
128 return i;
129}
130
131static void page_chain_add(struct page **head,
132 struct page *chain_first, struct page *chain_last)
133{
134#if 1
135 struct page *tmp;
136 tmp = page_chain_tail(chain_first, NULL);
137 BUG_ON(tmp != chain_last);
138#endif
139
140 /* add chain to head */
141 set_page_private(chain_last, (unsigned long)*head);
142 *head = chain_first;
143}
144
145static struct page *drbd_pp_first_pages_or_try_alloc(struct drbd_conf *mdev, int number)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700146{
147 struct page *page = NULL;
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200148 struct page *tmp = NULL;
149 int i = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700150
151 /* Yes, testing drbd_pp_vacant outside the lock is racy.
152 * So what. It saves a spin_lock. */
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200153 if (drbd_pp_vacant >= number) {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700154 spin_lock(&drbd_pp_lock);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200155 page = page_chain_del(&drbd_pp_pool, number);
156 if (page)
157 drbd_pp_vacant -= number;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700158 spin_unlock(&drbd_pp_lock);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200159 if (page)
160 return page;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700161 }
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200162
Philipp Reisnerb411b362009-09-25 16:07:19 -0700163 /* GFP_TRY, because we must not cause arbitrary write-out: in a DRBD
164 * "criss-cross" setup, that might cause write-out on some other DRBD,
165 * which in turn might block on the other node at this very place. */
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200166 for (i = 0; i < number; i++) {
167 tmp = alloc_page(GFP_TRY);
168 if (!tmp)
169 break;
170 set_page_private(tmp, (unsigned long)page);
171 page = tmp;
172 }
173
174 if (i == number)
175 return page;
176
177 /* Not enough pages immediately available this time.
178 * No need to jump around here, drbd_pp_alloc will retry this
179 * function "soon". */
180 if (page) {
181 tmp = page_chain_tail(page, NULL);
182 spin_lock(&drbd_pp_lock);
183 page_chain_add(&drbd_pp_pool, page, tmp);
184 drbd_pp_vacant += i;
185 spin_unlock(&drbd_pp_lock);
186 }
187 return NULL;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700188}
189
Philipp Reisnerb411b362009-09-25 16:07:19 -0700190static void reclaim_net_ee(struct drbd_conf *mdev, struct list_head *to_be_freed)
191{
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100192 struct drbd_peer_request *peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700193 struct list_head *le, *tle;
194
195 /* The EEs are always appended to the end of the list. Since
196 they are sent in order over the wire, they have to finish
197 in order. As soon as we see the first not finished we can
198 stop to examine the list... */
199
200 list_for_each_safe(le, tle, &mdev->net_ee) {
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100201 peer_req = list_entry(le, struct drbd_peer_request, w.list);
202 if (drbd_ee_has_active_page(peer_req))
Philipp Reisnerb411b362009-09-25 16:07:19 -0700203 break;
204 list_move(le, to_be_freed);
205 }
206}
207
208static void drbd_kick_lo_and_reclaim_net(struct drbd_conf *mdev)
209{
210 LIST_HEAD(reclaimed);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100211 struct drbd_peer_request *peer_req, *t;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700212
Philipp Reisner87eeee42011-01-19 14:16:30 +0100213 spin_lock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700214 reclaim_net_ee(mdev, &reclaimed);
Philipp Reisner87eeee42011-01-19 14:16:30 +0100215 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700216
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100217 list_for_each_entry_safe(peer_req, t, &reclaimed, w.list)
218 drbd_free_net_ee(mdev, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700219}
220
221/**
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200222 * drbd_pp_alloc() - Returns @number pages, retries forever (or until signalled)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700223 * @mdev: DRBD device.
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200224 * @number: number of pages requested
225 * @retry: whether to retry, if not enough pages are available right now
Philipp Reisnerb411b362009-09-25 16:07:19 -0700226 *
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200227 * Tries to allocate number pages, first from our own page pool, then from
228 * the kernel, unless this allocation would exceed the max_buffers setting.
229 * Possibly retry until DRBD frees sufficient pages somewhere else.
230 *
231 * Returns a page chain linked via page->private.
Philipp Reisnerb411b362009-09-25 16:07:19 -0700232 */
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200233static struct page *drbd_pp_alloc(struct drbd_conf *mdev, unsigned number, bool retry)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700234{
235 struct page *page = NULL;
236 DEFINE_WAIT(wait);
237
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200238 /* Yes, we may run up to @number over max_buffers. If we
239 * follow it strictly, the admin will get it wrong anyways. */
Philipp Reisner89e58e72011-01-19 13:12:45 +0100240 if (atomic_read(&mdev->pp_in_use) < mdev->tconn->net_conf->max_buffers)
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200241 page = drbd_pp_first_pages_or_try_alloc(mdev, number);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700242
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200243 while (page == NULL) {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700244 prepare_to_wait(&drbd_pp_wait, &wait, TASK_INTERRUPTIBLE);
245
246 drbd_kick_lo_and_reclaim_net(mdev);
247
Philipp Reisner89e58e72011-01-19 13:12:45 +0100248 if (atomic_read(&mdev->pp_in_use) < mdev->tconn->net_conf->max_buffers) {
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200249 page = drbd_pp_first_pages_or_try_alloc(mdev, number);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700250 if (page)
251 break;
252 }
253
254 if (!retry)
255 break;
256
257 if (signal_pending(current)) {
258 dev_warn(DEV, "drbd_pp_alloc interrupted!\n");
259 break;
260 }
261
262 schedule();
263 }
264 finish_wait(&drbd_pp_wait, &wait);
265
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200266 if (page)
267 atomic_add(number, &mdev->pp_in_use);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700268 return page;
269}
270
271/* Must not be used from irq, as that may deadlock: see drbd_pp_alloc.
Philipp Reisner87eeee42011-01-19 14:16:30 +0100272 * Is also used from inside an other spin_lock_irq(&mdev->tconn->req_lock);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200273 * Either links the page chain back to the global pool,
274 * or returns all pages to the system. */
Lars Ellenberg435f0742010-09-06 12:30:25 +0200275static void drbd_pp_free(struct drbd_conf *mdev, struct page *page, int is_net)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700276{
Lars Ellenberg435f0742010-09-06 12:30:25 +0200277 atomic_t *a = is_net ? &mdev->pp_in_use_by_net : &mdev->pp_in_use;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700278 int i;
Lars Ellenberg435f0742010-09-06 12:30:25 +0200279
Lars Ellenberg1816a2b2010-11-11 15:19:07 +0100280 if (drbd_pp_vacant > (DRBD_MAX_BIO_SIZE/PAGE_SIZE)*minor_count)
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200281 i = page_chain_free(page);
282 else {
283 struct page *tmp;
284 tmp = page_chain_tail(page, &i);
285 spin_lock(&drbd_pp_lock);
286 page_chain_add(&drbd_pp_pool, page, tmp);
287 drbd_pp_vacant += i;
288 spin_unlock(&drbd_pp_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700289 }
Lars Ellenberg435f0742010-09-06 12:30:25 +0200290 i = atomic_sub_return(i, a);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200291 if (i < 0)
Lars Ellenberg435f0742010-09-06 12:30:25 +0200292 dev_warn(DEV, "ASSERTION FAILED: %s: %d < 0\n",
293 is_net ? "pp_in_use_by_net" : "pp_in_use", i);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700294 wake_up(&drbd_pp_wait);
295}
296
297/*
298You need to hold the req_lock:
299 _drbd_wait_ee_list_empty()
300
301You must not have the req_lock:
302 drbd_free_ee()
303 drbd_alloc_ee()
304 drbd_init_ee()
305 drbd_release_ee()
306 drbd_ee_fix_bhs()
307 drbd_process_done_ee()
308 drbd_clear_done_ee()
309 drbd_wait_ee_list_empty()
310*/
311
Andreas Gruenbacherf6ffca92011-02-04 15:30:34 +0100312struct drbd_peer_request *
313drbd_alloc_ee(struct drbd_conf *mdev, u64 id, sector_t sector,
314 unsigned int data_size, gfp_t gfp_mask) __must_hold(local)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700315{
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100316 struct drbd_peer_request *peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700317 struct page *page;
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200318 unsigned nr_pages = (data_size + PAGE_SIZE -1) >> PAGE_SHIFT;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700319
Andreas Gruenbacher0cf9d272010-12-07 10:43:29 +0100320 if (drbd_insert_fault(mdev, DRBD_FAULT_AL_EE))
Philipp Reisnerb411b362009-09-25 16:07:19 -0700321 return NULL;
322
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100323 peer_req = mempool_alloc(drbd_ee_mempool, gfp_mask & ~__GFP_HIGHMEM);
324 if (!peer_req) {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700325 if (!(gfp_mask & __GFP_NOWARN))
326 dev_err(DEV, "alloc_ee: Allocation of an EE failed\n");
327 return NULL;
328 }
329
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200330 page = drbd_pp_alloc(mdev, nr_pages, (gfp_mask & __GFP_WAIT));
331 if (!page)
332 goto fail;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700333
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100334 drbd_clear_interval(&peer_req->i);
335 peer_req->i.size = data_size;
336 peer_req->i.sector = sector;
337 peer_req->i.local = false;
338 peer_req->i.waiting = false;
Andreas Gruenbacher53840642011-01-28 10:31:04 +0100339
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100340 peer_req->epoch = NULL;
341 peer_req->mdev = mdev;
342 peer_req->pages = page;
343 atomic_set(&peer_req->pending_bios, 0);
344 peer_req->flags = 0;
Andreas Gruenbacher9a8e7752011-01-11 14:04:09 +0100345 /*
346 * The block_id is opaque to the receiver. It is not endianness
347 * converted, and sent back to the sender unchanged.
348 */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100349 peer_req->block_id = id;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700350
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100351 return peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700352
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200353 fail:
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100354 mempool_free(peer_req, drbd_ee_mempool);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700355 return NULL;
356}
357
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100358void drbd_free_some_ee(struct drbd_conf *mdev, struct drbd_peer_request *peer_req,
Andreas Gruenbacherf6ffca92011-02-04 15:30:34 +0100359 int is_net)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700360{
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100361 if (peer_req->flags & EE_HAS_DIGEST)
362 kfree(peer_req->digest);
363 drbd_pp_free(mdev, peer_req->pages, is_net);
364 D_ASSERT(atomic_read(&peer_req->pending_bios) == 0);
365 D_ASSERT(drbd_interval_empty(&peer_req->i));
366 mempool_free(peer_req, drbd_ee_mempool);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700367}
368
369int drbd_release_ee(struct drbd_conf *mdev, struct list_head *list)
370{
371 LIST_HEAD(work_list);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100372 struct drbd_peer_request *peer_req, *t;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700373 int count = 0;
Lars Ellenberg435f0742010-09-06 12:30:25 +0200374 int is_net = list == &mdev->net_ee;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700375
Philipp Reisner87eeee42011-01-19 14:16:30 +0100376 spin_lock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700377 list_splice_init(list, &work_list);
Philipp Reisner87eeee42011-01-19 14:16:30 +0100378 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700379
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100380 list_for_each_entry_safe(peer_req, t, &work_list, w.list) {
381 drbd_free_some_ee(mdev, peer_req, is_net);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700382 count++;
383 }
384 return count;
385}
386
387
388/*
389 * This function is called from _asender only_
Andreas Gruenbacher8554df12011-01-25 15:37:43 +0100390 * but see also comments in _req_mod(,BARRIER_ACKED)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700391 * and receive_Barrier.
392 *
393 * Move entries from net_ee to done_ee, if ready.
394 * Grab done_ee, call all callbacks, free the entries.
395 * The callbacks typically send out ACKs.
396 */
397static int drbd_process_done_ee(struct drbd_conf *mdev)
398{
399 LIST_HEAD(work_list);
400 LIST_HEAD(reclaimed);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100401 struct drbd_peer_request *peer_req, *t;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700402 int ok = (mdev->state.conn >= C_WF_REPORT_PARAMS);
403
Philipp Reisner87eeee42011-01-19 14:16:30 +0100404 spin_lock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700405 reclaim_net_ee(mdev, &reclaimed);
406 list_splice_init(&mdev->done_ee, &work_list);
Philipp Reisner87eeee42011-01-19 14:16:30 +0100407 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700408
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100409 list_for_each_entry_safe(peer_req, t, &reclaimed, w.list)
410 drbd_free_net_ee(mdev, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700411
412 /* possible callbacks here:
413 * e_end_block, and e_end_resync_block, e_send_discard_ack.
414 * all ignore the last argument.
415 */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100416 list_for_each_entry_safe(peer_req, t, &work_list, w.list) {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700417 /* list_del not necessary, next/prev members not touched */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100418 ok = peer_req->w.cb(mdev, &peer_req->w, !ok) && ok;
419 drbd_free_ee(mdev, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700420 }
421 wake_up(&mdev->ee_wait);
422
423 return ok;
424}
425
426void _drbd_wait_ee_list_empty(struct drbd_conf *mdev, struct list_head *head)
427{
428 DEFINE_WAIT(wait);
429
430 /* avoids spin_lock/unlock
431 * and calling prepare_to_wait in the fast path */
432 while (!list_empty(head)) {
433 prepare_to_wait(&mdev->ee_wait, &wait, TASK_UNINTERRUPTIBLE);
Philipp Reisner87eeee42011-01-19 14:16:30 +0100434 spin_unlock_irq(&mdev->tconn->req_lock);
Jens Axboe7eaceac2011-03-10 08:52:07 +0100435 io_schedule();
Philipp Reisnerb411b362009-09-25 16:07:19 -0700436 finish_wait(&mdev->ee_wait, &wait);
Philipp Reisner87eeee42011-01-19 14:16:30 +0100437 spin_lock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700438 }
439}
440
441void drbd_wait_ee_list_empty(struct drbd_conf *mdev, struct list_head *head)
442{
Philipp Reisner87eeee42011-01-19 14:16:30 +0100443 spin_lock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700444 _drbd_wait_ee_list_empty(mdev, head);
Philipp Reisner87eeee42011-01-19 14:16:30 +0100445 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700446}
447
448/* see also kernel_accept; which is only present since 2.6.18.
449 * also we want to log which part of it failed, exactly */
Philipp Reisner76536202011-02-07 14:09:54 +0100450static int drbd_accept(const char **what, struct socket *sock, struct socket **newsock)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700451{
452 struct sock *sk = sock->sk;
453 int err = 0;
454
455 *what = "listen";
456 err = sock->ops->listen(sock, 5);
457 if (err < 0)
458 goto out;
459
460 *what = "sock_create_lite";
461 err = sock_create_lite(sk->sk_family, sk->sk_type, sk->sk_protocol,
462 newsock);
463 if (err < 0)
464 goto out;
465
466 *what = "accept";
467 err = sock->ops->accept(sock, *newsock, 0);
468 if (err < 0) {
469 sock_release(*newsock);
470 *newsock = NULL;
471 goto out;
472 }
473 (*newsock)->ops = sock->ops;
474
475out:
476 return err;
477}
478
Philipp Reisnerdbd9eea2011-02-07 15:34:16 +0100479static int drbd_recv_short(struct socket *sock, void *buf, size_t size, int flags)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700480{
481 mm_segment_t oldfs;
482 struct kvec iov = {
483 .iov_base = buf,
484 .iov_len = size,
485 };
486 struct msghdr msg = {
487 .msg_iovlen = 1,
488 .msg_iov = (struct iovec *)&iov,
489 .msg_flags = (flags ? flags : MSG_WAITALL | MSG_NOSIGNAL)
490 };
491 int rv;
492
493 oldfs = get_fs();
494 set_fs(KERNEL_DS);
495 rv = sock_recvmsg(sock, &msg, size, msg.msg_flags);
496 set_fs(oldfs);
497
498 return rv;
499}
500
501static int drbd_recv(struct drbd_conf *mdev, void *buf, size_t size)
502{
503 mm_segment_t oldfs;
504 struct kvec iov = {
505 .iov_base = buf,
506 .iov_len = size,
507 };
508 struct msghdr msg = {
509 .msg_iovlen = 1,
510 .msg_iov = (struct iovec *)&iov,
511 .msg_flags = MSG_WAITALL | MSG_NOSIGNAL
512 };
513 int rv;
514
515 oldfs = get_fs();
516 set_fs(KERNEL_DS);
517
518 for (;;) {
Philipp Reisnere42325a2011-01-19 13:55:45 +0100519 rv = sock_recvmsg(mdev->tconn->data.socket, &msg, size, msg.msg_flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700520 if (rv == size)
521 break;
522
523 /* Note:
524 * ECONNRESET other side closed the connection
525 * ERESTARTSYS (on sock) we got a signal
526 */
527
528 if (rv < 0) {
529 if (rv == -ECONNRESET)
530 dev_info(DEV, "sock was reset by peer\n");
531 else if (rv != -ERESTARTSYS)
532 dev_err(DEV, "sock_recvmsg returned %d\n", rv);
533 break;
534 } else if (rv == 0) {
535 dev_info(DEV, "sock was shut down by peer\n");
536 break;
537 } else {
538 /* signal came in, or peer/link went down,
539 * after we read a partial message
540 */
541 /* D_ASSERT(signal_pending(current)); */
542 break;
543 }
544 };
545
546 set_fs(oldfs);
547
548 if (rv != size)
549 drbd_force_state(mdev, NS(conn, C_BROKEN_PIPE));
550
551 return rv;
552}
553
Lars Ellenberg5dbf1672010-05-25 16:18:01 +0200554/* quoting tcp(7):
555 * On individual connections, the socket buffer size must be set prior to the
556 * listen(2) or connect(2) calls in order to have it take effect.
557 * This is our wrapper to do so.
558 */
559static void drbd_setbufsize(struct socket *sock, unsigned int snd,
560 unsigned int rcv)
561{
562 /* open coded SO_SNDBUF, SO_RCVBUF */
563 if (snd) {
564 sock->sk->sk_sndbuf = snd;
565 sock->sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
566 }
567 if (rcv) {
568 sock->sk->sk_rcvbuf = rcv;
569 sock->sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
570 }
571}
572
Philipp Reisnereac3e992011-02-07 14:05:07 +0100573static struct socket *drbd_try_connect(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700574{
575 const char *what;
576 struct socket *sock;
577 struct sockaddr_in6 src_in6;
578 int err;
579 int disconnect_on_error = 1;
580
Philipp Reisnereac3e992011-02-07 14:05:07 +0100581 if (!get_net_conf(tconn))
Philipp Reisnerb411b362009-09-25 16:07:19 -0700582 return NULL;
583
584 what = "sock_create_kern";
Philipp Reisnereac3e992011-02-07 14:05:07 +0100585 err = sock_create_kern(((struct sockaddr *)tconn->net_conf->my_addr)->sa_family,
Philipp Reisnerb411b362009-09-25 16:07:19 -0700586 SOCK_STREAM, IPPROTO_TCP, &sock);
587 if (err < 0) {
588 sock = NULL;
589 goto out;
590 }
591
592 sock->sk->sk_rcvtimeo =
Philipp Reisnereac3e992011-02-07 14:05:07 +0100593 sock->sk->sk_sndtimeo = tconn->net_conf->try_connect_int*HZ;
594 drbd_setbufsize(sock, tconn->net_conf->sndbuf_size,
595 tconn->net_conf->rcvbuf_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700596
597 /* explicitly bind to the configured IP as source IP
598 * for the outgoing connections.
599 * This is needed for multihomed hosts and to be
600 * able to use lo: interfaces for drbd.
601 * Make sure to use 0 as port number, so linux selects
602 * a free one dynamically.
603 */
Philipp Reisnereac3e992011-02-07 14:05:07 +0100604 memcpy(&src_in6, tconn->net_conf->my_addr,
605 min_t(int, tconn->net_conf->my_addr_len, sizeof(src_in6)));
606 if (((struct sockaddr *)tconn->net_conf->my_addr)->sa_family == AF_INET6)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700607 src_in6.sin6_port = 0;
608 else
609 ((struct sockaddr_in *)&src_in6)->sin_port = 0; /* AF_INET & AF_SCI */
610
611 what = "bind before connect";
612 err = sock->ops->bind(sock,
613 (struct sockaddr *) &src_in6,
Philipp Reisnereac3e992011-02-07 14:05:07 +0100614 tconn->net_conf->my_addr_len);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700615 if (err < 0)
616 goto out;
617
618 /* connect may fail, peer not yet available.
619 * stay C_WF_CONNECTION, don't go Disconnecting! */
620 disconnect_on_error = 0;
621 what = "connect";
622 err = sock->ops->connect(sock,
Philipp Reisnereac3e992011-02-07 14:05:07 +0100623 (struct sockaddr *)tconn->net_conf->peer_addr,
624 tconn->net_conf->peer_addr_len, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700625
626out:
627 if (err < 0) {
628 if (sock) {
629 sock_release(sock);
630 sock = NULL;
631 }
632 switch (-err) {
633 /* timeout, busy, signal pending */
634 case ETIMEDOUT: case EAGAIN: case EINPROGRESS:
635 case EINTR: case ERESTARTSYS:
636 /* peer not (yet) available, network problem */
637 case ECONNREFUSED: case ENETUNREACH:
638 case EHOSTDOWN: case EHOSTUNREACH:
639 disconnect_on_error = 0;
640 break;
641 default:
Philipp Reisnereac3e992011-02-07 14:05:07 +0100642 conn_err(tconn, "%s failed, err = %d\n", what, err);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700643 }
644 if (disconnect_on_error)
Philipp Reisnereac3e992011-02-07 14:05:07 +0100645 drbd_force_state(tconn->volume0, NS(conn, C_DISCONNECTING));
Philipp Reisnerb411b362009-09-25 16:07:19 -0700646 }
Philipp Reisnereac3e992011-02-07 14:05:07 +0100647 put_net_conf(tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700648 return sock;
649}
650
Philipp Reisner76536202011-02-07 14:09:54 +0100651static struct socket *drbd_wait_for_connect(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700652{
653 int timeo, err;
654 struct socket *s_estab = NULL, *s_listen;
655 const char *what;
656
Philipp Reisner76536202011-02-07 14:09:54 +0100657 if (!get_net_conf(tconn))
Philipp Reisnerb411b362009-09-25 16:07:19 -0700658 return NULL;
659
660 what = "sock_create_kern";
Philipp Reisner76536202011-02-07 14:09:54 +0100661 err = sock_create_kern(((struct sockaddr *)tconn->net_conf->my_addr)->sa_family,
Philipp Reisnerb411b362009-09-25 16:07:19 -0700662 SOCK_STREAM, IPPROTO_TCP, &s_listen);
663 if (err) {
664 s_listen = NULL;
665 goto out;
666 }
667
Philipp Reisner76536202011-02-07 14:09:54 +0100668 timeo = tconn->net_conf->try_connect_int * HZ;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700669 timeo += (random32() & 1) ? timeo / 7 : -timeo / 7; /* 28.5% random jitter */
670
671 s_listen->sk->sk_reuse = 1; /* SO_REUSEADDR */
672 s_listen->sk->sk_rcvtimeo = timeo;
673 s_listen->sk->sk_sndtimeo = timeo;
Philipp Reisner76536202011-02-07 14:09:54 +0100674 drbd_setbufsize(s_listen, tconn->net_conf->sndbuf_size,
675 tconn->net_conf->rcvbuf_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700676
677 what = "bind before listen";
678 err = s_listen->ops->bind(s_listen,
Philipp Reisner76536202011-02-07 14:09:54 +0100679 (struct sockaddr *) tconn->net_conf->my_addr,
680 tconn->net_conf->my_addr_len);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700681 if (err < 0)
682 goto out;
683
Philipp Reisner76536202011-02-07 14:09:54 +0100684 err = drbd_accept(&what, s_listen, &s_estab);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700685
686out:
687 if (s_listen)
688 sock_release(s_listen);
689 if (err < 0) {
690 if (err != -EAGAIN && err != -EINTR && err != -ERESTARTSYS) {
Philipp Reisner76536202011-02-07 14:09:54 +0100691 conn_err(tconn, "%s failed, err = %d\n", what, err);
692 drbd_force_state(tconn->volume0, NS(conn, C_DISCONNECTING));
Philipp Reisnerb411b362009-09-25 16:07:19 -0700693 }
694 }
Philipp Reisner76536202011-02-07 14:09:54 +0100695 put_net_conf(tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700696
697 return s_estab;
698}
699
Philipp Reisnerd38e7872011-02-07 15:32:04 +0100700static int drbd_send_fp(struct drbd_tconn *tconn, struct socket *sock, enum drbd_packet cmd)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700701{
Philipp Reisnerd38e7872011-02-07 15:32:04 +0100702 struct p_header *h = &tconn->data.sbuf.header;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700703
Philipp Reisnerd38e7872011-02-07 15:32:04 +0100704 return _conn_send_cmd(tconn, 0, sock, cmd, h, sizeof(*h), 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700705}
706
Philipp Reisnera25b63f2011-02-07 15:43:45 +0100707static enum drbd_packet drbd_recv_fp(struct drbd_tconn *tconn, struct socket *sock)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700708{
Philipp Reisnera25b63f2011-02-07 15:43:45 +0100709 struct p_header80 *h = &tconn->data.rbuf.header.h80;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700710 int rr;
711
Philipp Reisnerdbd9eea2011-02-07 15:34:16 +0100712 rr = drbd_recv_short(sock, h, sizeof(*h), 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700713
Andreas Gruenbacherca9bc122011-01-11 13:47:24 +0100714 if (rr == sizeof(*h) && h->magic == cpu_to_be32(DRBD_MAGIC))
Philipp Reisnerb411b362009-09-25 16:07:19 -0700715 return be16_to_cpu(h->command);
716
717 return 0xffff;
718}
719
720/**
721 * drbd_socket_okay() - Free the socket if its connection is not okay
Philipp Reisnerb411b362009-09-25 16:07:19 -0700722 * @sock: pointer to the pointer to the socket.
723 */
Philipp Reisnerdbd9eea2011-02-07 15:34:16 +0100724static int drbd_socket_okay(struct socket **sock)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700725{
726 int rr;
727 char tb[4];
728
729 if (!*sock)
Andreas Gruenbacher81e84652010-12-09 15:03:57 +0100730 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700731
Philipp Reisnerdbd9eea2011-02-07 15:34:16 +0100732 rr = drbd_recv_short(*sock, tb, 4, MSG_DONTWAIT | MSG_PEEK);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700733
734 if (rr > 0 || rr == -EAGAIN) {
Andreas Gruenbacher81e84652010-12-09 15:03:57 +0100735 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700736 } else {
737 sock_release(*sock);
738 *sock = NULL;
Andreas Gruenbacher81e84652010-12-09 15:03:57 +0100739 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700740 }
741}
742
743/*
744 * return values:
745 * 1 yes, we have a valid connection
746 * 0 oops, did not work out, please try again
747 * -1 peer talks different language,
748 * no point in trying again, please go standalone.
749 * -2 We do not have a network config...
750 */
751static int drbd_connect(struct drbd_conf *mdev)
752{
753 struct socket *s, *sock, *msock;
754 int try, h, ok;
755
Philipp Reisnere42325a2011-01-19 13:55:45 +0100756 D_ASSERT(!mdev->tconn->data.socket);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700757
Philipp Reisnerb411b362009-09-25 16:07:19 -0700758 if (drbd_request_state(mdev, NS(conn, C_WF_CONNECTION)) < SS_SUCCESS)
759 return -2;
760
Philipp Reisner25703f82011-02-07 14:35:25 +0100761 clear_bit(DISCARD_CONCURRENT, &mdev->tconn->flags);
Philipp Reisnerfd340c12011-01-19 16:57:39 +0100762 mdev->tconn->agreed_pro_version = 99;
763 /* agreed_pro_version must be smaller than 100 so we send the old
764 header (h80) in the first packet and in the handshake packet. */
Philipp Reisnerb411b362009-09-25 16:07:19 -0700765
766 sock = NULL;
767 msock = NULL;
768
769 do {
770 for (try = 0;;) {
771 /* 3 tries, this should take less than a second! */
Philipp Reisnereac3e992011-02-07 14:05:07 +0100772 s = drbd_try_connect(mdev->tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700773 if (s || ++try >= 3)
774 break;
775 /* give the other side time to call bind() & listen() */
Philipp Reisner20ee6392011-01-18 15:28:59 +0100776 schedule_timeout_interruptible(HZ / 10);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700777 }
778
779 if (s) {
780 if (!sock) {
Philipp Reisnerd38e7872011-02-07 15:32:04 +0100781 drbd_send_fp(mdev->tconn, s, P_HAND_SHAKE_S);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700782 sock = s;
783 s = NULL;
784 } else if (!msock) {
Philipp Reisnerd38e7872011-02-07 15:32:04 +0100785 drbd_send_fp(mdev->tconn, s, P_HAND_SHAKE_M);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700786 msock = s;
787 s = NULL;
788 } else {
789 dev_err(DEV, "Logic error in drbd_connect()\n");
790 goto out_release_sockets;
791 }
792 }
793
794 if (sock && msock) {
Philipp Reisner89e58e72011-01-19 13:12:45 +0100795 schedule_timeout_interruptible(mdev->tconn->net_conf->ping_timeo*HZ/10);
Philipp Reisnerdbd9eea2011-02-07 15:34:16 +0100796 ok = drbd_socket_okay(&sock);
797 ok = drbd_socket_okay(&msock) && ok;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700798 if (ok)
799 break;
800 }
801
802retry:
Philipp Reisner76536202011-02-07 14:09:54 +0100803 s = drbd_wait_for_connect(mdev->tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700804 if (s) {
Philipp Reisnera25b63f2011-02-07 15:43:45 +0100805 try = drbd_recv_fp(mdev->tconn, s);
Philipp Reisnerdbd9eea2011-02-07 15:34:16 +0100806 drbd_socket_okay(&sock);
807 drbd_socket_okay(&msock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700808 switch (try) {
809 case P_HAND_SHAKE_S:
810 if (sock) {
811 dev_warn(DEV, "initial packet S crossed\n");
812 sock_release(sock);
813 }
814 sock = s;
815 break;
816 case P_HAND_SHAKE_M:
817 if (msock) {
818 dev_warn(DEV, "initial packet M crossed\n");
819 sock_release(msock);
820 }
821 msock = s;
Philipp Reisner25703f82011-02-07 14:35:25 +0100822 set_bit(DISCARD_CONCURRENT, &mdev->tconn->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700823 break;
824 default:
825 dev_warn(DEV, "Error receiving initial packet\n");
826 sock_release(s);
827 if (random32() & 1)
828 goto retry;
829 }
830 }
831
832 if (mdev->state.conn <= C_DISCONNECTING)
833 goto out_release_sockets;
834 if (signal_pending(current)) {
835 flush_signals(current);
836 smp_rmb();
Philipp Reisnere6b3ea82011-01-19 14:02:01 +0100837 if (get_t_state(&mdev->tconn->receiver) == EXITING)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700838 goto out_release_sockets;
839 }
840
841 if (sock && msock) {
Philipp Reisnerdbd9eea2011-02-07 15:34:16 +0100842 ok = drbd_socket_okay(&sock);
843 ok = drbd_socket_okay(&msock) && ok;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700844 if (ok)
845 break;
846 }
847 } while (1);
848
849 msock->sk->sk_reuse = 1; /* SO_REUSEADDR */
850 sock->sk->sk_reuse = 1; /* SO_REUSEADDR */
851
852 sock->sk->sk_allocation = GFP_NOIO;
853 msock->sk->sk_allocation = GFP_NOIO;
854
855 sock->sk->sk_priority = TC_PRIO_INTERACTIVE_BULK;
856 msock->sk->sk_priority = TC_PRIO_INTERACTIVE;
857
Philipp Reisnerb411b362009-09-25 16:07:19 -0700858 /* NOT YET ...
Philipp Reisner89e58e72011-01-19 13:12:45 +0100859 * sock->sk->sk_sndtimeo = mdev->tconn->net_conf->timeout*HZ/10;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700860 * sock->sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
861 * first set it to the P_HAND_SHAKE timeout,
862 * which we set to 4x the configured ping_timeout. */
863 sock->sk->sk_sndtimeo =
Philipp Reisner89e58e72011-01-19 13:12:45 +0100864 sock->sk->sk_rcvtimeo = mdev->tconn->net_conf->ping_timeo*4*HZ/10;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700865
Philipp Reisner89e58e72011-01-19 13:12:45 +0100866 msock->sk->sk_sndtimeo = mdev->tconn->net_conf->timeout*HZ/10;
867 msock->sk->sk_rcvtimeo = mdev->tconn->net_conf->ping_int*HZ;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700868
869 /* we don't want delays.
Lucas De Marchi25985ed2011-03-30 22:57:33 -0300870 * we use TCP_CORK where appropriate, though */
Philipp Reisnerb411b362009-09-25 16:07:19 -0700871 drbd_tcp_nodelay(sock);
872 drbd_tcp_nodelay(msock);
873
Philipp Reisnere42325a2011-01-19 13:55:45 +0100874 mdev->tconn->data.socket = sock;
875 mdev->tconn->meta.socket = msock;
Philipp Reisner31890f42011-01-19 14:12:51 +0100876 mdev->tconn->last_received = jiffies;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700877
Philipp Reisnere6b3ea82011-01-19 14:02:01 +0100878 D_ASSERT(mdev->tconn->asender.task == NULL);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700879
880 h = drbd_do_handshake(mdev);
881 if (h <= 0)
882 return h;
883
Philipp Reisnera0638452011-01-19 14:31:32 +0100884 if (mdev->tconn->cram_hmac_tfm) {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700885 /* drbd_request_state(mdev, NS(conn, WFAuth)); */
Johannes Thomab10d96c2010-01-07 16:02:50 +0100886 switch (drbd_do_auth(mdev)) {
887 case -1:
Philipp Reisnerb411b362009-09-25 16:07:19 -0700888 dev_err(DEV, "Authentication of peer failed\n");
889 return -1;
Johannes Thomab10d96c2010-01-07 16:02:50 +0100890 case 0:
891 dev_err(DEV, "Authentication of peer failed, trying again.\n");
892 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700893 }
894 }
895
896 if (drbd_request_state(mdev, NS(conn, C_WF_REPORT_PARAMS)) < SS_SUCCESS)
897 return 0;
898
Philipp Reisner89e58e72011-01-19 13:12:45 +0100899 sock->sk->sk_sndtimeo = mdev->tconn->net_conf->timeout*HZ/10;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700900 sock->sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
901
902 atomic_set(&mdev->packet_seq, 0);
903 mdev->peer_seq = 0;
904
Philipp Reisnere6b3ea82011-01-19 14:02:01 +0100905 drbd_thread_start(&mdev->tconn->asender);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700906
Philipp Reisner148efa12011-01-15 00:21:15 +0100907 if (drbd_send_protocol(mdev) == -1)
Philipp Reisner7e2455c2010-04-22 14:50:23 +0200908 return -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700909 drbd_send_sync_param(mdev, &mdev->sync_conf);
Philipp Reisnere89b5912010-03-24 17:11:33 +0100910 drbd_send_sizes(mdev, 0, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700911 drbd_send_uuids(mdev);
912 drbd_send_state(mdev);
913 clear_bit(USE_DEGR_WFC_T, &mdev->flags);
914 clear_bit(RESIZE_PENDING, &mdev->flags);
Philipp Reisner7fde2be2011-03-01 11:08:28 +0100915 mod_timer(&mdev->request_timer, jiffies + HZ); /* just start it here. */
Philipp Reisnerb411b362009-09-25 16:07:19 -0700916
917 return 1;
918
919out_release_sockets:
920 if (sock)
921 sock_release(sock);
922 if (msock)
923 sock_release(msock);
924 return -1;
925}
926
Andreas Gruenbacherd8763022011-01-26 17:39:41 +0100927static bool decode_header(struct drbd_conf *mdev, struct p_header *h,
928 enum drbd_packet *cmd, unsigned int *packet_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700929{
Philipp Reisnerfd340c12011-01-19 16:57:39 +0100930 if (h->h80.magic == cpu_to_be32(DRBD_MAGIC)) {
Philipp Reisner02918be2010-08-20 14:35:10 +0200931 *cmd = be16_to_cpu(h->h80.command);
932 *packet_size = be16_to_cpu(h->h80.length);
Andreas Gruenbacherca9bc122011-01-11 13:47:24 +0100933 } else if (h->h95.magic == cpu_to_be16(DRBD_MAGIC_BIG)) {
Philipp Reisner02918be2010-08-20 14:35:10 +0200934 *cmd = be16_to_cpu(h->h95.command);
Philipp Reisnerfd340c12011-01-19 16:57:39 +0100935 *packet_size = be32_to_cpu(h->h95.length) & 0x00ffffff;
Philipp Reisner02918be2010-08-20 14:35:10 +0200936 } else {
Lars Ellenberg004352f2010-10-05 20:13:58 +0200937 dev_err(DEV, "magic?? on data m: 0x%08x c: %d l: %d\n",
938 be32_to_cpu(h->h80.magic),
939 be16_to_cpu(h->h80.command),
940 be16_to_cpu(h->h80.length));
Andreas Gruenbacher81e84652010-12-09 15:03:57 +0100941 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700942 }
Philipp Reisner257d0af2011-01-26 12:15:29 +0100943 return true;
944}
945
Andreas Gruenbacherd8763022011-01-26 17:39:41 +0100946static int drbd_recv_header(struct drbd_conf *mdev, enum drbd_packet *cmd,
947 unsigned int *packet_size)
Philipp Reisner257d0af2011-01-26 12:15:29 +0100948{
949 struct p_header *h = &mdev->tconn->data.rbuf.header;
950 int r;
951
952 r = drbd_recv(mdev, h, sizeof(*h));
953 if (unlikely(r != sizeof(*h))) {
954 if (!signal_pending(current))
955 dev_warn(DEV, "short read expecting header on sock: r=%d\n", r);
956 return false;
957 }
958
959 r = decode_header(mdev, h, cmd, packet_size);
Philipp Reisner31890f42011-01-19 14:12:51 +0100960 mdev->tconn->last_received = jiffies;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700961
Philipp Reisner257d0af2011-01-26 12:15:29 +0100962 return r;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700963}
964
Philipp Reisner2451fc32010-08-24 13:43:11 +0200965static void drbd_flush(struct drbd_conf *mdev)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700966{
967 int rv;
968
969 if (mdev->write_ordering >= WO_bdev_flush && get_ldev(mdev)) {
Dmitry Monakhovfbd9b092010-04-28 17:55:06 +0400970 rv = blkdev_issue_flush(mdev->ldev->backing_bdev, GFP_KERNEL,
Christoph Hellwigdd3932e2010-09-16 20:51:46 +0200971 NULL);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700972 if (rv) {
973 dev_err(DEV, "local disk flush failed with status %d\n", rv);
974 /* would rather check on EOPNOTSUPP, but that is not reliable.
975 * don't try again for ANY return value != 0
976 * if (rv == -EOPNOTSUPP) */
977 drbd_bump_write_ordering(mdev, WO_drain_io);
978 }
979 put_ldev(mdev);
980 }
Philipp Reisnerb411b362009-09-25 16:07:19 -0700981}
982
983/**
984 * drbd_may_finish_epoch() - Applies an epoch_event to the epoch's state, eventually finishes it.
985 * @mdev: DRBD device.
986 * @epoch: Epoch object.
987 * @ev: Epoch event.
988 */
989static enum finish_epoch drbd_may_finish_epoch(struct drbd_conf *mdev,
990 struct drbd_epoch *epoch,
991 enum epoch_event ev)
992{
Philipp Reisner2451fc32010-08-24 13:43:11 +0200993 int epoch_size;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700994 struct drbd_epoch *next_epoch;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700995 enum finish_epoch rv = FE_STILL_LIVE;
996
997 spin_lock(&mdev->epoch_lock);
998 do {
999 next_epoch = NULL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001000
1001 epoch_size = atomic_read(&epoch->epoch_size);
1002
1003 switch (ev & ~EV_CLEANUP) {
1004 case EV_PUT:
1005 atomic_dec(&epoch->active);
1006 break;
1007 case EV_GOT_BARRIER_NR:
1008 set_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001009 break;
1010 case EV_BECAME_LAST:
1011 /* nothing to do*/
1012 break;
1013 }
1014
Philipp Reisnerb411b362009-09-25 16:07:19 -07001015 if (epoch_size != 0 &&
1016 atomic_read(&epoch->active) == 0 &&
Philipp Reisner2451fc32010-08-24 13:43:11 +02001017 test_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001018 if (!(ev & EV_CLEANUP)) {
1019 spin_unlock(&mdev->epoch_lock);
1020 drbd_send_b_ack(mdev, epoch->barrier_nr, epoch_size);
1021 spin_lock(&mdev->epoch_lock);
1022 }
1023 dec_unacked(mdev);
1024
1025 if (mdev->current_epoch != epoch) {
1026 next_epoch = list_entry(epoch->list.next, struct drbd_epoch, list);
1027 list_del(&epoch->list);
1028 ev = EV_BECAME_LAST | (ev & EV_CLEANUP);
1029 mdev->epochs--;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001030 kfree(epoch);
1031
1032 if (rv == FE_STILL_LIVE)
1033 rv = FE_DESTROYED;
1034 } else {
1035 epoch->flags = 0;
1036 atomic_set(&epoch->epoch_size, 0);
Uwe Kleine-König698f9312010-07-02 20:41:51 +02001037 /* atomic_set(&epoch->active, 0); is already zero */
Philipp Reisnerb411b362009-09-25 16:07:19 -07001038 if (rv == FE_STILL_LIVE)
1039 rv = FE_RECYCLED;
Philipp Reisner2451fc32010-08-24 13:43:11 +02001040 wake_up(&mdev->ee_wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001041 }
1042 }
1043
1044 if (!next_epoch)
1045 break;
1046
1047 epoch = next_epoch;
1048 } while (1);
1049
1050 spin_unlock(&mdev->epoch_lock);
1051
Philipp Reisnerb411b362009-09-25 16:07:19 -07001052 return rv;
1053}
1054
1055/**
1056 * drbd_bump_write_ordering() - Fall back to an other write ordering method
1057 * @mdev: DRBD device.
1058 * @wo: Write ordering method to try.
1059 */
1060void drbd_bump_write_ordering(struct drbd_conf *mdev, enum write_ordering_e wo) __must_hold(local)
1061{
1062 enum write_ordering_e pwo;
1063 static char *write_ordering_str[] = {
1064 [WO_none] = "none",
1065 [WO_drain_io] = "drain",
1066 [WO_bdev_flush] = "flush",
Philipp Reisnerb411b362009-09-25 16:07:19 -07001067 };
1068
1069 pwo = mdev->write_ordering;
1070 wo = min(pwo, wo);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001071 if (wo == WO_bdev_flush && mdev->ldev->dc.no_disk_flush)
1072 wo = WO_drain_io;
1073 if (wo == WO_drain_io && mdev->ldev->dc.no_disk_drain)
1074 wo = WO_none;
1075 mdev->write_ordering = wo;
Philipp Reisner2451fc32010-08-24 13:43:11 +02001076 if (pwo != mdev->write_ordering || wo == WO_bdev_flush)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001077 dev_info(DEV, "Method to ensure write ordering: %s\n", write_ordering_str[mdev->write_ordering]);
1078}
1079
1080/**
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001081 * drbd_submit_ee()
1082 * @mdev: DRBD device.
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001083 * @peer_req: peer request
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001084 * @rw: flag field, see bio->bi_rw
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001085 *
1086 * May spread the pages to multiple bios,
1087 * depending on bio_add_page restrictions.
1088 *
1089 * Returns 0 if all bios have been submitted,
1090 * -ENOMEM if we could not allocate enough bios,
1091 * -ENOSPC (any better suggestion?) if we have not been able to bio_add_page a
1092 * single page to an empty bio (which should never happen and likely indicates
1093 * that the lower level IO stack is in some way broken). This has been observed
1094 * on certain Xen deployments.
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001095 */
1096/* TODO allocate from our own bio_set. */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001097int drbd_submit_ee(struct drbd_conf *mdev, struct drbd_peer_request *peer_req,
Andreas Gruenbacherf6ffca92011-02-04 15:30:34 +01001098 const unsigned rw, const int fault_type)
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001099{
1100 struct bio *bios = NULL;
1101 struct bio *bio;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001102 struct page *page = peer_req->pages;
1103 sector_t sector = peer_req->i.sector;
1104 unsigned ds = peer_req->i.size;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001105 unsigned n_bios = 0;
1106 unsigned nr_pages = (ds + PAGE_SIZE -1) >> PAGE_SHIFT;
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001107 int err = -ENOMEM;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001108
1109 /* In most cases, we will only need one bio. But in case the lower
1110 * level restrictions happen to be different at this offset on this
1111 * side than those of the sending peer, we may need to submit the
1112 * request in more than one bio. */
1113next_bio:
1114 bio = bio_alloc(GFP_NOIO, nr_pages);
1115 if (!bio) {
1116 dev_err(DEV, "submit_ee: Allocation of a bio failed\n");
1117 goto fail;
1118 }
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001119 /* > peer_req->i.sector, unless this is the first bio */
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001120 bio->bi_sector = sector;
1121 bio->bi_bdev = mdev->ldev->backing_bdev;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001122 bio->bi_rw = rw;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001123 bio->bi_private = peer_req;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001124 bio->bi_end_io = drbd_endio_sec;
1125
1126 bio->bi_next = bios;
1127 bios = bio;
1128 ++n_bios;
1129
1130 page_chain_for_each(page) {
1131 unsigned len = min_t(unsigned, ds, PAGE_SIZE);
1132 if (!bio_add_page(bio, page, len, 0)) {
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001133 /* A single page must always be possible!
1134 * But in case it fails anyways,
1135 * we deal with it, and complain (below). */
1136 if (bio->bi_vcnt == 0) {
1137 dev_err(DEV,
1138 "bio_add_page failed for len=%u, "
1139 "bi_vcnt=0 (bi_sector=%llu)\n",
1140 len, (unsigned long long)bio->bi_sector);
1141 err = -ENOSPC;
1142 goto fail;
1143 }
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001144 goto next_bio;
1145 }
1146 ds -= len;
1147 sector += len >> 9;
1148 --nr_pages;
1149 }
1150 D_ASSERT(page == NULL);
1151 D_ASSERT(ds == 0);
1152
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001153 atomic_set(&peer_req->pending_bios, n_bios);
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001154 do {
1155 bio = bios;
1156 bios = bios->bi_next;
1157 bio->bi_next = NULL;
1158
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001159 drbd_generic_make_request(mdev, fault_type, bio);
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001160 } while (bios);
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001161 return 0;
1162
1163fail:
1164 while (bios) {
1165 bio = bios;
1166 bios = bios->bi_next;
1167 bio_put(bio);
1168 }
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001169 return err;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001170}
1171
Andreas Gruenbacher53840642011-01-28 10:31:04 +01001172static void drbd_remove_epoch_entry_interval(struct drbd_conf *mdev,
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001173 struct drbd_peer_request *peer_req)
Andreas Gruenbacher53840642011-01-28 10:31:04 +01001174{
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001175 struct drbd_interval *i = &peer_req->i;
Andreas Gruenbacher53840642011-01-28 10:31:04 +01001176
1177 drbd_remove_interval(&mdev->write_requests, i);
1178 drbd_clear_interval(i);
1179
Andreas Gruenbacher6c852be2011-02-04 15:38:52 +01001180 /* Wake up any processes waiting for this peer request to complete. */
Andreas Gruenbacher53840642011-01-28 10:31:04 +01001181 if (i->waiting)
1182 wake_up(&mdev->misc_wait);
1183}
1184
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01001185static int receive_Barrier(struct drbd_conf *mdev, enum drbd_packet cmd,
1186 unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001187{
Philipp Reisner2451fc32010-08-24 13:43:11 +02001188 int rv;
Philipp Reisnere42325a2011-01-19 13:55:45 +01001189 struct p_barrier *p = &mdev->tconn->data.rbuf.barrier;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001190 struct drbd_epoch *epoch;
1191
Philipp Reisnerb411b362009-09-25 16:07:19 -07001192 inc_unacked(mdev);
1193
Philipp Reisnerb411b362009-09-25 16:07:19 -07001194 mdev->current_epoch->barrier_nr = p->barrier;
1195 rv = drbd_may_finish_epoch(mdev, mdev->current_epoch, EV_GOT_BARRIER_NR);
1196
1197 /* P_BARRIER_ACK may imply that the corresponding extent is dropped from
1198 * the activity log, which means it would not be resynced in case the
1199 * R_PRIMARY crashes now.
1200 * Therefore we must send the barrier_ack after the barrier request was
1201 * completed. */
1202 switch (mdev->write_ordering) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001203 case WO_none:
1204 if (rv == FE_RECYCLED)
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01001205 return true;
Philipp Reisner2451fc32010-08-24 13:43:11 +02001206
1207 /* receiver context, in the writeout path of the other node.
1208 * avoid potential distributed deadlock */
1209 epoch = kmalloc(sizeof(struct drbd_epoch), GFP_NOIO);
1210 if (epoch)
1211 break;
1212 else
1213 dev_warn(DEV, "Allocation of an epoch failed, slowing down\n");
1214 /* Fall through */
Philipp Reisnerb411b362009-09-25 16:07:19 -07001215
1216 case WO_bdev_flush:
1217 case WO_drain_io:
Philipp Reisnerb411b362009-09-25 16:07:19 -07001218 drbd_wait_ee_list_empty(mdev, &mdev->active_ee);
Philipp Reisner2451fc32010-08-24 13:43:11 +02001219 drbd_flush(mdev);
1220
1221 if (atomic_read(&mdev->current_epoch->epoch_size)) {
1222 epoch = kmalloc(sizeof(struct drbd_epoch), GFP_NOIO);
1223 if (epoch)
1224 break;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001225 }
1226
Philipp Reisner2451fc32010-08-24 13:43:11 +02001227 epoch = mdev->current_epoch;
1228 wait_event(mdev->ee_wait, atomic_read(&epoch->epoch_size) == 0);
1229
1230 D_ASSERT(atomic_read(&epoch->active) == 0);
1231 D_ASSERT(epoch->flags == 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001232
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01001233 return true;
Philipp Reisner2451fc32010-08-24 13:43:11 +02001234 default:
1235 dev_err(DEV, "Strangeness in mdev->write_ordering %d\n", mdev->write_ordering);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01001236 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001237 }
1238
1239 epoch->flags = 0;
1240 atomic_set(&epoch->epoch_size, 0);
1241 atomic_set(&epoch->active, 0);
1242
1243 spin_lock(&mdev->epoch_lock);
1244 if (atomic_read(&mdev->current_epoch->epoch_size)) {
1245 list_add(&epoch->list, &mdev->current_epoch->list);
1246 mdev->current_epoch = epoch;
1247 mdev->epochs++;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001248 } else {
1249 /* The current_epoch got recycled while we allocated this one... */
1250 kfree(epoch);
1251 }
1252 spin_unlock(&mdev->epoch_lock);
1253
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01001254 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001255}
1256
1257/* used from receive_RSDataReply (recv_resync_read)
1258 * and from receive_Data */
Andreas Gruenbacherf6ffca92011-02-04 15:30:34 +01001259static struct drbd_peer_request *
1260read_in_block(struct drbd_conf *mdev, u64 id, sector_t sector,
1261 int data_size) __must_hold(local)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001262{
Lars Ellenberg66660322010-04-06 12:15:04 +02001263 const sector_t capacity = drbd_get_capacity(mdev->this_bdev);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001264 struct drbd_peer_request *peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001265 struct page *page;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001266 int dgs, ds, rr;
Philipp Reisnera0638452011-01-19 14:31:32 +01001267 void *dig_in = mdev->tconn->int_dig_in;
1268 void *dig_vv = mdev->tconn->int_dig_vv;
Philipp Reisner6b4388a2010-04-26 14:11:45 +02001269 unsigned long *data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001270
Philipp Reisnera0638452011-01-19 14:31:32 +01001271 dgs = (mdev->tconn->agreed_pro_version >= 87 && mdev->tconn->integrity_r_tfm) ?
1272 crypto_hash_digestsize(mdev->tconn->integrity_r_tfm) : 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001273
1274 if (dgs) {
1275 rr = drbd_recv(mdev, dig_in, dgs);
1276 if (rr != dgs) {
Lars Ellenberg0ddc5542011-01-21 12:35:15 +01001277 if (!signal_pending(current))
1278 dev_warn(DEV,
1279 "short read receiving data digest: read %d expected %d\n",
1280 rr, dgs);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001281 return NULL;
1282 }
1283 }
1284
1285 data_size -= dgs;
1286
Andreas Gruenbacher841ce242010-12-15 19:31:20 +01001287 if (!expect(data_size != 0))
1288 return NULL;
1289 if (!expect(IS_ALIGNED(data_size, 512)))
1290 return NULL;
1291 if (!expect(data_size <= DRBD_MAX_BIO_SIZE))
1292 return NULL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001293
Lars Ellenberg66660322010-04-06 12:15:04 +02001294 /* even though we trust out peer,
1295 * we sometimes have to double check. */
1296 if (sector + (data_size>>9) > capacity) {
Lars Ellenbergfdda6542011-01-24 15:11:01 +01001297 dev_err(DEV, "request from peer beyond end of local disk: "
1298 "capacity: %llus < sector: %llus + size: %u\n",
Lars Ellenberg66660322010-04-06 12:15:04 +02001299 (unsigned long long)capacity,
1300 (unsigned long long)sector, data_size);
1301 return NULL;
1302 }
1303
Philipp Reisnerb411b362009-09-25 16:07:19 -07001304 /* GFP_NOIO, because we must not cause arbitrary write-out: in a DRBD
1305 * "criss-cross" setup, that might cause write-out on some other DRBD,
1306 * which in turn might block on the other node at this very place. */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001307 peer_req = drbd_alloc_ee(mdev, id, sector, data_size, GFP_NOIO);
1308 if (!peer_req)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001309 return NULL;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001310
Philipp Reisnerb411b362009-09-25 16:07:19 -07001311 ds = data_size;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001312 page = peer_req->pages;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001313 page_chain_for_each(page) {
1314 unsigned len = min_t(int, ds, PAGE_SIZE);
Philipp Reisner6b4388a2010-04-26 14:11:45 +02001315 data = kmap(page);
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001316 rr = drbd_recv(mdev, data, len);
Andreas Gruenbacher0cf9d272010-12-07 10:43:29 +01001317 if (drbd_insert_fault(mdev, DRBD_FAULT_RECEIVE)) {
Philipp Reisner6b4388a2010-04-26 14:11:45 +02001318 dev_err(DEV, "Fault injection: Corrupting data on receive\n");
1319 data[0] = data[0] ^ (unsigned long)-1;
1320 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07001321 kunmap(page);
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001322 if (rr != len) {
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001323 drbd_free_ee(mdev, peer_req);
Lars Ellenberg0ddc5542011-01-21 12:35:15 +01001324 if (!signal_pending(current))
1325 dev_warn(DEV, "short read receiving data: read %d expected %d\n",
1326 rr, len);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001327 return NULL;
1328 }
1329 ds -= rr;
1330 }
1331
1332 if (dgs) {
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001333 drbd_csum_ee(mdev, mdev->tconn->integrity_r_tfm, peer_req, dig_vv);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001334 if (memcmp(dig_in, dig_vv, dgs)) {
Lars Ellenberg470be442010-11-10 10:36:52 +01001335 dev_err(DEV, "Digest integrity check FAILED: %llus +%u\n",
1336 (unsigned long long)sector, data_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001337 drbd_bcast_ee(mdev, "digest failed",
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001338 dgs, dig_in, dig_vv, peer_req);
1339 drbd_free_ee(mdev, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001340 return NULL;
1341 }
1342 }
1343 mdev->recv_cnt += data_size>>9;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001344 return peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001345}
1346
1347/* drbd_drain_block() just takes a data block
1348 * out of the socket input buffer, and discards it.
1349 */
1350static int drbd_drain_block(struct drbd_conf *mdev, int data_size)
1351{
1352 struct page *page;
1353 int rr, rv = 1;
1354 void *data;
1355
Lars Ellenbergc3470cd2010-04-01 16:57:19 +02001356 if (!data_size)
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01001357 return true;
Lars Ellenbergc3470cd2010-04-01 16:57:19 +02001358
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001359 page = drbd_pp_alloc(mdev, 1, 1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001360
1361 data = kmap(page);
1362 while (data_size) {
1363 rr = drbd_recv(mdev, data, min_t(int, data_size, PAGE_SIZE));
1364 if (rr != min_t(int, data_size, PAGE_SIZE)) {
1365 rv = 0;
Lars Ellenberg0ddc5542011-01-21 12:35:15 +01001366 if (!signal_pending(current))
1367 dev_warn(DEV,
1368 "short read receiving data: read %d expected %d\n",
1369 rr, min_t(int, data_size, PAGE_SIZE));
Philipp Reisnerb411b362009-09-25 16:07:19 -07001370 break;
1371 }
1372 data_size -= rr;
1373 }
1374 kunmap(page);
Lars Ellenberg435f0742010-09-06 12:30:25 +02001375 drbd_pp_free(mdev, page, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001376 return rv;
1377}
1378
1379static int recv_dless_read(struct drbd_conf *mdev, struct drbd_request *req,
1380 sector_t sector, int data_size)
1381{
1382 struct bio_vec *bvec;
1383 struct bio *bio;
1384 int dgs, rr, i, expect;
Philipp Reisnera0638452011-01-19 14:31:32 +01001385 void *dig_in = mdev->tconn->int_dig_in;
1386 void *dig_vv = mdev->tconn->int_dig_vv;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001387
Philipp Reisnera0638452011-01-19 14:31:32 +01001388 dgs = (mdev->tconn->agreed_pro_version >= 87 && mdev->tconn->integrity_r_tfm) ?
1389 crypto_hash_digestsize(mdev->tconn->integrity_r_tfm) : 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001390
1391 if (dgs) {
1392 rr = drbd_recv(mdev, dig_in, dgs);
1393 if (rr != dgs) {
Lars Ellenberg0ddc5542011-01-21 12:35:15 +01001394 if (!signal_pending(current))
1395 dev_warn(DEV,
1396 "short read receiving data reply digest: read %d expected %d\n",
1397 rr, dgs);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001398 return 0;
1399 }
1400 }
1401
1402 data_size -= dgs;
1403
1404 /* optimistically update recv_cnt. if receiving fails below,
1405 * we disconnect anyways, and counters will be reset. */
1406 mdev->recv_cnt += data_size>>9;
1407
1408 bio = req->master_bio;
1409 D_ASSERT(sector == bio->bi_sector);
1410
1411 bio_for_each_segment(bvec, bio, i) {
1412 expect = min_t(int, data_size, bvec->bv_len);
1413 rr = drbd_recv(mdev,
1414 kmap(bvec->bv_page)+bvec->bv_offset,
1415 expect);
1416 kunmap(bvec->bv_page);
1417 if (rr != expect) {
Lars Ellenberg0ddc5542011-01-21 12:35:15 +01001418 if (!signal_pending(current))
1419 dev_warn(DEV, "short read receiving data reply: "
1420 "read %d expected %d\n",
1421 rr, expect);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001422 return 0;
1423 }
1424 data_size -= rr;
1425 }
1426
1427 if (dgs) {
Philipp Reisnera0638452011-01-19 14:31:32 +01001428 drbd_csum_bio(mdev, mdev->tconn->integrity_r_tfm, bio, dig_vv);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001429 if (memcmp(dig_in, dig_vv, dgs)) {
1430 dev_err(DEV, "Digest integrity check FAILED. Broken NICs?\n");
1431 return 0;
1432 }
1433 }
1434
1435 D_ASSERT(data_size == 0);
1436 return 1;
1437}
1438
1439/* e_end_resync_block() is called via
1440 * drbd_process_done_ee() by asender only */
1441static int e_end_resync_block(struct drbd_conf *mdev, struct drbd_work *w, int unused)
1442{
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001443 struct drbd_peer_request *peer_req = (struct drbd_peer_request *)w;
1444 sector_t sector = peer_req->i.sector;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001445 int ok;
1446
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001447 D_ASSERT(drbd_interval_empty(&peer_req->i));
Philipp Reisnerb411b362009-09-25 16:07:19 -07001448
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001449 if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
1450 drbd_set_in_sync(mdev, sector, peer_req->i.size);
1451 ok = drbd_send_ack(mdev, P_RS_WRITE_ACK, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001452 } else {
1453 /* Record failure to sync */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001454 drbd_rs_failed_io(mdev, sector, peer_req->i.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001455
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001456 ok = drbd_send_ack(mdev, P_NEG_ACK, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001457 }
1458 dec_unacked(mdev);
1459
1460 return ok;
1461}
1462
1463static int recv_resync_read(struct drbd_conf *mdev, sector_t sector, int data_size) __releases(local)
1464{
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001465 struct drbd_peer_request *peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001466
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001467 peer_req = read_in_block(mdev, ID_SYNCER, sector, data_size);
1468 if (!peer_req)
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001469 goto fail;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001470
1471 dec_rs_pending(mdev);
1472
Philipp Reisnerb411b362009-09-25 16:07:19 -07001473 inc_unacked(mdev);
1474 /* corresponding dec_unacked() in e_end_resync_block()
1475 * respective _drbd_clear_done_ee */
1476
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001477 peer_req->w.cb = e_end_resync_block;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001478
Philipp Reisner87eeee42011-01-19 14:16:30 +01001479 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001480 list_add(&peer_req->w.list, &mdev->sync_ee);
Philipp Reisner87eeee42011-01-19 14:16:30 +01001481 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001482
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02001483 atomic_add(data_size >> 9, &mdev->rs_sect_ev);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001484 if (drbd_submit_ee(mdev, peer_req, WRITE, DRBD_FAULT_RS_WR) == 0)
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01001485 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001486
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001487 /* don't care for the reason here */
1488 dev_err(DEV, "submit failed, triggering re-connect\n");
Philipp Reisner87eeee42011-01-19 14:16:30 +01001489 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001490 list_del(&peer_req->w.list);
Philipp Reisner87eeee42011-01-19 14:16:30 +01001491 spin_unlock_irq(&mdev->tconn->req_lock);
Lars Ellenberg22cc37a2010-09-14 20:40:41 +02001492
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001493 drbd_free_ee(mdev, peer_req);
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001494fail:
1495 put_ldev(mdev);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01001496 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001497}
1498
Andreas Gruenbacher668eebc2011-01-20 17:14:26 +01001499static struct drbd_request *
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01001500find_request(struct drbd_conf *mdev, struct rb_root *root, u64 id,
1501 sector_t sector, bool missing_ok, const char *func)
Andreas Gruenbacher668eebc2011-01-20 17:14:26 +01001502{
Andreas Gruenbacher668eebc2011-01-20 17:14:26 +01001503 struct drbd_request *req;
1504
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01001505 /* Request object according to our peer */
1506 req = (struct drbd_request *)(unsigned long)id;
Andreas Gruenbacher5e472262011-01-27 14:42:51 +01001507 if (drbd_contains_interval(root, sector, &req->i) && req->i.local)
Andreas Gruenbacher668eebc2011-01-20 17:14:26 +01001508 return req;
Andreas Gruenbacherc3afd8f2011-01-20 22:25:40 +01001509 if (!missing_ok) {
1510 dev_err(DEV, "%s: failed to find request %lu, sector %llus\n", func,
1511 (unsigned long)id, (unsigned long long)sector);
1512 }
Andreas Gruenbacher668eebc2011-01-20 17:14:26 +01001513 return NULL;
1514}
1515
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01001516static int receive_DataReply(struct drbd_conf *mdev, enum drbd_packet cmd,
1517 unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001518{
1519 struct drbd_request *req;
1520 sector_t sector;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001521 int ok;
Philipp Reisnere42325a2011-01-19 13:55:45 +01001522 struct p_data *p = &mdev->tconn->data.rbuf.data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001523
1524 sector = be64_to_cpu(p->sector);
1525
Philipp Reisner87eeee42011-01-19 14:16:30 +01001526 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01001527 req = find_request(mdev, &mdev->read_requests, p->block_id, sector, false, __func__);
Philipp Reisner87eeee42011-01-19 14:16:30 +01001528 spin_unlock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherc3afd8f2011-01-20 22:25:40 +01001529 if (unlikely(!req))
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01001530 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001531
Bart Van Assche24c48302011-05-21 18:32:29 +02001532 /* hlist_del(&req->collision) is done in _req_may_be_done, to avoid
Philipp Reisnerb411b362009-09-25 16:07:19 -07001533 * special casing it there for the various failure cases.
1534 * still no race with drbd_fail_pending_reads */
1535 ok = recv_dless_read(mdev, req, sector, data_size);
1536
1537 if (ok)
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01001538 req_mod(req, DATA_RECEIVED);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001539 /* else: nothing. handled from drbd_disconnect...
1540 * I don't think we may complete this just yet
1541 * in case we are "on-disconnect: freeze" */
1542
1543 return ok;
1544}
1545
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01001546static int receive_RSDataReply(struct drbd_conf *mdev, enum drbd_packet cmd,
1547 unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001548{
1549 sector_t sector;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001550 int ok;
Philipp Reisnere42325a2011-01-19 13:55:45 +01001551 struct p_data *p = &mdev->tconn->data.rbuf.data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001552
1553 sector = be64_to_cpu(p->sector);
1554 D_ASSERT(p->block_id == ID_SYNCER);
1555
1556 if (get_ldev(mdev)) {
1557 /* data is submitted to disk within recv_resync_read.
1558 * corresponding put_ldev done below on error,
Andreas Gruenbacher9c508422011-01-14 21:19:36 +01001559 * or in drbd_endio_sec. */
Philipp Reisnerb411b362009-09-25 16:07:19 -07001560 ok = recv_resync_read(mdev, sector, data_size);
1561 } else {
1562 if (__ratelimit(&drbd_ratelimit_state))
1563 dev_err(DEV, "Can not write resync data to local disk.\n");
1564
1565 ok = drbd_drain_block(mdev, data_size);
1566
Lars Ellenberg2b2bf212010-10-06 11:46:55 +02001567 drbd_send_ack_dp(mdev, P_NEG_ACK, p, data_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001568 }
1569
Philipp Reisner778f2712010-07-06 11:14:00 +02001570 atomic_add(data_size >> 9, &mdev->rs_sect_in);
1571
Philipp Reisnerb411b362009-09-25 16:07:19 -07001572 return ok;
1573}
1574
1575/* e_end_block() is called via drbd_process_done_ee().
1576 * this means this function only runs in the asender thread
1577 */
1578static int e_end_block(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
1579{
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001580 struct drbd_peer_request *peer_req = (struct drbd_peer_request *)w;
1581 sector_t sector = peer_req->i.sector;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001582 int ok = 1, pcmd;
1583
Philipp Reisner89e58e72011-01-19 13:12:45 +01001584 if (mdev->tconn->net_conf->wire_protocol == DRBD_PROT_C) {
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001585 if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001586 pcmd = (mdev->state.conn >= C_SYNC_SOURCE &&
1587 mdev->state.conn <= C_PAUSED_SYNC_T &&
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001588 peer_req->flags & EE_MAY_SET_IN_SYNC) ?
Philipp Reisnerb411b362009-09-25 16:07:19 -07001589 P_RS_WRITE_ACK : P_WRITE_ACK;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001590 ok &= drbd_send_ack(mdev, pcmd, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001591 if (pcmd == P_RS_WRITE_ACK)
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001592 drbd_set_in_sync(mdev, sector, peer_req->i.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001593 } else {
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001594 ok = drbd_send_ack(mdev, P_NEG_ACK, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001595 /* we expect it to be marked out of sync anyways...
1596 * maybe assert this? */
1597 }
1598 dec_unacked(mdev);
1599 }
1600 /* we delete from the conflict detection hash _after_ we sent out the
1601 * P_WRITE_ACK / P_NEG_ACK, to get the sequence number right. */
Philipp Reisner89e58e72011-01-19 13:12:45 +01001602 if (mdev->tconn->net_conf->two_primaries) {
Philipp Reisner87eeee42011-01-19 14:16:30 +01001603 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001604 D_ASSERT(!drbd_interval_empty(&peer_req->i));
1605 drbd_remove_epoch_entry_interval(mdev, peer_req);
Philipp Reisner87eeee42011-01-19 14:16:30 +01001606 spin_unlock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherbb3bfe92011-01-21 15:59:23 +01001607 } else
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001608 D_ASSERT(drbd_interval_empty(&peer_req->i));
Philipp Reisnerb411b362009-09-25 16:07:19 -07001609
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001610 drbd_may_finish_epoch(mdev, peer_req->epoch, EV_PUT + (cancel ? EV_CLEANUP : 0));
Philipp Reisnerb411b362009-09-25 16:07:19 -07001611
1612 return ok;
1613}
1614
1615static int e_send_discard_ack(struct drbd_conf *mdev, struct drbd_work *w, int unused)
1616{
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001617 struct drbd_peer_request *peer_req = (struct drbd_peer_request *)w;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001618 int ok = 1;
1619
Philipp Reisner89e58e72011-01-19 13:12:45 +01001620 D_ASSERT(mdev->tconn->net_conf->wire_protocol == DRBD_PROT_C);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001621 ok = drbd_send_ack(mdev, P_DISCARD_ACK, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001622
Philipp Reisner87eeee42011-01-19 14:16:30 +01001623 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001624 D_ASSERT(!drbd_interval_empty(&peer_req->i));
1625 drbd_remove_epoch_entry_interval(mdev, peer_req);
Philipp Reisner87eeee42011-01-19 14:16:30 +01001626 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001627
1628 dec_unacked(mdev);
1629
1630 return ok;
1631}
1632
Andreas Gruenbacher3e394da2011-01-26 18:36:55 +01001633static bool seq_greater(u32 a, u32 b)
1634{
1635 /*
1636 * We assume 32-bit wrap-around here.
1637 * For 24-bit wrap-around, we would have to shift:
1638 * a <<= 8; b <<= 8;
1639 */
1640 return (s32)a - (s32)b > 0;
1641}
1642
1643static u32 seq_max(u32 a, u32 b)
1644{
1645 return seq_greater(a, b) ? a : b;
1646}
1647
Andreas Gruenbacher43ae0772011-02-03 18:42:08 +01001648static void update_peer_seq(struct drbd_conf *mdev, unsigned int peer_seq)
Andreas Gruenbacher3e394da2011-01-26 18:36:55 +01001649{
Andreas Gruenbacher43ae0772011-02-03 18:42:08 +01001650 unsigned int old_peer_seq;
Andreas Gruenbacher3e394da2011-01-26 18:36:55 +01001651
1652 spin_lock(&mdev->peer_seq_lock);
Andreas Gruenbacher43ae0772011-02-03 18:42:08 +01001653 old_peer_seq = mdev->peer_seq;
1654 mdev->peer_seq = seq_max(mdev->peer_seq, peer_seq);
Andreas Gruenbacher3e394da2011-01-26 18:36:55 +01001655 spin_unlock(&mdev->peer_seq_lock);
Andreas Gruenbacher43ae0772011-02-03 18:42:08 +01001656 if (old_peer_seq != peer_seq)
Andreas Gruenbacher3e394da2011-01-26 18:36:55 +01001657 wake_up(&mdev->seq_wait);
1658}
1659
Philipp Reisnerb411b362009-09-25 16:07:19 -07001660/* Called from receive_Data.
1661 * Synchronize packets on sock with packets on msock.
1662 *
1663 * This is here so even when a P_DATA packet traveling via sock overtook an Ack
1664 * packet traveling on msock, they are still processed in the order they have
1665 * been sent.
1666 *
1667 * Note: we don't care for Ack packets overtaking P_DATA packets.
1668 *
1669 * In case packet_seq is larger than mdev->peer_seq number, there are
1670 * outstanding packets on the msock. We wait for them to arrive.
1671 * In case we are the logically next packet, we update mdev->peer_seq
1672 * ourselves. Correctly handles 32bit wrap around.
1673 *
1674 * Assume we have a 10 GBit connection, that is about 1<<30 byte per second,
1675 * about 1<<21 sectors per second. So "worst" case, we have 1<<3 == 8 seconds
1676 * for the 24bit wrap (historical atomic_t guarantee on some archs), and we have
1677 * 1<<9 == 512 seconds aka ages for the 32bit wrap around...
1678 *
1679 * returns 0 if we may process the packet,
1680 * -ERESTARTSYS if we were interrupted (by disconnect signal). */
1681static int drbd_wait_peer_seq(struct drbd_conf *mdev, const u32 packet_seq)
1682{
1683 DEFINE_WAIT(wait);
1684 unsigned int p_seq;
1685 long timeout;
1686 int ret = 0;
1687 spin_lock(&mdev->peer_seq_lock);
1688 for (;;) {
1689 prepare_to_wait(&mdev->seq_wait, &wait, TASK_INTERRUPTIBLE);
Andreas Gruenbacher3e394da2011-01-26 18:36:55 +01001690 if (!seq_greater(packet_seq, mdev->peer_seq + 1))
Philipp Reisnerb411b362009-09-25 16:07:19 -07001691 break;
1692 if (signal_pending(current)) {
1693 ret = -ERESTARTSYS;
1694 break;
1695 }
1696 p_seq = mdev->peer_seq;
1697 spin_unlock(&mdev->peer_seq_lock);
1698 timeout = schedule_timeout(30*HZ);
1699 spin_lock(&mdev->peer_seq_lock);
1700 if (timeout == 0 && p_seq == mdev->peer_seq) {
1701 ret = -ETIMEDOUT;
1702 dev_err(DEV, "ASSERT FAILED waited 30 seconds for sequence update, forcing reconnect\n");
1703 break;
1704 }
1705 }
1706 finish_wait(&mdev->seq_wait, &wait);
1707 if (mdev->peer_seq+1 == packet_seq)
1708 mdev->peer_seq++;
1709 spin_unlock(&mdev->peer_seq_lock);
1710 return ret;
1711}
1712
Lars Ellenberg688593c2010-11-17 22:25:03 +01001713/* see also bio_flags_to_wire()
1714 * DRBD_REQ_*, because we need to semantically map the flags to data packet
1715 * flags and back. We may replicate to other kernel versions. */
1716static unsigned long wire_flags_to_bio(struct drbd_conf *mdev, u32 dpf)
Philipp Reisner76d2e7e2010-08-25 11:58:05 +02001717{
Lars Ellenberg688593c2010-11-17 22:25:03 +01001718 return (dpf & DP_RW_SYNC ? REQ_SYNC : 0) |
1719 (dpf & DP_FUA ? REQ_FUA : 0) |
1720 (dpf & DP_FLUSH ? REQ_FLUSH : 0) |
1721 (dpf & DP_DISCARD ? REQ_DISCARD : 0);
Philipp Reisner76d2e7e2010-08-25 11:58:05 +02001722}
1723
Philipp Reisnerb411b362009-09-25 16:07:19 -07001724/* mirrored write */
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01001725static int receive_Data(struct drbd_conf *mdev, enum drbd_packet cmd,
1726 unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001727{
1728 sector_t sector;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001729 struct drbd_peer_request *peer_req;
Philipp Reisnere42325a2011-01-19 13:55:45 +01001730 struct p_data *p = &mdev->tconn->data.rbuf.data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001731 int rw = WRITE;
1732 u32 dp_flags;
1733
Philipp Reisnerb411b362009-09-25 16:07:19 -07001734 if (!get_ldev(mdev)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001735 spin_lock(&mdev->peer_seq_lock);
1736 if (mdev->peer_seq+1 == be32_to_cpu(p->seq_num))
1737 mdev->peer_seq++;
1738 spin_unlock(&mdev->peer_seq_lock);
1739
Lars Ellenberg2b2bf212010-10-06 11:46:55 +02001740 drbd_send_ack_dp(mdev, P_NEG_ACK, p, data_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001741 atomic_inc(&mdev->current_epoch->epoch_size);
1742 return drbd_drain_block(mdev, data_size);
1743 }
1744
1745 /* get_ldev(mdev) successful.
1746 * Corresponding put_ldev done either below (on various errors),
Andreas Gruenbacher9c508422011-01-14 21:19:36 +01001747 * or in drbd_endio_sec, if we successfully submit the data at
Philipp Reisnerb411b362009-09-25 16:07:19 -07001748 * the end of this function. */
1749
1750 sector = be64_to_cpu(p->sector);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001751 peer_req = read_in_block(mdev, p->block_id, sector, data_size);
1752 if (!peer_req) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001753 put_ldev(mdev);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01001754 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001755 }
1756
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001757 peer_req->w.cb = e_end_block;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001758
Lars Ellenberg688593c2010-11-17 22:25:03 +01001759 dp_flags = be32_to_cpu(p->dp_flags);
1760 rw |= wire_flags_to_bio(mdev, dp_flags);
1761
1762 if (dp_flags & DP_MAY_SET_IN_SYNC)
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001763 peer_req->flags |= EE_MAY_SET_IN_SYNC;
Lars Ellenberg688593c2010-11-17 22:25:03 +01001764
Philipp Reisnerb411b362009-09-25 16:07:19 -07001765 spin_lock(&mdev->epoch_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001766 peer_req->epoch = mdev->current_epoch;
1767 atomic_inc(&peer_req->epoch->epoch_size);
1768 atomic_inc(&peer_req->epoch->active);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001769 spin_unlock(&mdev->epoch_lock);
1770
Philipp Reisnerb411b362009-09-25 16:07:19 -07001771 /* I'm the receiver, I do hold a net_cnt reference. */
Philipp Reisner89e58e72011-01-19 13:12:45 +01001772 if (!mdev->tconn->net_conf->two_primaries) {
Philipp Reisner87eeee42011-01-19 14:16:30 +01001773 spin_lock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001774 } else {
1775 /* don't get the req_lock yet,
1776 * we may sleep in drbd_wait_peer_seq */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001777 const int size = peer_req->i.size;
Philipp Reisner25703f82011-02-07 14:35:25 +01001778 const int discard = test_bit(DISCARD_CONCURRENT, &mdev->tconn->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001779 DEFINE_WAIT(wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001780 int first;
1781
Philipp Reisner89e58e72011-01-19 13:12:45 +01001782 D_ASSERT(mdev->tconn->net_conf->wire_protocol == DRBD_PROT_C);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001783
1784 /* conflict detection and handling:
1785 * 1. wait on the sequence number,
1786 * in case this data packet overtook ACK packets.
Andreas Gruenbacher5e472262011-01-27 14:42:51 +01001787 * 2. check for conflicting write requests.
Philipp Reisnerb411b362009-09-25 16:07:19 -07001788 *
1789 * Note: for two_primaries, we are protocol C,
1790 * so there cannot be any request that is DONE
1791 * but still on the transfer log.
1792 *
Philipp Reisnerb411b362009-09-25 16:07:19 -07001793 * if no conflicting request is found:
1794 * submit.
1795 *
1796 * if any conflicting request is found
1797 * that has not yet been acked,
1798 * AND I have the "discard concurrent writes" flag:
1799 * queue (via done_ee) the P_DISCARD_ACK; OUT.
1800 *
1801 * if any conflicting request is found:
1802 * block the receiver, waiting on misc_wait
1803 * until no more conflicting requests are there,
1804 * or we get interrupted (disconnect).
1805 *
1806 * we do not just write after local io completion of those
1807 * requests, but only after req is done completely, i.e.
1808 * we wait for the P_DISCARD_ACK to arrive!
1809 *
1810 * then proceed normally, i.e. submit.
1811 */
1812 if (drbd_wait_peer_seq(mdev, be32_to_cpu(p->seq_num)))
1813 goto out_interrupted;
1814
Philipp Reisner87eeee42011-01-19 14:16:30 +01001815 spin_lock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001816
Philipp Reisnerb411b362009-09-25 16:07:19 -07001817 first = 1;
1818 for (;;) {
Andreas Gruenbacherde696712011-01-20 15:00:24 +01001819 struct drbd_interval *i;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001820 int have_unacked = 0;
1821 int have_conflict = 0;
1822 prepare_to_wait(&mdev->misc_wait, &wait,
1823 TASK_INTERRUPTIBLE);
Andreas Gruenbacherde696712011-01-20 15:00:24 +01001824
1825 i = drbd_find_overlap(&mdev->write_requests, sector, size);
1826 if (i) {
Andreas Gruenbacherde696712011-01-20 15:00:24 +01001827 /* only ALERT on first iteration,
1828 * we may be woken up early... */
1829 if (first)
Andreas Gruenbacher5e472262011-01-27 14:42:51 +01001830 dev_alert(DEV, "%s[%u] Concurrent %s write detected!"
Andreas Gruenbacherde696712011-01-20 15:00:24 +01001831 " new: %llus +%u; pending: %llus +%u\n",
1832 current->comm, current->pid,
Andreas Gruenbacher5e472262011-01-27 14:42:51 +01001833 i->local ? "local" : "remote",
Andreas Gruenbacherde696712011-01-20 15:00:24 +01001834 (unsigned long long)sector, size,
Andreas Gruenbacher5e472262011-01-27 14:42:51 +01001835 (unsigned long long)i->sector, i->size);
1836
1837 if (i->local) {
1838 struct drbd_request *req2;
1839
1840 req2 = container_of(i, struct drbd_request, i);
1841 if (req2->rq_state & RQ_NET_PENDING)
1842 ++have_unacked;
1843 }
Andreas Gruenbacherde696712011-01-20 15:00:24 +01001844 ++have_conflict;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001845 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07001846 if (!have_conflict)
1847 break;
1848
1849 /* Discard Ack only for the _first_ iteration */
1850 if (first && discard && have_unacked) {
1851 dev_alert(DEV, "Concurrent write! [DISCARD BY FLAG] sec=%llus\n",
1852 (unsigned long long)sector);
1853 inc_unacked(mdev);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001854 peer_req->w.cb = e_send_discard_ack;
1855 list_add_tail(&peer_req->w.list, &mdev->done_ee);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001856
Philipp Reisner87eeee42011-01-19 14:16:30 +01001857 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001858
1859 /* we could probably send that P_DISCARD_ACK ourselves,
1860 * but I don't like the receiver using the msock */
1861
1862 put_ldev(mdev);
Philipp Reisner0625ac12011-02-07 14:49:19 +01001863 wake_asender(mdev->tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001864 finish_wait(&mdev->misc_wait, &wait);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01001865 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001866 }
1867
1868 if (signal_pending(current)) {
Philipp Reisner87eeee42011-01-19 14:16:30 +01001869 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001870 finish_wait(&mdev->misc_wait, &wait);
1871 goto out_interrupted;
1872 }
1873
Andreas Gruenbachera500c2e2011-01-27 14:12:23 +01001874 /* Indicate to wake up mdev->misc_wait upon completion. */
Andreas Gruenbacher53840642011-01-28 10:31:04 +01001875 i->waiting = true;
Andreas Gruenbachera500c2e2011-01-27 14:12:23 +01001876
Philipp Reisner87eeee42011-01-19 14:16:30 +01001877 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001878 if (first) {
1879 first = 0;
1880 dev_alert(DEV, "Concurrent write! [W AFTERWARDS] "
1881 "sec=%llus\n", (unsigned long long)sector);
1882 } else if (discard) {
1883 /* we had none on the first iteration.
1884 * there must be none now. */
1885 D_ASSERT(have_unacked == 0);
1886 }
1887 schedule();
Philipp Reisner87eeee42011-01-19 14:16:30 +01001888 spin_lock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001889 }
1890 finish_wait(&mdev->misc_wait, &wait);
Andreas Gruenbacher5e472262011-01-27 14:42:51 +01001891
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001892 drbd_insert_interval(&mdev->write_requests, &peer_req->i);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001893 }
1894
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001895 list_add(&peer_req->w.list, &mdev->active_ee);
Philipp Reisner87eeee42011-01-19 14:16:30 +01001896 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001897
Philipp Reisner89e58e72011-01-19 13:12:45 +01001898 switch (mdev->tconn->net_conf->wire_protocol) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001899 case DRBD_PROT_C:
1900 inc_unacked(mdev);
1901 /* corresponding dec_unacked() in e_end_block()
1902 * respective _drbd_clear_done_ee */
1903 break;
1904 case DRBD_PROT_B:
1905 /* I really don't like it that the receiver thread
1906 * sends on the msock, but anyways */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001907 drbd_send_ack(mdev, P_RECV_ACK, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001908 break;
1909 case DRBD_PROT_A:
1910 /* nothing to do */
1911 break;
1912 }
1913
Lars Ellenberg6719fb02010-10-18 23:04:07 +02001914 if (mdev->state.pdsk < D_INCONSISTENT) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001915 /* In case we have the only disk of the cluster, */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001916 drbd_set_out_of_sync(mdev, peer_req->i.sector, peer_req->i.size);
1917 peer_req->flags |= EE_CALL_AL_COMPLETE_IO;
1918 peer_req->flags &= ~EE_MAY_SET_IN_SYNC;
1919 drbd_al_begin_io(mdev, peer_req->i.sector);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001920 }
1921
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001922 if (drbd_submit_ee(mdev, peer_req, rw, DRBD_FAULT_DT_WR) == 0)
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01001923 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001924
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001925 /* don't care for the reason here */
1926 dev_err(DEV, "submit failed, triggering re-connect\n");
Philipp Reisner87eeee42011-01-19 14:16:30 +01001927 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001928 list_del(&peer_req->w.list);
1929 drbd_remove_epoch_entry_interval(mdev, peer_req);
Philipp Reisner87eeee42011-01-19 14:16:30 +01001930 spin_unlock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001931 if (peer_req->flags & EE_CALL_AL_COMPLETE_IO)
1932 drbd_al_complete_io(mdev, peer_req->i.sector);
Lars Ellenberg22cc37a2010-09-14 20:40:41 +02001933
Philipp Reisnerb411b362009-09-25 16:07:19 -07001934out_interrupted:
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001935 drbd_may_finish_epoch(mdev, peer_req->epoch, EV_PUT + EV_CLEANUP);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001936 put_ldev(mdev);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001937 drbd_free_ee(mdev, peer_req);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01001938 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001939}
1940
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02001941/* We may throttle resync, if the lower device seems to be busy,
1942 * and current sync rate is above c_min_rate.
1943 *
1944 * To decide whether or not the lower device is busy, we use a scheme similar
1945 * to MD RAID is_mddev_idle(): if the partition stats reveal "significant"
1946 * (more than 64 sectors) of activity we cannot account for with our own resync
1947 * activity, it obviously is "busy".
1948 *
1949 * The current sync rate used here uses only the most recent two step marks,
1950 * to have a short time average so we can react faster.
1951 */
Philipp Reisnere3555d82010-11-07 15:56:29 +01001952int drbd_rs_should_slow_down(struct drbd_conf *mdev, sector_t sector)
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02001953{
1954 struct gendisk *disk = mdev->ldev->backing_bdev->bd_contains->bd_disk;
1955 unsigned long db, dt, dbdt;
Philipp Reisnere3555d82010-11-07 15:56:29 +01001956 struct lc_element *tmp;
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02001957 int curr_events;
1958 int throttle = 0;
1959
1960 /* feature disabled? */
1961 if (mdev->sync_conf.c_min_rate == 0)
1962 return 0;
1963
Philipp Reisnere3555d82010-11-07 15:56:29 +01001964 spin_lock_irq(&mdev->al_lock);
1965 tmp = lc_find(mdev->resync, BM_SECT_TO_EXT(sector));
1966 if (tmp) {
1967 struct bm_extent *bm_ext = lc_entry(tmp, struct bm_extent, lce);
1968 if (test_bit(BME_PRIORITY, &bm_ext->flags)) {
1969 spin_unlock_irq(&mdev->al_lock);
1970 return 0;
1971 }
1972 /* Do not slow down if app IO is already waiting for this extent */
1973 }
1974 spin_unlock_irq(&mdev->al_lock);
1975
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02001976 curr_events = (int)part_stat_read(&disk->part0, sectors[0]) +
1977 (int)part_stat_read(&disk->part0, sectors[1]) -
1978 atomic_read(&mdev->rs_sect_ev);
Philipp Reisnere3555d82010-11-07 15:56:29 +01001979
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02001980 if (!mdev->rs_last_events || curr_events - mdev->rs_last_events > 64) {
1981 unsigned long rs_left;
1982 int i;
1983
1984 mdev->rs_last_events = curr_events;
1985
1986 /* sync speed average over the last 2*DRBD_SYNC_MARK_STEP,
1987 * approx. */
Lars Ellenberg2649f082010-11-05 10:05:47 +01001988 i = (mdev->rs_last_mark + DRBD_SYNC_MARKS-1) % DRBD_SYNC_MARKS;
1989
1990 if (mdev->state.conn == C_VERIFY_S || mdev->state.conn == C_VERIFY_T)
1991 rs_left = mdev->ov_left;
1992 else
1993 rs_left = drbd_bm_total_weight(mdev) - mdev->rs_failed;
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02001994
1995 dt = ((long)jiffies - (long)mdev->rs_mark_time[i]) / HZ;
1996 if (!dt)
1997 dt++;
1998 db = mdev->rs_mark_left[i] - rs_left;
1999 dbdt = Bit2KB(db/dt);
2000
2001 if (dbdt > mdev->sync_conf.c_min_rate)
2002 throttle = 1;
2003 }
2004 return throttle;
2005}
2006
2007
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01002008static int receive_DataRequest(struct drbd_conf *mdev, enum drbd_packet cmd,
2009 unsigned int digest_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002010{
2011 sector_t sector;
2012 const sector_t capacity = drbd_get_capacity(mdev->this_bdev);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002013 struct drbd_peer_request *peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002014 struct digest_info *di = NULL;
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02002015 int size, verb;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002016 unsigned int fault_type;
Philipp Reisnere42325a2011-01-19 13:55:45 +01002017 struct p_block_req *p = &mdev->tconn->data.rbuf.block_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002018
2019 sector = be64_to_cpu(p->sector);
2020 size = be32_to_cpu(p->blksize);
2021
Lars Ellenberg1816a2b2010-11-11 15:19:07 +01002022 if (size <= 0 || (size & 0x1ff) != 0 || size > DRBD_MAX_BIO_SIZE) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002023 dev_err(DEV, "%s:%d: sector: %llus, size: %u\n", __FILE__, __LINE__,
2024 (unsigned long long)sector, size);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002025 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002026 }
2027 if (sector + (size>>9) > capacity) {
2028 dev_err(DEV, "%s:%d: sector: %llus, size: %u\n", __FILE__, __LINE__,
2029 (unsigned long long)sector, size);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002030 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002031 }
2032
2033 if (!get_ldev_if_state(mdev, D_UP_TO_DATE)) {
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02002034 verb = 1;
2035 switch (cmd) {
2036 case P_DATA_REQUEST:
2037 drbd_send_ack_rp(mdev, P_NEG_DREPLY, p);
2038 break;
2039 case P_RS_DATA_REQUEST:
2040 case P_CSUM_RS_REQUEST:
2041 case P_OV_REQUEST:
2042 drbd_send_ack_rp(mdev, P_NEG_RS_DREPLY , p);
2043 break;
2044 case P_OV_REPLY:
2045 verb = 0;
2046 dec_rs_pending(mdev);
2047 drbd_send_ack_ex(mdev, P_OV_RESULT, sector, size, ID_IN_SYNC);
2048 break;
2049 default:
2050 dev_err(DEV, "unexpected command (%s) in receive_DataRequest\n",
2051 cmdname(cmd));
2052 }
2053 if (verb && __ratelimit(&drbd_ratelimit_state))
Philipp Reisnerb411b362009-09-25 16:07:19 -07002054 dev_err(DEV, "Can not satisfy peer's read request, "
2055 "no local data.\n");
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02002056
Lars Ellenberga821cc42010-09-06 12:31:37 +02002057 /* drain possibly payload */
2058 return drbd_drain_block(mdev, digest_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002059 }
2060
2061 /* GFP_NOIO, because we must not cause arbitrary write-out: in a DRBD
2062 * "criss-cross" setup, that might cause write-out on some other DRBD,
2063 * which in turn might block on the other node at this very place. */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002064 peer_req = drbd_alloc_ee(mdev, p->block_id, sector, size, GFP_NOIO);
2065 if (!peer_req) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002066 put_ldev(mdev);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002067 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002068 }
2069
Philipp Reisner02918be2010-08-20 14:35:10 +02002070 switch (cmd) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002071 case P_DATA_REQUEST:
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002072 peer_req->w.cb = w_e_end_data_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002073 fault_type = DRBD_FAULT_DT_RD;
Lars Ellenberg80a40e42010-08-11 23:28:00 +02002074 /* application IO, don't drbd_rs_begin_io */
2075 goto submit;
2076
Philipp Reisnerb411b362009-09-25 16:07:19 -07002077 case P_RS_DATA_REQUEST:
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002078 peer_req->w.cb = w_e_end_rsdata_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002079 fault_type = DRBD_FAULT_RS_RD;
Lars Ellenberg5f9915b2010-11-09 14:15:24 +01002080 /* used in the sector offset progress display */
2081 mdev->bm_resync_fo = BM_SECT_TO_BIT(sector);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002082 break;
2083
2084 case P_OV_REPLY:
2085 case P_CSUM_RS_REQUEST:
2086 fault_type = DRBD_FAULT_RS_RD;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002087 di = kmalloc(sizeof(*di) + digest_size, GFP_NOIO);
2088 if (!di)
2089 goto out_free_e;
2090
2091 di->digest_size = digest_size;
2092 di->digest = (((char *)di)+sizeof(struct digest_info));
2093
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002094 peer_req->digest = di;
2095 peer_req->flags |= EE_HAS_DIGEST;
Lars Ellenbergc36c3ce2010-08-11 20:42:55 +02002096
Philipp Reisnerb411b362009-09-25 16:07:19 -07002097 if (drbd_recv(mdev, di->digest, digest_size) != digest_size)
2098 goto out_free_e;
2099
Philipp Reisner02918be2010-08-20 14:35:10 +02002100 if (cmd == P_CSUM_RS_REQUEST) {
Philipp Reisner31890f42011-01-19 14:12:51 +01002101 D_ASSERT(mdev->tconn->agreed_pro_version >= 89);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002102 peer_req->w.cb = w_e_end_csum_rs_req;
Lars Ellenberg5f9915b2010-11-09 14:15:24 +01002103 /* used in the sector offset progress display */
2104 mdev->bm_resync_fo = BM_SECT_TO_BIT(sector);
Philipp Reisner02918be2010-08-20 14:35:10 +02002105 } else if (cmd == P_OV_REPLY) {
Lars Ellenberg2649f082010-11-05 10:05:47 +01002106 /* track progress, we may need to throttle */
2107 atomic_add(size >> 9, &mdev->rs_sect_in);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002108 peer_req->w.cb = w_e_end_ov_reply;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002109 dec_rs_pending(mdev);
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002110 /* drbd_rs_begin_io done when we sent this request,
2111 * but accounting still needs to be done. */
2112 goto submit_for_resync;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002113 }
2114 break;
2115
2116 case P_OV_REQUEST:
Philipp Reisnerb411b362009-09-25 16:07:19 -07002117 if (mdev->ov_start_sector == ~(sector_t)0 &&
Philipp Reisner31890f42011-01-19 14:12:51 +01002118 mdev->tconn->agreed_pro_version >= 90) {
Lars Ellenbergde228bb2010-11-05 09:43:15 +01002119 unsigned long now = jiffies;
2120 int i;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002121 mdev->ov_start_sector = sector;
2122 mdev->ov_position = sector;
Lars Ellenberg30b743a2010-11-05 09:39:06 +01002123 mdev->ov_left = drbd_bm_bits(mdev) - BM_SECT_TO_BIT(sector);
2124 mdev->rs_total = mdev->ov_left;
Lars Ellenbergde228bb2010-11-05 09:43:15 +01002125 for (i = 0; i < DRBD_SYNC_MARKS; i++) {
2126 mdev->rs_mark_left[i] = mdev->ov_left;
2127 mdev->rs_mark_time[i] = now;
2128 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07002129 dev_info(DEV, "Online Verify start sector: %llu\n",
2130 (unsigned long long)sector);
2131 }
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002132 peer_req->w.cb = w_e_end_ov_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002133 fault_type = DRBD_FAULT_RS_RD;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002134 break;
2135
Philipp Reisnerb411b362009-09-25 16:07:19 -07002136 default:
2137 dev_err(DEV, "unexpected command (%s) in receive_DataRequest\n",
Philipp Reisner02918be2010-08-20 14:35:10 +02002138 cmdname(cmd));
Philipp Reisnerb411b362009-09-25 16:07:19 -07002139 fault_type = DRBD_FAULT_MAX;
Lars Ellenberg80a40e42010-08-11 23:28:00 +02002140 goto out_free_e;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002141 }
2142
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002143 /* Throttle, drbd_rs_begin_io and submit should become asynchronous
2144 * wrt the receiver, but it is not as straightforward as it may seem.
2145 * Various places in the resync start and stop logic assume resync
2146 * requests are processed in order, requeuing this on the worker thread
2147 * introduces a bunch of new code for synchronization between threads.
2148 *
2149 * Unlimited throttling before drbd_rs_begin_io may stall the resync
2150 * "forever", throttling after drbd_rs_begin_io will lock that extent
2151 * for application writes for the same time. For now, just throttle
2152 * here, where the rest of the code expects the receiver to sleep for
2153 * a while, anyways.
2154 */
Philipp Reisnerb411b362009-09-25 16:07:19 -07002155
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002156 /* Throttle before drbd_rs_begin_io, as that locks out application IO;
2157 * this defers syncer requests for some time, before letting at least
2158 * on request through. The resync controller on the receiving side
2159 * will adapt to the incoming rate accordingly.
2160 *
2161 * We cannot throttle here if remote is Primary/SyncTarget:
2162 * we would also throttle its application reads.
2163 * In that case, throttling is done on the SyncTarget only.
2164 */
Philipp Reisnere3555d82010-11-07 15:56:29 +01002165 if (mdev->state.peer != R_PRIMARY && drbd_rs_should_slow_down(mdev, sector))
2166 schedule_timeout_uninterruptible(HZ/10);
2167 if (drbd_rs_begin_io(mdev, sector))
Lars Ellenberg80a40e42010-08-11 23:28:00 +02002168 goto out_free_e;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002169
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002170submit_for_resync:
2171 atomic_add(size >> 9, &mdev->rs_sect_ev);
2172
Lars Ellenberg80a40e42010-08-11 23:28:00 +02002173submit:
Philipp Reisnerb411b362009-09-25 16:07:19 -07002174 inc_unacked(mdev);
Philipp Reisner87eeee42011-01-19 14:16:30 +01002175 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002176 list_add_tail(&peer_req->w.list, &mdev->read_ee);
Philipp Reisner87eeee42011-01-19 14:16:30 +01002177 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002178
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002179 if (drbd_submit_ee(mdev, peer_req, READ, fault_type) == 0)
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002180 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002181
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01002182 /* don't care for the reason here */
2183 dev_err(DEV, "submit failed, triggering re-connect\n");
Philipp Reisner87eeee42011-01-19 14:16:30 +01002184 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002185 list_del(&peer_req->w.list);
Philipp Reisner87eeee42011-01-19 14:16:30 +01002186 spin_unlock_irq(&mdev->tconn->req_lock);
Lars Ellenberg22cc37a2010-09-14 20:40:41 +02002187 /* no drbd_rs_complete_io(), we are dropping the connection anyways */
2188
Philipp Reisnerb411b362009-09-25 16:07:19 -07002189out_free_e:
Philipp Reisnerb411b362009-09-25 16:07:19 -07002190 put_ldev(mdev);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002191 drbd_free_ee(mdev, peer_req);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002192 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002193}
2194
2195static int drbd_asb_recover_0p(struct drbd_conf *mdev) __must_hold(local)
2196{
2197 int self, peer, rv = -100;
2198 unsigned long ch_self, ch_peer;
2199
2200 self = mdev->ldev->md.uuid[UI_BITMAP] & 1;
2201 peer = mdev->p_uuid[UI_BITMAP] & 1;
2202
2203 ch_peer = mdev->p_uuid[UI_SIZE];
2204 ch_self = mdev->comm_bm_set;
2205
Philipp Reisner89e58e72011-01-19 13:12:45 +01002206 switch (mdev->tconn->net_conf->after_sb_0p) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002207 case ASB_CONSENSUS:
2208 case ASB_DISCARD_SECONDARY:
2209 case ASB_CALL_HELPER:
2210 dev_err(DEV, "Configuration error.\n");
2211 break;
2212 case ASB_DISCONNECT:
2213 break;
2214 case ASB_DISCARD_YOUNGER_PRI:
2215 if (self == 0 && peer == 1) {
2216 rv = -1;
2217 break;
2218 }
2219 if (self == 1 && peer == 0) {
2220 rv = 1;
2221 break;
2222 }
2223 /* Else fall through to one of the other strategies... */
2224 case ASB_DISCARD_OLDER_PRI:
2225 if (self == 0 && peer == 1) {
2226 rv = 1;
2227 break;
2228 }
2229 if (self == 1 && peer == 0) {
2230 rv = -1;
2231 break;
2232 }
2233 /* Else fall through to one of the other strategies... */
Lars Ellenbergad19bf62009-10-14 09:36:49 +02002234 dev_warn(DEV, "Discard younger/older primary did not find a decision\n"
Philipp Reisnerb411b362009-09-25 16:07:19 -07002235 "Using discard-least-changes instead\n");
2236 case ASB_DISCARD_ZERO_CHG:
2237 if (ch_peer == 0 && ch_self == 0) {
Philipp Reisner25703f82011-02-07 14:35:25 +01002238 rv = test_bit(DISCARD_CONCURRENT, &mdev->tconn->flags)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002239 ? -1 : 1;
2240 break;
2241 } else {
2242 if (ch_peer == 0) { rv = 1; break; }
2243 if (ch_self == 0) { rv = -1; break; }
2244 }
Philipp Reisner89e58e72011-01-19 13:12:45 +01002245 if (mdev->tconn->net_conf->after_sb_0p == ASB_DISCARD_ZERO_CHG)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002246 break;
2247 case ASB_DISCARD_LEAST_CHG:
2248 if (ch_self < ch_peer)
2249 rv = -1;
2250 else if (ch_self > ch_peer)
2251 rv = 1;
2252 else /* ( ch_self == ch_peer ) */
2253 /* Well, then use something else. */
Philipp Reisner25703f82011-02-07 14:35:25 +01002254 rv = test_bit(DISCARD_CONCURRENT, &mdev->tconn->flags)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002255 ? -1 : 1;
2256 break;
2257 case ASB_DISCARD_LOCAL:
2258 rv = -1;
2259 break;
2260 case ASB_DISCARD_REMOTE:
2261 rv = 1;
2262 }
2263
2264 return rv;
2265}
2266
2267static int drbd_asb_recover_1p(struct drbd_conf *mdev) __must_hold(local)
2268{
Andreas Gruenbacher6184ea22010-12-09 14:23:27 +01002269 int hg, rv = -100;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002270
Philipp Reisner89e58e72011-01-19 13:12:45 +01002271 switch (mdev->tconn->net_conf->after_sb_1p) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002272 case ASB_DISCARD_YOUNGER_PRI:
2273 case ASB_DISCARD_OLDER_PRI:
2274 case ASB_DISCARD_LEAST_CHG:
2275 case ASB_DISCARD_LOCAL:
2276 case ASB_DISCARD_REMOTE:
2277 dev_err(DEV, "Configuration error.\n");
2278 break;
2279 case ASB_DISCONNECT:
2280 break;
2281 case ASB_CONSENSUS:
2282 hg = drbd_asb_recover_0p(mdev);
2283 if (hg == -1 && mdev->state.role == R_SECONDARY)
2284 rv = hg;
2285 if (hg == 1 && mdev->state.role == R_PRIMARY)
2286 rv = hg;
2287 break;
2288 case ASB_VIOLENTLY:
2289 rv = drbd_asb_recover_0p(mdev);
2290 break;
2291 case ASB_DISCARD_SECONDARY:
2292 return mdev->state.role == R_PRIMARY ? 1 : -1;
2293 case ASB_CALL_HELPER:
2294 hg = drbd_asb_recover_0p(mdev);
2295 if (hg == -1 && mdev->state.role == R_PRIMARY) {
Andreas Gruenbacherbb437942010-12-09 14:02:35 +01002296 enum drbd_state_rv rv2;
2297
2298 drbd_set_role(mdev, R_SECONDARY, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002299 /* drbd_change_state() does not sleep while in SS_IN_TRANSIENT_STATE,
2300 * we might be here in C_WF_REPORT_PARAMS which is transient.
2301 * we do not need to wait for the after state change work either. */
Andreas Gruenbacherbb437942010-12-09 14:02:35 +01002302 rv2 = drbd_change_state(mdev, CS_VERBOSE, NS(role, R_SECONDARY));
2303 if (rv2 != SS_SUCCESS) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002304 drbd_khelper(mdev, "pri-lost-after-sb");
2305 } else {
2306 dev_warn(DEV, "Successfully gave up primary role.\n");
2307 rv = hg;
2308 }
2309 } else
2310 rv = hg;
2311 }
2312
2313 return rv;
2314}
2315
2316static int drbd_asb_recover_2p(struct drbd_conf *mdev) __must_hold(local)
2317{
Andreas Gruenbacher6184ea22010-12-09 14:23:27 +01002318 int hg, rv = -100;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002319
Philipp Reisner89e58e72011-01-19 13:12:45 +01002320 switch (mdev->tconn->net_conf->after_sb_2p) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002321 case ASB_DISCARD_YOUNGER_PRI:
2322 case ASB_DISCARD_OLDER_PRI:
2323 case ASB_DISCARD_LEAST_CHG:
2324 case ASB_DISCARD_LOCAL:
2325 case ASB_DISCARD_REMOTE:
2326 case ASB_CONSENSUS:
2327 case ASB_DISCARD_SECONDARY:
2328 dev_err(DEV, "Configuration error.\n");
2329 break;
2330 case ASB_VIOLENTLY:
2331 rv = drbd_asb_recover_0p(mdev);
2332 break;
2333 case ASB_DISCONNECT:
2334 break;
2335 case ASB_CALL_HELPER:
2336 hg = drbd_asb_recover_0p(mdev);
2337 if (hg == -1) {
Andreas Gruenbacherbb437942010-12-09 14:02:35 +01002338 enum drbd_state_rv rv2;
2339
Philipp Reisnerb411b362009-09-25 16:07:19 -07002340 /* drbd_change_state() does not sleep while in SS_IN_TRANSIENT_STATE,
2341 * we might be here in C_WF_REPORT_PARAMS which is transient.
2342 * we do not need to wait for the after state change work either. */
Andreas Gruenbacherbb437942010-12-09 14:02:35 +01002343 rv2 = drbd_change_state(mdev, CS_VERBOSE, NS(role, R_SECONDARY));
2344 if (rv2 != SS_SUCCESS) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002345 drbd_khelper(mdev, "pri-lost-after-sb");
2346 } else {
2347 dev_warn(DEV, "Successfully gave up primary role.\n");
2348 rv = hg;
2349 }
2350 } else
2351 rv = hg;
2352 }
2353
2354 return rv;
2355}
2356
2357static void drbd_uuid_dump(struct drbd_conf *mdev, char *text, u64 *uuid,
2358 u64 bits, u64 flags)
2359{
2360 if (!uuid) {
2361 dev_info(DEV, "%s uuid info vanished while I was looking!\n", text);
2362 return;
2363 }
2364 dev_info(DEV, "%s %016llX:%016llX:%016llX:%016llX bits:%llu flags:%llX\n",
2365 text,
2366 (unsigned long long)uuid[UI_CURRENT],
2367 (unsigned long long)uuid[UI_BITMAP],
2368 (unsigned long long)uuid[UI_HISTORY_START],
2369 (unsigned long long)uuid[UI_HISTORY_END],
2370 (unsigned long long)bits,
2371 (unsigned long long)flags);
2372}
2373
2374/*
2375 100 after split brain try auto recover
2376 2 C_SYNC_SOURCE set BitMap
2377 1 C_SYNC_SOURCE use BitMap
2378 0 no Sync
2379 -1 C_SYNC_TARGET use BitMap
2380 -2 C_SYNC_TARGET set BitMap
2381 -100 after split brain, disconnect
2382-1000 unrelated data
Philipp Reisner4a23f262011-01-11 17:42:17 +01002383-1091 requires proto 91
2384-1096 requires proto 96
Philipp Reisnerb411b362009-09-25 16:07:19 -07002385 */
2386static int drbd_uuid_compare(struct drbd_conf *mdev, int *rule_nr) __must_hold(local)
2387{
2388 u64 self, peer;
2389 int i, j;
2390
2391 self = mdev->ldev->md.uuid[UI_CURRENT] & ~((u64)1);
2392 peer = mdev->p_uuid[UI_CURRENT] & ~((u64)1);
2393
2394 *rule_nr = 10;
2395 if (self == UUID_JUST_CREATED && peer == UUID_JUST_CREATED)
2396 return 0;
2397
2398 *rule_nr = 20;
2399 if ((self == UUID_JUST_CREATED || self == (u64)0) &&
2400 peer != UUID_JUST_CREATED)
2401 return -2;
2402
2403 *rule_nr = 30;
2404 if (self != UUID_JUST_CREATED &&
2405 (peer == UUID_JUST_CREATED || peer == (u64)0))
2406 return 2;
2407
2408 if (self == peer) {
2409 int rct, dc; /* roles at crash time */
2410
2411 if (mdev->p_uuid[UI_BITMAP] == (u64)0 && mdev->ldev->md.uuid[UI_BITMAP] != (u64)0) {
2412
Philipp Reisner31890f42011-01-19 14:12:51 +01002413 if (mdev->tconn->agreed_pro_version < 91)
Philipp Reisner4a23f262011-01-11 17:42:17 +01002414 return -1091;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002415
2416 if ((mdev->ldev->md.uuid[UI_BITMAP] & ~((u64)1)) == (mdev->p_uuid[UI_HISTORY_START] & ~((u64)1)) &&
2417 (mdev->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) == (mdev->p_uuid[UI_HISTORY_START + 1] & ~((u64)1))) {
2418 dev_info(DEV, "was SyncSource, missed the resync finished event, corrected myself:\n");
2419 drbd_uuid_set_bm(mdev, 0UL);
2420
2421 drbd_uuid_dump(mdev, "self", mdev->ldev->md.uuid,
2422 mdev->state.disk >= D_NEGOTIATING ? drbd_bm_total_weight(mdev) : 0, 0);
2423 *rule_nr = 34;
2424 } else {
2425 dev_info(DEV, "was SyncSource (peer failed to write sync_uuid)\n");
2426 *rule_nr = 36;
2427 }
2428
2429 return 1;
2430 }
2431
2432 if (mdev->ldev->md.uuid[UI_BITMAP] == (u64)0 && mdev->p_uuid[UI_BITMAP] != (u64)0) {
2433
Philipp Reisner31890f42011-01-19 14:12:51 +01002434 if (mdev->tconn->agreed_pro_version < 91)
Philipp Reisner4a23f262011-01-11 17:42:17 +01002435 return -1091;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002436
2437 if ((mdev->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) == (mdev->p_uuid[UI_BITMAP] & ~((u64)1)) &&
2438 (mdev->ldev->md.uuid[UI_HISTORY_START + 1] & ~((u64)1)) == (mdev->p_uuid[UI_HISTORY_START] & ~((u64)1))) {
2439 dev_info(DEV, "was SyncTarget, peer missed the resync finished event, corrected peer:\n");
2440
2441 mdev->p_uuid[UI_HISTORY_START + 1] = mdev->p_uuid[UI_HISTORY_START];
2442 mdev->p_uuid[UI_HISTORY_START] = mdev->p_uuid[UI_BITMAP];
2443 mdev->p_uuid[UI_BITMAP] = 0UL;
2444
2445 drbd_uuid_dump(mdev, "peer", mdev->p_uuid, mdev->p_uuid[UI_SIZE], mdev->p_uuid[UI_FLAGS]);
2446 *rule_nr = 35;
2447 } else {
2448 dev_info(DEV, "was SyncTarget (failed to write sync_uuid)\n");
2449 *rule_nr = 37;
2450 }
2451
2452 return -1;
2453 }
2454
2455 /* Common power [off|failure] */
2456 rct = (test_bit(CRASHED_PRIMARY, &mdev->flags) ? 1 : 0) +
2457 (mdev->p_uuid[UI_FLAGS] & 2);
2458 /* lowest bit is set when we were primary,
2459 * next bit (weight 2) is set when peer was primary */
2460 *rule_nr = 40;
2461
2462 switch (rct) {
2463 case 0: /* !self_pri && !peer_pri */ return 0;
2464 case 1: /* self_pri && !peer_pri */ return 1;
2465 case 2: /* !self_pri && peer_pri */ return -1;
2466 case 3: /* self_pri && peer_pri */
Philipp Reisner25703f82011-02-07 14:35:25 +01002467 dc = test_bit(DISCARD_CONCURRENT, &mdev->tconn->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002468 return dc ? -1 : 1;
2469 }
2470 }
2471
2472 *rule_nr = 50;
2473 peer = mdev->p_uuid[UI_BITMAP] & ~((u64)1);
2474 if (self == peer)
2475 return -1;
2476
2477 *rule_nr = 51;
2478 peer = mdev->p_uuid[UI_HISTORY_START] & ~((u64)1);
2479 if (self == peer) {
Philipp Reisner31890f42011-01-19 14:12:51 +01002480 if (mdev->tconn->agreed_pro_version < 96 ?
Philipp Reisner4a23f262011-01-11 17:42:17 +01002481 (mdev->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) ==
2482 (mdev->p_uuid[UI_HISTORY_START + 1] & ~((u64)1)) :
2483 peer + UUID_NEW_BM_OFFSET == (mdev->p_uuid[UI_BITMAP] & ~((u64)1))) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002484 /* The last P_SYNC_UUID did not get though. Undo the last start of
2485 resync as sync source modifications of the peer's UUIDs. */
2486
Philipp Reisner31890f42011-01-19 14:12:51 +01002487 if (mdev->tconn->agreed_pro_version < 91)
Philipp Reisner4a23f262011-01-11 17:42:17 +01002488 return -1091;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002489
2490 mdev->p_uuid[UI_BITMAP] = mdev->p_uuid[UI_HISTORY_START];
2491 mdev->p_uuid[UI_HISTORY_START] = mdev->p_uuid[UI_HISTORY_START + 1];
Philipp Reisner4a23f262011-01-11 17:42:17 +01002492
2493 dev_info(DEV, "Did not got last syncUUID packet, corrected:\n");
2494 drbd_uuid_dump(mdev, "peer", mdev->p_uuid, mdev->p_uuid[UI_SIZE], mdev->p_uuid[UI_FLAGS]);
2495
Philipp Reisnerb411b362009-09-25 16:07:19 -07002496 return -1;
2497 }
2498 }
2499
2500 *rule_nr = 60;
2501 self = mdev->ldev->md.uuid[UI_CURRENT] & ~((u64)1);
2502 for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) {
2503 peer = mdev->p_uuid[i] & ~((u64)1);
2504 if (self == peer)
2505 return -2;
2506 }
2507
2508 *rule_nr = 70;
2509 self = mdev->ldev->md.uuid[UI_BITMAP] & ~((u64)1);
2510 peer = mdev->p_uuid[UI_CURRENT] & ~((u64)1);
2511 if (self == peer)
2512 return 1;
2513
2514 *rule_nr = 71;
2515 self = mdev->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1);
2516 if (self == peer) {
Philipp Reisner31890f42011-01-19 14:12:51 +01002517 if (mdev->tconn->agreed_pro_version < 96 ?
Philipp Reisner4a23f262011-01-11 17:42:17 +01002518 (mdev->ldev->md.uuid[UI_HISTORY_START + 1] & ~((u64)1)) ==
2519 (mdev->p_uuid[UI_HISTORY_START] & ~((u64)1)) :
2520 self + UUID_NEW_BM_OFFSET == (mdev->ldev->md.uuid[UI_BITMAP] & ~((u64)1))) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002521 /* The last P_SYNC_UUID did not get though. Undo the last start of
2522 resync as sync source modifications of our UUIDs. */
2523
Philipp Reisner31890f42011-01-19 14:12:51 +01002524 if (mdev->tconn->agreed_pro_version < 91)
Philipp Reisner4a23f262011-01-11 17:42:17 +01002525 return -1091;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002526
2527 _drbd_uuid_set(mdev, UI_BITMAP, mdev->ldev->md.uuid[UI_HISTORY_START]);
2528 _drbd_uuid_set(mdev, UI_HISTORY_START, mdev->ldev->md.uuid[UI_HISTORY_START + 1]);
2529
Philipp Reisner4a23f262011-01-11 17:42:17 +01002530 dev_info(DEV, "Last syncUUID did not get through, corrected:\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002531 drbd_uuid_dump(mdev, "self", mdev->ldev->md.uuid,
2532 mdev->state.disk >= D_NEGOTIATING ? drbd_bm_total_weight(mdev) : 0, 0);
2533
2534 return 1;
2535 }
2536 }
2537
2538
2539 *rule_nr = 80;
Philipp Reisnerd8c2a362009-11-18 15:52:51 +01002540 peer = mdev->p_uuid[UI_CURRENT] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002541 for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) {
2542 self = mdev->ldev->md.uuid[i] & ~((u64)1);
2543 if (self == peer)
2544 return 2;
2545 }
2546
2547 *rule_nr = 90;
2548 self = mdev->ldev->md.uuid[UI_BITMAP] & ~((u64)1);
2549 peer = mdev->p_uuid[UI_BITMAP] & ~((u64)1);
2550 if (self == peer && self != ((u64)0))
2551 return 100;
2552
2553 *rule_nr = 100;
2554 for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) {
2555 self = mdev->ldev->md.uuid[i] & ~((u64)1);
2556 for (j = UI_HISTORY_START; j <= UI_HISTORY_END; j++) {
2557 peer = mdev->p_uuid[j] & ~((u64)1);
2558 if (self == peer)
2559 return -100;
2560 }
2561 }
2562
2563 return -1000;
2564}
2565
2566/* drbd_sync_handshake() returns the new conn state on success, or
2567 CONN_MASK (-1) on failure.
2568 */
2569static enum drbd_conns drbd_sync_handshake(struct drbd_conf *mdev, enum drbd_role peer_role,
2570 enum drbd_disk_state peer_disk) __must_hold(local)
2571{
2572 int hg, rule_nr;
2573 enum drbd_conns rv = C_MASK;
2574 enum drbd_disk_state mydisk;
2575
2576 mydisk = mdev->state.disk;
2577 if (mydisk == D_NEGOTIATING)
2578 mydisk = mdev->new_state_tmp.disk;
2579
2580 dev_info(DEV, "drbd_sync_handshake:\n");
2581 drbd_uuid_dump(mdev, "self", mdev->ldev->md.uuid, mdev->comm_bm_set, 0);
2582 drbd_uuid_dump(mdev, "peer", mdev->p_uuid,
2583 mdev->p_uuid[UI_SIZE], mdev->p_uuid[UI_FLAGS]);
2584
2585 hg = drbd_uuid_compare(mdev, &rule_nr);
2586
2587 dev_info(DEV, "uuid_compare()=%d by rule %d\n", hg, rule_nr);
2588
2589 if (hg == -1000) {
2590 dev_alert(DEV, "Unrelated data, aborting!\n");
2591 return C_MASK;
2592 }
Philipp Reisner4a23f262011-01-11 17:42:17 +01002593 if (hg < -1000) {
2594 dev_alert(DEV, "To resolve this both sides have to support at least protocol %d\n", -hg - 1000);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002595 return C_MASK;
2596 }
2597
2598 if ((mydisk == D_INCONSISTENT && peer_disk > D_INCONSISTENT) ||
2599 (peer_disk == D_INCONSISTENT && mydisk > D_INCONSISTENT)) {
2600 int f = (hg == -100) || abs(hg) == 2;
2601 hg = mydisk > D_INCONSISTENT ? 1 : -1;
2602 if (f)
2603 hg = hg*2;
2604 dev_info(DEV, "Becoming sync %s due to disk states.\n",
2605 hg > 0 ? "source" : "target");
2606 }
2607
Adam Gandelman3a11a482010-04-08 16:48:23 -07002608 if (abs(hg) == 100)
2609 drbd_khelper(mdev, "initial-split-brain");
2610
Philipp Reisner89e58e72011-01-19 13:12:45 +01002611 if (hg == 100 || (hg == -100 && mdev->tconn->net_conf->always_asbp)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002612 int pcount = (mdev->state.role == R_PRIMARY)
2613 + (peer_role == R_PRIMARY);
2614 int forced = (hg == -100);
2615
2616 switch (pcount) {
2617 case 0:
2618 hg = drbd_asb_recover_0p(mdev);
2619 break;
2620 case 1:
2621 hg = drbd_asb_recover_1p(mdev);
2622 break;
2623 case 2:
2624 hg = drbd_asb_recover_2p(mdev);
2625 break;
2626 }
2627 if (abs(hg) < 100) {
2628 dev_warn(DEV, "Split-Brain detected, %d primaries, "
2629 "automatically solved. Sync from %s node\n",
2630 pcount, (hg < 0) ? "peer" : "this");
2631 if (forced) {
2632 dev_warn(DEV, "Doing a full sync, since"
2633 " UUIDs where ambiguous.\n");
2634 hg = hg*2;
2635 }
2636 }
2637 }
2638
2639 if (hg == -100) {
Philipp Reisner89e58e72011-01-19 13:12:45 +01002640 if (mdev->tconn->net_conf->want_lose && !(mdev->p_uuid[UI_FLAGS]&1))
Philipp Reisnerb411b362009-09-25 16:07:19 -07002641 hg = -1;
Philipp Reisner89e58e72011-01-19 13:12:45 +01002642 if (!mdev->tconn->net_conf->want_lose && (mdev->p_uuid[UI_FLAGS]&1))
Philipp Reisnerb411b362009-09-25 16:07:19 -07002643 hg = 1;
2644
2645 if (abs(hg) < 100)
2646 dev_warn(DEV, "Split-Brain detected, manually solved. "
2647 "Sync from %s node\n",
2648 (hg < 0) ? "peer" : "this");
2649 }
2650
2651 if (hg == -100) {
Lars Ellenberg580b9762010-02-26 23:15:23 +01002652 /* FIXME this log message is not correct if we end up here
2653 * after an attempted attach on a diskless node.
2654 * We just refuse to attach -- well, we drop the "connection"
2655 * to that disk, in a way... */
Adam Gandelman3a11a482010-04-08 16:48:23 -07002656 dev_alert(DEV, "Split-Brain detected but unresolved, dropping connection!\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002657 drbd_khelper(mdev, "split-brain");
2658 return C_MASK;
2659 }
2660
2661 if (hg > 0 && mydisk <= D_INCONSISTENT) {
2662 dev_err(DEV, "I shall become SyncSource, but I am inconsistent!\n");
2663 return C_MASK;
2664 }
2665
2666 if (hg < 0 && /* by intention we do not use mydisk here. */
2667 mdev->state.role == R_PRIMARY && mdev->state.disk >= D_CONSISTENT) {
Philipp Reisner89e58e72011-01-19 13:12:45 +01002668 switch (mdev->tconn->net_conf->rr_conflict) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002669 case ASB_CALL_HELPER:
2670 drbd_khelper(mdev, "pri-lost");
2671 /* fall through */
2672 case ASB_DISCONNECT:
2673 dev_err(DEV, "I shall become SyncTarget, but I am primary!\n");
2674 return C_MASK;
2675 case ASB_VIOLENTLY:
2676 dev_warn(DEV, "Becoming SyncTarget, violating the stable-data"
2677 "assumption\n");
2678 }
2679 }
2680
Philipp Reisner89e58e72011-01-19 13:12:45 +01002681 if (mdev->tconn->net_conf->dry_run || test_bit(CONN_DRY_RUN, &mdev->flags)) {
Philipp Reisnercf14c2e2010-02-02 21:03:50 +01002682 if (hg == 0)
2683 dev_info(DEV, "dry-run connect: No resync, would become Connected immediately.\n");
2684 else
2685 dev_info(DEV, "dry-run connect: Would become %s, doing a %s resync.",
2686 drbd_conn_str(hg > 0 ? C_SYNC_SOURCE : C_SYNC_TARGET),
2687 abs(hg) >= 2 ? "full" : "bit-map based");
2688 return C_MASK;
2689 }
2690
Philipp Reisnerb411b362009-09-25 16:07:19 -07002691 if (abs(hg) >= 2) {
2692 dev_info(DEV, "Writing the whole bitmap, full sync required after drbd_sync_handshake.\n");
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01002693 if (drbd_bitmap_io(mdev, &drbd_bmio_set_n_write, "set_n_write from sync_handshake",
2694 BM_LOCKED_SET_ALLOWED))
Philipp Reisnerb411b362009-09-25 16:07:19 -07002695 return C_MASK;
2696 }
2697
2698 if (hg > 0) { /* become sync source. */
2699 rv = C_WF_BITMAP_S;
2700 } else if (hg < 0) { /* become sync target */
2701 rv = C_WF_BITMAP_T;
2702 } else {
2703 rv = C_CONNECTED;
2704 if (drbd_bm_total_weight(mdev)) {
2705 dev_info(DEV, "No resync, but %lu bits in bitmap!\n",
2706 drbd_bm_total_weight(mdev));
2707 }
2708 }
2709
2710 return rv;
2711}
2712
2713/* returns 1 if invalid */
2714static int cmp_after_sb(enum drbd_after_sb_p peer, enum drbd_after_sb_p self)
2715{
2716 /* ASB_DISCARD_REMOTE - ASB_DISCARD_LOCAL is valid */
2717 if ((peer == ASB_DISCARD_REMOTE && self == ASB_DISCARD_LOCAL) ||
2718 (self == ASB_DISCARD_REMOTE && peer == ASB_DISCARD_LOCAL))
2719 return 0;
2720
2721 /* any other things with ASB_DISCARD_REMOTE or ASB_DISCARD_LOCAL are invalid */
2722 if (peer == ASB_DISCARD_REMOTE || peer == ASB_DISCARD_LOCAL ||
2723 self == ASB_DISCARD_REMOTE || self == ASB_DISCARD_LOCAL)
2724 return 1;
2725
2726 /* everything else is valid if they are equal on both sides. */
2727 if (peer == self)
2728 return 0;
2729
2730 /* everything es is invalid. */
2731 return 1;
2732}
2733
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01002734static int receive_protocol(struct drbd_conf *mdev, enum drbd_packet cmd,
2735 unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002736{
Philipp Reisnere42325a2011-01-19 13:55:45 +01002737 struct p_protocol *p = &mdev->tconn->data.rbuf.protocol;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002738 int p_proto, p_after_sb_0p, p_after_sb_1p, p_after_sb_2p;
Philipp Reisnercf14c2e2010-02-02 21:03:50 +01002739 int p_want_lose, p_two_primaries, cf;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002740 char p_integrity_alg[SHARED_SECRET_MAX] = "";
2741
Philipp Reisnerb411b362009-09-25 16:07:19 -07002742 p_proto = be32_to_cpu(p->protocol);
2743 p_after_sb_0p = be32_to_cpu(p->after_sb_0p);
2744 p_after_sb_1p = be32_to_cpu(p->after_sb_1p);
2745 p_after_sb_2p = be32_to_cpu(p->after_sb_2p);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002746 p_two_primaries = be32_to_cpu(p->two_primaries);
Philipp Reisnercf14c2e2010-02-02 21:03:50 +01002747 cf = be32_to_cpu(p->conn_flags);
2748 p_want_lose = cf & CF_WANT_LOSE;
2749
2750 clear_bit(CONN_DRY_RUN, &mdev->flags);
2751
2752 if (cf & CF_DRY_RUN)
2753 set_bit(CONN_DRY_RUN, &mdev->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002754
Philipp Reisner89e58e72011-01-19 13:12:45 +01002755 if (p_proto != mdev->tconn->net_conf->wire_protocol) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002756 dev_err(DEV, "incompatible communication protocols\n");
2757 goto disconnect;
2758 }
2759
Philipp Reisner89e58e72011-01-19 13:12:45 +01002760 if (cmp_after_sb(p_after_sb_0p, mdev->tconn->net_conf->after_sb_0p)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002761 dev_err(DEV, "incompatible after-sb-0pri settings\n");
2762 goto disconnect;
2763 }
2764
Philipp Reisner89e58e72011-01-19 13:12:45 +01002765 if (cmp_after_sb(p_after_sb_1p, mdev->tconn->net_conf->after_sb_1p)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002766 dev_err(DEV, "incompatible after-sb-1pri settings\n");
2767 goto disconnect;
2768 }
2769
Philipp Reisner89e58e72011-01-19 13:12:45 +01002770 if (cmp_after_sb(p_after_sb_2p, mdev->tconn->net_conf->after_sb_2p)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002771 dev_err(DEV, "incompatible after-sb-2pri settings\n");
2772 goto disconnect;
2773 }
2774
Philipp Reisner89e58e72011-01-19 13:12:45 +01002775 if (p_want_lose && mdev->tconn->net_conf->want_lose) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002776 dev_err(DEV, "both sides have the 'want_lose' flag set\n");
2777 goto disconnect;
2778 }
2779
Philipp Reisner89e58e72011-01-19 13:12:45 +01002780 if (p_two_primaries != mdev->tconn->net_conf->two_primaries) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002781 dev_err(DEV, "incompatible setting of the two-primaries options\n");
2782 goto disconnect;
2783 }
2784
Philipp Reisner31890f42011-01-19 14:12:51 +01002785 if (mdev->tconn->agreed_pro_version >= 87) {
Philipp Reisner89e58e72011-01-19 13:12:45 +01002786 unsigned char *my_alg = mdev->tconn->net_conf->integrity_alg;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002787
2788 if (drbd_recv(mdev, p_integrity_alg, data_size) != data_size)
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002789 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002790
2791 p_integrity_alg[SHARED_SECRET_MAX-1] = 0;
2792 if (strcmp(p_integrity_alg, my_alg)) {
2793 dev_err(DEV, "incompatible setting of the data-integrity-alg\n");
2794 goto disconnect;
2795 }
2796 dev_info(DEV, "data-integrity-alg: %s\n",
2797 my_alg[0] ? my_alg : (unsigned char *)"<not-used>");
2798 }
2799
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002800 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002801
2802disconnect:
2803 drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002804 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002805}
2806
2807/* helper function
2808 * input: alg name, feature name
2809 * return: NULL (alg name was "")
2810 * ERR_PTR(error) if something goes wrong
2811 * or the crypto hash ptr, if it worked out ok. */
2812struct crypto_hash *drbd_crypto_alloc_digest_safe(const struct drbd_conf *mdev,
2813 const char *alg, const char *name)
2814{
2815 struct crypto_hash *tfm;
2816
2817 if (!alg[0])
2818 return NULL;
2819
2820 tfm = crypto_alloc_hash(alg, 0, CRYPTO_ALG_ASYNC);
2821 if (IS_ERR(tfm)) {
2822 dev_err(DEV, "Can not allocate \"%s\" as %s (reason: %ld)\n",
2823 alg, name, PTR_ERR(tfm));
2824 return tfm;
2825 }
2826 if (!drbd_crypto_is_hash(crypto_hash_tfm(tfm))) {
2827 crypto_free_hash(tfm);
2828 dev_err(DEV, "\"%s\" is not a digest (%s)\n", alg, name);
2829 return ERR_PTR(-EINVAL);
2830 }
2831 return tfm;
2832}
2833
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01002834static int receive_SyncParam(struct drbd_conf *mdev, enum drbd_packet cmd,
2835 unsigned int packet_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002836{
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002837 int ok = true;
Philipp Reisnere42325a2011-01-19 13:55:45 +01002838 struct p_rs_param_95 *p = &mdev->tconn->data.rbuf.rs_param_95;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002839 unsigned int header_size, data_size, exp_max_sz;
2840 struct crypto_hash *verify_tfm = NULL;
2841 struct crypto_hash *csums_tfm = NULL;
Philipp Reisner31890f42011-01-19 14:12:51 +01002842 const int apv = mdev->tconn->agreed_pro_version;
Philipp Reisner778f2712010-07-06 11:14:00 +02002843 int *rs_plan_s = NULL;
2844 int fifo_size = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002845
2846 exp_max_sz = apv <= 87 ? sizeof(struct p_rs_param)
2847 : apv == 88 ? sizeof(struct p_rs_param)
2848 + SHARED_SECRET_MAX
Philipp Reisner8e26f9c2010-07-06 17:25:54 +02002849 : apv <= 94 ? sizeof(struct p_rs_param_89)
2850 : /* apv >= 95 */ sizeof(struct p_rs_param_95);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002851
Philipp Reisner02918be2010-08-20 14:35:10 +02002852 if (packet_size > exp_max_sz) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002853 dev_err(DEV, "SyncParam packet too long: received %u, expected <= %u bytes\n",
Philipp Reisner02918be2010-08-20 14:35:10 +02002854 packet_size, exp_max_sz);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002855 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002856 }
2857
2858 if (apv <= 88) {
Philipp Reisner257d0af2011-01-26 12:15:29 +01002859 header_size = sizeof(struct p_rs_param) - sizeof(struct p_header);
Philipp Reisner02918be2010-08-20 14:35:10 +02002860 data_size = packet_size - header_size;
Philipp Reisner8e26f9c2010-07-06 17:25:54 +02002861 } else if (apv <= 94) {
Philipp Reisner257d0af2011-01-26 12:15:29 +01002862 header_size = sizeof(struct p_rs_param_89) - sizeof(struct p_header);
Philipp Reisner02918be2010-08-20 14:35:10 +02002863 data_size = packet_size - header_size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002864 D_ASSERT(data_size == 0);
Philipp Reisner8e26f9c2010-07-06 17:25:54 +02002865 } else {
Philipp Reisner257d0af2011-01-26 12:15:29 +01002866 header_size = sizeof(struct p_rs_param_95) - sizeof(struct p_header);
Philipp Reisner02918be2010-08-20 14:35:10 +02002867 data_size = packet_size - header_size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002868 D_ASSERT(data_size == 0);
2869 }
2870
2871 /* initialize verify_alg and csums_alg */
2872 memset(p->verify_alg, 0, 2 * SHARED_SECRET_MAX);
2873
Philipp Reisner02918be2010-08-20 14:35:10 +02002874 if (drbd_recv(mdev, &p->head.payload, header_size) != header_size)
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002875 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002876
2877 mdev->sync_conf.rate = be32_to_cpu(p->rate);
2878
2879 if (apv >= 88) {
2880 if (apv == 88) {
2881 if (data_size > SHARED_SECRET_MAX) {
2882 dev_err(DEV, "verify-alg too long, "
2883 "peer wants %u, accepting only %u byte\n",
2884 data_size, SHARED_SECRET_MAX);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002885 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002886 }
2887
2888 if (drbd_recv(mdev, p->verify_alg, data_size) != data_size)
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002889 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002890
2891 /* we expect NUL terminated string */
2892 /* but just in case someone tries to be evil */
2893 D_ASSERT(p->verify_alg[data_size-1] == 0);
2894 p->verify_alg[data_size-1] = 0;
2895
2896 } else /* apv >= 89 */ {
2897 /* we still expect NUL terminated strings */
2898 /* but just in case someone tries to be evil */
2899 D_ASSERT(p->verify_alg[SHARED_SECRET_MAX-1] == 0);
2900 D_ASSERT(p->csums_alg[SHARED_SECRET_MAX-1] == 0);
2901 p->verify_alg[SHARED_SECRET_MAX-1] = 0;
2902 p->csums_alg[SHARED_SECRET_MAX-1] = 0;
2903 }
2904
2905 if (strcmp(mdev->sync_conf.verify_alg, p->verify_alg)) {
2906 if (mdev->state.conn == C_WF_REPORT_PARAMS) {
2907 dev_err(DEV, "Different verify-alg settings. me=\"%s\" peer=\"%s\"\n",
2908 mdev->sync_conf.verify_alg, p->verify_alg);
2909 goto disconnect;
2910 }
2911 verify_tfm = drbd_crypto_alloc_digest_safe(mdev,
2912 p->verify_alg, "verify-alg");
2913 if (IS_ERR(verify_tfm)) {
2914 verify_tfm = NULL;
2915 goto disconnect;
2916 }
2917 }
2918
2919 if (apv >= 89 && strcmp(mdev->sync_conf.csums_alg, p->csums_alg)) {
2920 if (mdev->state.conn == C_WF_REPORT_PARAMS) {
2921 dev_err(DEV, "Different csums-alg settings. me=\"%s\" peer=\"%s\"\n",
2922 mdev->sync_conf.csums_alg, p->csums_alg);
2923 goto disconnect;
2924 }
2925 csums_tfm = drbd_crypto_alloc_digest_safe(mdev,
2926 p->csums_alg, "csums-alg");
2927 if (IS_ERR(csums_tfm)) {
2928 csums_tfm = NULL;
2929 goto disconnect;
2930 }
2931 }
2932
Philipp Reisner8e26f9c2010-07-06 17:25:54 +02002933 if (apv > 94) {
2934 mdev->sync_conf.rate = be32_to_cpu(p->rate);
2935 mdev->sync_conf.c_plan_ahead = be32_to_cpu(p->c_plan_ahead);
2936 mdev->sync_conf.c_delay_target = be32_to_cpu(p->c_delay_target);
2937 mdev->sync_conf.c_fill_target = be32_to_cpu(p->c_fill_target);
2938 mdev->sync_conf.c_max_rate = be32_to_cpu(p->c_max_rate);
Philipp Reisner778f2712010-07-06 11:14:00 +02002939
2940 fifo_size = (mdev->sync_conf.c_plan_ahead * 10 * SLEEP_TIME) / HZ;
2941 if (fifo_size != mdev->rs_plan_s.size && fifo_size > 0) {
2942 rs_plan_s = kzalloc(sizeof(int) * fifo_size, GFP_KERNEL);
2943 if (!rs_plan_s) {
2944 dev_err(DEV, "kmalloc of fifo_buffer failed");
2945 goto disconnect;
2946 }
2947 }
Philipp Reisner8e26f9c2010-07-06 17:25:54 +02002948 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07002949
2950 spin_lock(&mdev->peer_seq_lock);
2951 /* lock against drbd_nl_syncer_conf() */
2952 if (verify_tfm) {
2953 strcpy(mdev->sync_conf.verify_alg, p->verify_alg);
2954 mdev->sync_conf.verify_alg_len = strlen(p->verify_alg) + 1;
2955 crypto_free_hash(mdev->verify_tfm);
2956 mdev->verify_tfm = verify_tfm;
2957 dev_info(DEV, "using verify-alg: \"%s\"\n", p->verify_alg);
2958 }
2959 if (csums_tfm) {
2960 strcpy(mdev->sync_conf.csums_alg, p->csums_alg);
2961 mdev->sync_conf.csums_alg_len = strlen(p->csums_alg) + 1;
2962 crypto_free_hash(mdev->csums_tfm);
2963 mdev->csums_tfm = csums_tfm;
2964 dev_info(DEV, "using csums-alg: \"%s\"\n", p->csums_alg);
2965 }
Philipp Reisner778f2712010-07-06 11:14:00 +02002966 if (fifo_size != mdev->rs_plan_s.size) {
2967 kfree(mdev->rs_plan_s.values);
2968 mdev->rs_plan_s.values = rs_plan_s;
2969 mdev->rs_plan_s.size = fifo_size;
2970 mdev->rs_planed = 0;
2971 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07002972 spin_unlock(&mdev->peer_seq_lock);
2973 }
2974
2975 return ok;
2976disconnect:
2977 /* just for completeness: actually not needed,
2978 * as this is not reached if csums_tfm was ok. */
2979 crypto_free_hash(csums_tfm);
2980 /* but free the verify_tfm again, if csums_tfm did not work out */
2981 crypto_free_hash(verify_tfm);
2982 drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002983 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002984}
2985
Philipp Reisnerb411b362009-09-25 16:07:19 -07002986/* warn if the arguments differ by more than 12.5% */
2987static void warn_if_differ_considerably(struct drbd_conf *mdev,
2988 const char *s, sector_t a, sector_t b)
2989{
2990 sector_t d;
2991 if (a == 0 || b == 0)
2992 return;
2993 d = (a > b) ? (a - b) : (b - a);
2994 if (d > (a>>3) || d > (b>>3))
2995 dev_warn(DEV, "Considerable difference in %s: %llus vs. %llus\n", s,
2996 (unsigned long long)a, (unsigned long long)b);
2997}
2998
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01002999static int receive_sizes(struct drbd_conf *mdev, enum drbd_packet cmd,
3000 unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003001{
Philipp Reisnere42325a2011-01-19 13:55:45 +01003002 struct p_sizes *p = &mdev->tconn->data.rbuf.sizes;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003003 enum determine_dev_size dd = unchanged;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003004 sector_t p_size, p_usize, my_usize;
3005 int ldsc = 0; /* local disk size changed */
Philipp Reisnere89b5912010-03-24 17:11:33 +01003006 enum dds_flags ddsf;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003007
Philipp Reisnerb411b362009-09-25 16:07:19 -07003008 p_size = be64_to_cpu(p->d_size);
3009 p_usize = be64_to_cpu(p->u_size);
3010
3011 if (p_size == 0 && mdev->state.disk == D_DISKLESS) {
3012 dev_err(DEV, "some backing storage is needed\n");
3013 drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003014 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003015 }
3016
3017 /* just store the peer's disk size for now.
3018 * we still need to figure out whether we accept that. */
3019 mdev->p_size = p_size;
3020
Philipp Reisnerb411b362009-09-25 16:07:19 -07003021 if (get_ldev(mdev)) {
3022 warn_if_differ_considerably(mdev, "lower level device sizes",
3023 p_size, drbd_get_max_capacity(mdev->ldev));
3024 warn_if_differ_considerably(mdev, "user requested size",
3025 p_usize, mdev->ldev->dc.disk_size);
3026
3027 /* if this is the first connect, or an otherwise expected
3028 * param exchange, choose the minimum */
3029 if (mdev->state.conn == C_WF_REPORT_PARAMS)
3030 p_usize = min_not_zero((sector_t)mdev->ldev->dc.disk_size,
3031 p_usize);
3032
3033 my_usize = mdev->ldev->dc.disk_size;
3034
3035 if (mdev->ldev->dc.disk_size != p_usize) {
3036 mdev->ldev->dc.disk_size = p_usize;
3037 dev_info(DEV, "Peer sets u_size to %lu sectors\n",
3038 (unsigned long)mdev->ldev->dc.disk_size);
3039 }
3040
3041 /* Never shrink a device with usable data during connect.
3042 But allow online shrinking if we are connected. */
Philipp Reisnera393db62009-12-22 13:35:52 +01003043 if (drbd_new_dev_size(mdev, mdev->ldev, 0) <
Philipp Reisnerb411b362009-09-25 16:07:19 -07003044 drbd_get_capacity(mdev->this_bdev) &&
3045 mdev->state.disk >= D_OUTDATED &&
3046 mdev->state.conn < C_CONNECTED) {
3047 dev_err(DEV, "The peer's disk size is too small!\n");
3048 drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
3049 mdev->ldev->dc.disk_size = my_usize;
3050 put_ldev(mdev);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003051 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003052 }
3053 put_ldev(mdev);
3054 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003055
Philipp Reisnere89b5912010-03-24 17:11:33 +01003056 ddsf = be16_to_cpu(p->dds_flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003057 if (get_ldev(mdev)) {
Bart Van Assche24c48302011-05-21 18:32:29 +02003058 dd = drbd_determine_dev_size(mdev, ddsf);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003059 put_ldev(mdev);
3060 if (dd == dev_size_error)
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003061 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003062 drbd_md_sync(mdev);
3063 } else {
3064 /* I am diskless, need to accept the peer's size. */
3065 drbd_set_my_capacity(mdev, p_size);
3066 }
3067
Philipp Reisner99432fc2011-05-20 16:39:13 +02003068 mdev->peer_max_bio_size = be32_to_cpu(p->max_bio_size);
3069 drbd_reconsider_max_bio_size(mdev);
3070
Philipp Reisnerb411b362009-09-25 16:07:19 -07003071 if (get_ldev(mdev)) {
3072 if (mdev->ldev->known_size != drbd_get_capacity(mdev->ldev->backing_bdev)) {
3073 mdev->ldev->known_size = drbd_get_capacity(mdev->ldev->backing_bdev);
3074 ldsc = 1;
3075 }
3076
Philipp Reisnerb411b362009-09-25 16:07:19 -07003077 put_ldev(mdev);
3078 }
3079
3080 if (mdev->state.conn > C_WF_REPORT_PARAMS) {
3081 if (be64_to_cpu(p->c_size) !=
3082 drbd_get_capacity(mdev->this_bdev) || ldsc) {
3083 /* we have different sizes, probably peer
3084 * needs to know my new size... */
Philipp Reisnere89b5912010-03-24 17:11:33 +01003085 drbd_send_sizes(mdev, 0, ddsf);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003086 }
3087 if (test_and_clear_bit(RESIZE_PENDING, &mdev->flags) ||
3088 (dd == grew && mdev->state.conn == C_CONNECTED)) {
3089 if (mdev->state.pdsk >= D_INCONSISTENT &&
Philipp Reisnere89b5912010-03-24 17:11:33 +01003090 mdev->state.disk >= D_INCONSISTENT) {
3091 if (ddsf & DDSF_NO_RESYNC)
3092 dev_info(DEV, "Resync of new storage suppressed with --assume-clean\n");
3093 else
3094 resync_after_online_grow(mdev);
3095 } else
Philipp Reisnerb411b362009-09-25 16:07:19 -07003096 set_bit(RESYNC_AFTER_NEG, &mdev->flags);
3097 }
3098 }
3099
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003100 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003101}
3102
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01003103static int receive_uuids(struct drbd_conf *mdev, enum drbd_packet cmd,
3104 unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003105{
Philipp Reisnere42325a2011-01-19 13:55:45 +01003106 struct p_uuids *p = &mdev->tconn->data.rbuf.uuids;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003107 u64 *p_uuid;
Lars Ellenberg62b0da32011-01-20 13:25:21 +01003108 int i, updated_uuids = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003109
Philipp Reisnerb411b362009-09-25 16:07:19 -07003110 p_uuid = kmalloc(sizeof(u64)*UI_EXTENDED_SIZE, GFP_NOIO);
3111
3112 for (i = UI_CURRENT; i < UI_EXTENDED_SIZE; i++)
3113 p_uuid[i] = be64_to_cpu(p->uuid[i]);
3114
3115 kfree(mdev->p_uuid);
3116 mdev->p_uuid = p_uuid;
3117
3118 if (mdev->state.conn < C_CONNECTED &&
3119 mdev->state.disk < D_INCONSISTENT &&
3120 mdev->state.role == R_PRIMARY &&
3121 (mdev->ed_uuid & ~((u64)1)) != (p_uuid[UI_CURRENT] & ~((u64)1))) {
3122 dev_err(DEV, "Can only connect to data with current UUID=%016llX\n",
3123 (unsigned long long)mdev->ed_uuid);
3124 drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003125 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003126 }
3127
3128 if (get_ldev(mdev)) {
3129 int skip_initial_sync =
3130 mdev->state.conn == C_CONNECTED &&
Philipp Reisner31890f42011-01-19 14:12:51 +01003131 mdev->tconn->agreed_pro_version >= 90 &&
Philipp Reisnerb411b362009-09-25 16:07:19 -07003132 mdev->ldev->md.uuid[UI_CURRENT] == UUID_JUST_CREATED &&
3133 (p_uuid[UI_FLAGS] & 8);
3134 if (skip_initial_sync) {
3135 dev_info(DEV, "Accepted new current UUID, preparing to skip initial sync\n");
3136 drbd_bitmap_io(mdev, &drbd_bmio_clear_n_write,
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01003137 "clear_n_write from receive_uuids",
3138 BM_LOCKED_TEST_ALLOWED);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003139 _drbd_uuid_set(mdev, UI_CURRENT, p_uuid[UI_CURRENT]);
3140 _drbd_uuid_set(mdev, UI_BITMAP, 0);
3141 _drbd_set_state(_NS2(mdev, disk, D_UP_TO_DATE, pdsk, D_UP_TO_DATE),
3142 CS_VERBOSE, NULL);
3143 drbd_md_sync(mdev);
Lars Ellenberg62b0da32011-01-20 13:25:21 +01003144 updated_uuids = 1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003145 }
3146 put_ldev(mdev);
Philipp Reisner18a50fa2010-06-21 14:14:15 +02003147 } else if (mdev->state.disk < D_INCONSISTENT &&
3148 mdev->state.role == R_PRIMARY) {
3149 /* I am a diskless primary, the peer just created a new current UUID
3150 for me. */
Lars Ellenberg62b0da32011-01-20 13:25:21 +01003151 updated_uuids = drbd_set_ed_uuid(mdev, p_uuid[UI_CURRENT]);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003152 }
3153
3154 /* Before we test for the disk state, we should wait until an eventually
3155 ongoing cluster wide state change is finished. That is important if
3156 we are primary and are detaching from our disk. We need to see the
3157 new disk state... */
3158 wait_event(mdev->misc_wait, !test_bit(CLUSTER_ST_CHANGE, &mdev->flags));
3159 if (mdev->state.conn >= C_CONNECTED && mdev->state.disk < D_INCONSISTENT)
Lars Ellenberg62b0da32011-01-20 13:25:21 +01003160 updated_uuids |= drbd_set_ed_uuid(mdev, p_uuid[UI_CURRENT]);
3161
3162 if (updated_uuids)
3163 drbd_print_uuids(mdev, "receiver updated UUIDs to");
Philipp Reisnerb411b362009-09-25 16:07:19 -07003164
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003165 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003166}
3167
3168/**
3169 * convert_state() - Converts the peer's view of the cluster state to our point of view
3170 * @ps: The state as seen by the peer.
3171 */
3172static union drbd_state convert_state(union drbd_state ps)
3173{
3174 union drbd_state ms;
3175
3176 static enum drbd_conns c_tab[] = {
3177 [C_CONNECTED] = C_CONNECTED,
3178
3179 [C_STARTING_SYNC_S] = C_STARTING_SYNC_T,
3180 [C_STARTING_SYNC_T] = C_STARTING_SYNC_S,
3181 [C_DISCONNECTING] = C_TEAR_DOWN, /* C_NETWORK_FAILURE, */
3182 [C_VERIFY_S] = C_VERIFY_T,
3183 [C_MASK] = C_MASK,
3184 };
3185
3186 ms.i = ps.i;
3187
3188 ms.conn = c_tab[ps.conn];
3189 ms.peer = ps.role;
3190 ms.role = ps.peer;
3191 ms.pdsk = ps.disk;
3192 ms.disk = ps.pdsk;
3193 ms.peer_isp = (ps.aftr_isp | ps.user_isp);
3194
3195 return ms;
3196}
3197
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01003198static int receive_req_state(struct drbd_conf *mdev, enum drbd_packet cmd,
3199 unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003200{
Philipp Reisnere42325a2011-01-19 13:55:45 +01003201 struct p_req_state *p = &mdev->tconn->data.rbuf.req_state;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003202 union drbd_state mask, val;
Andreas Gruenbacherbf885f82010-12-08 00:39:32 +01003203 enum drbd_state_rv rv;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003204
Philipp Reisnerb411b362009-09-25 16:07:19 -07003205 mask.i = be32_to_cpu(p->mask);
3206 val.i = be32_to_cpu(p->val);
3207
Philipp Reisner25703f82011-02-07 14:35:25 +01003208 if (test_bit(DISCARD_CONCURRENT, &mdev->tconn->flags) &&
Philipp Reisnerb411b362009-09-25 16:07:19 -07003209 test_bit(CLUSTER_ST_CHANGE, &mdev->flags)) {
3210 drbd_send_sr_reply(mdev, SS_CONCURRENT_ST_CHG);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003211 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003212 }
3213
3214 mask = convert_state(mask);
3215 val = convert_state(val);
3216
3217 rv = drbd_change_state(mdev, CS_VERBOSE, mask, val);
3218
3219 drbd_send_sr_reply(mdev, rv);
3220 drbd_md_sync(mdev);
3221
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003222 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003223}
3224
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01003225static int receive_state(struct drbd_conf *mdev, enum drbd_packet cmd,
3226 unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003227{
Philipp Reisnere42325a2011-01-19 13:55:45 +01003228 struct p_state *p = &mdev->tconn->data.rbuf.state;
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003229 union drbd_state os, ns, peer_state;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003230 enum drbd_disk_state real_peer_disk;
Philipp Reisner65d922c2010-06-16 16:18:09 +02003231 enum chg_state_flags cs_flags;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003232 int rv;
3233
Philipp Reisnerb411b362009-09-25 16:07:19 -07003234 peer_state.i = be32_to_cpu(p->state);
3235
3236 real_peer_disk = peer_state.disk;
3237 if (peer_state.disk == D_NEGOTIATING) {
3238 real_peer_disk = mdev->p_uuid[UI_FLAGS] & 4 ? D_INCONSISTENT : D_CONSISTENT;
3239 dev_info(DEV, "real peer disk state = %s\n", drbd_disk_str(real_peer_disk));
3240 }
3241
Philipp Reisner87eeee42011-01-19 14:16:30 +01003242 spin_lock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003243 retry:
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003244 os = ns = mdev->state;
Philipp Reisner87eeee42011-01-19 14:16:30 +01003245 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003246
Lars Ellenberge9ef7bb2010-10-07 15:55:39 +02003247 /* peer says his disk is uptodate, while we think it is inconsistent,
3248 * and this happens while we think we have a sync going on. */
3249 if (os.pdsk == D_INCONSISTENT && real_peer_disk == D_UP_TO_DATE &&
3250 os.conn > C_CONNECTED && os.disk == D_UP_TO_DATE) {
3251 /* If we are (becoming) SyncSource, but peer is still in sync
3252 * preparation, ignore its uptodate-ness to avoid flapping, it
3253 * will change to inconsistent once the peer reaches active
3254 * syncing states.
3255 * It may have changed syncer-paused flags, however, so we
3256 * cannot ignore this completely. */
3257 if (peer_state.conn > C_CONNECTED &&
3258 peer_state.conn < C_SYNC_SOURCE)
3259 real_peer_disk = D_INCONSISTENT;
3260
3261 /* if peer_state changes to connected at the same time,
3262 * it explicitly notifies us that it finished resync.
3263 * Maybe we should finish it up, too? */
3264 else if (os.conn >= C_SYNC_SOURCE &&
3265 peer_state.conn == C_CONNECTED) {
3266 if (drbd_bm_total_weight(mdev) <= mdev->rs_failed)
3267 drbd_resync_finished(mdev);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003268 return true;
Lars Ellenberge9ef7bb2010-10-07 15:55:39 +02003269 }
3270 }
3271
3272 /* peer says his disk is inconsistent, while we think it is uptodate,
3273 * and this happens while the peer still thinks we have a sync going on,
3274 * but we think we are already done with the sync.
3275 * We ignore this to avoid flapping pdsk.
3276 * This should not happen, if the peer is a recent version of drbd. */
3277 if (os.pdsk == D_UP_TO_DATE && real_peer_disk == D_INCONSISTENT &&
3278 os.conn == C_CONNECTED && peer_state.conn > C_SYNC_SOURCE)
3279 real_peer_disk = D_UP_TO_DATE;
3280
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003281 if (ns.conn == C_WF_REPORT_PARAMS)
3282 ns.conn = C_CONNECTED;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003283
Philipp Reisner67531712010-10-27 12:21:30 +02003284 if (peer_state.conn == C_AHEAD)
3285 ns.conn = C_BEHIND;
3286
Philipp Reisnerb411b362009-09-25 16:07:19 -07003287 if (mdev->p_uuid && peer_state.disk >= D_NEGOTIATING &&
3288 get_ldev_if_state(mdev, D_NEGOTIATING)) {
3289 int cr; /* consider resync */
3290
3291 /* if we established a new connection */
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003292 cr = (os.conn < C_CONNECTED);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003293 /* if we had an established connection
3294 * and one of the nodes newly attaches a disk */
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003295 cr |= (os.conn == C_CONNECTED &&
Philipp Reisnerb411b362009-09-25 16:07:19 -07003296 (peer_state.disk == D_NEGOTIATING ||
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003297 os.disk == D_NEGOTIATING));
Philipp Reisnerb411b362009-09-25 16:07:19 -07003298 /* if we have both been inconsistent, and the peer has been
3299 * forced to be UpToDate with --overwrite-data */
3300 cr |= test_bit(CONSIDER_RESYNC, &mdev->flags);
3301 /* if we had been plain connected, and the admin requested to
3302 * start a sync by "invalidate" or "invalidate-remote" */
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003303 cr |= (os.conn == C_CONNECTED &&
Philipp Reisnerb411b362009-09-25 16:07:19 -07003304 (peer_state.conn >= C_STARTING_SYNC_S &&
3305 peer_state.conn <= C_WF_BITMAP_T));
3306
3307 if (cr)
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003308 ns.conn = drbd_sync_handshake(mdev, peer_state.role, real_peer_disk);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003309
3310 put_ldev(mdev);
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003311 if (ns.conn == C_MASK) {
3312 ns.conn = C_CONNECTED;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003313 if (mdev->state.disk == D_NEGOTIATING) {
Lars Ellenberg82f59cc2010-10-16 12:13:47 +02003314 drbd_force_state(mdev, NS(disk, D_FAILED));
Philipp Reisnerb411b362009-09-25 16:07:19 -07003315 } else if (peer_state.disk == D_NEGOTIATING) {
3316 dev_err(DEV, "Disk attach process on the peer node was aborted.\n");
3317 peer_state.disk = D_DISKLESS;
Lars Ellenberg580b9762010-02-26 23:15:23 +01003318 real_peer_disk = D_DISKLESS;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003319 } else {
Philipp Reisnercf14c2e2010-02-02 21:03:50 +01003320 if (test_and_clear_bit(CONN_DRY_RUN, &mdev->flags))
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003321 return false;
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003322 D_ASSERT(os.conn == C_WF_REPORT_PARAMS);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003323 drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003324 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003325 }
3326 }
3327 }
3328
Philipp Reisner87eeee42011-01-19 14:16:30 +01003329 spin_lock_irq(&mdev->tconn->req_lock);
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003330 if (mdev->state.i != os.i)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003331 goto retry;
3332 clear_bit(CONSIDER_RESYNC, &mdev->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003333 ns.peer = peer_state.role;
3334 ns.pdsk = real_peer_disk;
3335 ns.peer_isp = (peer_state.aftr_isp | peer_state.user_isp);
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003336 if ((ns.conn == C_CONNECTED || ns.conn == C_WF_BITMAP_S) && ns.disk == D_NEGOTIATING)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003337 ns.disk = mdev->new_state_tmp.disk;
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003338 cs_flags = CS_VERBOSE + (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED ? 0 : CS_HARD);
3339 if (ns.pdsk == D_CONSISTENT && is_susp(ns) && ns.conn == C_CONNECTED && os.conn < C_CONNECTED &&
Philipp Reisner481c6f52010-06-22 14:03:27 +02003340 test_bit(NEW_CUR_UUID, &mdev->flags)) {
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01003341 /* Do not allow tl_restart(RESEND) for a rebooted peer. We can only allow this
Philipp Reisner481c6f52010-06-22 14:03:27 +02003342 for temporal network outages! */
Philipp Reisner87eeee42011-01-19 14:16:30 +01003343 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisner481c6f52010-06-22 14:03:27 +02003344 dev_err(DEV, "Aborting Connect, can not thaw IO with an only Consistent peer\n");
3345 tl_clear(mdev);
3346 drbd_uuid_new_current(mdev);
3347 clear_bit(NEW_CUR_UUID, &mdev->flags);
3348 drbd_force_state(mdev, NS2(conn, C_PROTOCOL_ERROR, susp, 0));
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003349 return false;
Philipp Reisner481c6f52010-06-22 14:03:27 +02003350 }
Philipp Reisner65d922c2010-06-16 16:18:09 +02003351 rv = _drbd_set_state(mdev, ns, cs_flags, NULL);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003352 ns = mdev->state;
Philipp Reisner87eeee42011-01-19 14:16:30 +01003353 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003354
3355 if (rv < SS_SUCCESS) {
3356 drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003357 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003358 }
3359
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003360 if (os.conn > C_WF_REPORT_PARAMS) {
3361 if (ns.conn > C_CONNECTED && peer_state.conn <= C_CONNECTED &&
Philipp Reisnerb411b362009-09-25 16:07:19 -07003362 peer_state.disk != D_NEGOTIATING ) {
3363 /* we want resync, peer has not yet decided to sync... */
3364 /* Nowadays only used when forcing a node into primary role and
3365 setting its disk to UpToDate with that */
3366 drbd_send_uuids(mdev);
3367 drbd_send_state(mdev);
3368 }
3369 }
3370
Philipp Reisner89e58e72011-01-19 13:12:45 +01003371 mdev->tconn->net_conf->want_lose = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003372
3373 drbd_md_sync(mdev); /* update connected indicator, la_size, ... */
3374
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003375 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003376}
3377
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01003378static int receive_sync_uuid(struct drbd_conf *mdev, enum drbd_packet cmd,
3379 unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003380{
Philipp Reisnere42325a2011-01-19 13:55:45 +01003381 struct p_rs_uuid *p = &mdev->tconn->data.rbuf.rs_uuid;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003382
3383 wait_event(mdev->misc_wait,
3384 mdev->state.conn == C_WF_SYNC_UUID ||
Philipp Reisnerc4752ef2010-10-27 17:32:36 +02003385 mdev->state.conn == C_BEHIND ||
Philipp Reisnerb411b362009-09-25 16:07:19 -07003386 mdev->state.conn < C_CONNECTED ||
3387 mdev->state.disk < D_NEGOTIATING);
3388
3389 /* D_ASSERT( mdev->state.conn == C_WF_SYNC_UUID ); */
3390
Philipp Reisnerb411b362009-09-25 16:07:19 -07003391 /* Here the _drbd_uuid_ functions are right, current should
3392 _not_ be rotated into the history */
3393 if (get_ldev_if_state(mdev, D_NEGOTIATING)) {
3394 _drbd_uuid_set(mdev, UI_CURRENT, be64_to_cpu(p->uuid));
3395 _drbd_uuid_set(mdev, UI_BITMAP, 0UL);
3396
Lars Ellenberg62b0da32011-01-20 13:25:21 +01003397 drbd_print_uuids(mdev, "updated sync uuid");
Philipp Reisnerb411b362009-09-25 16:07:19 -07003398 drbd_start_resync(mdev, C_SYNC_TARGET);
3399
3400 put_ldev(mdev);
3401 } else
3402 dev_err(DEV, "Ignoring SyncUUID packet!\n");
3403
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003404 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003405}
3406
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003407/**
3408 * receive_bitmap_plain
3409 *
3410 * Return 0 when done, 1 when another iteration is needed, and a negative error
3411 * code upon failure.
3412 */
3413static int
Philipp Reisner02918be2010-08-20 14:35:10 +02003414receive_bitmap_plain(struct drbd_conf *mdev, unsigned int data_size,
3415 unsigned long *buffer, struct bm_xfer_ctx *c)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003416{
3417 unsigned num_words = min_t(size_t, BM_PACKET_WORDS, c->bm_words - c->word_offset);
3418 unsigned want = num_words * sizeof(long);
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003419 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003420
Philipp Reisner02918be2010-08-20 14:35:10 +02003421 if (want != data_size) {
3422 dev_err(DEV, "%s:want (%u) != data_size (%u)\n", __func__, want, data_size);
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003423 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003424 }
3425 if (want == 0)
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003426 return 0;
3427 err = drbd_recv(mdev, buffer, want);
3428 if (err != want) {
3429 if (err >= 0)
3430 err = -EIO;
3431 return err;
3432 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003433
3434 drbd_bm_merge_lel(mdev, c->word_offset, num_words, buffer);
3435
3436 c->word_offset += num_words;
3437 c->bit_offset = c->word_offset * BITS_PER_LONG;
3438 if (c->bit_offset > c->bm_bits)
3439 c->bit_offset = c->bm_bits;
3440
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003441 return 1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003442}
3443
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003444/**
3445 * recv_bm_rle_bits
3446 *
3447 * Return 0 when done, 1 when another iteration is needed, and a negative error
3448 * code upon failure.
3449 */
3450static int
Philipp Reisnerb411b362009-09-25 16:07:19 -07003451recv_bm_rle_bits(struct drbd_conf *mdev,
3452 struct p_compressed_bm *p,
Philipp Reisnerc6d25cf2011-01-19 16:13:06 +01003453 struct bm_xfer_ctx *c,
3454 unsigned int len)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003455{
3456 struct bitstream bs;
3457 u64 look_ahead;
3458 u64 rl;
3459 u64 tmp;
3460 unsigned long s = c->bit_offset;
3461 unsigned long e;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003462 int toggle = DCBP_get_start(p);
3463 int have;
3464 int bits;
3465
3466 bitstream_init(&bs, p->code, len, DCBP_get_pad_bits(p));
3467
3468 bits = bitstream_get_bits(&bs, &look_ahead, 64);
3469 if (bits < 0)
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003470 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003471
3472 for (have = bits; have > 0; s += rl, toggle = !toggle) {
3473 bits = vli_decode_bits(&rl, look_ahead);
3474 if (bits <= 0)
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003475 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003476
3477 if (toggle) {
3478 e = s + rl -1;
3479 if (e >= c->bm_bits) {
3480 dev_err(DEV, "bitmap overflow (e:%lu) while decoding bm RLE packet\n", e);
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003481 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003482 }
3483 _drbd_bm_set_bits(mdev, s, e);
3484 }
3485
3486 if (have < bits) {
3487 dev_err(DEV, "bitmap decoding error: h:%d b:%d la:0x%08llx l:%u/%u\n",
3488 have, bits, look_ahead,
3489 (unsigned int)(bs.cur.b - p->code),
3490 (unsigned int)bs.buf_len);
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003491 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003492 }
3493 look_ahead >>= bits;
3494 have -= bits;
3495
3496 bits = bitstream_get_bits(&bs, &tmp, 64 - have);
3497 if (bits < 0)
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003498 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003499 look_ahead |= tmp << have;
3500 have += bits;
3501 }
3502
3503 c->bit_offset = s;
3504 bm_xfer_ctx_bit_to_word_offset(c);
3505
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003506 return (s != c->bm_bits);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003507}
3508
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003509/**
3510 * decode_bitmap_c
3511 *
3512 * Return 0 when done, 1 when another iteration is needed, and a negative error
3513 * code upon failure.
3514 */
3515static int
Philipp Reisnerb411b362009-09-25 16:07:19 -07003516decode_bitmap_c(struct drbd_conf *mdev,
3517 struct p_compressed_bm *p,
Philipp Reisnerc6d25cf2011-01-19 16:13:06 +01003518 struct bm_xfer_ctx *c,
3519 unsigned int len)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003520{
3521 if (DCBP_get_code(p) == RLE_VLI_Bits)
Philipp Reisnerc6d25cf2011-01-19 16:13:06 +01003522 return recv_bm_rle_bits(mdev, p, c, len);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003523
3524 /* other variants had been implemented for evaluation,
3525 * but have been dropped as this one turned out to be "best"
3526 * during all our tests. */
3527
3528 dev_err(DEV, "receive_bitmap_c: unknown encoding %u\n", p->encoding);
3529 drbd_force_state(mdev, NS(conn, C_PROTOCOL_ERROR));
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003530 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003531}
3532
3533void INFO_bm_xfer_stats(struct drbd_conf *mdev,
3534 const char *direction, struct bm_xfer_ctx *c)
3535{
3536 /* what would it take to transfer it "plaintext" */
Philipp Reisnerc0129492011-01-19 16:58:16 +01003537 unsigned plain = sizeof(struct p_header) *
Philipp Reisnerb411b362009-09-25 16:07:19 -07003538 ((c->bm_words+BM_PACKET_WORDS-1)/BM_PACKET_WORDS+1)
3539 + c->bm_words * sizeof(long);
3540 unsigned total = c->bytes[0] + c->bytes[1];
3541 unsigned r;
3542
3543 /* total can not be zero. but just in case: */
3544 if (total == 0)
3545 return;
3546
3547 /* don't report if not compressed */
3548 if (total >= plain)
3549 return;
3550
3551 /* total < plain. check for overflow, still */
3552 r = (total > UINT_MAX/1000) ? (total / (plain/1000))
3553 : (1000 * total / plain);
3554
3555 if (r > 1000)
3556 r = 1000;
3557
3558 r = 1000 - r;
3559 dev_info(DEV, "%s bitmap stats [Bytes(packets)]: plain %u(%u), RLE %u(%u), "
3560 "total %u; compression: %u.%u%%\n",
3561 direction,
3562 c->bytes[1], c->packets[1],
3563 c->bytes[0], c->packets[0],
3564 total, r/10, r % 10);
3565}
3566
3567/* Since we are processing the bitfield from lower addresses to higher,
3568 it does not matter if the process it in 32 bit chunks or 64 bit
3569 chunks as long as it is little endian. (Understand it as byte stream,
3570 beginning with the lowest byte...) If we would use big endian
3571 we would need to process it from the highest address to the lowest,
3572 in order to be agnostic to the 32 vs 64 bits issue.
3573
3574 returns 0 on failure, 1 if we successfully received it. */
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01003575static int receive_bitmap(struct drbd_conf *mdev, enum drbd_packet cmd,
3576 unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003577{
3578 struct bm_xfer_ctx c;
3579 void *buffer;
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003580 int err;
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003581 int ok = false;
Philipp Reisner257d0af2011-01-26 12:15:29 +01003582 struct p_header *h = &mdev->tconn->data.rbuf.header;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003583
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01003584 drbd_bm_lock(mdev, "receive bitmap", BM_LOCKED_SET_ALLOWED);
3585 /* you are supposed to send additional out-of-sync information
3586 * if you actually set bits during this phase */
Philipp Reisnerb411b362009-09-25 16:07:19 -07003587
3588 /* maybe we should use some per thread scratch page,
3589 * and allocate that during initial device creation? */
3590 buffer = (unsigned long *) __get_free_page(GFP_NOIO);
3591 if (!buffer) {
3592 dev_err(DEV, "failed to allocate one page buffer in %s\n", __func__);
3593 goto out;
3594 }
3595
3596 c = (struct bm_xfer_ctx) {
3597 .bm_bits = drbd_bm_bits(mdev),
3598 .bm_words = drbd_bm_words(mdev),
3599 };
3600
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003601 for(;;) {
Philipp Reisner02918be2010-08-20 14:35:10 +02003602 if (cmd == P_BITMAP) {
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003603 err = receive_bitmap_plain(mdev, data_size, buffer, &c);
Philipp Reisner02918be2010-08-20 14:35:10 +02003604 } else if (cmd == P_COMPRESSED_BITMAP) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07003605 /* MAYBE: sanity check that we speak proto >= 90,
3606 * and the feature is enabled! */
3607 struct p_compressed_bm *p;
3608
Philipp Reisner02918be2010-08-20 14:35:10 +02003609 if (data_size > BM_PACKET_PAYLOAD_BYTES) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07003610 dev_err(DEV, "ReportCBitmap packet too large\n");
3611 goto out;
3612 }
3613 /* use the page buff */
3614 p = buffer;
3615 memcpy(p, h, sizeof(*h));
Philipp Reisner02918be2010-08-20 14:35:10 +02003616 if (drbd_recv(mdev, p->head.payload, data_size) != data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003617 goto out;
Lars Ellenberg004352f2010-10-05 20:13:58 +02003618 if (data_size <= (sizeof(*p) - sizeof(p->head))) {
3619 dev_err(DEV, "ReportCBitmap packet too small (l:%u)\n", data_size);
Andreas Gruenbacher78fcbda2010-12-10 22:18:27 +01003620 goto out;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003621 }
Philipp Reisnerc6d25cf2011-01-19 16:13:06 +01003622 err = decode_bitmap_c(mdev, p, &c, data_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003623 } else {
Philipp Reisner02918be2010-08-20 14:35:10 +02003624 dev_warn(DEV, "receive_bitmap: cmd neither ReportBitMap nor ReportCBitMap (is 0x%x)", cmd);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003625 goto out;
3626 }
3627
Philipp Reisner02918be2010-08-20 14:35:10 +02003628 c.packets[cmd == P_BITMAP]++;
Philipp Reisner257d0af2011-01-26 12:15:29 +01003629 c.bytes[cmd == P_BITMAP] += sizeof(struct p_header) + data_size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003630
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003631 if (err <= 0) {
3632 if (err < 0)
3633 goto out;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003634 break;
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003635 }
Philipp Reisner02918be2010-08-20 14:35:10 +02003636 if (!drbd_recv_header(mdev, &cmd, &data_size))
Philipp Reisnerb411b362009-09-25 16:07:19 -07003637 goto out;
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003638 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003639
3640 INFO_bm_xfer_stats(mdev, "receive", &c);
3641
3642 if (mdev->state.conn == C_WF_BITMAP_T) {
Andreas Gruenbacherde1f8e42010-12-10 21:04:00 +01003643 enum drbd_state_rv rv;
3644
Philipp Reisnerb411b362009-09-25 16:07:19 -07003645 ok = !drbd_send_bitmap(mdev);
3646 if (!ok)
3647 goto out;
3648 /* Omit CS_ORDERED with this state transition to avoid deadlocks. */
Andreas Gruenbacherde1f8e42010-12-10 21:04:00 +01003649 rv = _drbd_request_state(mdev, NS(conn, C_WF_SYNC_UUID), CS_VERBOSE);
3650 D_ASSERT(rv == SS_SUCCESS);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003651 } else if (mdev->state.conn != C_WF_BITMAP_S) {
3652 /* admin may have requested C_DISCONNECTING,
3653 * other threads may have noticed network errors */
3654 dev_info(DEV, "unexpected cstate (%s) in receive_bitmap\n",
3655 drbd_conn_str(mdev->state.conn));
3656 }
3657
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003658 ok = true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003659 out:
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01003660 drbd_bm_unlock(mdev);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003661 if (ok && mdev->state.conn == C_WF_BITMAP_S)
3662 drbd_start_resync(mdev, C_SYNC_SOURCE);
3663 free_page((unsigned long) buffer);
3664 return ok;
3665}
3666
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01003667static int receive_skip(struct drbd_conf *mdev, enum drbd_packet cmd,
3668 unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003669{
3670 /* TODO zero copy sink :) */
3671 static char sink[128];
3672 int size, want, r;
3673
Philipp Reisner02918be2010-08-20 14:35:10 +02003674 dev_warn(DEV, "skipping unknown optional packet type %d, l: %d!\n",
3675 cmd, data_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003676
Philipp Reisner02918be2010-08-20 14:35:10 +02003677 size = data_size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003678 while (size > 0) {
3679 want = min_t(int, size, sizeof(sink));
3680 r = drbd_recv(mdev, sink, want);
Andreas Gruenbacher841ce242010-12-15 19:31:20 +01003681 if (!expect(r > 0))
3682 break;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003683 size -= r;
3684 }
3685 return size == 0;
3686}
3687
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01003688static int receive_UnplugRemote(struct drbd_conf *mdev, enum drbd_packet cmd,
3689 unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003690{
Philipp Reisnerb411b362009-09-25 16:07:19 -07003691 /* Make sure we've acked all the TCP data associated
3692 * with the data requests being unplugged */
Philipp Reisnere42325a2011-01-19 13:55:45 +01003693 drbd_tcp_quickack(mdev->tconn->data.socket);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003694
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003695 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003696}
3697
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01003698static int receive_out_of_sync(struct drbd_conf *mdev, enum drbd_packet cmd,
3699 unsigned int data_size)
Philipp Reisner73a01a12010-10-27 14:33:00 +02003700{
Philipp Reisnere42325a2011-01-19 13:55:45 +01003701 struct p_block_desc *p = &mdev->tconn->data.rbuf.block_desc;
Philipp Reisner73a01a12010-10-27 14:33:00 +02003702
Lars Ellenbergf735e3632010-12-17 21:06:18 +01003703 switch (mdev->state.conn) {
3704 case C_WF_SYNC_UUID:
3705 case C_WF_BITMAP_T:
3706 case C_BEHIND:
3707 break;
3708 default:
3709 dev_err(DEV, "ASSERT FAILED cstate = %s, expected: WFSyncUUID|WFBitMapT|Behind\n",
3710 drbd_conn_str(mdev->state.conn));
3711 }
3712
Philipp Reisner73a01a12010-10-27 14:33:00 +02003713 drbd_set_out_of_sync(mdev, be64_to_cpu(p->sector), be32_to_cpu(p->blksize));
3714
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003715 return true;
Philipp Reisner73a01a12010-10-27 14:33:00 +02003716}
3717
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01003718typedef int (*drbd_cmd_handler_f)(struct drbd_conf *, enum drbd_packet cmd,
3719 unsigned int to_receive);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003720
Philipp Reisner02918be2010-08-20 14:35:10 +02003721struct data_cmd {
3722 int expect_payload;
3723 size_t pkt_size;
3724 drbd_cmd_handler_f function;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003725};
3726
Philipp Reisner02918be2010-08-20 14:35:10 +02003727static struct data_cmd drbd_cmd_handler[] = {
3728 [P_DATA] = { 1, sizeof(struct p_data), receive_Data },
3729 [P_DATA_REPLY] = { 1, sizeof(struct p_data), receive_DataReply },
3730 [P_RS_DATA_REPLY] = { 1, sizeof(struct p_data), receive_RSDataReply } ,
3731 [P_BARRIER] = { 0, sizeof(struct p_barrier), receive_Barrier } ,
Philipp Reisner257d0af2011-01-26 12:15:29 +01003732 [P_BITMAP] = { 1, sizeof(struct p_header), receive_bitmap } ,
3733 [P_COMPRESSED_BITMAP] = { 1, sizeof(struct p_header), receive_bitmap } ,
3734 [P_UNPLUG_REMOTE] = { 0, sizeof(struct p_header), receive_UnplugRemote },
Philipp Reisner02918be2010-08-20 14:35:10 +02003735 [P_DATA_REQUEST] = { 0, sizeof(struct p_block_req), receive_DataRequest },
3736 [P_RS_DATA_REQUEST] = { 0, sizeof(struct p_block_req), receive_DataRequest },
Philipp Reisner257d0af2011-01-26 12:15:29 +01003737 [P_SYNC_PARAM] = { 1, sizeof(struct p_header), receive_SyncParam },
3738 [P_SYNC_PARAM89] = { 1, sizeof(struct p_header), receive_SyncParam },
Philipp Reisner02918be2010-08-20 14:35:10 +02003739 [P_PROTOCOL] = { 1, sizeof(struct p_protocol), receive_protocol },
3740 [P_UUIDS] = { 0, sizeof(struct p_uuids), receive_uuids },
3741 [P_SIZES] = { 0, sizeof(struct p_sizes), receive_sizes },
3742 [P_STATE] = { 0, sizeof(struct p_state), receive_state },
3743 [P_STATE_CHG_REQ] = { 0, sizeof(struct p_req_state), receive_req_state },
3744 [P_SYNC_UUID] = { 0, sizeof(struct p_rs_uuid), receive_sync_uuid },
3745 [P_OV_REQUEST] = { 0, sizeof(struct p_block_req), receive_DataRequest },
3746 [P_OV_REPLY] = { 1, sizeof(struct p_block_req), receive_DataRequest },
3747 [P_CSUM_RS_REQUEST] = { 1, sizeof(struct p_block_req), receive_DataRequest },
3748 [P_DELAY_PROBE] = { 0, sizeof(struct p_delay_probe93), receive_skip },
Philipp Reisner73a01a12010-10-27 14:33:00 +02003749 [P_OUT_OF_SYNC] = { 0, sizeof(struct p_block_desc), receive_out_of_sync },
Philipp Reisner02918be2010-08-20 14:35:10 +02003750 /* anything missing from this table is in
3751 * the asender_tbl, see get_asender_cmd */
3752 [P_MAX_CMD] = { 0, 0, NULL },
3753};
3754
3755/* All handler functions that expect a sub-header get that sub-heder in
Philipp Reisnere42325a2011-01-19 13:55:45 +01003756 mdev->tconn->data.rbuf.header.head.payload.
Philipp Reisner02918be2010-08-20 14:35:10 +02003757
Philipp Reisnere42325a2011-01-19 13:55:45 +01003758 Usually in mdev->tconn->data.rbuf.header.head the callback can find the usual
Philipp Reisner02918be2010-08-20 14:35:10 +02003759 p_header, but they may not rely on that. Since there is also p_header95 !
3760 */
Philipp Reisnerb411b362009-09-25 16:07:19 -07003761
3762static void drbdd(struct drbd_conf *mdev)
3763{
Philipp Reisnerc0129492011-01-19 16:58:16 +01003764 struct p_header *header = &mdev->tconn->data.rbuf.header;
Philipp Reisner02918be2010-08-20 14:35:10 +02003765 unsigned int packet_size;
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01003766 enum drbd_packet cmd;
Philipp Reisner02918be2010-08-20 14:35:10 +02003767 size_t shs; /* sub header size */
3768 int rv;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003769
Philipp Reisnere6b3ea82011-01-19 14:02:01 +01003770 while (get_t_state(&mdev->tconn->receiver) == RUNNING) {
Philipp Reisnerbc31fe32011-02-07 11:14:38 +01003771 drbd_thread_current_set_cpu(mdev, &mdev->tconn->receiver);
Philipp Reisner02918be2010-08-20 14:35:10 +02003772 if (!drbd_recv_header(mdev, &cmd, &packet_size))
3773 goto err_out;
3774
3775 if (unlikely(cmd >= P_MAX_CMD || !drbd_cmd_handler[cmd].function)) {
3776 dev_err(DEV, "unknown packet type %d, l: %d!\n", cmd, packet_size);
3777 goto err_out;
Lars Ellenberg0b33a912009-11-16 15:58:04 +01003778 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003779
Philipp Reisnerc0129492011-01-19 16:58:16 +01003780 shs = drbd_cmd_handler[cmd].pkt_size - sizeof(struct p_header);
Philipp Reisner02918be2010-08-20 14:35:10 +02003781 if (packet_size - shs > 0 && !drbd_cmd_handler[cmd].expect_payload) {
3782 dev_err(DEV, "No payload expected %s l:%d\n", cmdname(cmd), packet_size);
3783 goto err_out;
3784 }
3785
Lars Ellenbergc13f7e12010-10-29 23:32:01 +02003786 if (shs) {
Philipp Reisnerc0129492011-01-19 16:58:16 +01003787 rv = drbd_recv(mdev, &header->payload, shs);
Lars Ellenbergc13f7e12010-10-29 23:32:01 +02003788 if (unlikely(rv != shs)) {
Lars Ellenberg0ddc5542011-01-21 12:35:15 +01003789 if (!signal_pending(current))
3790 dev_warn(DEV, "short read while reading sub header: rv=%d\n", rv);
Lars Ellenbergc13f7e12010-10-29 23:32:01 +02003791 goto err_out;
3792 }
3793 }
3794
Philipp Reisner02918be2010-08-20 14:35:10 +02003795 rv = drbd_cmd_handler[cmd].function(mdev, cmd, packet_size - shs);
3796
3797 if (unlikely(!rv)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07003798 dev_err(DEV, "error receiving %s, l: %d!\n",
Philipp Reisner02918be2010-08-20 14:35:10 +02003799 cmdname(cmd), packet_size);
3800 goto err_out;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003801 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003802 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003803
Philipp Reisner02918be2010-08-20 14:35:10 +02003804 if (0) {
3805 err_out:
3806 drbd_force_state(mdev, NS(conn, C_PROTOCOL_ERROR));
Philipp Reisnerb411b362009-09-25 16:07:19 -07003807 }
Lars Ellenberg856c50c2010-10-14 13:37:40 +02003808 /* If we leave here, we probably want to update at least the
3809 * "Connected" indicator on stable storage. Do so explicitly here. */
3810 drbd_md_sync(mdev);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003811}
3812
Philipp Reisner191d3cc2011-01-19 14:53:22 +01003813void drbd_flush_workqueue(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003814{
3815 struct drbd_wq_barrier barr;
3816
3817 barr.w.cb = w_prev_work_done;
3818 init_completion(&barr.done);
Philipp Reisner191d3cc2011-01-19 14:53:22 +01003819 drbd_queue_work(&tconn->data.work, &barr.w);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003820 wait_for_completion(&barr.done);
3821}
3822
3823static void drbd_disconnect(struct drbd_conf *mdev)
3824{
3825 enum drbd_fencing_p fp;
3826 union drbd_state os, ns;
3827 int rv = SS_UNKNOWN_ERROR;
3828 unsigned int i;
3829
3830 if (mdev->state.conn == C_STANDALONE)
3831 return;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003832
3833 /* asender does not clean up anything. it must not interfere, either */
Philipp Reisnere6b3ea82011-01-19 14:02:01 +01003834 drbd_thread_stop(&mdev->tconn->asender);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003835 drbd_free_sock(mdev);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003836
Philipp Reisner85719572010-07-21 10:20:17 +02003837 /* wait for current activity to cease. */
Philipp Reisner87eeee42011-01-19 14:16:30 +01003838 spin_lock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003839 _drbd_wait_ee_list_empty(mdev, &mdev->active_ee);
3840 _drbd_wait_ee_list_empty(mdev, &mdev->sync_ee);
3841 _drbd_wait_ee_list_empty(mdev, &mdev->read_ee);
Philipp Reisner87eeee42011-01-19 14:16:30 +01003842 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003843
3844 /* We do not have data structures that would allow us to
3845 * get the rs_pending_cnt down to 0 again.
3846 * * On C_SYNC_TARGET we do not have any data structures describing
3847 * the pending RSDataRequest's we have sent.
3848 * * On C_SYNC_SOURCE there is no data structure that tracks
3849 * the P_RS_DATA_REPLY blocks that we sent to the SyncTarget.
3850 * And no, it is not the sum of the reference counts in the
3851 * resync_LRU. The resync_LRU tracks the whole operation including
3852 * the disk-IO, while the rs_pending_cnt only tracks the blocks
3853 * on the fly. */
3854 drbd_rs_cancel_all(mdev);
3855 mdev->rs_total = 0;
3856 mdev->rs_failed = 0;
3857 atomic_set(&mdev->rs_pending_cnt, 0);
3858 wake_up(&mdev->misc_wait);
3859
Philipp Reisner7fde2be2011-03-01 11:08:28 +01003860 del_timer(&mdev->request_timer);
3861
Philipp Reisnerb411b362009-09-25 16:07:19 -07003862 /* make sure syncer is stopped and w_resume_next_sg queued */
3863 del_timer_sync(&mdev->resync_timer);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003864 resync_timer_fn((unsigned long)mdev);
3865
Philipp Reisnerb411b362009-09-25 16:07:19 -07003866 /* wait for all w_e_end_data_req, w_e_end_rsdata_req, w_send_barrier,
3867 * w_make_resync_request etc. which may still be on the worker queue
3868 * to be "canceled" */
Philipp Reisner191d3cc2011-01-19 14:53:22 +01003869 drbd_flush_workqueue(mdev->tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003870
3871 /* This also does reclaim_net_ee(). If we do this too early, we might
3872 * miss some resync ee and pages.*/
3873 drbd_process_done_ee(mdev);
3874
3875 kfree(mdev->p_uuid);
3876 mdev->p_uuid = NULL;
3877
Philipp Reisnerfb22c402010-09-08 23:20:21 +02003878 if (!is_susp(mdev->state))
Philipp Reisnerb411b362009-09-25 16:07:19 -07003879 tl_clear(mdev);
3880
Philipp Reisnerb411b362009-09-25 16:07:19 -07003881 dev_info(DEV, "Connection closed\n");
3882
3883 drbd_md_sync(mdev);
3884
3885 fp = FP_DONT_CARE;
3886 if (get_ldev(mdev)) {
3887 fp = mdev->ldev->dc.fencing;
3888 put_ldev(mdev);
3889 }
3890
Philipp Reisner87f7be42010-06-11 13:56:33 +02003891 if (mdev->state.role == R_PRIMARY && fp >= FP_RESOURCE && mdev->state.pdsk >= D_UNKNOWN)
3892 drbd_try_outdate_peer_async(mdev);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003893
Philipp Reisner87eeee42011-01-19 14:16:30 +01003894 spin_lock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003895 os = mdev->state;
3896 if (os.conn >= C_UNCONNECTED) {
3897 /* Do not restart in case we are C_DISCONNECTING */
3898 ns = os;
3899 ns.conn = C_UNCONNECTED;
3900 rv = _drbd_set_state(mdev, ns, CS_VERBOSE, NULL);
3901 }
Philipp Reisner87eeee42011-01-19 14:16:30 +01003902 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003903
3904 if (os.conn == C_DISCONNECTING) {
Philipp Reisnerb2fb6dbe2011-01-19 13:48:44 +01003905 wait_event(mdev->tconn->net_cnt_wait, atomic_read(&mdev->tconn->net_cnt) == 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003906
Philipp Reisnera0638452011-01-19 14:31:32 +01003907 crypto_free_hash(mdev->tconn->cram_hmac_tfm);
3908 mdev->tconn->cram_hmac_tfm = NULL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003909
Philipp Reisner89e58e72011-01-19 13:12:45 +01003910 kfree(mdev->tconn->net_conf);
3911 mdev->tconn->net_conf = NULL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003912 drbd_request_state(mdev, NS(conn, C_STANDALONE));
3913 }
3914
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01003915 /* serialize with bitmap writeout triggered by the state change,
3916 * if any. */
3917 wait_event(mdev->misc_wait, !test_bit(BITMAP_IO, &mdev->flags));
3918
Philipp Reisnerb411b362009-09-25 16:07:19 -07003919 /* tcp_close and release of sendpage pages can be deferred. I don't
3920 * want to use SO_LINGER, because apparently it can be deferred for
3921 * more than 20 seconds (longest time I checked).
3922 *
3923 * Actually we don't care for exactly when the network stack does its
3924 * put_page(), but release our reference on these pages right here.
3925 */
3926 i = drbd_release_ee(mdev, &mdev->net_ee);
3927 if (i)
3928 dev_info(DEV, "net_ee not empty, killed %u entries\n", i);
Lars Ellenberg435f0742010-09-06 12:30:25 +02003929 i = atomic_read(&mdev->pp_in_use_by_net);
3930 if (i)
3931 dev_info(DEV, "pp_in_use_by_net = %d, expected 0\n", i);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003932 i = atomic_read(&mdev->pp_in_use);
3933 if (i)
Lars Ellenberg45bb9122010-05-14 17:10:48 +02003934 dev_info(DEV, "pp_in_use = %d, expected 0\n", i);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003935
3936 D_ASSERT(list_empty(&mdev->read_ee));
3937 D_ASSERT(list_empty(&mdev->active_ee));
3938 D_ASSERT(list_empty(&mdev->sync_ee));
3939 D_ASSERT(list_empty(&mdev->done_ee));
3940
3941 /* ok, no more ee's on the fly, it is safe to reset the epoch_size */
3942 atomic_set(&mdev->current_epoch->epoch_size, 0);
3943 D_ASSERT(list_empty(&mdev->current_epoch->list));
3944}
3945
3946/*
3947 * We support PRO_VERSION_MIN to PRO_VERSION_MAX. The protocol version
3948 * we can agree on is stored in agreed_pro_version.
3949 *
3950 * feature flags and the reserved array should be enough room for future
3951 * enhancements of the handshake protocol, and possible plugins...
3952 *
3953 * for now, they are expected to be zero, but ignored.
3954 */
3955static int drbd_send_handshake(struct drbd_conf *mdev)
3956{
Philipp Reisnere6b3ea82011-01-19 14:02:01 +01003957 /* ASSERT current == mdev->tconn->receiver ... */
Philipp Reisnere42325a2011-01-19 13:55:45 +01003958 struct p_handshake *p = &mdev->tconn->data.sbuf.handshake;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003959 int ok;
3960
Philipp Reisnere42325a2011-01-19 13:55:45 +01003961 if (mutex_lock_interruptible(&mdev->tconn->data.mutex)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07003962 dev_err(DEV, "interrupted during initial handshake\n");
3963 return 0; /* interrupted. not ok. */
3964 }
3965
Philipp Reisnere42325a2011-01-19 13:55:45 +01003966 if (mdev->tconn->data.socket == NULL) {
3967 mutex_unlock(&mdev->tconn->data.mutex);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003968 return 0;
3969 }
3970
3971 memset(p, 0, sizeof(*p));
3972 p->protocol_min = cpu_to_be32(PRO_VERSION_MIN);
3973 p->protocol_max = cpu_to_be32(PRO_VERSION_MAX);
Philipp Reisnerc0129492011-01-19 16:58:16 +01003974 ok = _drbd_send_cmd(mdev, mdev->tconn->data.socket, P_HAND_SHAKE,
3975 &p->head, sizeof(*p), 0 );
Philipp Reisnere42325a2011-01-19 13:55:45 +01003976 mutex_unlock(&mdev->tconn->data.mutex);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003977 return ok;
3978}
3979
3980/*
3981 * return values:
3982 * 1 yes, we have a valid connection
3983 * 0 oops, did not work out, please try again
3984 * -1 peer talks different language,
3985 * no point in trying again, please go standalone.
3986 */
3987static int drbd_do_handshake(struct drbd_conf *mdev)
3988{
Philipp Reisnere6b3ea82011-01-19 14:02:01 +01003989 /* ASSERT current == mdev->tconn->receiver ... */
Philipp Reisnere42325a2011-01-19 13:55:45 +01003990 struct p_handshake *p = &mdev->tconn->data.rbuf.handshake;
Philipp Reisner02918be2010-08-20 14:35:10 +02003991 const int expect = sizeof(struct p_handshake) - sizeof(struct p_header80);
3992 unsigned int length;
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01003993 enum drbd_packet cmd;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003994 int rv;
3995
3996 rv = drbd_send_handshake(mdev);
3997 if (!rv)
3998 return 0;
3999
Philipp Reisner02918be2010-08-20 14:35:10 +02004000 rv = drbd_recv_header(mdev, &cmd, &length);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004001 if (!rv)
4002 return 0;
4003
Philipp Reisner02918be2010-08-20 14:35:10 +02004004 if (cmd != P_HAND_SHAKE) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004005 dev_err(DEV, "expected HandShake packet, received: %s (0x%04x)\n",
Philipp Reisner02918be2010-08-20 14:35:10 +02004006 cmdname(cmd), cmd);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004007 return -1;
4008 }
4009
Philipp Reisner02918be2010-08-20 14:35:10 +02004010 if (length != expect) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004011 dev_err(DEV, "expected HandShake length: %u, received: %u\n",
Philipp Reisner02918be2010-08-20 14:35:10 +02004012 expect, length);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004013 return -1;
4014 }
4015
4016 rv = drbd_recv(mdev, &p->head.payload, expect);
4017
4018 if (rv != expect) {
Lars Ellenberg0ddc5542011-01-21 12:35:15 +01004019 if (!signal_pending(current))
4020 dev_warn(DEV, "short read receiving handshake packet: l=%u\n", rv);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004021 return 0;
4022 }
4023
Philipp Reisnerb411b362009-09-25 16:07:19 -07004024 p->protocol_min = be32_to_cpu(p->protocol_min);
4025 p->protocol_max = be32_to_cpu(p->protocol_max);
4026 if (p->protocol_max == 0)
4027 p->protocol_max = p->protocol_min;
4028
4029 if (PRO_VERSION_MAX < p->protocol_min ||
4030 PRO_VERSION_MIN > p->protocol_max)
4031 goto incompat;
4032
Philipp Reisner31890f42011-01-19 14:12:51 +01004033 mdev->tconn->agreed_pro_version = min_t(int, PRO_VERSION_MAX, p->protocol_max);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004034
4035 dev_info(DEV, "Handshake successful: "
Philipp Reisner31890f42011-01-19 14:12:51 +01004036 "Agreed network protocol version %d\n", mdev->tconn->agreed_pro_version);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004037
4038 return 1;
4039
4040 incompat:
4041 dev_err(DEV, "incompatible DRBD dialects: "
4042 "I support %d-%d, peer supports %d-%d\n",
4043 PRO_VERSION_MIN, PRO_VERSION_MAX,
4044 p->protocol_min, p->protocol_max);
4045 return -1;
4046}
4047
4048#if !defined(CONFIG_CRYPTO_HMAC) && !defined(CONFIG_CRYPTO_HMAC_MODULE)
4049static int drbd_do_auth(struct drbd_conf *mdev)
4050{
4051 dev_err(DEV, "This kernel was build without CONFIG_CRYPTO_HMAC.\n");
4052 dev_err(DEV, "You need to disable 'cram-hmac-alg' in drbd.conf.\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01004053 return -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004054}
4055#else
4056#define CHALLENGE_LEN 64
Johannes Thomab10d96c2010-01-07 16:02:50 +01004057
4058/* Return value:
4059 1 - auth succeeded,
4060 0 - failed, try again (network error),
4061 -1 - auth failed, don't try again.
4062*/
4063
Philipp Reisnerb411b362009-09-25 16:07:19 -07004064static int drbd_do_auth(struct drbd_conf *mdev)
4065{
4066 char my_challenge[CHALLENGE_LEN]; /* 64 Bytes... */
4067 struct scatterlist sg;
4068 char *response = NULL;
4069 char *right_response = NULL;
4070 char *peers_ch = NULL;
Philipp Reisner89e58e72011-01-19 13:12:45 +01004071 unsigned int key_len = strlen(mdev->tconn->net_conf->shared_secret);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004072 unsigned int resp_size;
4073 struct hash_desc desc;
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01004074 enum drbd_packet cmd;
Philipp Reisner02918be2010-08-20 14:35:10 +02004075 unsigned int length;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004076 int rv;
4077
Philipp Reisnera0638452011-01-19 14:31:32 +01004078 desc.tfm = mdev->tconn->cram_hmac_tfm;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004079 desc.flags = 0;
4080
Philipp Reisnera0638452011-01-19 14:31:32 +01004081 rv = crypto_hash_setkey(mdev->tconn->cram_hmac_tfm,
Philipp Reisner89e58e72011-01-19 13:12:45 +01004082 (u8 *)mdev->tconn->net_conf->shared_secret, key_len);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004083 if (rv) {
4084 dev_err(DEV, "crypto_hash_setkey() failed with %d\n", rv);
Johannes Thomab10d96c2010-01-07 16:02:50 +01004085 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004086 goto fail;
4087 }
4088
4089 get_random_bytes(my_challenge, CHALLENGE_LEN);
4090
4091 rv = drbd_send_cmd2(mdev, P_AUTH_CHALLENGE, my_challenge, CHALLENGE_LEN);
4092 if (!rv)
4093 goto fail;
4094
Philipp Reisner02918be2010-08-20 14:35:10 +02004095 rv = drbd_recv_header(mdev, &cmd, &length);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004096 if (!rv)
4097 goto fail;
4098
Philipp Reisner02918be2010-08-20 14:35:10 +02004099 if (cmd != P_AUTH_CHALLENGE) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004100 dev_err(DEV, "expected AuthChallenge packet, received: %s (0x%04x)\n",
Philipp Reisner02918be2010-08-20 14:35:10 +02004101 cmdname(cmd), cmd);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004102 rv = 0;
4103 goto fail;
4104 }
4105
Philipp Reisner02918be2010-08-20 14:35:10 +02004106 if (length > CHALLENGE_LEN * 2) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004107 dev_err(DEV, "expected AuthChallenge payload too big.\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01004108 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004109 goto fail;
4110 }
4111
Philipp Reisner02918be2010-08-20 14:35:10 +02004112 peers_ch = kmalloc(length, GFP_NOIO);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004113 if (peers_ch == NULL) {
4114 dev_err(DEV, "kmalloc of peers_ch failed\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01004115 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004116 goto fail;
4117 }
4118
Philipp Reisner02918be2010-08-20 14:35:10 +02004119 rv = drbd_recv(mdev, peers_ch, length);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004120
Philipp Reisner02918be2010-08-20 14:35:10 +02004121 if (rv != length) {
Lars Ellenberg0ddc5542011-01-21 12:35:15 +01004122 if (!signal_pending(current))
4123 dev_warn(DEV, "short read AuthChallenge: l=%u\n", rv);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004124 rv = 0;
4125 goto fail;
4126 }
4127
Philipp Reisnera0638452011-01-19 14:31:32 +01004128 resp_size = crypto_hash_digestsize(mdev->tconn->cram_hmac_tfm);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004129 response = kmalloc(resp_size, GFP_NOIO);
4130 if (response == NULL) {
4131 dev_err(DEV, "kmalloc of response failed\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01004132 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004133 goto fail;
4134 }
4135
4136 sg_init_table(&sg, 1);
Philipp Reisner02918be2010-08-20 14:35:10 +02004137 sg_set_buf(&sg, peers_ch, length);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004138
4139 rv = crypto_hash_digest(&desc, &sg, sg.length, response);
4140 if (rv) {
4141 dev_err(DEV, "crypto_hash_digest() failed with %d\n", rv);
Johannes Thomab10d96c2010-01-07 16:02:50 +01004142 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004143 goto fail;
4144 }
4145
4146 rv = drbd_send_cmd2(mdev, P_AUTH_RESPONSE, response, resp_size);
4147 if (!rv)
4148 goto fail;
4149
Philipp Reisner02918be2010-08-20 14:35:10 +02004150 rv = drbd_recv_header(mdev, &cmd, &length);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004151 if (!rv)
4152 goto fail;
4153
Philipp Reisner02918be2010-08-20 14:35:10 +02004154 if (cmd != P_AUTH_RESPONSE) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004155 dev_err(DEV, "expected AuthResponse packet, received: %s (0x%04x)\n",
Philipp Reisner02918be2010-08-20 14:35:10 +02004156 cmdname(cmd), cmd);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004157 rv = 0;
4158 goto fail;
4159 }
4160
Philipp Reisner02918be2010-08-20 14:35:10 +02004161 if (length != resp_size) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004162 dev_err(DEV, "expected AuthResponse payload of wrong size\n");
4163 rv = 0;
4164 goto fail;
4165 }
4166
4167 rv = drbd_recv(mdev, response , resp_size);
4168
4169 if (rv != resp_size) {
Lars Ellenberg0ddc5542011-01-21 12:35:15 +01004170 if (!signal_pending(current))
4171 dev_warn(DEV, "short read receiving AuthResponse: l=%u\n", rv);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004172 rv = 0;
4173 goto fail;
4174 }
4175
4176 right_response = kmalloc(resp_size, GFP_NOIO);
Julia Lawall2d1ee872009-12-27 22:27:11 +01004177 if (right_response == NULL) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004178 dev_err(DEV, "kmalloc of right_response failed\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01004179 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004180 goto fail;
4181 }
4182
4183 sg_set_buf(&sg, my_challenge, CHALLENGE_LEN);
4184
4185 rv = crypto_hash_digest(&desc, &sg, sg.length, right_response);
4186 if (rv) {
4187 dev_err(DEV, "crypto_hash_digest() failed with %d\n", rv);
Johannes Thomab10d96c2010-01-07 16:02:50 +01004188 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004189 goto fail;
4190 }
4191
4192 rv = !memcmp(response, right_response, resp_size);
4193
4194 if (rv)
4195 dev_info(DEV, "Peer authenticated using %d bytes of '%s' HMAC\n",
Philipp Reisner89e58e72011-01-19 13:12:45 +01004196 resp_size, mdev->tconn->net_conf->cram_hmac_alg);
Johannes Thomab10d96c2010-01-07 16:02:50 +01004197 else
4198 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004199
4200 fail:
4201 kfree(peers_ch);
4202 kfree(response);
4203 kfree(right_response);
4204
4205 return rv;
4206}
4207#endif
4208
4209int drbdd_init(struct drbd_thread *thi)
4210{
4211 struct drbd_conf *mdev = thi->mdev;
4212 unsigned int minor = mdev_to_minor(mdev);
4213 int h;
4214
4215 sprintf(current->comm, "drbd%d_receiver", minor);
4216
4217 dev_info(DEV, "receiver (re)started\n");
4218
4219 do {
4220 h = drbd_connect(mdev);
4221 if (h == 0) {
4222 drbd_disconnect(mdev);
Philipp Reisner20ee6392011-01-18 15:28:59 +01004223 schedule_timeout_interruptible(HZ);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004224 }
4225 if (h == -1) {
4226 dev_warn(DEV, "Discarding network configuration.\n");
4227 drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
4228 }
4229 } while (h == 0);
4230
4231 if (h > 0) {
Philipp Reisnerb2fb6dbe2011-01-19 13:48:44 +01004232 if (get_net_conf(mdev->tconn)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004233 drbdd(mdev);
Philipp Reisnerb2fb6dbe2011-01-19 13:48:44 +01004234 put_net_conf(mdev->tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004235 }
4236 }
4237
4238 drbd_disconnect(mdev);
4239
4240 dev_info(DEV, "receiver terminated\n");
4241 return 0;
4242}
4243
4244/* ********* acknowledge sender ******** */
4245
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01004246static int got_RqSReply(struct drbd_conf *mdev, enum drbd_packet cmd)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004247{
Philipp Reisner257d0af2011-01-26 12:15:29 +01004248 struct p_req_state_reply *p = &mdev->tconn->meta.rbuf.req_state_reply;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004249
4250 int retcode = be32_to_cpu(p->retcode);
4251
4252 if (retcode >= SS_SUCCESS) {
4253 set_bit(CL_ST_CHG_SUCCESS, &mdev->flags);
4254 } else {
4255 set_bit(CL_ST_CHG_FAIL, &mdev->flags);
4256 dev_err(DEV, "Requested state change failed by peer: %s (%d)\n",
4257 drbd_set_st_err_str(retcode), retcode);
4258 }
4259 wake_up(&mdev->state_wait);
4260
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004261 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004262}
4263
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01004264static int got_Ping(struct drbd_conf *mdev, enum drbd_packet cmd)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004265{
4266 return drbd_send_ping_ack(mdev);
4267
4268}
4269
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01004270static int got_PingAck(struct drbd_conf *mdev, enum drbd_packet cmd)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004271{
4272 /* restore idle timeout */
Philipp Reisnere42325a2011-01-19 13:55:45 +01004273 mdev->tconn->meta.socket->sk->sk_rcvtimeo = mdev->tconn->net_conf->ping_int*HZ;
Philipp Reisner309d1602010-03-02 15:03:44 +01004274 if (!test_and_set_bit(GOT_PING_ACK, &mdev->flags))
4275 wake_up(&mdev->misc_wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004276
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004277 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004278}
4279
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01004280static int got_IsInSync(struct drbd_conf *mdev, enum drbd_packet cmd)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004281{
Philipp Reisner257d0af2011-01-26 12:15:29 +01004282 struct p_block_ack *p = &mdev->tconn->meta.rbuf.block_ack;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004283 sector_t sector = be64_to_cpu(p->sector);
4284 int blksize = be32_to_cpu(p->blksize);
4285
Philipp Reisner31890f42011-01-19 14:12:51 +01004286 D_ASSERT(mdev->tconn->agreed_pro_version >= 89);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004287
4288 update_peer_seq(mdev, be32_to_cpu(p->seq_num));
4289
Lars Ellenberg1d53f092010-09-05 01:13:24 +02004290 if (get_ldev(mdev)) {
4291 drbd_rs_complete_io(mdev, sector);
4292 drbd_set_in_sync(mdev, sector, blksize);
4293 /* rs_same_csums is supposed to count in units of BM_BLOCK_SIZE */
4294 mdev->rs_same_csum += (blksize >> BM_BLOCK_SHIFT);
4295 put_ldev(mdev);
4296 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07004297 dec_rs_pending(mdev);
Philipp Reisner778f2712010-07-06 11:14:00 +02004298 atomic_add(blksize >> 9, &mdev->rs_sect_in);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004299
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004300 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004301}
4302
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01004303static int
4304validate_req_change_req_state(struct drbd_conf *mdev, u64 id, sector_t sector,
4305 struct rb_root *root, const char *func,
4306 enum drbd_req_event what, bool missing_ok)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004307{
4308 struct drbd_request *req;
4309 struct bio_and_error m;
4310
Philipp Reisner87eeee42011-01-19 14:16:30 +01004311 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01004312 req = find_request(mdev, root, id, sector, missing_ok, func);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004313 if (unlikely(!req)) {
Philipp Reisner87eeee42011-01-19 14:16:30 +01004314 spin_unlock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004315 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004316 }
4317 __req_mod(req, what, &m);
Philipp Reisner87eeee42011-01-19 14:16:30 +01004318 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004319
4320 if (m.bio)
4321 complete_master_bio(mdev, &m);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004322 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004323}
4324
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01004325static int got_BlockAck(struct drbd_conf *mdev, enum drbd_packet cmd)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004326{
Philipp Reisner257d0af2011-01-26 12:15:29 +01004327 struct p_block_ack *p = &mdev->tconn->meta.rbuf.block_ack;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004328 sector_t sector = be64_to_cpu(p->sector);
4329 int blksize = be32_to_cpu(p->blksize);
4330 enum drbd_req_event what;
4331
4332 update_peer_seq(mdev, be32_to_cpu(p->seq_num));
4333
Andreas Gruenbacher579b57e2011-01-13 18:40:57 +01004334 if (p->block_id == ID_SYNCER) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004335 drbd_set_in_sync(mdev, sector, blksize);
4336 dec_rs_pending(mdev);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004337 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004338 }
Philipp Reisner257d0af2011-01-26 12:15:29 +01004339 switch (cmd) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004340 case P_RS_WRITE_ACK:
Philipp Reisner89e58e72011-01-19 13:12:45 +01004341 D_ASSERT(mdev->tconn->net_conf->wire_protocol == DRBD_PROT_C);
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01004342 what = WRITE_ACKED_BY_PEER_AND_SIS;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004343 break;
4344 case P_WRITE_ACK:
Philipp Reisner89e58e72011-01-19 13:12:45 +01004345 D_ASSERT(mdev->tconn->net_conf->wire_protocol == DRBD_PROT_C);
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01004346 what = WRITE_ACKED_BY_PEER;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004347 break;
4348 case P_RECV_ACK:
Philipp Reisner89e58e72011-01-19 13:12:45 +01004349 D_ASSERT(mdev->tconn->net_conf->wire_protocol == DRBD_PROT_B);
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01004350 what = RECV_ACKED_BY_PEER;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004351 break;
4352 case P_DISCARD_ACK:
Philipp Reisner89e58e72011-01-19 13:12:45 +01004353 D_ASSERT(mdev->tconn->net_conf->wire_protocol == DRBD_PROT_C);
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01004354 what = CONFLICT_DISCARDED_BY_PEER;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004355 break;
4356 default:
4357 D_ASSERT(0);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004358 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004359 }
4360
4361 return validate_req_change_req_state(mdev, p->block_id, sector,
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01004362 &mdev->write_requests, __func__,
4363 what, false);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004364}
4365
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01004366static int got_NegAck(struct drbd_conf *mdev, enum drbd_packet cmd)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004367{
Philipp Reisner257d0af2011-01-26 12:15:29 +01004368 struct p_block_ack *p = &mdev->tconn->meta.rbuf.block_ack;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004369 sector_t sector = be64_to_cpu(p->sector);
Philipp Reisner2deb8332011-01-17 18:39:18 +01004370 int size = be32_to_cpu(p->blksize);
Philipp Reisner89e58e72011-01-19 13:12:45 +01004371 bool missing_ok = mdev->tconn->net_conf->wire_protocol == DRBD_PROT_A ||
4372 mdev->tconn->net_conf->wire_protocol == DRBD_PROT_B;
Andreas Gruenbacherc3afd8f2011-01-20 22:25:40 +01004373 bool found;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004374
4375 update_peer_seq(mdev, be32_to_cpu(p->seq_num));
4376
Andreas Gruenbacher579b57e2011-01-13 18:40:57 +01004377 if (p->block_id == ID_SYNCER) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004378 dec_rs_pending(mdev);
4379 drbd_rs_failed_io(mdev, sector, size);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004380 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004381 }
Philipp Reisner2deb8332011-01-17 18:39:18 +01004382
Andreas Gruenbacherc3afd8f2011-01-20 22:25:40 +01004383 found = validate_req_change_req_state(mdev, p->block_id, sector,
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01004384 &mdev->write_requests, __func__,
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01004385 NEG_ACKED, missing_ok);
Andreas Gruenbacherc3afd8f2011-01-20 22:25:40 +01004386 if (!found) {
4387 /* Protocol A has no P_WRITE_ACKs, but has P_NEG_ACKs.
4388 The master bio might already be completed, therefore the
4389 request is no longer in the collision hash. */
4390 /* In Protocol B we might already have got a P_RECV_ACK
4391 but then get a P_NEG_ACK afterwards. */
4392 if (!missing_ok)
Philipp Reisner2deb8332011-01-17 18:39:18 +01004393 return false;
Andreas Gruenbacherc3afd8f2011-01-20 22:25:40 +01004394 drbd_set_out_of_sync(mdev, sector, size);
Philipp Reisner2deb8332011-01-17 18:39:18 +01004395 }
Philipp Reisner2deb8332011-01-17 18:39:18 +01004396 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004397}
4398
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01004399static int got_NegDReply(struct drbd_conf *mdev, enum drbd_packet cmd)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004400{
Philipp Reisner257d0af2011-01-26 12:15:29 +01004401 struct p_block_ack *p = &mdev->tconn->meta.rbuf.block_ack;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004402 sector_t sector = be64_to_cpu(p->sector);
4403
4404 update_peer_seq(mdev, be32_to_cpu(p->seq_num));
4405 dev_err(DEV, "Got NegDReply; Sector %llus, len %u; Fail original request.\n",
4406 (unsigned long long)sector, be32_to_cpu(p->blksize));
4407
4408 return validate_req_change_req_state(mdev, p->block_id, sector,
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01004409 &mdev->read_requests, __func__,
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01004410 NEG_ACKED, false);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004411}
4412
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01004413static int got_NegRSDReply(struct drbd_conf *mdev, enum drbd_packet cmd)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004414{
4415 sector_t sector;
4416 int size;
Philipp Reisner257d0af2011-01-26 12:15:29 +01004417 struct p_block_ack *p = &mdev->tconn->meta.rbuf.block_ack;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004418
4419 sector = be64_to_cpu(p->sector);
4420 size = be32_to_cpu(p->blksize);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004421
4422 update_peer_seq(mdev, be32_to_cpu(p->seq_num));
4423
4424 dec_rs_pending(mdev);
4425
4426 if (get_ldev_if_state(mdev, D_FAILED)) {
4427 drbd_rs_complete_io(mdev, sector);
Philipp Reisner257d0af2011-01-26 12:15:29 +01004428 switch (cmd) {
Philipp Reisnerd612d302010-12-27 10:53:28 +01004429 case P_NEG_RS_DREPLY:
4430 drbd_rs_failed_io(mdev, sector, size);
4431 case P_RS_CANCEL:
4432 break;
4433 default:
4434 D_ASSERT(0);
4435 put_ldev(mdev);
4436 return false;
4437 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07004438 put_ldev(mdev);
4439 }
4440
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004441 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004442}
4443
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01004444static int got_BarrierAck(struct drbd_conf *mdev, enum drbd_packet cmd)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004445{
Philipp Reisner257d0af2011-01-26 12:15:29 +01004446 struct p_barrier_ack *p = &mdev->tconn->meta.rbuf.barrier_ack;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004447
4448 tl_release(mdev, p->barrier, be32_to_cpu(p->set_size));
4449
Philipp Reisnerc4752ef2010-10-27 17:32:36 +02004450 if (mdev->state.conn == C_AHEAD &&
4451 atomic_read(&mdev->ap_in_flight) == 0 &&
Philipp Reisner370a43e2011-01-14 16:03:11 +01004452 !test_and_set_bit(AHEAD_TO_SYNC_SOURCE, &mdev->current_epoch->flags)) {
4453 mdev->start_resync_timer.expires = jiffies + HZ;
4454 add_timer(&mdev->start_resync_timer);
Philipp Reisnerc4752ef2010-10-27 17:32:36 +02004455 }
4456
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004457 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004458}
4459
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01004460static int got_OVResult(struct drbd_conf *mdev, enum drbd_packet cmd)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004461{
Philipp Reisner257d0af2011-01-26 12:15:29 +01004462 struct p_block_ack *p = &mdev->tconn->meta.rbuf.block_ack;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004463 struct drbd_work *w;
4464 sector_t sector;
4465 int size;
4466
4467 sector = be64_to_cpu(p->sector);
4468 size = be32_to_cpu(p->blksize);
4469
4470 update_peer_seq(mdev, be32_to_cpu(p->seq_num));
4471
4472 if (be64_to_cpu(p->block_id) == ID_OUT_OF_SYNC)
4473 drbd_ov_oos_found(mdev, sector, size);
4474 else
4475 ov_oos_print(mdev);
4476
Lars Ellenberg1d53f092010-09-05 01:13:24 +02004477 if (!get_ldev(mdev))
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004478 return true;
Lars Ellenberg1d53f092010-09-05 01:13:24 +02004479
Philipp Reisnerb411b362009-09-25 16:07:19 -07004480 drbd_rs_complete_io(mdev, sector);
4481 dec_rs_pending(mdev);
4482
Lars Ellenbergea5442a2010-11-05 09:48:01 +01004483 --mdev->ov_left;
4484
4485 /* let's advance progress step marks only for every other megabyte */
4486 if ((mdev->ov_left & 0x200) == 0x200)
4487 drbd_advance_rs_marks(mdev, mdev->ov_left);
4488
4489 if (mdev->ov_left == 0) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004490 w = kmalloc(sizeof(*w), GFP_NOIO);
4491 if (w) {
4492 w->cb = w_ov_finished;
Philipp Reisnere42325a2011-01-19 13:55:45 +01004493 drbd_queue_work_front(&mdev->tconn->data.work, w);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004494 } else {
4495 dev_err(DEV, "kmalloc(w) failed.");
4496 ov_oos_print(mdev);
4497 drbd_resync_finished(mdev);
4498 }
4499 }
Lars Ellenberg1d53f092010-09-05 01:13:24 +02004500 put_ldev(mdev);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004501 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004502}
4503
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01004504static int got_skip(struct drbd_conf *mdev, enum drbd_packet cmd)
Philipp Reisner0ced55a2010-04-30 15:26:20 +02004505{
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004506 return true;
Philipp Reisner0ced55a2010-04-30 15:26:20 +02004507}
4508
Philipp Reisnerb411b362009-09-25 16:07:19 -07004509struct asender_cmd {
4510 size_t pkt_size;
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01004511 int (*process)(struct drbd_conf *mdev, enum drbd_packet cmd);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004512};
4513
4514static struct asender_cmd *get_asender_cmd(int cmd)
4515{
4516 static struct asender_cmd asender_tbl[] = {
4517 /* anything missing from this table is in
4518 * the drbd_cmd_handler (drbd_default_handler) table,
4519 * see the beginning of drbdd() */
Philipp Reisner257d0af2011-01-26 12:15:29 +01004520 [P_PING] = { sizeof(struct p_header), got_Ping },
4521 [P_PING_ACK] = { sizeof(struct p_header), got_PingAck },
Philipp Reisnerb411b362009-09-25 16:07:19 -07004522 [P_RECV_ACK] = { sizeof(struct p_block_ack), got_BlockAck },
4523 [P_WRITE_ACK] = { sizeof(struct p_block_ack), got_BlockAck },
4524 [P_RS_WRITE_ACK] = { sizeof(struct p_block_ack), got_BlockAck },
4525 [P_DISCARD_ACK] = { sizeof(struct p_block_ack), got_BlockAck },
4526 [P_NEG_ACK] = { sizeof(struct p_block_ack), got_NegAck },
4527 [P_NEG_DREPLY] = { sizeof(struct p_block_ack), got_NegDReply },
4528 [P_NEG_RS_DREPLY] = { sizeof(struct p_block_ack), got_NegRSDReply},
4529 [P_OV_RESULT] = { sizeof(struct p_block_ack), got_OVResult },
4530 [P_BARRIER_ACK] = { sizeof(struct p_barrier_ack), got_BarrierAck },
4531 [P_STATE_CHG_REPLY] = { sizeof(struct p_req_state_reply), got_RqSReply },
4532 [P_RS_IS_IN_SYNC] = { sizeof(struct p_block_ack), got_IsInSync },
Philipp Reisner02918be2010-08-20 14:35:10 +02004533 [P_DELAY_PROBE] = { sizeof(struct p_delay_probe93), got_skip },
Philipp Reisnerd612d302010-12-27 10:53:28 +01004534 [P_RS_CANCEL] = { sizeof(struct p_block_ack), got_NegRSDReply},
Philipp Reisnerb411b362009-09-25 16:07:19 -07004535 [P_MAX_CMD] = { 0, NULL },
4536 };
4537 if (cmd > P_MAX_CMD || asender_tbl[cmd].process == NULL)
4538 return NULL;
4539 return &asender_tbl[cmd];
4540}
4541
4542int drbd_asender(struct drbd_thread *thi)
4543{
4544 struct drbd_conf *mdev = thi->mdev;
Philipp Reisner257d0af2011-01-26 12:15:29 +01004545 struct p_header *h = &mdev->tconn->meta.rbuf.header;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004546 struct asender_cmd *cmd = NULL;
4547
Philipp Reisner257d0af2011-01-26 12:15:29 +01004548 int rv;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004549 void *buf = h;
4550 int received = 0;
Philipp Reisner257d0af2011-01-26 12:15:29 +01004551 int expect = sizeof(struct p_header);
Lars Ellenbergf36af182011-03-09 22:44:55 +01004552 int ping_timeout_active = 0;
Philipp Reisner257d0af2011-01-26 12:15:29 +01004553 int empty, pkt_size;
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01004554 enum drbd_packet cmd_nr;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004555
4556 sprintf(current->comm, "drbd%d_asender", mdev_to_minor(mdev));
4557
4558 current->policy = SCHED_RR; /* Make this a realtime task! */
4559 current->rt_priority = 2; /* more important than all other tasks */
4560
Andreas Gruenbachere77a0a52011-01-25 15:43:39 +01004561 while (get_t_state(thi) == RUNNING) {
Philipp Reisnerbc31fe32011-02-07 11:14:38 +01004562 drbd_thread_current_set_cpu(mdev, thi);
Philipp Reisnere43ef192011-02-07 14:40:40 +01004563 if (test_and_clear_bit(SEND_PING, &mdev->tconn->flags)) {
Andreas Gruenbacher841ce242010-12-15 19:31:20 +01004564 if (!drbd_send_ping(mdev)) {
4565 dev_err(DEV, "drbd_send_ping has failed\n");
4566 goto reconnect;
4567 }
Philipp Reisnere42325a2011-01-19 13:55:45 +01004568 mdev->tconn->meta.socket->sk->sk_rcvtimeo =
Philipp Reisner89e58e72011-01-19 13:12:45 +01004569 mdev->tconn->net_conf->ping_timeo*HZ/10;
Lars Ellenbergf36af182011-03-09 22:44:55 +01004570 ping_timeout_active = 1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004571 }
4572
4573 /* conditionally cork;
4574 * it may hurt latency if we cork without much to send */
Philipp Reisner89e58e72011-01-19 13:12:45 +01004575 if (!mdev->tconn->net_conf->no_cork &&
Philipp Reisnerb411b362009-09-25 16:07:19 -07004576 3 < atomic_read(&mdev->unacked_cnt))
Philipp Reisnere42325a2011-01-19 13:55:45 +01004577 drbd_tcp_cork(mdev->tconn->meta.socket);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004578 while (1) {
Philipp Reisner808e37b2011-02-07 14:44:14 +01004579 clear_bit(SIGNAL_ASENDER, &mdev->tconn->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004580 flush_signals(current);
Lars Ellenberg0f8488e2010-10-13 18:19:23 +02004581 if (!drbd_process_done_ee(mdev))
Philipp Reisnerb411b362009-09-25 16:07:19 -07004582 goto reconnect;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004583 /* to avoid race with newly queued ACKs */
Philipp Reisner808e37b2011-02-07 14:44:14 +01004584 set_bit(SIGNAL_ASENDER, &mdev->tconn->flags);
Philipp Reisner87eeee42011-01-19 14:16:30 +01004585 spin_lock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004586 empty = list_empty(&mdev->done_ee);
Philipp Reisner87eeee42011-01-19 14:16:30 +01004587 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004588 /* new ack may have been queued right here,
4589 * but then there is also a signal pending,
4590 * and we start over... */
4591 if (empty)
4592 break;
4593 }
4594 /* but unconditionally uncork unless disabled */
Philipp Reisner89e58e72011-01-19 13:12:45 +01004595 if (!mdev->tconn->net_conf->no_cork)
Philipp Reisnere42325a2011-01-19 13:55:45 +01004596 drbd_tcp_uncork(mdev->tconn->meta.socket);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004597
4598 /* short circuit, recv_msg would return EINTR anyways. */
4599 if (signal_pending(current))
4600 continue;
4601
Philipp Reisnerdbd9eea2011-02-07 15:34:16 +01004602 rv = drbd_recv_short(mdev->tconn->meta.socket, buf, expect-received, 0);
Philipp Reisner808e37b2011-02-07 14:44:14 +01004603 clear_bit(SIGNAL_ASENDER, &mdev->tconn->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004604
4605 flush_signals(current);
4606
4607 /* Note:
4608 * -EINTR (on meta) we got a signal
4609 * -EAGAIN (on meta) rcvtimeo expired
4610 * -ECONNRESET other side closed the connection
4611 * -ERESTARTSYS (on data) we got a signal
4612 * rv < 0 other than above: unexpected error!
4613 * rv == expected: full header or command
4614 * rv < expected: "woken" by signal during receive
4615 * rv == 0 : "connection shut down by peer"
4616 */
4617 if (likely(rv > 0)) {
4618 received += rv;
4619 buf += rv;
4620 } else if (rv == 0) {
4621 dev_err(DEV, "meta connection shut down by peer.\n");
4622 goto reconnect;
4623 } else if (rv == -EAGAIN) {
Lars Ellenbergcb6518c2011-06-20 14:44:45 +02004624 /* If the data socket received something meanwhile,
4625 * that is good enough: peer is still alive. */
Philipp Reisner31890f42011-01-19 14:12:51 +01004626 if (time_after(mdev->tconn->last_received,
Philipp Reisnere42325a2011-01-19 13:55:45 +01004627 jiffies - mdev->tconn->meta.socket->sk->sk_rcvtimeo))
Lars Ellenbergcb6518c2011-06-20 14:44:45 +02004628 continue;
Lars Ellenbergf36af182011-03-09 22:44:55 +01004629 if (ping_timeout_active) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004630 dev_err(DEV, "PingAck did not arrive in time.\n");
4631 goto reconnect;
4632 }
Philipp Reisnere43ef192011-02-07 14:40:40 +01004633 set_bit(SEND_PING, &mdev->tconn->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004634 continue;
4635 } else if (rv == -EINTR) {
4636 continue;
4637 } else {
4638 dev_err(DEV, "sock_recvmsg returned %d\n", rv);
4639 goto reconnect;
4640 }
4641
4642 if (received == expect && cmd == NULL) {
Philipp Reisner257d0af2011-01-26 12:15:29 +01004643 if (!decode_header(mdev, h, &cmd_nr, &pkt_size))
Philipp Reisnerb411b362009-09-25 16:07:19 -07004644 goto reconnect;
Philipp Reisner257d0af2011-01-26 12:15:29 +01004645 cmd = get_asender_cmd(cmd_nr);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004646 if (unlikely(cmd == NULL)) {
Philipp Reisner257d0af2011-01-26 12:15:29 +01004647 dev_err(DEV, "unknown command %d on meta (l: %d)\n",
4648 cmd_nr, pkt_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004649 goto disconnect;
4650 }
4651 expect = cmd->pkt_size;
Philipp Reisner257d0af2011-01-26 12:15:29 +01004652 if (pkt_size != expect - sizeof(struct p_header)) {
4653 dev_err(DEV, "Wrong packet size on meta (c: %d, l: %d)\n",
4654 cmd_nr, pkt_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004655 goto reconnect;
Philipp Reisner257d0af2011-01-26 12:15:29 +01004656 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07004657 }
4658 if (received == expect) {
Philipp Reisner31890f42011-01-19 14:12:51 +01004659 mdev->tconn->last_received = jiffies;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004660 D_ASSERT(cmd != NULL);
Philipp Reisner257d0af2011-01-26 12:15:29 +01004661 if (!cmd->process(mdev, cmd_nr))
Philipp Reisnerb411b362009-09-25 16:07:19 -07004662 goto reconnect;
4663
Lars Ellenbergf36af182011-03-09 22:44:55 +01004664 /* the idle_timeout (ping-int)
4665 * has been restored in got_PingAck() */
4666 if (cmd == get_asender_cmd(P_PING_ACK))
4667 ping_timeout_active = 0;
4668
Philipp Reisnerb411b362009-09-25 16:07:19 -07004669 buf = h;
4670 received = 0;
Philipp Reisner257d0af2011-01-26 12:15:29 +01004671 expect = sizeof(struct p_header);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004672 cmd = NULL;
4673 }
4674 }
4675
4676 if (0) {
4677reconnect:
4678 drbd_force_state(mdev, NS(conn, C_NETWORK_FAILURE));
Lars Ellenberg856c50c2010-10-14 13:37:40 +02004679 drbd_md_sync(mdev);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004680 }
4681 if (0) {
4682disconnect:
4683 drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
Lars Ellenberg856c50c2010-10-14 13:37:40 +02004684 drbd_md_sync(mdev);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004685 }
Philipp Reisner808e37b2011-02-07 14:44:14 +01004686 clear_bit(SIGNAL_ASENDER, &mdev->tconn->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004687
4688 D_ASSERT(mdev->state.conn < C_CONNECTED);
4689 dev_info(DEV, "asender terminated\n");
4690
4691 return 0;
4692}