blob: 02fa1b25dce58aa33094f9492ae787afb2bc9757 [file] [log] [blame]
Philipp Reisnerb411b362009-09-25 16:07:19 -07001/*
2 drbd_receiver.c
3
4 This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
5
6 Copyright (C) 2001-2008, LINBIT Information Technologies GmbH.
7 Copyright (C) 1999-2008, Philipp Reisner <philipp.reisner@linbit.com>.
8 Copyright (C) 2002-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
9
10 drbd is free software; you can redistribute it and/or modify
11 it under the terms of the GNU General Public License as published by
12 the Free Software Foundation; either version 2, or (at your option)
13 any later version.
14
15 drbd is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License for more details.
19
20 You should have received a copy of the GNU General Public License
21 along with drbd; see the file COPYING. If not, write to
22 the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
23 */
24
25
Philipp Reisnerb411b362009-09-25 16:07:19 -070026#include <linux/module.h>
27
28#include <asm/uaccess.h>
29#include <net/sock.h>
30
Philipp Reisnerb411b362009-09-25 16:07:19 -070031#include <linux/drbd.h>
32#include <linux/fs.h>
33#include <linux/file.h>
34#include <linux/in.h>
35#include <linux/mm.h>
36#include <linux/memcontrol.h>
37#include <linux/mm_inline.h>
38#include <linux/slab.h>
Philipp Reisnerb411b362009-09-25 16:07:19 -070039#include <linux/pkt_sched.h>
40#define __KERNEL_SYSCALLS__
41#include <linux/unistd.h>
42#include <linux/vmalloc.h>
43#include <linux/random.h>
Philipp Reisnerb411b362009-09-25 16:07:19 -070044#include <linux/string.h>
45#include <linux/scatterlist.h>
46#include "drbd_int.h"
Philipp Reisnerb411b362009-09-25 16:07:19 -070047#include "drbd_req.h"
48
49#include "drbd_vli.h"
50
Philipp Reisner77351055b2011-02-07 17:24:26 +010051struct packet_info {
52 enum drbd_packet cmd;
53 int size;
54 int vnr;
55};
56
Philipp Reisnerb411b362009-09-25 16:07:19 -070057enum finish_epoch {
58 FE_STILL_LIVE,
59 FE_DESTROYED,
60 FE_RECYCLED,
61};
62
Philipp Reisner65d11ed2011-02-07 17:35:59 +010063static int drbd_do_handshake(struct drbd_tconn *tconn);
Philipp Reisner13e60372011-02-08 09:54:40 +010064static int drbd_do_auth(struct drbd_tconn *tconn);
Philipp Reisner360cc742011-02-08 14:29:53 +010065static int drbd_disconnected(int vnr, void *p, void *data);
Philipp Reisnerb411b362009-09-25 16:07:19 -070066
67static enum finish_epoch drbd_may_finish_epoch(struct drbd_conf *, struct drbd_epoch *, enum epoch_event);
Philipp Reisner00d56942011-02-09 18:09:48 +010068static int e_end_block(struct drbd_work *, int);
Philipp Reisnerb411b362009-09-25 16:07:19 -070069
Philipp Reisnerb411b362009-09-25 16:07:19 -070070
71#define GFP_TRY (__GFP_HIGHMEM | __GFP_NOWARN)
72
Lars Ellenberg45bb9122010-05-14 17:10:48 +020073/*
74 * some helper functions to deal with single linked page lists,
75 * page->private being our "next" pointer.
76 */
77
78/* If at least n pages are linked at head, get n pages off.
79 * Otherwise, don't modify head, and return NULL.
80 * Locking is the responsibility of the caller.
81 */
82static struct page *page_chain_del(struct page **head, int n)
83{
84 struct page *page;
85 struct page *tmp;
86
87 BUG_ON(!n);
88 BUG_ON(!head);
89
90 page = *head;
Philipp Reisner23ce4222010-05-20 13:35:31 +020091
92 if (!page)
93 return NULL;
94
Lars Ellenberg45bb9122010-05-14 17:10:48 +020095 while (page) {
96 tmp = page_chain_next(page);
97 if (--n == 0)
98 break; /* found sufficient pages */
99 if (tmp == NULL)
100 /* insufficient pages, don't use any of them. */
101 return NULL;
102 page = tmp;
103 }
104
105 /* add end of list marker for the returned list */
106 set_page_private(page, 0);
107 /* actual return value, and adjustment of head */
108 page = *head;
109 *head = tmp;
110 return page;
111}
112
113/* may be used outside of locks to find the tail of a (usually short)
114 * "private" page chain, before adding it back to a global chain head
115 * with page_chain_add() under a spinlock. */
116static struct page *page_chain_tail(struct page *page, int *len)
117{
118 struct page *tmp;
119 int i = 1;
120 while ((tmp = page_chain_next(page)))
121 ++i, page = tmp;
122 if (len)
123 *len = i;
124 return page;
125}
126
127static int page_chain_free(struct page *page)
128{
129 struct page *tmp;
130 int i = 0;
131 page_chain_for_each_safe(page, tmp) {
132 put_page(page);
133 ++i;
134 }
135 return i;
136}
137
138static void page_chain_add(struct page **head,
139 struct page *chain_first, struct page *chain_last)
140{
141#if 1
142 struct page *tmp;
143 tmp = page_chain_tail(chain_first, NULL);
144 BUG_ON(tmp != chain_last);
145#endif
146
147 /* add chain to head */
148 set_page_private(chain_last, (unsigned long)*head);
149 *head = chain_first;
150}
151
152static struct page *drbd_pp_first_pages_or_try_alloc(struct drbd_conf *mdev, int number)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700153{
154 struct page *page = NULL;
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200155 struct page *tmp = NULL;
156 int i = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700157
158 /* Yes, testing drbd_pp_vacant outside the lock is racy.
159 * So what. It saves a spin_lock. */
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200160 if (drbd_pp_vacant >= number) {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700161 spin_lock(&drbd_pp_lock);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200162 page = page_chain_del(&drbd_pp_pool, number);
163 if (page)
164 drbd_pp_vacant -= number;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700165 spin_unlock(&drbd_pp_lock);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200166 if (page)
167 return page;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700168 }
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200169
Philipp Reisnerb411b362009-09-25 16:07:19 -0700170 /* GFP_TRY, because we must not cause arbitrary write-out: in a DRBD
171 * "criss-cross" setup, that might cause write-out on some other DRBD,
172 * which in turn might block on the other node at this very place. */
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200173 for (i = 0; i < number; i++) {
174 tmp = alloc_page(GFP_TRY);
175 if (!tmp)
176 break;
177 set_page_private(tmp, (unsigned long)page);
178 page = tmp;
179 }
180
181 if (i == number)
182 return page;
183
184 /* Not enough pages immediately available this time.
185 * No need to jump around here, drbd_pp_alloc will retry this
186 * function "soon". */
187 if (page) {
188 tmp = page_chain_tail(page, NULL);
189 spin_lock(&drbd_pp_lock);
190 page_chain_add(&drbd_pp_pool, page, tmp);
191 drbd_pp_vacant += i;
192 spin_unlock(&drbd_pp_lock);
193 }
194 return NULL;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700195}
196
Philipp Reisnerb411b362009-09-25 16:07:19 -0700197static void reclaim_net_ee(struct drbd_conf *mdev, struct list_head *to_be_freed)
198{
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100199 struct drbd_peer_request *peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700200 struct list_head *le, *tle;
201
202 /* The EEs are always appended to the end of the list. Since
203 they are sent in order over the wire, they have to finish
204 in order. As soon as we see the first not finished we can
205 stop to examine the list... */
206
207 list_for_each_safe(le, tle, &mdev->net_ee) {
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100208 peer_req = list_entry(le, struct drbd_peer_request, w.list);
209 if (drbd_ee_has_active_page(peer_req))
Philipp Reisnerb411b362009-09-25 16:07:19 -0700210 break;
211 list_move(le, to_be_freed);
212 }
213}
214
215static void drbd_kick_lo_and_reclaim_net(struct drbd_conf *mdev)
216{
217 LIST_HEAD(reclaimed);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100218 struct drbd_peer_request *peer_req, *t;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700219
Philipp Reisner87eeee42011-01-19 14:16:30 +0100220 spin_lock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700221 reclaim_net_ee(mdev, &reclaimed);
Philipp Reisner87eeee42011-01-19 14:16:30 +0100222 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700223
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100224 list_for_each_entry_safe(peer_req, t, &reclaimed, w.list)
225 drbd_free_net_ee(mdev, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700226}
227
228/**
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200229 * drbd_pp_alloc() - Returns @number pages, retries forever (or until signalled)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700230 * @mdev: DRBD device.
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200231 * @number: number of pages requested
232 * @retry: whether to retry, if not enough pages are available right now
Philipp Reisnerb411b362009-09-25 16:07:19 -0700233 *
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200234 * Tries to allocate number pages, first from our own page pool, then from
235 * the kernel, unless this allocation would exceed the max_buffers setting.
236 * Possibly retry until DRBD frees sufficient pages somewhere else.
237 *
238 * Returns a page chain linked via page->private.
Philipp Reisnerb411b362009-09-25 16:07:19 -0700239 */
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200240static struct page *drbd_pp_alloc(struct drbd_conf *mdev, unsigned number, bool retry)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700241{
242 struct page *page = NULL;
243 DEFINE_WAIT(wait);
244
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200245 /* Yes, we may run up to @number over max_buffers. If we
246 * follow it strictly, the admin will get it wrong anyways. */
Philipp Reisner89e58e72011-01-19 13:12:45 +0100247 if (atomic_read(&mdev->pp_in_use) < mdev->tconn->net_conf->max_buffers)
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200248 page = drbd_pp_first_pages_or_try_alloc(mdev, number);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700249
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200250 while (page == NULL) {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700251 prepare_to_wait(&drbd_pp_wait, &wait, TASK_INTERRUPTIBLE);
252
253 drbd_kick_lo_and_reclaim_net(mdev);
254
Philipp Reisner89e58e72011-01-19 13:12:45 +0100255 if (atomic_read(&mdev->pp_in_use) < mdev->tconn->net_conf->max_buffers) {
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200256 page = drbd_pp_first_pages_or_try_alloc(mdev, number);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700257 if (page)
258 break;
259 }
260
261 if (!retry)
262 break;
263
264 if (signal_pending(current)) {
265 dev_warn(DEV, "drbd_pp_alloc interrupted!\n");
266 break;
267 }
268
269 schedule();
270 }
271 finish_wait(&drbd_pp_wait, &wait);
272
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200273 if (page)
274 atomic_add(number, &mdev->pp_in_use);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700275 return page;
276}
277
278/* Must not be used from irq, as that may deadlock: see drbd_pp_alloc.
Philipp Reisner87eeee42011-01-19 14:16:30 +0100279 * Is also used from inside an other spin_lock_irq(&mdev->tconn->req_lock);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200280 * Either links the page chain back to the global pool,
281 * or returns all pages to the system. */
Lars Ellenberg435f0742010-09-06 12:30:25 +0200282static void drbd_pp_free(struct drbd_conf *mdev, struct page *page, int is_net)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700283{
Lars Ellenberg435f0742010-09-06 12:30:25 +0200284 atomic_t *a = is_net ? &mdev->pp_in_use_by_net : &mdev->pp_in_use;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700285 int i;
Lars Ellenberg435f0742010-09-06 12:30:25 +0200286
Lars Ellenberg1816a2b2010-11-11 15:19:07 +0100287 if (drbd_pp_vacant > (DRBD_MAX_BIO_SIZE/PAGE_SIZE)*minor_count)
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200288 i = page_chain_free(page);
289 else {
290 struct page *tmp;
291 tmp = page_chain_tail(page, &i);
292 spin_lock(&drbd_pp_lock);
293 page_chain_add(&drbd_pp_pool, page, tmp);
294 drbd_pp_vacant += i;
295 spin_unlock(&drbd_pp_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700296 }
Lars Ellenberg435f0742010-09-06 12:30:25 +0200297 i = atomic_sub_return(i, a);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200298 if (i < 0)
Lars Ellenberg435f0742010-09-06 12:30:25 +0200299 dev_warn(DEV, "ASSERTION FAILED: %s: %d < 0\n",
300 is_net ? "pp_in_use_by_net" : "pp_in_use", i);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700301 wake_up(&drbd_pp_wait);
302}
303
304/*
305You need to hold the req_lock:
306 _drbd_wait_ee_list_empty()
307
308You must not have the req_lock:
309 drbd_free_ee()
310 drbd_alloc_ee()
311 drbd_init_ee()
312 drbd_release_ee()
313 drbd_ee_fix_bhs()
314 drbd_process_done_ee()
315 drbd_clear_done_ee()
316 drbd_wait_ee_list_empty()
317*/
318
Andreas Gruenbacherf6ffca92011-02-04 15:30:34 +0100319struct drbd_peer_request *
320drbd_alloc_ee(struct drbd_conf *mdev, u64 id, sector_t sector,
321 unsigned int data_size, gfp_t gfp_mask) __must_hold(local)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700322{
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100323 struct drbd_peer_request *peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700324 struct page *page;
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200325 unsigned nr_pages = (data_size + PAGE_SIZE -1) >> PAGE_SHIFT;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700326
Andreas Gruenbacher0cf9d272010-12-07 10:43:29 +0100327 if (drbd_insert_fault(mdev, DRBD_FAULT_AL_EE))
Philipp Reisnerb411b362009-09-25 16:07:19 -0700328 return NULL;
329
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100330 peer_req = mempool_alloc(drbd_ee_mempool, gfp_mask & ~__GFP_HIGHMEM);
331 if (!peer_req) {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700332 if (!(gfp_mask & __GFP_NOWARN))
333 dev_err(DEV, "alloc_ee: Allocation of an EE failed\n");
334 return NULL;
335 }
336
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200337 page = drbd_pp_alloc(mdev, nr_pages, (gfp_mask & __GFP_WAIT));
338 if (!page)
339 goto fail;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700340
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100341 drbd_clear_interval(&peer_req->i);
342 peer_req->i.size = data_size;
343 peer_req->i.sector = sector;
344 peer_req->i.local = false;
345 peer_req->i.waiting = false;
Andreas Gruenbacher53840642011-01-28 10:31:04 +0100346
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100347 peer_req->epoch = NULL;
Philipp Reisnera21e9292011-02-08 15:08:49 +0100348 peer_req->w.mdev = mdev;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100349 peer_req->pages = page;
350 atomic_set(&peer_req->pending_bios, 0);
351 peer_req->flags = 0;
Andreas Gruenbacher9a8e7752011-01-11 14:04:09 +0100352 /*
353 * The block_id is opaque to the receiver. It is not endianness
354 * converted, and sent back to the sender unchanged.
355 */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100356 peer_req->block_id = id;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700357
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100358 return peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700359
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200360 fail:
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100361 mempool_free(peer_req, drbd_ee_mempool);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700362 return NULL;
363}
364
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100365void drbd_free_some_ee(struct drbd_conf *mdev, struct drbd_peer_request *peer_req,
Andreas Gruenbacherf6ffca92011-02-04 15:30:34 +0100366 int is_net)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700367{
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100368 if (peer_req->flags & EE_HAS_DIGEST)
369 kfree(peer_req->digest);
370 drbd_pp_free(mdev, peer_req->pages, is_net);
371 D_ASSERT(atomic_read(&peer_req->pending_bios) == 0);
372 D_ASSERT(drbd_interval_empty(&peer_req->i));
373 mempool_free(peer_req, drbd_ee_mempool);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700374}
375
376int drbd_release_ee(struct drbd_conf *mdev, struct list_head *list)
377{
378 LIST_HEAD(work_list);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100379 struct drbd_peer_request *peer_req, *t;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700380 int count = 0;
Lars Ellenberg435f0742010-09-06 12:30:25 +0200381 int is_net = list == &mdev->net_ee;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700382
Philipp Reisner87eeee42011-01-19 14:16:30 +0100383 spin_lock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700384 list_splice_init(list, &work_list);
Philipp Reisner87eeee42011-01-19 14:16:30 +0100385 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700386
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100387 list_for_each_entry_safe(peer_req, t, &work_list, w.list) {
388 drbd_free_some_ee(mdev, peer_req, is_net);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700389 count++;
390 }
391 return count;
392}
393
394
Philipp Reisner32862ec2011-02-08 16:41:01 +0100395/* See also comments in _req_mod(,BARRIER_ACKED)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700396 * and receive_Barrier.
397 *
398 * Move entries from net_ee to done_ee, if ready.
399 * Grab done_ee, call all callbacks, free the entries.
400 * The callbacks typically send out ACKs.
401 */
402static int drbd_process_done_ee(struct drbd_conf *mdev)
403{
404 LIST_HEAD(work_list);
405 LIST_HEAD(reclaimed);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100406 struct drbd_peer_request *peer_req, *t;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700407 int ok = (mdev->state.conn >= C_WF_REPORT_PARAMS);
408
Philipp Reisner87eeee42011-01-19 14:16:30 +0100409 spin_lock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700410 reclaim_net_ee(mdev, &reclaimed);
411 list_splice_init(&mdev->done_ee, &work_list);
Philipp Reisner87eeee42011-01-19 14:16:30 +0100412 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700413
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100414 list_for_each_entry_safe(peer_req, t, &reclaimed, w.list)
415 drbd_free_net_ee(mdev, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700416
417 /* possible callbacks here:
418 * e_end_block, and e_end_resync_block, e_send_discard_ack.
419 * all ignore the last argument.
420 */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100421 list_for_each_entry_safe(peer_req, t, &work_list, w.list) {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700422 /* list_del not necessary, next/prev members not touched */
Philipp Reisner00d56942011-02-09 18:09:48 +0100423 ok = peer_req->w.cb(&peer_req->w, !ok) && ok;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100424 drbd_free_ee(mdev, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700425 }
426 wake_up(&mdev->ee_wait);
427
428 return ok;
429}
430
431void _drbd_wait_ee_list_empty(struct drbd_conf *mdev, struct list_head *head)
432{
433 DEFINE_WAIT(wait);
434
435 /* avoids spin_lock/unlock
436 * and calling prepare_to_wait in the fast path */
437 while (!list_empty(head)) {
438 prepare_to_wait(&mdev->ee_wait, &wait, TASK_UNINTERRUPTIBLE);
Philipp Reisner87eeee42011-01-19 14:16:30 +0100439 spin_unlock_irq(&mdev->tconn->req_lock);
Jens Axboe7eaceac2011-03-10 08:52:07 +0100440 io_schedule();
Philipp Reisnerb411b362009-09-25 16:07:19 -0700441 finish_wait(&mdev->ee_wait, &wait);
Philipp Reisner87eeee42011-01-19 14:16:30 +0100442 spin_lock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700443 }
444}
445
446void drbd_wait_ee_list_empty(struct drbd_conf *mdev, struct list_head *head)
447{
Philipp Reisner87eeee42011-01-19 14:16:30 +0100448 spin_lock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700449 _drbd_wait_ee_list_empty(mdev, head);
Philipp Reisner87eeee42011-01-19 14:16:30 +0100450 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700451}
452
453/* see also kernel_accept; which is only present since 2.6.18.
454 * also we want to log which part of it failed, exactly */
Philipp Reisner76536202011-02-07 14:09:54 +0100455static int drbd_accept(const char **what, struct socket *sock, struct socket **newsock)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700456{
457 struct sock *sk = sock->sk;
458 int err = 0;
459
460 *what = "listen";
461 err = sock->ops->listen(sock, 5);
462 if (err < 0)
463 goto out;
464
465 *what = "sock_create_lite";
466 err = sock_create_lite(sk->sk_family, sk->sk_type, sk->sk_protocol,
467 newsock);
468 if (err < 0)
469 goto out;
470
471 *what = "accept";
472 err = sock->ops->accept(sock, *newsock, 0);
473 if (err < 0) {
474 sock_release(*newsock);
475 *newsock = NULL;
476 goto out;
477 }
478 (*newsock)->ops = sock->ops;
479
480out:
481 return err;
482}
483
Philipp Reisnerdbd9eea2011-02-07 15:34:16 +0100484static int drbd_recv_short(struct socket *sock, void *buf, size_t size, int flags)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700485{
486 mm_segment_t oldfs;
487 struct kvec iov = {
488 .iov_base = buf,
489 .iov_len = size,
490 };
491 struct msghdr msg = {
492 .msg_iovlen = 1,
493 .msg_iov = (struct iovec *)&iov,
494 .msg_flags = (flags ? flags : MSG_WAITALL | MSG_NOSIGNAL)
495 };
496 int rv;
497
498 oldfs = get_fs();
499 set_fs(KERNEL_DS);
500 rv = sock_recvmsg(sock, &msg, size, msg.msg_flags);
501 set_fs(oldfs);
502
503 return rv;
504}
505
Philipp Reisnerde0ff332011-02-07 16:56:20 +0100506static int drbd_recv(struct drbd_tconn *tconn, void *buf, size_t size)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700507{
508 mm_segment_t oldfs;
509 struct kvec iov = {
510 .iov_base = buf,
511 .iov_len = size,
512 };
513 struct msghdr msg = {
514 .msg_iovlen = 1,
515 .msg_iov = (struct iovec *)&iov,
516 .msg_flags = MSG_WAITALL | MSG_NOSIGNAL
517 };
518 int rv;
519
520 oldfs = get_fs();
521 set_fs(KERNEL_DS);
522
523 for (;;) {
Philipp Reisnerde0ff332011-02-07 16:56:20 +0100524 rv = sock_recvmsg(tconn->data.socket, &msg, size, msg.msg_flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700525 if (rv == size)
526 break;
527
528 /* Note:
529 * ECONNRESET other side closed the connection
530 * ERESTARTSYS (on sock) we got a signal
531 */
532
533 if (rv < 0) {
534 if (rv == -ECONNRESET)
Philipp Reisnerde0ff332011-02-07 16:56:20 +0100535 conn_info(tconn, "sock was reset by peer\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -0700536 else if (rv != -ERESTARTSYS)
Philipp Reisnerde0ff332011-02-07 16:56:20 +0100537 conn_err(tconn, "sock_recvmsg returned %d\n", rv);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700538 break;
539 } else if (rv == 0) {
Philipp Reisnerde0ff332011-02-07 16:56:20 +0100540 conn_info(tconn, "sock was shut down by peer\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -0700541 break;
542 } else {
543 /* signal came in, or peer/link went down,
544 * after we read a partial message
545 */
546 /* D_ASSERT(signal_pending(current)); */
547 break;
548 }
549 };
550
551 set_fs(oldfs);
552
553 if (rv != size)
Philipp Reisnerde0ff332011-02-07 16:56:20 +0100554 drbd_force_state(tconn->volume0, NS(conn, C_BROKEN_PIPE));
Philipp Reisnerb411b362009-09-25 16:07:19 -0700555
556 return rv;
557}
558
Lars Ellenberg5dbf1672010-05-25 16:18:01 +0200559/* quoting tcp(7):
560 * On individual connections, the socket buffer size must be set prior to the
561 * listen(2) or connect(2) calls in order to have it take effect.
562 * This is our wrapper to do so.
563 */
564static void drbd_setbufsize(struct socket *sock, unsigned int snd,
565 unsigned int rcv)
566{
567 /* open coded SO_SNDBUF, SO_RCVBUF */
568 if (snd) {
569 sock->sk->sk_sndbuf = snd;
570 sock->sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
571 }
572 if (rcv) {
573 sock->sk->sk_rcvbuf = rcv;
574 sock->sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
575 }
576}
577
Philipp Reisnereac3e992011-02-07 14:05:07 +0100578static struct socket *drbd_try_connect(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700579{
580 const char *what;
581 struct socket *sock;
582 struct sockaddr_in6 src_in6;
583 int err;
584 int disconnect_on_error = 1;
585
Philipp Reisnereac3e992011-02-07 14:05:07 +0100586 if (!get_net_conf(tconn))
Philipp Reisnerb411b362009-09-25 16:07:19 -0700587 return NULL;
588
589 what = "sock_create_kern";
Philipp Reisnereac3e992011-02-07 14:05:07 +0100590 err = sock_create_kern(((struct sockaddr *)tconn->net_conf->my_addr)->sa_family,
Philipp Reisnerb411b362009-09-25 16:07:19 -0700591 SOCK_STREAM, IPPROTO_TCP, &sock);
592 if (err < 0) {
593 sock = NULL;
594 goto out;
595 }
596
597 sock->sk->sk_rcvtimeo =
Philipp Reisnereac3e992011-02-07 14:05:07 +0100598 sock->sk->sk_sndtimeo = tconn->net_conf->try_connect_int*HZ;
599 drbd_setbufsize(sock, tconn->net_conf->sndbuf_size,
600 tconn->net_conf->rcvbuf_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700601
602 /* explicitly bind to the configured IP as source IP
603 * for the outgoing connections.
604 * This is needed for multihomed hosts and to be
605 * able to use lo: interfaces for drbd.
606 * Make sure to use 0 as port number, so linux selects
607 * a free one dynamically.
608 */
Philipp Reisnereac3e992011-02-07 14:05:07 +0100609 memcpy(&src_in6, tconn->net_conf->my_addr,
610 min_t(int, tconn->net_conf->my_addr_len, sizeof(src_in6)));
611 if (((struct sockaddr *)tconn->net_conf->my_addr)->sa_family == AF_INET6)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700612 src_in6.sin6_port = 0;
613 else
614 ((struct sockaddr_in *)&src_in6)->sin_port = 0; /* AF_INET & AF_SCI */
615
616 what = "bind before connect";
617 err = sock->ops->bind(sock,
618 (struct sockaddr *) &src_in6,
Philipp Reisnereac3e992011-02-07 14:05:07 +0100619 tconn->net_conf->my_addr_len);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700620 if (err < 0)
621 goto out;
622
623 /* connect may fail, peer not yet available.
624 * stay C_WF_CONNECTION, don't go Disconnecting! */
625 disconnect_on_error = 0;
626 what = "connect";
627 err = sock->ops->connect(sock,
Philipp Reisnereac3e992011-02-07 14:05:07 +0100628 (struct sockaddr *)tconn->net_conf->peer_addr,
629 tconn->net_conf->peer_addr_len, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700630
631out:
632 if (err < 0) {
633 if (sock) {
634 sock_release(sock);
635 sock = NULL;
636 }
637 switch (-err) {
638 /* timeout, busy, signal pending */
639 case ETIMEDOUT: case EAGAIN: case EINPROGRESS:
640 case EINTR: case ERESTARTSYS:
641 /* peer not (yet) available, network problem */
642 case ECONNREFUSED: case ENETUNREACH:
643 case EHOSTDOWN: case EHOSTUNREACH:
644 disconnect_on_error = 0;
645 break;
646 default:
Philipp Reisnereac3e992011-02-07 14:05:07 +0100647 conn_err(tconn, "%s failed, err = %d\n", what, err);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700648 }
649 if (disconnect_on_error)
Philipp Reisnereac3e992011-02-07 14:05:07 +0100650 drbd_force_state(tconn->volume0, NS(conn, C_DISCONNECTING));
Philipp Reisnerb411b362009-09-25 16:07:19 -0700651 }
Philipp Reisnereac3e992011-02-07 14:05:07 +0100652 put_net_conf(tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700653 return sock;
654}
655
Philipp Reisner76536202011-02-07 14:09:54 +0100656static struct socket *drbd_wait_for_connect(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700657{
658 int timeo, err;
659 struct socket *s_estab = NULL, *s_listen;
660 const char *what;
661
Philipp Reisner76536202011-02-07 14:09:54 +0100662 if (!get_net_conf(tconn))
Philipp Reisnerb411b362009-09-25 16:07:19 -0700663 return NULL;
664
665 what = "sock_create_kern";
Philipp Reisner76536202011-02-07 14:09:54 +0100666 err = sock_create_kern(((struct sockaddr *)tconn->net_conf->my_addr)->sa_family,
Philipp Reisnerb411b362009-09-25 16:07:19 -0700667 SOCK_STREAM, IPPROTO_TCP, &s_listen);
668 if (err) {
669 s_listen = NULL;
670 goto out;
671 }
672
Philipp Reisner76536202011-02-07 14:09:54 +0100673 timeo = tconn->net_conf->try_connect_int * HZ;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700674 timeo += (random32() & 1) ? timeo / 7 : -timeo / 7; /* 28.5% random jitter */
675
676 s_listen->sk->sk_reuse = 1; /* SO_REUSEADDR */
677 s_listen->sk->sk_rcvtimeo = timeo;
678 s_listen->sk->sk_sndtimeo = timeo;
Philipp Reisner76536202011-02-07 14:09:54 +0100679 drbd_setbufsize(s_listen, tconn->net_conf->sndbuf_size,
680 tconn->net_conf->rcvbuf_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700681
682 what = "bind before listen";
683 err = s_listen->ops->bind(s_listen,
Philipp Reisner76536202011-02-07 14:09:54 +0100684 (struct sockaddr *) tconn->net_conf->my_addr,
685 tconn->net_conf->my_addr_len);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700686 if (err < 0)
687 goto out;
688
Philipp Reisner76536202011-02-07 14:09:54 +0100689 err = drbd_accept(&what, s_listen, &s_estab);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700690
691out:
692 if (s_listen)
693 sock_release(s_listen);
694 if (err < 0) {
695 if (err != -EAGAIN && err != -EINTR && err != -ERESTARTSYS) {
Philipp Reisner76536202011-02-07 14:09:54 +0100696 conn_err(tconn, "%s failed, err = %d\n", what, err);
697 drbd_force_state(tconn->volume0, NS(conn, C_DISCONNECTING));
Philipp Reisnerb411b362009-09-25 16:07:19 -0700698 }
699 }
Philipp Reisner76536202011-02-07 14:09:54 +0100700 put_net_conf(tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700701
702 return s_estab;
703}
704
Philipp Reisnerd38e7872011-02-07 15:32:04 +0100705static int drbd_send_fp(struct drbd_tconn *tconn, struct socket *sock, enum drbd_packet cmd)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700706{
Philipp Reisnerd38e7872011-02-07 15:32:04 +0100707 struct p_header *h = &tconn->data.sbuf.header;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700708
Philipp Reisnerd38e7872011-02-07 15:32:04 +0100709 return _conn_send_cmd(tconn, 0, sock, cmd, h, sizeof(*h), 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700710}
711
Philipp Reisnera25b63f2011-02-07 15:43:45 +0100712static enum drbd_packet drbd_recv_fp(struct drbd_tconn *tconn, struct socket *sock)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700713{
Philipp Reisnera25b63f2011-02-07 15:43:45 +0100714 struct p_header80 *h = &tconn->data.rbuf.header.h80;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700715 int rr;
716
Philipp Reisnerdbd9eea2011-02-07 15:34:16 +0100717 rr = drbd_recv_short(sock, h, sizeof(*h), 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700718
Andreas Gruenbacherca9bc122011-01-11 13:47:24 +0100719 if (rr == sizeof(*h) && h->magic == cpu_to_be32(DRBD_MAGIC))
Philipp Reisnerb411b362009-09-25 16:07:19 -0700720 return be16_to_cpu(h->command);
721
722 return 0xffff;
723}
724
725/**
726 * drbd_socket_okay() - Free the socket if its connection is not okay
Philipp Reisnerb411b362009-09-25 16:07:19 -0700727 * @sock: pointer to the pointer to the socket.
728 */
Philipp Reisnerdbd9eea2011-02-07 15:34:16 +0100729static int drbd_socket_okay(struct socket **sock)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700730{
731 int rr;
732 char tb[4];
733
734 if (!*sock)
Andreas Gruenbacher81e84652010-12-09 15:03:57 +0100735 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700736
Philipp Reisnerdbd9eea2011-02-07 15:34:16 +0100737 rr = drbd_recv_short(*sock, tb, 4, MSG_DONTWAIT | MSG_PEEK);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700738
739 if (rr > 0 || rr == -EAGAIN) {
Andreas Gruenbacher81e84652010-12-09 15:03:57 +0100740 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700741 } else {
742 sock_release(*sock);
743 *sock = NULL;
Andreas Gruenbacher81e84652010-12-09 15:03:57 +0100744 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700745 }
746}
747
Philipp Reisner907599e2011-02-08 11:25:37 +0100748static int drbd_connected(int vnr, void *p, void *data)
749{
750 struct drbd_conf *mdev = (struct drbd_conf *)p;
751 int ok = 1;
752
753 atomic_set(&mdev->packet_seq, 0);
754 mdev->peer_seq = 0;
755
756 ok &= drbd_send_sync_param(mdev, &mdev->sync_conf);
757 ok &= drbd_send_sizes(mdev, 0, 0);
758 ok &= drbd_send_uuids(mdev);
759 ok &= drbd_send_state(mdev);
760 clear_bit(USE_DEGR_WFC_T, &mdev->flags);
761 clear_bit(RESIZE_PENDING, &mdev->flags);
762
763 return !ok;
764}
765
Philipp Reisnerb411b362009-09-25 16:07:19 -0700766/*
767 * return values:
768 * 1 yes, we have a valid connection
769 * 0 oops, did not work out, please try again
770 * -1 peer talks different language,
771 * no point in trying again, please go standalone.
772 * -2 We do not have a network config...
773 */
Philipp Reisner907599e2011-02-08 11:25:37 +0100774static int drbd_connect(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700775{
776 struct socket *s, *sock, *msock;
777 int try, h, ok;
778
Philipp Reisner907599e2011-02-08 11:25:37 +0100779 if (drbd_request_state(tconn->volume0, NS(conn, C_WF_CONNECTION)) < SS_SUCCESS)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700780 return -2;
781
Philipp Reisner907599e2011-02-08 11:25:37 +0100782 clear_bit(DISCARD_CONCURRENT, &tconn->flags);
783 tconn->agreed_pro_version = 99;
Philipp Reisnerfd340c12011-01-19 16:57:39 +0100784 /* agreed_pro_version must be smaller than 100 so we send the old
785 header (h80) in the first packet and in the handshake packet. */
Philipp Reisnerb411b362009-09-25 16:07:19 -0700786
787 sock = NULL;
788 msock = NULL;
789
790 do {
791 for (try = 0;;) {
792 /* 3 tries, this should take less than a second! */
Philipp Reisner907599e2011-02-08 11:25:37 +0100793 s = drbd_try_connect(tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700794 if (s || ++try >= 3)
795 break;
796 /* give the other side time to call bind() & listen() */
Philipp Reisner20ee6392011-01-18 15:28:59 +0100797 schedule_timeout_interruptible(HZ / 10);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700798 }
799
800 if (s) {
801 if (!sock) {
Philipp Reisner907599e2011-02-08 11:25:37 +0100802 drbd_send_fp(tconn, s, P_HAND_SHAKE_S);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700803 sock = s;
804 s = NULL;
805 } else if (!msock) {
Philipp Reisner907599e2011-02-08 11:25:37 +0100806 drbd_send_fp(tconn, s, P_HAND_SHAKE_M);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700807 msock = s;
808 s = NULL;
809 } else {
Philipp Reisner907599e2011-02-08 11:25:37 +0100810 conn_err(tconn, "Logic error in drbd_connect()\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -0700811 goto out_release_sockets;
812 }
813 }
814
815 if (sock && msock) {
Philipp Reisner907599e2011-02-08 11:25:37 +0100816 schedule_timeout_interruptible(tconn->net_conf->ping_timeo*HZ/10);
Philipp Reisnerdbd9eea2011-02-07 15:34:16 +0100817 ok = drbd_socket_okay(&sock);
818 ok = drbd_socket_okay(&msock) && ok;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700819 if (ok)
820 break;
821 }
822
823retry:
Philipp Reisner907599e2011-02-08 11:25:37 +0100824 s = drbd_wait_for_connect(tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700825 if (s) {
Philipp Reisner907599e2011-02-08 11:25:37 +0100826 try = drbd_recv_fp(tconn, s);
Philipp Reisnerdbd9eea2011-02-07 15:34:16 +0100827 drbd_socket_okay(&sock);
828 drbd_socket_okay(&msock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700829 switch (try) {
830 case P_HAND_SHAKE_S:
831 if (sock) {
Philipp Reisner907599e2011-02-08 11:25:37 +0100832 conn_warn(tconn, "initial packet S crossed\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -0700833 sock_release(sock);
834 }
835 sock = s;
836 break;
837 case P_HAND_SHAKE_M:
838 if (msock) {
Philipp Reisner907599e2011-02-08 11:25:37 +0100839 conn_warn(tconn, "initial packet M crossed\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -0700840 sock_release(msock);
841 }
842 msock = s;
Philipp Reisner907599e2011-02-08 11:25:37 +0100843 set_bit(DISCARD_CONCURRENT, &tconn->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700844 break;
845 default:
Philipp Reisner907599e2011-02-08 11:25:37 +0100846 conn_warn(tconn, "Error receiving initial packet\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -0700847 sock_release(s);
848 if (random32() & 1)
849 goto retry;
850 }
851 }
852
Philipp Reisner907599e2011-02-08 11:25:37 +0100853 if (tconn->volume0->state.conn <= C_DISCONNECTING)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700854 goto out_release_sockets;
855 if (signal_pending(current)) {
856 flush_signals(current);
857 smp_rmb();
Philipp Reisner907599e2011-02-08 11:25:37 +0100858 if (get_t_state(&tconn->receiver) == EXITING)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700859 goto out_release_sockets;
860 }
861
862 if (sock && msock) {
Philipp Reisnerdbd9eea2011-02-07 15:34:16 +0100863 ok = drbd_socket_okay(&sock);
864 ok = drbd_socket_okay(&msock) && ok;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700865 if (ok)
866 break;
867 }
868 } while (1);
869
870 msock->sk->sk_reuse = 1; /* SO_REUSEADDR */
871 sock->sk->sk_reuse = 1; /* SO_REUSEADDR */
872
873 sock->sk->sk_allocation = GFP_NOIO;
874 msock->sk->sk_allocation = GFP_NOIO;
875
876 sock->sk->sk_priority = TC_PRIO_INTERACTIVE_BULK;
877 msock->sk->sk_priority = TC_PRIO_INTERACTIVE;
878
Philipp Reisnerb411b362009-09-25 16:07:19 -0700879 /* NOT YET ...
Philipp Reisner907599e2011-02-08 11:25:37 +0100880 * sock->sk->sk_sndtimeo = tconn->net_conf->timeout*HZ/10;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700881 * sock->sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
882 * first set it to the P_HAND_SHAKE timeout,
883 * which we set to 4x the configured ping_timeout. */
884 sock->sk->sk_sndtimeo =
Philipp Reisner907599e2011-02-08 11:25:37 +0100885 sock->sk->sk_rcvtimeo = tconn->net_conf->ping_timeo*4*HZ/10;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700886
Philipp Reisner907599e2011-02-08 11:25:37 +0100887 msock->sk->sk_sndtimeo = tconn->net_conf->timeout*HZ/10;
888 msock->sk->sk_rcvtimeo = tconn->net_conf->ping_int*HZ;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700889
890 /* we don't want delays.
Lucas De Marchi25985ed2011-03-30 22:57:33 -0300891 * we use TCP_CORK where appropriate, though */
Philipp Reisnerb411b362009-09-25 16:07:19 -0700892 drbd_tcp_nodelay(sock);
893 drbd_tcp_nodelay(msock);
894
Philipp Reisner907599e2011-02-08 11:25:37 +0100895 tconn->data.socket = sock;
896 tconn->meta.socket = msock;
897 tconn->last_received = jiffies;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700898
Philipp Reisner907599e2011-02-08 11:25:37 +0100899 h = drbd_do_handshake(tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700900 if (h <= 0)
901 return h;
902
Philipp Reisner907599e2011-02-08 11:25:37 +0100903 if (tconn->cram_hmac_tfm) {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700904 /* drbd_request_state(mdev, NS(conn, WFAuth)); */
Philipp Reisner907599e2011-02-08 11:25:37 +0100905 switch (drbd_do_auth(tconn)) {
Johannes Thomab10d96c2010-01-07 16:02:50 +0100906 case -1:
Philipp Reisner907599e2011-02-08 11:25:37 +0100907 conn_err(tconn, "Authentication of peer failed\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -0700908 return -1;
Johannes Thomab10d96c2010-01-07 16:02:50 +0100909 case 0:
Philipp Reisner907599e2011-02-08 11:25:37 +0100910 conn_err(tconn, "Authentication of peer failed, trying again.\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +0100911 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700912 }
913 }
914
Philipp Reisner907599e2011-02-08 11:25:37 +0100915 if (drbd_request_state(tconn->volume0, NS(conn, C_WF_REPORT_PARAMS)) < SS_SUCCESS)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700916 return 0;
917
Philipp Reisner907599e2011-02-08 11:25:37 +0100918 sock->sk->sk_sndtimeo = tconn->net_conf->timeout*HZ/10;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700919 sock->sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
920
Philipp Reisner907599e2011-02-08 11:25:37 +0100921 drbd_thread_start(&tconn->asender);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700922
Philipp Reisner907599e2011-02-08 11:25:37 +0100923 if (drbd_send_protocol(tconn) == -1)
Philipp Reisner7e2455c2010-04-22 14:50:23 +0200924 return -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700925
Philipp Reisner907599e2011-02-08 11:25:37 +0100926 return !idr_for_each(&tconn->volumes, drbd_connected, tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700927
928out_release_sockets:
929 if (sock)
930 sock_release(sock);
931 if (msock)
932 sock_release(msock);
933 return -1;
934}
935
Philipp Reisnerce243852011-02-07 17:27:47 +0100936static bool decode_header(struct drbd_tconn *tconn, struct p_header *h, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700937{
Philipp Reisnerfd340c12011-01-19 16:57:39 +0100938 if (h->h80.magic == cpu_to_be32(DRBD_MAGIC)) {
Philipp Reisner77351055b2011-02-07 17:24:26 +0100939 pi->cmd = be16_to_cpu(h->h80.command);
940 pi->size = be16_to_cpu(h->h80.length);
Philipp Reisnereefc2f72011-02-08 12:55:24 +0100941 pi->vnr = 0;
Andreas Gruenbacherca9bc122011-01-11 13:47:24 +0100942 } else if (h->h95.magic == cpu_to_be16(DRBD_MAGIC_BIG)) {
Philipp Reisner77351055b2011-02-07 17:24:26 +0100943 pi->cmd = be16_to_cpu(h->h95.command);
944 pi->size = be32_to_cpu(h->h95.length) & 0x00ffffff;
945 pi->vnr = 0;
Philipp Reisner02918be2010-08-20 14:35:10 +0200946 } else {
Philipp Reisnerce243852011-02-07 17:27:47 +0100947 conn_err(tconn, "magic?? on data m: 0x%08x c: %d l: %d\n",
Lars Ellenberg004352f2010-10-05 20:13:58 +0200948 be32_to_cpu(h->h80.magic),
949 be16_to_cpu(h->h80.command),
950 be16_to_cpu(h->h80.length));
Andreas Gruenbacher81e84652010-12-09 15:03:57 +0100951 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700952 }
Philipp Reisner257d0af2011-01-26 12:15:29 +0100953 return true;
954}
955
Philipp Reisner9ba7aa02011-02-07 17:32:41 +0100956static int drbd_recv_header(struct drbd_tconn *tconn, struct packet_info *pi)
Philipp Reisner257d0af2011-01-26 12:15:29 +0100957{
Philipp Reisner9ba7aa02011-02-07 17:32:41 +0100958 struct p_header *h = &tconn->data.rbuf.header;
Philipp Reisner257d0af2011-01-26 12:15:29 +0100959 int r;
960
Philipp Reisner9ba7aa02011-02-07 17:32:41 +0100961 r = drbd_recv(tconn, h, sizeof(*h));
Philipp Reisner257d0af2011-01-26 12:15:29 +0100962 if (unlikely(r != sizeof(*h))) {
963 if (!signal_pending(current))
Philipp Reisner9ba7aa02011-02-07 17:32:41 +0100964 conn_warn(tconn, "short read expecting header on sock: r=%d\n", r);
Philipp Reisner257d0af2011-01-26 12:15:29 +0100965 return false;
966 }
967
Philipp Reisner9ba7aa02011-02-07 17:32:41 +0100968 r = decode_header(tconn, h, pi);
969 tconn->last_received = jiffies;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700970
Philipp Reisner257d0af2011-01-26 12:15:29 +0100971 return r;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700972}
973
Philipp Reisner2451fc32010-08-24 13:43:11 +0200974static void drbd_flush(struct drbd_conf *mdev)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700975{
976 int rv;
977
978 if (mdev->write_ordering >= WO_bdev_flush && get_ldev(mdev)) {
Dmitry Monakhovfbd9b092010-04-28 17:55:06 +0400979 rv = blkdev_issue_flush(mdev->ldev->backing_bdev, GFP_KERNEL,
Christoph Hellwigdd3932e2010-09-16 20:51:46 +0200980 NULL);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700981 if (rv) {
982 dev_err(DEV, "local disk flush failed with status %d\n", rv);
983 /* would rather check on EOPNOTSUPP, but that is not reliable.
984 * don't try again for ANY return value != 0
985 * if (rv == -EOPNOTSUPP) */
986 drbd_bump_write_ordering(mdev, WO_drain_io);
987 }
988 put_ldev(mdev);
989 }
Philipp Reisnerb411b362009-09-25 16:07:19 -0700990}
991
992/**
993 * drbd_may_finish_epoch() - Applies an epoch_event to the epoch's state, eventually finishes it.
994 * @mdev: DRBD device.
995 * @epoch: Epoch object.
996 * @ev: Epoch event.
997 */
998static enum finish_epoch drbd_may_finish_epoch(struct drbd_conf *mdev,
999 struct drbd_epoch *epoch,
1000 enum epoch_event ev)
1001{
Philipp Reisner2451fc32010-08-24 13:43:11 +02001002 int epoch_size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001003 struct drbd_epoch *next_epoch;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001004 enum finish_epoch rv = FE_STILL_LIVE;
1005
1006 spin_lock(&mdev->epoch_lock);
1007 do {
1008 next_epoch = NULL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001009
1010 epoch_size = atomic_read(&epoch->epoch_size);
1011
1012 switch (ev & ~EV_CLEANUP) {
1013 case EV_PUT:
1014 atomic_dec(&epoch->active);
1015 break;
1016 case EV_GOT_BARRIER_NR:
1017 set_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001018 break;
1019 case EV_BECAME_LAST:
1020 /* nothing to do*/
1021 break;
1022 }
1023
Philipp Reisnerb411b362009-09-25 16:07:19 -07001024 if (epoch_size != 0 &&
1025 atomic_read(&epoch->active) == 0 &&
Philipp Reisner2451fc32010-08-24 13:43:11 +02001026 test_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001027 if (!(ev & EV_CLEANUP)) {
1028 spin_unlock(&mdev->epoch_lock);
1029 drbd_send_b_ack(mdev, epoch->barrier_nr, epoch_size);
1030 spin_lock(&mdev->epoch_lock);
1031 }
1032 dec_unacked(mdev);
1033
1034 if (mdev->current_epoch != epoch) {
1035 next_epoch = list_entry(epoch->list.next, struct drbd_epoch, list);
1036 list_del(&epoch->list);
1037 ev = EV_BECAME_LAST | (ev & EV_CLEANUP);
1038 mdev->epochs--;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001039 kfree(epoch);
1040
1041 if (rv == FE_STILL_LIVE)
1042 rv = FE_DESTROYED;
1043 } else {
1044 epoch->flags = 0;
1045 atomic_set(&epoch->epoch_size, 0);
Uwe Kleine-König698f9312010-07-02 20:41:51 +02001046 /* atomic_set(&epoch->active, 0); is already zero */
Philipp Reisnerb411b362009-09-25 16:07:19 -07001047 if (rv == FE_STILL_LIVE)
1048 rv = FE_RECYCLED;
Philipp Reisner2451fc32010-08-24 13:43:11 +02001049 wake_up(&mdev->ee_wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001050 }
1051 }
1052
1053 if (!next_epoch)
1054 break;
1055
1056 epoch = next_epoch;
1057 } while (1);
1058
1059 spin_unlock(&mdev->epoch_lock);
1060
Philipp Reisnerb411b362009-09-25 16:07:19 -07001061 return rv;
1062}
1063
1064/**
1065 * drbd_bump_write_ordering() - Fall back to an other write ordering method
1066 * @mdev: DRBD device.
1067 * @wo: Write ordering method to try.
1068 */
1069void drbd_bump_write_ordering(struct drbd_conf *mdev, enum write_ordering_e wo) __must_hold(local)
1070{
1071 enum write_ordering_e pwo;
1072 static char *write_ordering_str[] = {
1073 [WO_none] = "none",
1074 [WO_drain_io] = "drain",
1075 [WO_bdev_flush] = "flush",
Philipp Reisnerb411b362009-09-25 16:07:19 -07001076 };
1077
1078 pwo = mdev->write_ordering;
1079 wo = min(pwo, wo);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001080 if (wo == WO_bdev_flush && mdev->ldev->dc.no_disk_flush)
1081 wo = WO_drain_io;
1082 if (wo == WO_drain_io && mdev->ldev->dc.no_disk_drain)
1083 wo = WO_none;
1084 mdev->write_ordering = wo;
Philipp Reisner2451fc32010-08-24 13:43:11 +02001085 if (pwo != mdev->write_ordering || wo == WO_bdev_flush)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001086 dev_info(DEV, "Method to ensure write ordering: %s\n", write_ordering_str[mdev->write_ordering]);
1087}
1088
1089/**
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001090 * drbd_submit_ee()
1091 * @mdev: DRBD device.
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001092 * @peer_req: peer request
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001093 * @rw: flag field, see bio->bi_rw
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001094 *
1095 * May spread the pages to multiple bios,
1096 * depending on bio_add_page restrictions.
1097 *
1098 * Returns 0 if all bios have been submitted,
1099 * -ENOMEM if we could not allocate enough bios,
1100 * -ENOSPC (any better suggestion?) if we have not been able to bio_add_page a
1101 * single page to an empty bio (which should never happen and likely indicates
1102 * that the lower level IO stack is in some way broken). This has been observed
1103 * on certain Xen deployments.
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001104 */
1105/* TODO allocate from our own bio_set. */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001106int drbd_submit_ee(struct drbd_conf *mdev, struct drbd_peer_request *peer_req,
Andreas Gruenbacherf6ffca92011-02-04 15:30:34 +01001107 const unsigned rw, const int fault_type)
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001108{
1109 struct bio *bios = NULL;
1110 struct bio *bio;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001111 struct page *page = peer_req->pages;
1112 sector_t sector = peer_req->i.sector;
1113 unsigned ds = peer_req->i.size;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001114 unsigned n_bios = 0;
1115 unsigned nr_pages = (ds + PAGE_SIZE -1) >> PAGE_SHIFT;
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001116 int err = -ENOMEM;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001117
1118 /* In most cases, we will only need one bio. But in case the lower
1119 * level restrictions happen to be different at this offset on this
1120 * side than those of the sending peer, we may need to submit the
1121 * request in more than one bio. */
1122next_bio:
1123 bio = bio_alloc(GFP_NOIO, nr_pages);
1124 if (!bio) {
1125 dev_err(DEV, "submit_ee: Allocation of a bio failed\n");
1126 goto fail;
1127 }
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001128 /* > peer_req->i.sector, unless this is the first bio */
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001129 bio->bi_sector = sector;
1130 bio->bi_bdev = mdev->ldev->backing_bdev;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001131 bio->bi_rw = rw;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001132 bio->bi_private = peer_req;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001133 bio->bi_end_io = drbd_endio_sec;
1134
1135 bio->bi_next = bios;
1136 bios = bio;
1137 ++n_bios;
1138
1139 page_chain_for_each(page) {
1140 unsigned len = min_t(unsigned, ds, PAGE_SIZE);
1141 if (!bio_add_page(bio, page, len, 0)) {
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001142 /* A single page must always be possible!
1143 * But in case it fails anyways,
1144 * we deal with it, and complain (below). */
1145 if (bio->bi_vcnt == 0) {
1146 dev_err(DEV,
1147 "bio_add_page failed for len=%u, "
1148 "bi_vcnt=0 (bi_sector=%llu)\n",
1149 len, (unsigned long long)bio->bi_sector);
1150 err = -ENOSPC;
1151 goto fail;
1152 }
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001153 goto next_bio;
1154 }
1155 ds -= len;
1156 sector += len >> 9;
1157 --nr_pages;
1158 }
1159 D_ASSERT(page == NULL);
1160 D_ASSERT(ds == 0);
1161
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001162 atomic_set(&peer_req->pending_bios, n_bios);
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001163 do {
1164 bio = bios;
1165 bios = bios->bi_next;
1166 bio->bi_next = NULL;
1167
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001168 drbd_generic_make_request(mdev, fault_type, bio);
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001169 } while (bios);
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001170 return 0;
1171
1172fail:
1173 while (bios) {
1174 bio = bios;
1175 bios = bios->bi_next;
1176 bio_put(bio);
1177 }
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001178 return err;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001179}
1180
Andreas Gruenbacher53840642011-01-28 10:31:04 +01001181static void drbd_remove_epoch_entry_interval(struct drbd_conf *mdev,
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001182 struct drbd_peer_request *peer_req)
Andreas Gruenbacher53840642011-01-28 10:31:04 +01001183{
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001184 struct drbd_interval *i = &peer_req->i;
Andreas Gruenbacher53840642011-01-28 10:31:04 +01001185
1186 drbd_remove_interval(&mdev->write_requests, i);
1187 drbd_clear_interval(i);
1188
Andreas Gruenbacher6c852be2011-02-04 15:38:52 +01001189 /* Wake up any processes waiting for this peer request to complete. */
Andreas Gruenbacher53840642011-01-28 10:31:04 +01001190 if (i->waiting)
1191 wake_up(&mdev->misc_wait);
1192}
1193
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01001194static int receive_Barrier(struct drbd_conf *mdev, enum drbd_packet cmd,
1195 unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001196{
Philipp Reisner2451fc32010-08-24 13:43:11 +02001197 int rv;
Philipp Reisnere42325a2011-01-19 13:55:45 +01001198 struct p_barrier *p = &mdev->tconn->data.rbuf.barrier;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001199 struct drbd_epoch *epoch;
1200
Philipp Reisnerb411b362009-09-25 16:07:19 -07001201 inc_unacked(mdev);
1202
Philipp Reisnerb411b362009-09-25 16:07:19 -07001203 mdev->current_epoch->barrier_nr = p->barrier;
1204 rv = drbd_may_finish_epoch(mdev, mdev->current_epoch, EV_GOT_BARRIER_NR);
1205
1206 /* P_BARRIER_ACK may imply that the corresponding extent is dropped from
1207 * the activity log, which means it would not be resynced in case the
1208 * R_PRIMARY crashes now.
1209 * Therefore we must send the barrier_ack after the barrier request was
1210 * completed. */
1211 switch (mdev->write_ordering) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001212 case WO_none:
1213 if (rv == FE_RECYCLED)
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01001214 return true;
Philipp Reisner2451fc32010-08-24 13:43:11 +02001215
1216 /* receiver context, in the writeout path of the other node.
1217 * avoid potential distributed deadlock */
1218 epoch = kmalloc(sizeof(struct drbd_epoch), GFP_NOIO);
1219 if (epoch)
1220 break;
1221 else
1222 dev_warn(DEV, "Allocation of an epoch failed, slowing down\n");
1223 /* Fall through */
Philipp Reisnerb411b362009-09-25 16:07:19 -07001224
1225 case WO_bdev_flush:
1226 case WO_drain_io:
Philipp Reisnerb411b362009-09-25 16:07:19 -07001227 drbd_wait_ee_list_empty(mdev, &mdev->active_ee);
Philipp Reisner2451fc32010-08-24 13:43:11 +02001228 drbd_flush(mdev);
1229
1230 if (atomic_read(&mdev->current_epoch->epoch_size)) {
1231 epoch = kmalloc(sizeof(struct drbd_epoch), GFP_NOIO);
1232 if (epoch)
1233 break;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001234 }
1235
Philipp Reisner2451fc32010-08-24 13:43:11 +02001236 epoch = mdev->current_epoch;
1237 wait_event(mdev->ee_wait, atomic_read(&epoch->epoch_size) == 0);
1238
1239 D_ASSERT(atomic_read(&epoch->active) == 0);
1240 D_ASSERT(epoch->flags == 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001241
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01001242 return true;
Philipp Reisner2451fc32010-08-24 13:43:11 +02001243 default:
1244 dev_err(DEV, "Strangeness in mdev->write_ordering %d\n", mdev->write_ordering);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01001245 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001246 }
1247
1248 epoch->flags = 0;
1249 atomic_set(&epoch->epoch_size, 0);
1250 atomic_set(&epoch->active, 0);
1251
1252 spin_lock(&mdev->epoch_lock);
1253 if (atomic_read(&mdev->current_epoch->epoch_size)) {
1254 list_add(&epoch->list, &mdev->current_epoch->list);
1255 mdev->current_epoch = epoch;
1256 mdev->epochs++;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001257 } else {
1258 /* The current_epoch got recycled while we allocated this one... */
1259 kfree(epoch);
1260 }
1261 spin_unlock(&mdev->epoch_lock);
1262
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01001263 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001264}
1265
1266/* used from receive_RSDataReply (recv_resync_read)
1267 * and from receive_Data */
Andreas Gruenbacherf6ffca92011-02-04 15:30:34 +01001268static struct drbd_peer_request *
1269read_in_block(struct drbd_conf *mdev, u64 id, sector_t sector,
1270 int data_size) __must_hold(local)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001271{
Lars Ellenberg66660322010-04-06 12:15:04 +02001272 const sector_t capacity = drbd_get_capacity(mdev->this_bdev);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001273 struct drbd_peer_request *peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001274 struct page *page;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001275 int dgs, ds, rr;
Philipp Reisnera0638452011-01-19 14:31:32 +01001276 void *dig_in = mdev->tconn->int_dig_in;
1277 void *dig_vv = mdev->tconn->int_dig_vv;
Philipp Reisner6b4388a2010-04-26 14:11:45 +02001278 unsigned long *data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001279
Philipp Reisnera0638452011-01-19 14:31:32 +01001280 dgs = (mdev->tconn->agreed_pro_version >= 87 && mdev->tconn->integrity_r_tfm) ?
1281 crypto_hash_digestsize(mdev->tconn->integrity_r_tfm) : 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001282
1283 if (dgs) {
Philipp Reisnerde0ff332011-02-07 16:56:20 +01001284 rr = drbd_recv(mdev->tconn, dig_in, dgs);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001285 if (rr != dgs) {
Lars Ellenberg0ddc5542011-01-21 12:35:15 +01001286 if (!signal_pending(current))
1287 dev_warn(DEV,
1288 "short read receiving data digest: read %d expected %d\n",
1289 rr, dgs);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001290 return NULL;
1291 }
1292 }
1293
1294 data_size -= dgs;
1295
Andreas Gruenbacher841ce242010-12-15 19:31:20 +01001296 if (!expect(data_size != 0))
1297 return NULL;
1298 if (!expect(IS_ALIGNED(data_size, 512)))
1299 return NULL;
1300 if (!expect(data_size <= DRBD_MAX_BIO_SIZE))
1301 return NULL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001302
Lars Ellenberg66660322010-04-06 12:15:04 +02001303 /* even though we trust out peer,
1304 * we sometimes have to double check. */
1305 if (sector + (data_size>>9) > capacity) {
Lars Ellenbergfdda6542011-01-24 15:11:01 +01001306 dev_err(DEV, "request from peer beyond end of local disk: "
1307 "capacity: %llus < sector: %llus + size: %u\n",
Lars Ellenberg66660322010-04-06 12:15:04 +02001308 (unsigned long long)capacity,
1309 (unsigned long long)sector, data_size);
1310 return NULL;
1311 }
1312
Philipp Reisnerb411b362009-09-25 16:07:19 -07001313 /* GFP_NOIO, because we must not cause arbitrary write-out: in a DRBD
1314 * "criss-cross" setup, that might cause write-out on some other DRBD,
1315 * which in turn might block on the other node at this very place. */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001316 peer_req = drbd_alloc_ee(mdev, id, sector, data_size, GFP_NOIO);
1317 if (!peer_req)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001318 return NULL;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001319
Philipp Reisnerb411b362009-09-25 16:07:19 -07001320 ds = data_size;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001321 page = peer_req->pages;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001322 page_chain_for_each(page) {
1323 unsigned len = min_t(int, ds, PAGE_SIZE);
Philipp Reisner6b4388a2010-04-26 14:11:45 +02001324 data = kmap(page);
Philipp Reisnerde0ff332011-02-07 16:56:20 +01001325 rr = drbd_recv(mdev->tconn, data, len);
Andreas Gruenbacher0cf9d272010-12-07 10:43:29 +01001326 if (drbd_insert_fault(mdev, DRBD_FAULT_RECEIVE)) {
Philipp Reisner6b4388a2010-04-26 14:11:45 +02001327 dev_err(DEV, "Fault injection: Corrupting data on receive\n");
1328 data[0] = data[0] ^ (unsigned long)-1;
1329 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07001330 kunmap(page);
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001331 if (rr != len) {
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001332 drbd_free_ee(mdev, peer_req);
Lars Ellenberg0ddc5542011-01-21 12:35:15 +01001333 if (!signal_pending(current))
1334 dev_warn(DEV, "short read receiving data: read %d expected %d\n",
1335 rr, len);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001336 return NULL;
1337 }
1338 ds -= rr;
1339 }
1340
1341 if (dgs) {
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001342 drbd_csum_ee(mdev, mdev->tconn->integrity_r_tfm, peer_req, dig_vv);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001343 if (memcmp(dig_in, dig_vv, dgs)) {
Lars Ellenberg470be442010-11-10 10:36:52 +01001344 dev_err(DEV, "Digest integrity check FAILED: %llus +%u\n",
1345 (unsigned long long)sector, data_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001346 drbd_bcast_ee(mdev, "digest failed",
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001347 dgs, dig_in, dig_vv, peer_req);
1348 drbd_free_ee(mdev, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001349 return NULL;
1350 }
1351 }
1352 mdev->recv_cnt += data_size>>9;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001353 return peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001354}
1355
1356/* drbd_drain_block() just takes a data block
1357 * out of the socket input buffer, and discards it.
1358 */
1359static int drbd_drain_block(struct drbd_conf *mdev, int data_size)
1360{
1361 struct page *page;
1362 int rr, rv = 1;
1363 void *data;
1364
Lars Ellenbergc3470cd2010-04-01 16:57:19 +02001365 if (!data_size)
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01001366 return true;
Lars Ellenbergc3470cd2010-04-01 16:57:19 +02001367
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001368 page = drbd_pp_alloc(mdev, 1, 1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001369
1370 data = kmap(page);
1371 while (data_size) {
Philipp Reisnerde0ff332011-02-07 16:56:20 +01001372 rr = drbd_recv(mdev->tconn, data, min_t(int, data_size, PAGE_SIZE));
Philipp Reisnerb411b362009-09-25 16:07:19 -07001373 if (rr != min_t(int, data_size, PAGE_SIZE)) {
1374 rv = 0;
Lars Ellenberg0ddc5542011-01-21 12:35:15 +01001375 if (!signal_pending(current))
1376 dev_warn(DEV,
1377 "short read receiving data: read %d expected %d\n",
1378 rr, min_t(int, data_size, PAGE_SIZE));
Philipp Reisnerb411b362009-09-25 16:07:19 -07001379 break;
1380 }
1381 data_size -= rr;
1382 }
1383 kunmap(page);
Lars Ellenberg435f0742010-09-06 12:30:25 +02001384 drbd_pp_free(mdev, page, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001385 return rv;
1386}
1387
1388static int recv_dless_read(struct drbd_conf *mdev, struct drbd_request *req,
1389 sector_t sector, int data_size)
1390{
1391 struct bio_vec *bvec;
1392 struct bio *bio;
1393 int dgs, rr, i, expect;
Philipp Reisnera0638452011-01-19 14:31:32 +01001394 void *dig_in = mdev->tconn->int_dig_in;
1395 void *dig_vv = mdev->tconn->int_dig_vv;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001396
Philipp Reisnera0638452011-01-19 14:31:32 +01001397 dgs = (mdev->tconn->agreed_pro_version >= 87 && mdev->tconn->integrity_r_tfm) ?
1398 crypto_hash_digestsize(mdev->tconn->integrity_r_tfm) : 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001399
1400 if (dgs) {
Philipp Reisnerde0ff332011-02-07 16:56:20 +01001401 rr = drbd_recv(mdev->tconn, dig_in, dgs);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001402 if (rr != dgs) {
Lars Ellenberg0ddc5542011-01-21 12:35:15 +01001403 if (!signal_pending(current))
1404 dev_warn(DEV,
1405 "short read receiving data reply digest: read %d expected %d\n",
1406 rr, dgs);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001407 return 0;
1408 }
1409 }
1410
1411 data_size -= dgs;
1412
1413 /* optimistically update recv_cnt. if receiving fails below,
1414 * we disconnect anyways, and counters will be reset. */
1415 mdev->recv_cnt += data_size>>9;
1416
1417 bio = req->master_bio;
1418 D_ASSERT(sector == bio->bi_sector);
1419
1420 bio_for_each_segment(bvec, bio, i) {
1421 expect = min_t(int, data_size, bvec->bv_len);
Philipp Reisnerde0ff332011-02-07 16:56:20 +01001422 rr = drbd_recv(mdev->tconn,
Philipp Reisnerb411b362009-09-25 16:07:19 -07001423 kmap(bvec->bv_page)+bvec->bv_offset,
1424 expect);
1425 kunmap(bvec->bv_page);
1426 if (rr != expect) {
Lars Ellenberg0ddc5542011-01-21 12:35:15 +01001427 if (!signal_pending(current))
1428 dev_warn(DEV, "short read receiving data reply: "
1429 "read %d expected %d\n",
1430 rr, expect);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001431 return 0;
1432 }
1433 data_size -= rr;
1434 }
1435
1436 if (dgs) {
Philipp Reisnera0638452011-01-19 14:31:32 +01001437 drbd_csum_bio(mdev, mdev->tconn->integrity_r_tfm, bio, dig_vv);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001438 if (memcmp(dig_in, dig_vv, dgs)) {
1439 dev_err(DEV, "Digest integrity check FAILED. Broken NICs?\n");
1440 return 0;
1441 }
1442 }
1443
1444 D_ASSERT(data_size == 0);
1445 return 1;
1446}
1447
1448/* e_end_resync_block() is called via
1449 * drbd_process_done_ee() by asender only */
Philipp Reisner00d56942011-02-09 18:09:48 +01001450static int e_end_resync_block(struct drbd_work *w, int unused)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001451{
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001452 struct drbd_peer_request *peer_req = (struct drbd_peer_request *)w;
Philipp Reisner00d56942011-02-09 18:09:48 +01001453 struct drbd_conf *mdev = w->mdev;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001454 sector_t sector = peer_req->i.sector;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001455 int ok;
1456
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001457 D_ASSERT(drbd_interval_empty(&peer_req->i));
Philipp Reisnerb411b362009-09-25 16:07:19 -07001458
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001459 if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
1460 drbd_set_in_sync(mdev, sector, peer_req->i.size);
1461 ok = drbd_send_ack(mdev, P_RS_WRITE_ACK, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001462 } else {
1463 /* Record failure to sync */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001464 drbd_rs_failed_io(mdev, sector, peer_req->i.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001465
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001466 ok = drbd_send_ack(mdev, P_NEG_ACK, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001467 }
1468 dec_unacked(mdev);
1469
1470 return ok;
1471}
1472
1473static int recv_resync_read(struct drbd_conf *mdev, sector_t sector, int data_size) __releases(local)
1474{
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001475 struct drbd_peer_request *peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001476
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001477 peer_req = read_in_block(mdev, ID_SYNCER, sector, data_size);
1478 if (!peer_req)
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001479 goto fail;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001480
1481 dec_rs_pending(mdev);
1482
Philipp Reisnerb411b362009-09-25 16:07:19 -07001483 inc_unacked(mdev);
1484 /* corresponding dec_unacked() in e_end_resync_block()
1485 * respective _drbd_clear_done_ee */
1486
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001487 peer_req->w.cb = e_end_resync_block;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001488
Philipp Reisner87eeee42011-01-19 14:16:30 +01001489 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001490 list_add(&peer_req->w.list, &mdev->sync_ee);
Philipp Reisner87eeee42011-01-19 14:16:30 +01001491 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001492
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02001493 atomic_add(data_size >> 9, &mdev->rs_sect_ev);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001494 if (drbd_submit_ee(mdev, peer_req, WRITE, DRBD_FAULT_RS_WR) == 0)
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01001495 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001496
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001497 /* don't care for the reason here */
1498 dev_err(DEV, "submit failed, triggering re-connect\n");
Philipp Reisner87eeee42011-01-19 14:16:30 +01001499 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001500 list_del(&peer_req->w.list);
Philipp Reisner87eeee42011-01-19 14:16:30 +01001501 spin_unlock_irq(&mdev->tconn->req_lock);
Lars Ellenberg22cc37a2010-09-14 20:40:41 +02001502
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001503 drbd_free_ee(mdev, peer_req);
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001504fail:
1505 put_ldev(mdev);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01001506 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001507}
1508
Andreas Gruenbacher668eebc2011-01-20 17:14:26 +01001509static struct drbd_request *
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01001510find_request(struct drbd_conf *mdev, struct rb_root *root, u64 id,
1511 sector_t sector, bool missing_ok, const char *func)
Andreas Gruenbacher668eebc2011-01-20 17:14:26 +01001512{
Andreas Gruenbacher668eebc2011-01-20 17:14:26 +01001513 struct drbd_request *req;
1514
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01001515 /* Request object according to our peer */
1516 req = (struct drbd_request *)(unsigned long)id;
Andreas Gruenbacher5e472262011-01-27 14:42:51 +01001517 if (drbd_contains_interval(root, sector, &req->i) && req->i.local)
Andreas Gruenbacher668eebc2011-01-20 17:14:26 +01001518 return req;
Andreas Gruenbacherc3afd8f2011-01-20 22:25:40 +01001519 if (!missing_ok) {
1520 dev_err(DEV, "%s: failed to find request %lu, sector %llus\n", func,
1521 (unsigned long)id, (unsigned long long)sector);
1522 }
Andreas Gruenbacher668eebc2011-01-20 17:14:26 +01001523 return NULL;
1524}
1525
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01001526static int receive_DataReply(struct drbd_conf *mdev, enum drbd_packet cmd,
1527 unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001528{
1529 struct drbd_request *req;
1530 sector_t sector;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001531 int ok;
Philipp Reisnere42325a2011-01-19 13:55:45 +01001532 struct p_data *p = &mdev->tconn->data.rbuf.data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001533
1534 sector = be64_to_cpu(p->sector);
1535
Philipp Reisner87eeee42011-01-19 14:16:30 +01001536 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01001537 req = find_request(mdev, &mdev->read_requests, p->block_id, sector, false, __func__);
Philipp Reisner87eeee42011-01-19 14:16:30 +01001538 spin_unlock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherc3afd8f2011-01-20 22:25:40 +01001539 if (unlikely(!req))
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01001540 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001541
Bart Van Assche24c48302011-05-21 18:32:29 +02001542 /* hlist_del(&req->collision) is done in _req_may_be_done, to avoid
Philipp Reisnerb411b362009-09-25 16:07:19 -07001543 * special casing it there for the various failure cases.
1544 * still no race with drbd_fail_pending_reads */
1545 ok = recv_dless_read(mdev, req, sector, data_size);
1546
1547 if (ok)
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01001548 req_mod(req, DATA_RECEIVED);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001549 /* else: nothing. handled from drbd_disconnect...
1550 * I don't think we may complete this just yet
1551 * in case we are "on-disconnect: freeze" */
1552
1553 return ok;
1554}
1555
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01001556static int receive_RSDataReply(struct drbd_conf *mdev, enum drbd_packet cmd,
1557 unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001558{
1559 sector_t sector;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001560 int ok;
Philipp Reisnere42325a2011-01-19 13:55:45 +01001561 struct p_data *p = &mdev->tconn->data.rbuf.data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001562
1563 sector = be64_to_cpu(p->sector);
1564 D_ASSERT(p->block_id == ID_SYNCER);
1565
1566 if (get_ldev(mdev)) {
1567 /* data is submitted to disk within recv_resync_read.
1568 * corresponding put_ldev done below on error,
Andreas Gruenbacher9c508422011-01-14 21:19:36 +01001569 * or in drbd_endio_sec. */
Philipp Reisnerb411b362009-09-25 16:07:19 -07001570 ok = recv_resync_read(mdev, sector, data_size);
1571 } else {
1572 if (__ratelimit(&drbd_ratelimit_state))
1573 dev_err(DEV, "Can not write resync data to local disk.\n");
1574
1575 ok = drbd_drain_block(mdev, data_size);
1576
Lars Ellenberg2b2bf212010-10-06 11:46:55 +02001577 drbd_send_ack_dp(mdev, P_NEG_ACK, p, data_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001578 }
1579
Philipp Reisner778f2712010-07-06 11:14:00 +02001580 atomic_add(data_size >> 9, &mdev->rs_sect_in);
1581
Philipp Reisnerb411b362009-09-25 16:07:19 -07001582 return ok;
1583}
1584
1585/* e_end_block() is called via drbd_process_done_ee().
1586 * this means this function only runs in the asender thread
1587 */
Philipp Reisner00d56942011-02-09 18:09:48 +01001588static int e_end_block(struct drbd_work *w, int cancel)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001589{
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001590 struct drbd_peer_request *peer_req = (struct drbd_peer_request *)w;
Philipp Reisner00d56942011-02-09 18:09:48 +01001591 struct drbd_conf *mdev = w->mdev;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001592 sector_t sector = peer_req->i.sector;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001593 int ok = 1, pcmd;
1594
Philipp Reisner89e58e72011-01-19 13:12:45 +01001595 if (mdev->tconn->net_conf->wire_protocol == DRBD_PROT_C) {
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001596 if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001597 pcmd = (mdev->state.conn >= C_SYNC_SOURCE &&
1598 mdev->state.conn <= C_PAUSED_SYNC_T &&
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001599 peer_req->flags & EE_MAY_SET_IN_SYNC) ?
Philipp Reisnerb411b362009-09-25 16:07:19 -07001600 P_RS_WRITE_ACK : P_WRITE_ACK;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001601 ok &= drbd_send_ack(mdev, pcmd, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001602 if (pcmd == P_RS_WRITE_ACK)
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001603 drbd_set_in_sync(mdev, sector, peer_req->i.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001604 } else {
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001605 ok = drbd_send_ack(mdev, P_NEG_ACK, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001606 /* we expect it to be marked out of sync anyways...
1607 * maybe assert this? */
1608 }
1609 dec_unacked(mdev);
1610 }
1611 /* we delete from the conflict detection hash _after_ we sent out the
1612 * P_WRITE_ACK / P_NEG_ACK, to get the sequence number right. */
Philipp Reisner89e58e72011-01-19 13:12:45 +01001613 if (mdev->tconn->net_conf->two_primaries) {
Philipp Reisner87eeee42011-01-19 14:16:30 +01001614 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001615 D_ASSERT(!drbd_interval_empty(&peer_req->i));
1616 drbd_remove_epoch_entry_interval(mdev, peer_req);
Philipp Reisner87eeee42011-01-19 14:16:30 +01001617 spin_unlock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherbb3bfe92011-01-21 15:59:23 +01001618 } else
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001619 D_ASSERT(drbd_interval_empty(&peer_req->i));
Philipp Reisnerb411b362009-09-25 16:07:19 -07001620
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001621 drbd_may_finish_epoch(mdev, peer_req->epoch, EV_PUT + (cancel ? EV_CLEANUP : 0));
Philipp Reisnerb411b362009-09-25 16:07:19 -07001622
1623 return ok;
1624}
1625
Philipp Reisner00d56942011-02-09 18:09:48 +01001626static int e_send_discard_ack(struct drbd_work *w, int unused)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001627{
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001628 struct drbd_peer_request *peer_req = (struct drbd_peer_request *)w;
Philipp Reisner00d56942011-02-09 18:09:48 +01001629 struct drbd_conf *mdev = w->mdev;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001630 int ok = 1;
1631
Philipp Reisner89e58e72011-01-19 13:12:45 +01001632 D_ASSERT(mdev->tconn->net_conf->wire_protocol == DRBD_PROT_C);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001633 ok = drbd_send_ack(mdev, P_DISCARD_ACK, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001634
Philipp Reisner87eeee42011-01-19 14:16:30 +01001635 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001636 D_ASSERT(!drbd_interval_empty(&peer_req->i));
1637 drbd_remove_epoch_entry_interval(mdev, peer_req);
Philipp Reisner87eeee42011-01-19 14:16:30 +01001638 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001639
1640 dec_unacked(mdev);
1641
1642 return ok;
1643}
1644
Andreas Gruenbacher3e394da2011-01-26 18:36:55 +01001645static bool seq_greater(u32 a, u32 b)
1646{
1647 /*
1648 * We assume 32-bit wrap-around here.
1649 * For 24-bit wrap-around, we would have to shift:
1650 * a <<= 8; b <<= 8;
1651 */
1652 return (s32)a - (s32)b > 0;
1653}
1654
1655static u32 seq_max(u32 a, u32 b)
1656{
1657 return seq_greater(a, b) ? a : b;
1658}
1659
Andreas Gruenbacher43ae0772011-02-03 18:42:08 +01001660static void update_peer_seq(struct drbd_conf *mdev, unsigned int peer_seq)
Andreas Gruenbacher3e394da2011-01-26 18:36:55 +01001661{
Andreas Gruenbacher43ae0772011-02-03 18:42:08 +01001662 unsigned int old_peer_seq;
Andreas Gruenbacher3e394da2011-01-26 18:36:55 +01001663
1664 spin_lock(&mdev->peer_seq_lock);
Andreas Gruenbacher43ae0772011-02-03 18:42:08 +01001665 old_peer_seq = mdev->peer_seq;
1666 mdev->peer_seq = seq_max(mdev->peer_seq, peer_seq);
Andreas Gruenbacher3e394da2011-01-26 18:36:55 +01001667 spin_unlock(&mdev->peer_seq_lock);
Andreas Gruenbacher43ae0772011-02-03 18:42:08 +01001668 if (old_peer_seq != peer_seq)
Andreas Gruenbacher3e394da2011-01-26 18:36:55 +01001669 wake_up(&mdev->seq_wait);
1670}
1671
Philipp Reisnerb411b362009-09-25 16:07:19 -07001672/* Called from receive_Data.
1673 * Synchronize packets on sock with packets on msock.
1674 *
1675 * This is here so even when a P_DATA packet traveling via sock overtook an Ack
1676 * packet traveling on msock, they are still processed in the order they have
1677 * been sent.
1678 *
1679 * Note: we don't care for Ack packets overtaking P_DATA packets.
1680 *
1681 * In case packet_seq is larger than mdev->peer_seq number, there are
1682 * outstanding packets on the msock. We wait for them to arrive.
1683 * In case we are the logically next packet, we update mdev->peer_seq
1684 * ourselves. Correctly handles 32bit wrap around.
1685 *
1686 * Assume we have a 10 GBit connection, that is about 1<<30 byte per second,
1687 * about 1<<21 sectors per second. So "worst" case, we have 1<<3 == 8 seconds
1688 * for the 24bit wrap (historical atomic_t guarantee on some archs), and we have
1689 * 1<<9 == 512 seconds aka ages for the 32bit wrap around...
1690 *
1691 * returns 0 if we may process the packet,
1692 * -ERESTARTSYS if we were interrupted (by disconnect signal). */
1693static int drbd_wait_peer_seq(struct drbd_conf *mdev, const u32 packet_seq)
1694{
1695 DEFINE_WAIT(wait);
1696 unsigned int p_seq;
1697 long timeout;
1698 int ret = 0;
1699 spin_lock(&mdev->peer_seq_lock);
1700 for (;;) {
1701 prepare_to_wait(&mdev->seq_wait, &wait, TASK_INTERRUPTIBLE);
Andreas Gruenbacher3e394da2011-01-26 18:36:55 +01001702 if (!seq_greater(packet_seq, mdev->peer_seq + 1))
Philipp Reisnerb411b362009-09-25 16:07:19 -07001703 break;
1704 if (signal_pending(current)) {
1705 ret = -ERESTARTSYS;
1706 break;
1707 }
1708 p_seq = mdev->peer_seq;
1709 spin_unlock(&mdev->peer_seq_lock);
1710 timeout = schedule_timeout(30*HZ);
1711 spin_lock(&mdev->peer_seq_lock);
1712 if (timeout == 0 && p_seq == mdev->peer_seq) {
1713 ret = -ETIMEDOUT;
1714 dev_err(DEV, "ASSERT FAILED waited 30 seconds for sequence update, forcing reconnect\n");
1715 break;
1716 }
1717 }
1718 finish_wait(&mdev->seq_wait, &wait);
1719 if (mdev->peer_seq+1 == packet_seq)
1720 mdev->peer_seq++;
1721 spin_unlock(&mdev->peer_seq_lock);
1722 return ret;
1723}
1724
Lars Ellenberg688593c2010-11-17 22:25:03 +01001725/* see also bio_flags_to_wire()
1726 * DRBD_REQ_*, because we need to semantically map the flags to data packet
1727 * flags and back. We may replicate to other kernel versions. */
1728static unsigned long wire_flags_to_bio(struct drbd_conf *mdev, u32 dpf)
Philipp Reisner76d2e7e2010-08-25 11:58:05 +02001729{
Lars Ellenberg688593c2010-11-17 22:25:03 +01001730 return (dpf & DP_RW_SYNC ? REQ_SYNC : 0) |
1731 (dpf & DP_FUA ? REQ_FUA : 0) |
1732 (dpf & DP_FLUSH ? REQ_FLUSH : 0) |
1733 (dpf & DP_DISCARD ? REQ_DISCARD : 0);
Philipp Reisner76d2e7e2010-08-25 11:58:05 +02001734}
1735
Philipp Reisnerb411b362009-09-25 16:07:19 -07001736/* mirrored write */
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01001737static int receive_Data(struct drbd_conf *mdev, enum drbd_packet cmd,
1738 unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001739{
1740 sector_t sector;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001741 struct drbd_peer_request *peer_req;
Philipp Reisnere42325a2011-01-19 13:55:45 +01001742 struct p_data *p = &mdev->tconn->data.rbuf.data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001743 int rw = WRITE;
1744 u32 dp_flags;
1745
Philipp Reisnerb411b362009-09-25 16:07:19 -07001746 if (!get_ldev(mdev)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001747 spin_lock(&mdev->peer_seq_lock);
1748 if (mdev->peer_seq+1 == be32_to_cpu(p->seq_num))
1749 mdev->peer_seq++;
1750 spin_unlock(&mdev->peer_seq_lock);
1751
Lars Ellenberg2b2bf212010-10-06 11:46:55 +02001752 drbd_send_ack_dp(mdev, P_NEG_ACK, p, data_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001753 atomic_inc(&mdev->current_epoch->epoch_size);
1754 return drbd_drain_block(mdev, data_size);
1755 }
1756
1757 /* get_ldev(mdev) successful.
1758 * Corresponding put_ldev done either below (on various errors),
Andreas Gruenbacher9c508422011-01-14 21:19:36 +01001759 * or in drbd_endio_sec, if we successfully submit the data at
Philipp Reisnerb411b362009-09-25 16:07:19 -07001760 * the end of this function. */
1761
1762 sector = be64_to_cpu(p->sector);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001763 peer_req = read_in_block(mdev, p->block_id, sector, data_size);
1764 if (!peer_req) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001765 put_ldev(mdev);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01001766 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001767 }
1768
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001769 peer_req->w.cb = e_end_block;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001770
Lars Ellenberg688593c2010-11-17 22:25:03 +01001771 dp_flags = be32_to_cpu(p->dp_flags);
1772 rw |= wire_flags_to_bio(mdev, dp_flags);
1773
1774 if (dp_flags & DP_MAY_SET_IN_SYNC)
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001775 peer_req->flags |= EE_MAY_SET_IN_SYNC;
Lars Ellenberg688593c2010-11-17 22:25:03 +01001776
Philipp Reisnerb411b362009-09-25 16:07:19 -07001777 spin_lock(&mdev->epoch_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001778 peer_req->epoch = mdev->current_epoch;
1779 atomic_inc(&peer_req->epoch->epoch_size);
1780 atomic_inc(&peer_req->epoch->active);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001781 spin_unlock(&mdev->epoch_lock);
1782
Philipp Reisnerb411b362009-09-25 16:07:19 -07001783 /* I'm the receiver, I do hold a net_cnt reference. */
Philipp Reisner89e58e72011-01-19 13:12:45 +01001784 if (!mdev->tconn->net_conf->two_primaries) {
Philipp Reisner87eeee42011-01-19 14:16:30 +01001785 spin_lock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001786 } else {
1787 /* don't get the req_lock yet,
1788 * we may sleep in drbd_wait_peer_seq */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001789 const int size = peer_req->i.size;
Philipp Reisner25703f82011-02-07 14:35:25 +01001790 const int discard = test_bit(DISCARD_CONCURRENT, &mdev->tconn->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001791 DEFINE_WAIT(wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001792 int first;
1793
Philipp Reisner89e58e72011-01-19 13:12:45 +01001794 D_ASSERT(mdev->tconn->net_conf->wire_protocol == DRBD_PROT_C);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001795
1796 /* conflict detection and handling:
1797 * 1. wait on the sequence number,
1798 * in case this data packet overtook ACK packets.
Andreas Gruenbacher5e472262011-01-27 14:42:51 +01001799 * 2. check for conflicting write requests.
Philipp Reisnerb411b362009-09-25 16:07:19 -07001800 *
1801 * Note: for two_primaries, we are protocol C,
1802 * so there cannot be any request that is DONE
1803 * but still on the transfer log.
1804 *
Philipp Reisnerb411b362009-09-25 16:07:19 -07001805 * if no conflicting request is found:
1806 * submit.
1807 *
1808 * if any conflicting request is found
1809 * that has not yet been acked,
1810 * AND I have the "discard concurrent writes" flag:
1811 * queue (via done_ee) the P_DISCARD_ACK; OUT.
1812 *
1813 * if any conflicting request is found:
1814 * block the receiver, waiting on misc_wait
1815 * until no more conflicting requests are there,
1816 * or we get interrupted (disconnect).
1817 *
1818 * we do not just write after local io completion of those
1819 * requests, but only after req is done completely, i.e.
1820 * we wait for the P_DISCARD_ACK to arrive!
1821 *
1822 * then proceed normally, i.e. submit.
1823 */
1824 if (drbd_wait_peer_seq(mdev, be32_to_cpu(p->seq_num)))
1825 goto out_interrupted;
1826
Philipp Reisner87eeee42011-01-19 14:16:30 +01001827 spin_lock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001828
Philipp Reisnerb411b362009-09-25 16:07:19 -07001829 first = 1;
1830 for (;;) {
Andreas Gruenbacherde696712011-01-20 15:00:24 +01001831 struct drbd_interval *i;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001832 int have_unacked = 0;
1833 int have_conflict = 0;
1834 prepare_to_wait(&mdev->misc_wait, &wait,
1835 TASK_INTERRUPTIBLE);
Andreas Gruenbacherde696712011-01-20 15:00:24 +01001836
1837 i = drbd_find_overlap(&mdev->write_requests, sector, size);
1838 if (i) {
Andreas Gruenbacherde696712011-01-20 15:00:24 +01001839 /* only ALERT on first iteration,
1840 * we may be woken up early... */
1841 if (first)
Andreas Gruenbacher5e472262011-01-27 14:42:51 +01001842 dev_alert(DEV, "%s[%u] Concurrent %s write detected!"
Andreas Gruenbacherde696712011-01-20 15:00:24 +01001843 " new: %llus +%u; pending: %llus +%u\n",
1844 current->comm, current->pid,
Andreas Gruenbacher5e472262011-01-27 14:42:51 +01001845 i->local ? "local" : "remote",
Andreas Gruenbacherde696712011-01-20 15:00:24 +01001846 (unsigned long long)sector, size,
Andreas Gruenbacher5e472262011-01-27 14:42:51 +01001847 (unsigned long long)i->sector, i->size);
1848
1849 if (i->local) {
1850 struct drbd_request *req2;
1851
1852 req2 = container_of(i, struct drbd_request, i);
1853 if (req2->rq_state & RQ_NET_PENDING)
1854 ++have_unacked;
1855 }
Andreas Gruenbacherde696712011-01-20 15:00:24 +01001856 ++have_conflict;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001857 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07001858 if (!have_conflict)
1859 break;
1860
1861 /* Discard Ack only for the _first_ iteration */
1862 if (first && discard && have_unacked) {
1863 dev_alert(DEV, "Concurrent write! [DISCARD BY FLAG] sec=%llus\n",
1864 (unsigned long long)sector);
1865 inc_unacked(mdev);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001866 peer_req->w.cb = e_send_discard_ack;
1867 list_add_tail(&peer_req->w.list, &mdev->done_ee);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001868
Philipp Reisner87eeee42011-01-19 14:16:30 +01001869 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001870
1871 /* we could probably send that P_DISCARD_ACK ourselves,
1872 * but I don't like the receiver using the msock */
1873
1874 put_ldev(mdev);
Philipp Reisner0625ac12011-02-07 14:49:19 +01001875 wake_asender(mdev->tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001876 finish_wait(&mdev->misc_wait, &wait);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01001877 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001878 }
1879
1880 if (signal_pending(current)) {
Philipp Reisner87eeee42011-01-19 14:16:30 +01001881 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001882 finish_wait(&mdev->misc_wait, &wait);
1883 goto out_interrupted;
1884 }
1885
Andreas Gruenbachera500c2e2011-01-27 14:12:23 +01001886 /* Indicate to wake up mdev->misc_wait upon completion. */
Andreas Gruenbacher53840642011-01-28 10:31:04 +01001887 i->waiting = true;
Andreas Gruenbachera500c2e2011-01-27 14:12:23 +01001888
Philipp Reisner87eeee42011-01-19 14:16:30 +01001889 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001890 if (first) {
1891 first = 0;
1892 dev_alert(DEV, "Concurrent write! [W AFTERWARDS] "
1893 "sec=%llus\n", (unsigned long long)sector);
1894 } else if (discard) {
1895 /* we had none on the first iteration.
1896 * there must be none now. */
1897 D_ASSERT(have_unacked == 0);
1898 }
1899 schedule();
Philipp Reisner87eeee42011-01-19 14:16:30 +01001900 spin_lock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001901 }
1902 finish_wait(&mdev->misc_wait, &wait);
Andreas Gruenbacher5e472262011-01-27 14:42:51 +01001903
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001904 drbd_insert_interval(&mdev->write_requests, &peer_req->i);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001905 }
1906
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001907 list_add(&peer_req->w.list, &mdev->active_ee);
Philipp Reisner87eeee42011-01-19 14:16:30 +01001908 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001909
Philipp Reisner89e58e72011-01-19 13:12:45 +01001910 switch (mdev->tconn->net_conf->wire_protocol) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001911 case DRBD_PROT_C:
1912 inc_unacked(mdev);
1913 /* corresponding dec_unacked() in e_end_block()
1914 * respective _drbd_clear_done_ee */
1915 break;
1916 case DRBD_PROT_B:
1917 /* I really don't like it that the receiver thread
1918 * sends on the msock, but anyways */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001919 drbd_send_ack(mdev, P_RECV_ACK, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001920 break;
1921 case DRBD_PROT_A:
1922 /* nothing to do */
1923 break;
1924 }
1925
Lars Ellenberg6719fb02010-10-18 23:04:07 +02001926 if (mdev->state.pdsk < D_INCONSISTENT) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001927 /* In case we have the only disk of the cluster, */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001928 drbd_set_out_of_sync(mdev, peer_req->i.sector, peer_req->i.size);
1929 peer_req->flags |= EE_CALL_AL_COMPLETE_IO;
1930 peer_req->flags &= ~EE_MAY_SET_IN_SYNC;
1931 drbd_al_begin_io(mdev, peer_req->i.sector);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001932 }
1933
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001934 if (drbd_submit_ee(mdev, peer_req, rw, DRBD_FAULT_DT_WR) == 0)
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01001935 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001936
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001937 /* don't care for the reason here */
1938 dev_err(DEV, "submit failed, triggering re-connect\n");
Philipp Reisner87eeee42011-01-19 14:16:30 +01001939 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001940 list_del(&peer_req->w.list);
1941 drbd_remove_epoch_entry_interval(mdev, peer_req);
Philipp Reisner87eeee42011-01-19 14:16:30 +01001942 spin_unlock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001943 if (peer_req->flags & EE_CALL_AL_COMPLETE_IO)
1944 drbd_al_complete_io(mdev, peer_req->i.sector);
Lars Ellenberg22cc37a2010-09-14 20:40:41 +02001945
Philipp Reisnerb411b362009-09-25 16:07:19 -07001946out_interrupted:
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001947 drbd_may_finish_epoch(mdev, peer_req->epoch, EV_PUT + EV_CLEANUP);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001948 put_ldev(mdev);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001949 drbd_free_ee(mdev, peer_req);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01001950 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001951}
1952
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02001953/* We may throttle resync, if the lower device seems to be busy,
1954 * and current sync rate is above c_min_rate.
1955 *
1956 * To decide whether or not the lower device is busy, we use a scheme similar
1957 * to MD RAID is_mddev_idle(): if the partition stats reveal "significant"
1958 * (more than 64 sectors) of activity we cannot account for with our own resync
1959 * activity, it obviously is "busy".
1960 *
1961 * The current sync rate used here uses only the most recent two step marks,
1962 * to have a short time average so we can react faster.
1963 */
Philipp Reisnere3555d82010-11-07 15:56:29 +01001964int drbd_rs_should_slow_down(struct drbd_conf *mdev, sector_t sector)
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02001965{
1966 struct gendisk *disk = mdev->ldev->backing_bdev->bd_contains->bd_disk;
1967 unsigned long db, dt, dbdt;
Philipp Reisnere3555d82010-11-07 15:56:29 +01001968 struct lc_element *tmp;
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02001969 int curr_events;
1970 int throttle = 0;
1971
1972 /* feature disabled? */
1973 if (mdev->sync_conf.c_min_rate == 0)
1974 return 0;
1975
Philipp Reisnere3555d82010-11-07 15:56:29 +01001976 spin_lock_irq(&mdev->al_lock);
1977 tmp = lc_find(mdev->resync, BM_SECT_TO_EXT(sector));
1978 if (tmp) {
1979 struct bm_extent *bm_ext = lc_entry(tmp, struct bm_extent, lce);
1980 if (test_bit(BME_PRIORITY, &bm_ext->flags)) {
1981 spin_unlock_irq(&mdev->al_lock);
1982 return 0;
1983 }
1984 /* Do not slow down if app IO is already waiting for this extent */
1985 }
1986 spin_unlock_irq(&mdev->al_lock);
1987
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02001988 curr_events = (int)part_stat_read(&disk->part0, sectors[0]) +
1989 (int)part_stat_read(&disk->part0, sectors[1]) -
1990 atomic_read(&mdev->rs_sect_ev);
Philipp Reisnere3555d82010-11-07 15:56:29 +01001991
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02001992 if (!mdev->rs_last_events || curr_events - mdev->rs_last_events > 64) {
1993 unsigned long rs_left;
1994 int i;
1995
1996 mdev->rs_last_events = curr_events;
1997
1998 /* sync speed average over the last 2*DRBD_SYNC_MARK_STEP,
1999 * approx. */
Lars Ellenberg2649f082010-11-05 10:05:47 +01002000 i = (mdev->rs_last_mark + DRBD_SYNC_MARKS-1) % DRBD_SYNC_MARKS;
2001
2002 if (mdev->state.conn == C_VERIFY_S || mdev->state.conn == C_VERIFY_T)
2003 rs_left = mdev->ov_left;
2004 else
2005 rs_left = drbd_bm_total_weight(mdev) - mdev->rs_failed;
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002006
2007 dt = ((long)jiffies - (long)mdev->rs_mark_time[i]) / HZ;
2008 if (!dt)
2009 dt++;
2010 db = mdev->rs_mark_left[i] - rs_left;
2011 dbdt = Bit2KB(db/dt);
2012
2013 if (dbdt > mdev->sync_conf.c_min_rate)
2014 throttle = 1;
2015 }
2016 return throttle;
2017}
2018
2019
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01002020static int receive_DataRequest(struct drbd_conf *mdev, enum drbd_packet cmd,
2021 unsigned int digest_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002022{
2023 sector_t sector;
2024 const sector_t capacity = drbd_get_capacity(mdev->this_bdev);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002025 struct drbd_peer_request *peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002026 struct digest_info *di = NULL;
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02002027 int size, verb;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002028 unsigned int fault_type;
Philipp Reisnere42325a2011-01-19 13:55:45 +01002029 struct p_block_req *p = &mdev->tconn->data.rbuf.block_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002030
2031 sector = be64_to_cpu(p->sector);
2032 size = be32_to_cpu(p->blksize);
2033
Lars Ellenberg1816a2b2010-11-11 15:19:07 +01002034 if (size <= 0 || (size & 0x1ff) != 0 || size > DRBD_MAX_BIO_SIZE) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002035 dev_err(DEV, "%s:%d: sector: %llus, size: %u\n", __FILE__, __LINE__,
2036 (unsigned long long)sector, size);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002037 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002038 }
2039 if (sector + (size>>9) > capacity) {
2040 dev_err(DEV, "%s:%d: sector: %llus, size: %u\n", __FILE__, __LINE__,
2041 (unsigned long long)sector, size);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002042 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002043 }
2044
2045 if (!get_ldev_if_state(mdev, D_UP_TO_DATE)) {
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02002046 verb = 1;
2047 switch (cmd) {
2048 case P_DATA_REQUEST:
2049 drbd_send_ack_rp(mdev, P_NEG_DREPLY, p);
2050 break;
2051 case P_RS_DATA_REQUEST:
2052 case P_CSUM_RS_REQUEST:
2053 case P_OV_REQUEST:
2054 drbd_send_ack_rp(mdev, P_NEG_RS_DREPLY , p);
2055 break;
2056 case P_OV_REPLY:
2057 verb = 0;
2058 dec_rs_pending(mdev);
2059 drbd_send_ack_ex(mdev, P_OV_RESULT, sector, size, ID_IN_SYNC);
2060 break;
2061 default:
2062 dev_err(DEV, "unexpected command (%s) in receive_DataRequest\n",
2063 cmdname(cmd));
2064 }
2065 if (verb && __ratelimit(&drbd_ratelimit_state))
Philipp Reisnerb411b362009-09-25 16:07:19 -07002066 dev_err(DEV, "Can not satisfy peer's read request, "
2067 "no local data.\n");
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02002068
Lars Ellenberga821cc42010-09-06 12:31:37 +02002069 /* drain possibly payload */
2070 return drbd_drain_block(mdev, digest_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002071 }
2072
2073 /* GFP_NOIO, because we must not cause arbitrary write-out: in a DRBD
2074 * "criss-cross" setup, that might cause write-out on some other DRBD,
2075 * which in turn might block on the other node at this very place. */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002076 peer_req = drbd_alloc_ee(mdev, p->block_id, sector, size, GFP_NOIO);
2077 if (!peer_req) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002078 put_ldev(mdev);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002079 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002080 }
2081
Philipp Reisner02918be2010-08-20 14:35:10 +02002082 switch (cmd) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002083 case P_DATA_REQUEST:
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002084 peer_req->w.cb = w_e_end_data_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002085 fault_type = DRBD_FAULT_DT_RD;
Lars Ellenberg80a40e42010-08-11 23:28:00 +02002086 /* application IO, don't drbd_rs_begin_io */
2087 goto submit;
2088
Philipp Reisnerb411b362009-09-25 16:07:19 -07002089 case P_RS_DATA_REQUEST:
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002090 peer_req->w.cb = w_e_end_rsdata_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002091 fault_type = DRBD_FAULT_RS_RD;
Lars Ellenberg5f9915b2010-11-09 14:15:24 +01002092 /* used in the sector offset progress display */
2093 mdev->bm_resync_fo = BM_SECT_TO_BIT(sector);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002094 break;
2095
2096 case P_OV_REPLY:
2097 case P_CSUM_RS_REQUEST:
2098 fault_type = DRBD_FAULT_RS_RD;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002099 di = kmalloc(sizeof(*di) + digest_size, GFP_NOIO);
2100 if (!di)
2101 goto out_free_e;
2102
2103 di->digest_size = digest_size;
2104 di->digest = (((char *)di)+sizeof(struct digest_info));
2105
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002106 peer_req->digest = di;
2107 peer_req->flags |= EE_HAS_DIGEST;
Lars Ellenbergc36c3ce2010-08-11 20:42:55 +02002108
Philipp Reisnerde0ff332011-02-07 16:56:20 +01002109 if (drbd_recv(mdev->tconn, di->digest, digest_size) != digest_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002110 goto out_free_e;
2111
Philipp Reisner02918be2010-08-20 14:35:10 +02002112 if (cmd == P_CSUM_RS_REQUEST) {
Philipp Reisner31890f42011-01-19 14:12:51 +01002113 D_ASSERT(mdev->tconn->agreed_pro_version >= 89);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002114 peer_req->w.cb = w_e_end_csum_rs_req;
Lars Ellenberg5f9915b2010-11-09 14:15:24 +01002115 /* used in the sector offset progress display */
2116 mdev->bm_resync_fo = BM_SECT_TO_BIT(sector);
Philipp Reisner02918be2010-08-20 14:35:10 +02002117 } else if (cmd == P_OV_REPLY) {
Lars Ellenberg2649f082010-11-05 10:05:47 +01002118 /* track progress, we may need to throttle */
2119 atomic_add(size >> 9, &mdev->rs_sect_in);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002120 peer_req->w.cb = w_e_end_ov_reply;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002121 dec_rs_pending(mdev);
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002122 /* drbd_rs_begin_io done when we sent this request,
2123 * but accounting still needs to be done. */
2124 goto submit_for_resync;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002125 }
2126 break;
2127
2128 case P_OV_REQUEST:
Philipp Reisnerb411b362009-09-25 16:07:19 -07002129 if (mdev->ov_start_sector == ~(sector_t)0 &&
Philipp Reisner31890f42011-01-19 14:12:51 +01002130 mdev->tconn->agreed_pro_version >= 90) {
Lars Ellenbergde228bb2010-11-05 09:43:15 +01002131 unsigned long now = jiffies;
2132 int i;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002133 mdev->ov_start_sector = sector;
2134 mdev->ov_position = sector;
Lars Ellenberg30b743a2010-11-05 09:39:06 +01002135 mdev->ov_left = drbd_bm_bits(mdev) - BM_SECT_TO_BIT(sector);
2136 mdev->rs_total = mdev->ov_left;
Lars Ellenbergde228bb2010-11-05 09:43:15 +01002137 for (i = 0; i < DRBD_SYNC_MARKS; i++) {
2138 mdev->rs_mark_left[i] = mdev->ov_left;
2139 mdev->rs_mark_time[i] = now;
2140 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07002141 dev_info(DEV, "Online Verify start sector: %llu\n",
2142 (unsigned long long)sector);
2143 }
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002144 peer_req->w.cb = w_e_end_ov_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002145 fault_type = DRBD_FAULT_RS_RD;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002146 break;
2147
Philipp Reisnerb411b362009-09-25 16:07:19 -07002148 default:
2149 dev_err(DEV, "unexpected command (%s) in receive_DataRequest\n",
Philipp Reisner02918be2010-08-20 14:35:10 +02002150 cmdname(cmd));
Philipp Reisnerb411b362009-09-25 16:07:19 -07002151 fault_type = DRBD_FAULT_MAX;
Lars Ellenberg80a40e42010-08-11 23:28:00 +02002152 goto out_free_e;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002153 }
2154
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002155 /* Throttle, drbd_rs_begin_io and submit should become asynchronous
2156 * wrt the receiver, but it is not as straightforward as it may seem.
2157 * Various places in the resync start and stop logic assume resync
2158 * requests are processed in order, requeuing this on the worker thread
2159 * introduces a bunch of new code for synchronization between threads.
2160 *
2161 * Unlimited throttling before drbd_rs_begin_io may stall the resync
2162 * "forever", throttling after drbd_rs_begin_io will lock that extent
2163 * for application writes for the same time. For now, just throttle
2164 * here, where the rest of the code expects the receiver to sleep for
2165 * a while, anyways.
2166 */
Philipp Reisnerb411b362009-09-25 16:07:19 -07002167
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002168 /* Throttle before drbd_rs_begin_io, as that locks out application IO;
2169 * this defers syncer requests for some time, before letting at least
2170 * on request through. The resync controller on the receiving side
2171 * will adapt to the incoming rate accordingly.
2172 *
2173 * We cannot throttle here if remote is Primary/SyncTarget:
2174 * we would also throttle its application reads.
2175 * In that case, throttling is done on the SyncTarget only.
2176 */
Philipp Reisnere3555d82010-11-07 15:56:29 +01002177 if (mdev->state.peer != R_PRIMARY && drbd_rs_should_slow_down(mdev, sector))
2178 schedule_timeout_uninterruptible(HZ/10);
2179 if (drbd_rs_begin_io(mdev, sector))
Lars Ellenberg80a40e42010-08-11 23:28:00 +02002180 goto out_free_e;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002181
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002182submit_for_resync:
2183 atomic_add(size >> 9, &mdev->rs_sect_ev);
2184
Lars Ellenberg80a40e42010-08-11 23:28:00 +02002185submit:
Philipp Reisnerb411b362009-09-25 16:07:19 -07002186 inc_unacked(mdev);
Philipp Reisner87eeee42011-01-19 14:16:30 +01002187 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002188 list_add_tail(&peer_req->w.list, &mdev->read_ee);
Philipp Reisner87eeee42011-01-19 14:16:30 +01002189 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002190
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002191 if (drbd_submit_ee(mdev, peer_req, READ, fault_type) == 0)
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002192 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002193
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01002194 /* don't care for the reason here */
2195 dev_err(DEV, "submit failed, triggering re-connect\n");
Philipp Reisner87eeee42011-01-19 14:16:30 +01002196 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002197 list_del(&peer_req->w.list);
Philipp Reisner87eeee42011-01-19 14:16:30 +01002198 spin_unlock_irq(&mdev->tconn->req_lock);
Lars Ellenberg22cc37a2010-09-14 20:40:41 +02002199 /* no drbd_rs_complete_io(), we are dropping the connection anyways */
2200
Philipp Reisnerb411b362009-09-25 16:07:19 -07002201out_free_e:
Philipp Reisnerb411b362009-09-25 16:07:19 -07002202 put_ldev(mdev);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002203 drbd_free_ee(mdev, peer_req);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002204 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002205}
2206
2207static int drbd_asb_recover_0p(struct drbd_conf *mdev) __must_hold(local)
2208{
2209 int self, peer, rv = -100;
2210 unsigned long ch_self, ch_peer;
2211
2212 self = mdev->ldev->md.uuid[UI_BITMAP] & 1;
2213 peer = mdev->p_uuid[UI_BITMAP] & 1;
2214
2215 ch_peer = mdev->p_uuid[UI_SIZE];
2216 ch_self = mdev->comm_bm_set;
2217
Philipp Reisner89e58e72011-01-19 13:12:45 +01002218 switch (mdev->tconn->net_conf->after_sb_0p) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002219 case ASB_CONSENSUS:
2220 case ASB_DISCARD_SECONDARY:
2221 case ASB_CALL_HELPER:
2222 dev_err(DEV, "Configuration error.\n");
2223 break;
2224 case ASB_DISCONNECT:
2225 break;
2226 case ASB_DISCARD_YOUNGER_PRI:
2227 if (self == 0 && peer == 1) {
2228 rv = -1;
2229 break;
2230 }
2231 if (self == 1 && peer == 0) {
2232 rv = 1;
2233 break;
2234 }
2235 /* Else fall through to one of the other strategies... */
2236 case ASB_DISCARD_OLDER_PRI:
2237 if (self == 0 && peer == 1) {
2238 rv = 1;
2239 break;
2240 }
2241 if (self == 1 && peer == 0) {
2242 rv = -1;
2243 break;
2244 }
2245 /* Else fall through to one of the other strategies... */
Lars Ellenbergad19bf62009-10-14 09:36:49 +02002246 dev_warn(DEV, "Discard younger/older primary did not find a decision\n"
Philipp Reisnerb411b362009-09-25 16:07:19 -07002247 "Using discard-least-changes instead\n");
2248 case ASB_DISCARD_ZERO_CHG:
2249 if (ch_peer == 0 && ch_self == 0) {
Philipp Reisner25703f82011-02-07 14:35:25 +01002250 rv = test_bit(DISCARD_CONCURRENT, &mdev->tconn->flags)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002251 ? -1 : 1;
2252 break;
2253 } else {
2254 if (ch_peer == 0) { rv = 1; break; }
2255 if (ch_self == 0) { rv = -1; break; }
2256 }
Philipp Reisner89e58e72011-01-19 13:12:45 +01002257 if (mdev->tconn->net_conf->after_sb_0p == ASB_DISCARD_ZERO_CHG)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002258 break;
2259 case ASB_DISCARD_LEAST_CHG:
2260 if (ch_self < ch_peer)
2261 rv = -1;
2262 else if (ch_self > ch_peer)
2263 rv = 1;
2264 else /* ( ch_self == ch_peer ) */
2265 /* Well, then use something else. */
Philipp Reisner25703f82011-02-07 14:35:25 +01002266 rv = test_bit(DISCARD_CONCURRENT, &mdev->tconn->flags)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002267 ? -1 : 1;
2268 break;
2269 case ASB_DISCARD_LOCAL:
2270 rv = -1;
2271 break;
2272 case ASB_DISCARD_REMOTE:
2273 rv = 1;
2274 }
2275
2276 return rv;
2277}
2278
2279static int drbd_asb_recover_1p(struct drbd_conf *mdev) __must_hold(local)
2280{
Andreas Gruenbacher6184ea22010-12-09 14:23:27 +01002281 int hg, rv = -100;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002282
Philipp Reisner89e58e72011-01-19 13:12:45 +01002283 switch (mdev->tconn->net_conf->after_sb_1p) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002284 case ASB_DISCARD_YOUNGER_PRI:
2285 case ASB_DISCARD_OLDER_PRI:
2286 case ASB_DISCARD_LEAST_CHG:
2287 case ASB_DISCARD_LOCAL:
2288 case ASB_DISCARD_REMOTE:
2289 dev_err(DEV, "Configuration error.\n");
2290 break;
2291 case ASB_DISCONNECT:
2292 break;
2293 case ASB_CONSENSUS:
2294 hg = drbd_asb_recover_0p(mdev);
2295 if (hg == -1 && mdev->state.role == R_SECONDARY)
2296 rv = hg;
2297 if (hg == 1 && mdev->state.role == R_PRIMARY)
2298 rv = hg;
2299 break;
2300 case ASB_VIOLENTLY:
2301 rv = drbd_asb_recover_0p(mdev);
2302 break;
2303 case ASB_DISCARD_SECONDARY:
2304 return mdev->state.role == R_PRIMARY ? 1 : -1;
2305 case ASB_CALL_HELPER:
2306 hg = drbd_asb_recover_0p(mdev);
2307 if (hg == -1 && mdev->state.role == R_PRIMARY) {
Andreas Gruenbacherbb437942010-12-09 14:02:35 +01002308 enum drbd_state_rv rv2;
2309
2310 drbd_set_role(mdev, R_SECONDARY, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002311 /* drbd_change_state() does not sleep while in SS_IN_TRANSIENT_STATE,
2312 * we might be here in C_WF_REPORT_PARAMS which is transient.
2313 * we do not need to wait for the after state change work either. */
Andreas Gruenbacherbb437942010-12-09 14:02:35 +01002314 rv2 = drbd_change_state(mdev, CS_VERBOSE, NS(role, R_SECONDARY));
2315 if (rv2 != SS_SUCCESS) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002316 drbd_khelper(mdev, "pri-lost-after-sb");
2317 } else {
2318 dev_warn(DEV, "Successfully gave up primary role.\n");
2319 rv = hg;
2320 }
2321 } else
2322 rv = hg;
2323 }
2324
2325 return rv;
2326}
2327
2328static int drbd_asb_recover_2p(struct drbd_conf *mdev) __must_hold(local)
2329{
Andreas Gruenbacher6184ea22010-12-09 14:23:27 +01002330 int hg, rv = -100;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002331
Philipp Reisner89e58e72011-01-19 13:12:45 +01002332 switch (mdev->tconn->net_conf->after_sb_2p) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002333 case ASB_DISCARD_YOUNGER_PRI:
2334 case ASB_DISCARD_OLDER_PRI:
2335 case ASB_DISCARD_LEAST_CHG:
2336 case ASB_DISCARD_LOCAL:
2337 case ASB_DISCARD_REMOTE:
2338 case ASB_CONSENSUS:
2339 case ASB_DISCARD_SECONDARY:
2340 dev_err(DEV, "Configuration error.\n");
2341 break;
2342 case ASB_VIOLENTLY:
2343 rv = drbd_asb_recover_0p(mdev);
2344 break;
2345 case ASB_DISCONNECT:
2346 break;
2347 case ASB_CALL_HELPER:
2348 hg = drbd_asb_recover_0p(mdev);
2349 if (hg == -1) {
Andreas Gruenbacherbb437942010-12-09 14:02:35 +01002350 enum drbd_state_rv rv2;
2351
Philipp Reisnerb411b362009-09-25 16:07:19 -07002352 /* drbd_change_state() does not sleep while in SS_IN_TRANSIENT_STATE,
2353 * we might be here in C_WF_REPORT_PARAMS which is transient.
2354 * we do not need to wait for the after state change work either. */
Andreas Gruenbacherbb437942010-12-09 14:02:35 +01002355 rv2 = drbd_change_state(mdev, CS_VERBOSE, NS(role, R_SECONDARY));
2356 if (rv2 != SS_SUCCESS) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002357 drbd_khelper(mdev, "pri-lost-after-sb");
2358 } else {
2359 dev_warn(DEV, "Successfully gave up primary role.\n");
2360 rv = hg;
2361 }
2362 } else
2363 rv = hg;
2364 }
2365
2366 return rv;
2367}
2368
2369static void drbd_uuid_dump(struct drbd_conf *mdev, char *text, u64 *uuid,
2370 u64 bits, u64 flags)
2371{
2372 if (!uuid) {
2373 dev_info(DEV, "%s uuid info vanished while I was looking!\n", text);
2374 return;
2375 }
2376 dev_info(DEV, "%s %016llX:%016llX:%016llX:%016llX bits:%llu flags:%llX\n",
2377 text,
2378 (unsigned long long)uuid[UI_CURRENT],
2379 (unsigned long long)uuid[UI_BITMAP],
2380 (unsigned long long)uuid[UI_HISTORY_START],
2381 (unsigned long long)uuid[UI_HISTORY_END],
2382 (unsigned long long)bits,
2383 (unsigned long long)flags);
2384}
2385
2386/*
2387 100 after split brain try auto recover
2388 2 C_SYNC_SOURCE set BitMap
2389 1 C_SYNC_SOURCE use BitMap
2390 0 no Sync
2391 -1 C_SYNC_TARGET use BitMap
2392 -2 C_SYNC_TARGET set BitMap
2393 -100 after split brain, disconnect
2394-1000 unrelated data
Philipp Reisner4a23f262011-01-11 17:42:17 +01002395-1091 requires proto 91
2396-1096 requires proto 96
Philipp Reisnerb411b362009-09-25 16:07:19 -07002397 */
2398static int drbd_uuid_compare(struct drbd_conf *mdev, int *rule_nr) __must_hold(local)
2399{
2400 u64 self, peer;
2401 int i, j;
2402
2403 self = mdev->ldev->md.uuid[UI_CURRENT] & ~((u64)1);
2404 peer = mdev->p_uuid[UI_CURRENT] & ~((u64)1);
2405
2406 *rule_nr = 10;
2407 if (self == UUID_JUST_CREATED && peer == UUID_JUST_CREATED)
2408 return 0;
2409
2410 *rule_nr = 20;
2411 if ((self == UUID_JUST_CREATED || self == (u64)0) &&
2412 peer != UUID_JUST_CREATED)
2413 return -2;
2414
2415 *rule_nr = 30;
2416 if (self != UUID_JUST_CREATED &&
2417 (peer == UUID_JUST_CREATED || peer == (u64)0))
2418 return 2;
2419
2420 if (self == peer) {
2421 int rct, dc; /* roles at crash time */
2422
2423 if (mdev->p_uuid[UI_BITMAP] == (u64)0 && mdev->ldev->md.uuid[UI_BITMAP] != (u64)0) {
2424
Philipp Reisner31890f42011-01-19 14:12:51 +01002425 if (mdev->tconn->agreed_pro_version < 91)
Philipp Reisner4a23f262011-01-11 17:42:17 +01002426 return -1091;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002427
2428 if ((mdev->ldev->md.uuid[UI_BITMAP] & ~((u64)1)) == (mdev->p_uuid[UI_HISTORY_START] & ~((u64)1)) &&
2429 (mdev->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) == (mdev->p_uuid[UI_HISTORY_START + 1] & ~((u64)1))) {
2430 dev_info(DEV, "was SyncSource, missed the resync finished event, corrected myself:\n");
2431 drbd_uuid_set_bm(mdev, 0UL);
2432
2433 drbd_uuid_dump(mdev, "self", mdev->ldev->md.uuid,
2434 mdev->state.disk >= D_NEGOTIATING ? drbd_bm_total_weight(mdev) : 0, 0);
2435 *rule_nr = 34;
2436 } else {
2437 dev_info(DEV, "was SyncSource (peer failed to write sync_uuid)\n");
2438 *rule_nr = 36;
2439 }
2440
2441 return 1;
2442 }
2443
2444 if (mdev->ldev->md.uuid[UI_BITMAP] == (u64)0 && mdev->p_uuid[UI_BITMAP] != (u64)0) {
2445
Philipp Reisner31890f42011-01-19 14:12:51 +01002446 if (mdev->tconn->agreed_pro_version < 91)
Philipp Reisner4a23f262011-01-11 17:42:17 +01002447 return -1091;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002448
2449 if ((mdev->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) == (mdev->p_uuid[UI_BITMAP] & ~((u64)1)) &&
2450 (mdev->ldev->md.uuid[UI_HISTORY_START + 1] & ~((u64)1)) == (mdev->p_uuid[UI_HISTORY_START] & ~((u64)1))) {
2451 dev_info(DEV, "was SyncTarget, peer missed the resync finished event, corrected peer:\n");
2452
2453 mdev->p_uuid[UI_HISTORY_START + 1] = mdev->p_uuid[UI_HISTORY_START];
2454 mdev->p_uuid[UI_HISTORY_START] = mdev->p_uuid[UI_BITMAP];
2455 mdev->p_uuid[UI_BITMAP] = 0UL;
2456
2457 drbd_uuid_dump(mdev, "peer", mdev->p_uuid, mdev->p_uuid[UI_SIZE], mdev->p_uuid[UI_FLAGS]);
2458 *rule_nr = 35;
2459 } else {
2460 dev_info(DEV, "was SyncTarget (failed to write sync_uuid)\n");
2461 *rule_nr = 37;
2462 }
2463
2464 return -1;
2465 }
2466
2467 /* Common power [off|failure] */
2468 rct = (test_bit(CRASHED_PRIMARY, &mdev->flags) ? 1 : 0) +
2469 (mdev->p_uuid[UI_FLAGS] & 2);
2470 /* lowest bit is set when we were primary,
2471 * next bit (weight 2) is set when peer was primary */
2472 *rule_nr = 40;
2473
2474 switch (rct) {
2475 case 0: /* !self_pri && !peer_pri */ return 0;
2476 case 1: /* self_pri && !peer_pri */ return 1;
2477 case 2: /* !self_pri && peer_pri */ return -1;
2478 case 3: /* self_pri && peer_pri */
Philipp Reisner25703f82011-02-07 14:35:25 +01002479 dc = test_bit(DISCARD_CONCURRENT, &mdev->tconn->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002480 return dc ? -1 : 1;
2481 }
2482 }
2483
2484 *rule_nr = 50;
2485 peer = mdev->p_uuid[UI_BITMAP] & ~((u64)1);
2486 if (self == peer)
2487 return -1;
2488
2489 *rule_nr = 51;
2490 peer = mdev->p_uuid[UI_HISTORY_START] & ~((u64)1);
2491 if (self == peer) {
Philipp Reisner31890f42011-01-19 14:12:51 +01002492 if (mdev->tconn->agreed_pro_version < 96 ?
Philipp Reisner4a23f262011-01-11 17:42:17 +01002493 (mdev->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) ==
2494 (mdev->p_uuid[UI_HISTORY_START + 1] & ~((u64)1)) :
2495 peer + UUID_NEW_BM_OFFSET == (mdev->p_uuid[UI_BITMAP] & ~((u64)1))) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002496 /* The last P_SYNC_UUID did not get though. Undo the last start of
2497 resync as sync source modifications of the peer's UUIDs. */
2498
Philipp Reisner31890f42011-01-19 14:12:51 +01002499 if (mdev->tconn->agreed_pro_version < 91)
Philipp Reisner4a23f262011-01-11 17:42:17 +01002500 return -1091;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002501
2502 mdev->p_uuid[UI_BITMAP] = mdev->p_uuid[UI_HISTORY_START];
2503 mdev->p_uuid[UI_HISTORY_START] = mdev->p_uuid[UI_HISTORY_START + 1];
Philipp Reisner4a23f262011-01-11 17:42:17 +01002504
2505 dev_info(DEV, "Did not got last syncUUID packet, corrected:\n");
2506 drbd_uuid_dump(mdev, "peer", mdev->p_uuid, mdev->p_uuid[UI_SIZE], mdev->p_uuid[UI_FLAGS]);
2507
Philipp Reisnerb411b362009-09-25 16:07:19 -07002508 return -1;
2509 }
2510 }
2511
2512 *rule_nr = 60;
2513 self = mdev->ldev->md.uuid[UI_CURRENT] & ~((u64)1);
2514 for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) {
2515 peer = mdev->p_uuid[i] & ~((u64)1);
2516 if (self == peer)
2517 return -2;
2518 }
2519
2520 *rule_nr = 70;
2521 self = mdev->ldev->md.uuid[UI_BITMAP] & ~((u64)1);
2522 peer = mdev->p_uuid[UI_CURRENT] & ~((u64)1);
2523 if (self == peer)
2524 return 1;
2525
2526 *rule_nr = 71;
2527 self = mdev->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1);
2528 if (self == peer) {
Philipp Reisner31890f42011-01-19 14:12:51 +01002529 if (mdev->tconn->agreed_pro_version < 96 ?
Philipp Reisner4a23f262011-01-11 17:42:17 +01002530 (mdev->ldev->md.uuid[UI_HISTORY_START + 1] & ~((u64)1)) ==
2531 (mdev->p_uuid[UI_HISTORY_START] & ~((u64)1)) :
2532 self + UUID_NEW_BM_OFFSET == (mdev->ldev->md.uuid[UI_BITMAP] & ~((u64)1))) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002533 /* The last P_SYNC_UUID did not get though. Undo the last start of
2534 resync as sync source modifications of our UUIDs. */
2535
Philipp Reisner31890f42011-01-19 14:12:51 +01002536 if (mdev->tconn->agreed_pro_version < 91)
Philipp Reisner4a23f262011-01-11 17:42:17 +01002537 return -1091;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002538
2539 _drbd_uuid_set(mdev, UI_BITMAP, mdev->ldev->md.uuid[UI_HISTORY_START]);
2540 _drbd_uuid_set(mdev, UI_HISTORY_START, mdev->ldev->md.uuid[UI_HISTORY_START + 1]);
2541
Philipp Reisner4a23f262011-01-11 17:42:17 +01002542 dev_info(DEV, "Last syncUUID did not get through, corrected:\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002543 drbd_uuid_dump(mdev, "self", mdev->ldev->md.uuid,
2544 mdev->state.disk >= D_NEGOTIATING ? drbd_bm_total_weight(mdev) : 0, 0);
2545
2546 return 1;
2547 }
2548 }
2549
2550
2551 *rule_nr = 80;
Philipp Reisnerd8c2a362009-11-18 15:52:51 +01002552 peer = mdev->p_uuid[UI_CURRENT] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002553 for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) {
2554 self = mdev->ldev->md.uuid[i] & ~((u64)1);
2555 if (self == peer)
2556 return 2;
2557 }
2558
2559 *rule_nr = 90;
2560 self = mdev->ldev->md.uuid[UI_BITMAP] & ~((u64)1);
2561 peer = mdev->p_uuid[UI_BITMAP] & ~((u64)1);
2562 if (self == peer && self != ((u64)0))
2563 return 100;
2564
2565 *rule_nr = 100;
2566 for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) {
2567 self = mdev->ldev->md.uuid[i] & ~((u64)1);
2568 for (j = UI_HISTORY_START; j <= UI_HISTORY_END; j++) {
2569 peer = mdev->p_uuid[j] & ~((u64)1);
2570 if (self == peer)
2571 return -100;
2572 }
2573 }
2574
2575 return -1000;
2576}
2577
2578/* drbd_sync_handshake() returns the new conn state on success, or
2579 CONN_MASK (-1) on failure.
2580 */
2581static enum drbd_conns drbd_sync_handshake(struct drbd_conf *mdev, enum drbd_role peer_role,
2582 enum drbd_disk_state peer_disk) __must_hold(local)
2583{
2584 int hg, rule_nr;
2585 enum drbd_conns rv = C_MASK;
2586 enum drbd_disk_state mydisk;
2587
2588 mydisk = mdev->state.disk;
2589 if (mydisk == D_NEGOTIATING)
2590 mydisk = mdev->new_state_tmp.disk;
2591
2592 dev_info(DEV, "drbd_sync_handshake:\n");
2593 drbd_uuid_dump(mdev, "self", mdev->ldev->md.uuid, mdev->comm_bm_set, 0);
2594 drbd_uuid_dump(mdev, "peer", mdev->p_uuid,
2595 mdev->p_uuid[UI_SIZE], mdev->p_uuid[UI_FLAGS]);
2596
2597 hg = drbd_uuid_compare(mdev, &rule_nr);
2598
2599 dev_info(DEV, "uuid_compare()=%d by rule %d\n", hg, rule_nr);
2600
2601 if (hg == -1000) {
2602 dev_alert(DEV, "Unrelated data, aborting!\n");
2603 return C_MASK;
2604 }
Philipp Reisner4a23f262011-01-11 17:42:17 +01002605 if (hg < -1000) {
2606 dev_alert(DEV, "To resolve this both sides have to support at least protocol %d\n", -hg - 1000);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002607 return C_MASK;
2608 }
2609
2610 if ((mydisk == D_INCONSISTENT && peer_disk > D_INCONSISTENT) ||
2611 (peer_disk == D_INCONSISTENT && mydisk > D_INCONSISTENT)) {
2612 int f = (hg == -100) || abs(hg) == 2;
2613 hg = mydisk > D_INCONSISTENT ? 1 : -1;
2614 if (f)
2615 hg = hg*2;
2616 dev_info(DEV, "Becoming sync %s due to disk states.\n",
2617 hg > 0 ? "source" : "target");
2618 }
2619
Adam Gandelman3a11a482010-04-08 16:48:23 -07002620 if (abs(hg) == 100)
2621 drbd_khelper(mdev, "initial-split-brain");
2622
Philipp Reisner89e58e72011-01-19 13:12:45 +01002623 if (hg == 100 || (hg == -100 && mdev->tconn->net_conf->always_asbp)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002624 int pcount = (mdev->state.role == R_PRIMARY)
2625 + (peer_role == R_PRIMARY);
2626 int forced = (hg == -100);
2627
2628 switch (pcount) {
2629 case 0:
2630 hg = drbd_asb_recover_0p(mdev);
2631 break;
2632 case 1:
2633 hg = drbd_asb_recover_1p(mdev);
2634 break;
2635 case 2:
2636 hg = drbd_asb_recover_2p(mdev);
2637 break;
2638 }
2639 if (abs(hg) < 100) {
2640 dev_warn(DEV, "Split-Brain detected, %d primaries, "
2641 "automatically solved. Sync from %s node\n",
2642 pcount, (hg < 0) ? "peer" : "this");
2643 if (forced) {
2644 dev_warn(DEV, "Doing a full sync, since"
2645 " UUIDs where ambiguous.\n");
2646 hg = hg*2;
2647 }
2648 }
2649 }
2650
2651 if (hg == -100) {
Philipp Reisner89e58e72011-01-19 13:12:45 +01002652 if (mdev->tconn->net_conf->want_lose && !(mdev->p_uuid[UI_FLAGS]&1))
Philipp Reisnerb411b362009-09-25 16:07:19 -07002653 hg = -1;
Philipp Reisner89e58e72011-01-19 13:12:45 +01002654 if (!mdev->tconn->net_conf->want_lose && (mdev->p_uuid[UI_FLAGS]&1))
Philipp Reisnerb411b362009-09-25 16:07:19 -07002655 hg = 1;
2656
2657 if (abs(hg) < 100)
2658 dev_warn(DEV, "Split-Brain detected, manually solved. "
2659 "Sync from %s node\n",
2660 (hg < 0) ? "peer" : "this");
2661 }
2662
2663 if (hg == -100) {
Lars Ellenberg580b9762010-02-26 23:15:23 +01002664 /* FIXME this log message is not correct if we end up here
2665 * after an attempted attach on a diskless node.
2666 * We just refuse to attach -- well, we drop the "connection"
2667 * to that disk, in a way... */
Adam Gandelman3a11a482010-04-08 16:48:23 -07002668 dev_alert(DEV, "Split-Brain detected but unresolved, dropping connection!\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002669 drbd_khelper(mdev, "split-brain");
2670 return C_MASK;
2671 }
2672
2673 if (hg > 0 && mydisk <= D_INCONSISTENT) {
2674 dev_err(DEV, "I shall become SyncSource, but I am inconsistent!\n");
2675 return C_MASK;
2676 }
2677
2678 if (hg < 0 && /* by intention we do not use mydisk here. */
2679 mdev->state.role == R_PRIMARY && mdev->state.disk >= D_CONSISTENT) {
Philipp Reisner89e58e72011-01-19 13:12:45 +01002680 switch (mdev->tconn->net_conf->rr_conflict) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002681 case ASB_CALL_HELPER:
2682 drbd_khelper(mdev, "pri-lost");
2683 /* fall through */
2684 case ASB_DISCONNECT:
2685 dev_err(DEV, "I shall become SyncTarget, but I am primary!\n");
2686 return C_MASK;
2687 case ASB_VIOLENTLY:
2688 dev_warn(DEV, "Becoming SyncTarget, violating the stable-data"
2689 "assumption\n");
2690 }
2691 }
2692
Philipp Reisner89e58e72011-01-19 13:12:45 +01002693 if (mdev->tconn->net_conf->dry_run || test_bit(CONN_DRY_RUN, &mdev->flags)) {
Philipp Reisnercf14c2e2010-02-02 21:03:50 +01002694 if (hg == 0)
2695 dev_info(DEV, "dry-run connect: No resync, would become Connected immediately.\n");
2696 else
2697 dev_info(DEV, "dry-run connect: Would become %s, doing a %s resync.",
2698 drbd_conn_str(hg > 0 ? C_SYNC_SOURCE : C_SYNC_TARGET),
2699 abs(hg) >= 2 ? "full" : "bit-map based");
2700 return C_MASK;
2701 }
2702
Philipp Reisnerb411b362009-09-25 16:07:19 -07002703 if (abs(hg) >= 2) {
2704 dev_info(DEV, "Writing the whole bitmap, full sync required after drbd_sync_handshake.\n");
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01002705 if (drbd_bitmap_io(mdev, &drbd_bmio_set_n_write, "set_n_write from sync_handshake",
2706 BM_LOCKED_SET_ALLOWED))
Philipp Reisnerb411b362009-09-25 16:07:19 -07002707 return C_MASK;
2708 }
2709
2710 if (hg > 0) { /* become sync source. */
2711 rv = C_WF_BITMAP_S;
2712 } else if (hg < 0) { /* become sync target */
2713 rv = C_WF_BITMAP_T;
2714 } else {
2715 rv = C_CONNECTED;
2716 if (drbd_bm_total_weight(mdev)) {
2717 dev_info(DEV, "No resync, but %lu bits in bitmap!\n",
2718 drbd_bm_total_weight(mdev));
2719 }
2720 }
2721
2722 return rv;
2723}
2724
2725/* returns 1 if invalid */
2726static int cmp_after_sb(enum drbd_after_sb_p peer, enum drbd_after_sb_p self)
2727{
2728 /* ASB_DISCARD_REMOTE - ASB_DISCARD_LOCAL is valid */
2729 if ((peer == ASB_DISCARD_REMOTE && self == ASB_DISCARD_LOCAL) ||
2730 (self == ASB_DISCARD_REMOTE && peer == ASB_DISCARD_LOCAL))
2731 return 0;
2732
2733 /* any other things with ASB_DISCARD_REMOTE or ASB_DISCARD_LOCAL are invalid */
2734 if (peer == ASB_DISCARD_REMOTE || peer == ASB_DISCARD_LOCAL ||
2735 self == ASB_DISCARD_REMOTE || self == ASB_DISCARD_LOCAL)
2736 return 1;
2737
2738 /* everything else is valid if they are equal on both sides. */
2739 if (peer == self)
2740 return 0;
2741
2742 /* everything es is invalid. */
2743 return 1;
2744}
2745
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01002746static int receive_protocol(struct drbd_conf *mdev, enum drbd_packet cmd,
2747 unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002748{
Philipp Reisnere42325a2011-01-19 13:55:45 +01002749 struct p_protocol *p = &mdev->tconn->data.rbuf.protocol;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002750 int p_proto, p_after_sb_0p, p_after_sb_1p, p_after_sb_2p;
Philipp Reisnercf14c2e2010-02-02 21:03:50 +01002751 int p_want_lose, p_two_primaries, cf;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002752 char p_integrity_alg[SHARED_SECRET_MAX] = "";
2753
Philipp Reisnerb411b362009-09-25 16:07:19 -07002754 p_proto = be32_to_cpu(p->protocol);
2755 p_after_sb_0p = be32_to_cpu(p->after_sb_0p);
2756 p_after_sb_1p = be32_to_cpu(p->after_sb_1p);
2757 p_after_sb_2p = be32_to_cpu(p->after_sb_2p);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002758 p_two_primaries = be32_to_cpu(p->two_primaries);
Philipp Reisnercf14c2e2010-02-02 21:03:50 +01002759 cf = be32_to_cpu(p->conn_flags);
2760 p_want_lose = cf & CF_WANT_LOSE;
2761
2762 clear_bit(CONN_DRY_RUN, &mdev->flags);
2763
2764 if (cf & CF_DRY_RUN)
2765 set_bit(CONN_DRY_RUN, &mdev->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002766
Philipp Reisner89e58e72011-01-19 13:12:45 +01002767 if (p_proto != mdev->tconn->net_conf->wire_protocol) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002768 dev_err(DEV, "incompatible communication protocols\n");
2769 goto disconnect;
2770 }
2771
Philipp Reisner89e58e72011-01-19 13:12:45 +01002772 if (cmp_after_sb(p_after_sb_0p, mdev->tconn->net_conf->after_sb_0p)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002773 dev_err(DEV, "incompatible after-sb-0pri settings\n");
2774 goto disconnect;
2775 }
2776
Philipp Reisner89e58e72011-01-19 13:12:45 +01002777 if (cmp_after_sb(p_after_sb_1p, mdev->tconn->net_conf->after_sb_1p)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002778 dev_err(DEV, "incompatible after-sb-1pri settings\n");
2779 goto disconnect;
2780 }
2781
Philipp Reisner89e58e72011-01-19 13:12:45 +01002782 if (cmp_after_sb(p_after_sb_2p, mdev->tconn->net_conf->after_sb_2p)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002783 dev_err(DEV, "incompatible after-sb-2pri settings\n");
2784 goto disconnect;
2785 }
2786
Philipp Reisner89e58e72011-01-19 13:12:45 +01002787 if (p_want_lose && mdev->tconn->net_conf->want_lose) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002788 dev_err(DEV, "both sides have the 'want_lose' flag set\n");
2789 goto disconnect;
2790 }
2791
Philipp Reisner89e58e72011-01-19 13:12:45 +01002792 if (p_two_primaries != mdev->tconn->net_conf->two_primaries) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002793 dev_err(DEV, "incompatible setting of the two-primaries options\n");
2794 goto disconnect;
2795 }
2796
Philipp Reisner31890f42011-01-19 14:12:51 +01002797 if (mdev->tconn->agreed_pro_version >= 87) {
Philipp Reisner89e58e72011-01-19 13:12:45 +01002798 unsigned char *my_alg = mdev->tconn->net_conf->integrity_alg;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002799
Philipp Reisnerde0ff332011-02-07 16:56:20 +01002800 if (drbd_recv(mdev->tconn, p_integrity_alg, data_size) != data_size)
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002801 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002802
2803 p_integrity_alg[SHARED_SECRET_MAX-1] = 0;
2804 if (strcmp(p_integrity_alg, my_alg)) {
2805 dev_err(DEV, "incompatible setting of the data-integrity-alg\n");
2806 goto disconnect;
2807 }
2808 dev_info(DEV, "data-integrity-alg: %s\n",
2809 my_alg[0] ? my_alg : (unsigned char *)"<not-used>");
2810 }
2811
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002812 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002813
2814disconnect:
2815 drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002816 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002817}
2818
2819/* helper function
2820 * input: alg name, feature name
2821 * return: NULL (alg name was "")
2822 * ERR_PTR(error) if something goes wrong
2823 * or the crypto hash ptr, if it worked out ok. */
2824struct crypto_hash *drbd_crypto_alloc_digest_safe(const struct drbd_conf *mdev,
2825 const char *alg, const char *name)
2826{
2827 struct crypto_hash *tfm;
2828
2829 if (!alg[0])
2830 return NULL;
2831
2832 tfm = crypto_alloc_hash(alg, 0, CRYPTO_ALG_ASYNC);
2833 if (IS_ERR(tfm)) {
2834 dev_err(DEV, "Can not allocate \"%s\" as %s (reason: %ld)\n",
2835 alg, name, PTR_ERR(tfm));
2836 return tfm;
2837 }
2838 if (!drbd_crypto_is_hash(crypto_hash_tfm(tfm))) {
2839 crypto_free_hash(tfm);
2840 dev_err(DEV, "\"%s\" is not a digest (%s)\n", alg, name);
2841 return ERR_PTR(-EINVAL);
2842 }
2843 return tfm;
2844}
2845
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01002846static int receive_SyncParam(struct drbd_conf *mdev, enum drbd_packet cmd,
2847 unsigned int packet_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002848{
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002849 int ok = true;
Philipp Reisnere42325a2011-01-19 13:55:45 +01002850 struct p_rs_param_95 *p = &mdev->tconn->data.rbuf.rs_param_95;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002851 unsigned int header_size, data_size, exp_max_sz;
2852 struct crypto_hash *verify_tfm = NULL;
2853 struct crypto_hash *csums_tfm = NULL;
Philipp Reisner31890f42011-01-19 14:12:51 +01002854 const int apv = mdev->tconn->agreed_pro_version;
Philipp Reisner778f2712010-07-06 11:14:00 +02002855 int *rs_plan_s = NULL;
2856 int fifo_size = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002857
2858 exp_max_sz = apv <= 87 ? sizeof(struct p_rs_param)
2859 : apv == 88 ? sizeof(struct p_rs_param)
2860 + SHARED_SECRET_MAX
Philipp Reisner8e26f9c2010-07-06 17:25:54 +02002861 : apv <= 94 ? sizeof(struct p_rs_param_89)
2862 : /* apv >= 95 */ sizeof(struct p_rs_param_95);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002863
Philipp Reisner02918be2010-08-20 14:35:10 +02002864 if (packet_size > exp_max_sz) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002865 dev_err(DEV, "SyncParam packet too long: received %u, expected <= %u bytes\n",
Philipp Reisner02918be2010-08-20 14:35:10 +02002866 packet_size, exp_max_sz);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002867 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002868 }
2869
2870 if (apv <= 88) {
Philipp Reisner257d0af2011-01-26 12:15:29 +01002871 header_size = sizeof(struct p_rs_param) - sizeof(struct p_header);
Philipp Reisner02918be2010-08-20 14:35:10 +02002872 data_size = packet_size - header_size;
Philipp Reisner8e26f9c2010-07-06 17:25:54 +02002873 } else if (apv <= 94) {
Philipp Reisner257d0af2011-01-26 12:15:29 +01002874 header_size = sizeof(struct p_rs_param_89) - sizeof(struct p_header);
Philipp Reisner02918be2010-08-20 14:35:10 +02002875 data_size = packet_size - header_size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002876 D_ASSERT(data_size == 0);
Philipp Reisner8e26f9c2010-07-06 17:25:54 +02002877 } else {
Philipp Reisner257d0af2011-01-26 12:15:29 +01002878 header_size = sizeof(struct p_rs_param_95) - sizeof(struct p_header);
Philipp Reisner02918be2010-08-20 14:35:10 +02002879 data_size = packet_size - header_size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002880 D_ASSERT(data_size == 0);
2881 }
2882
2883 /* initialize verify_alg and csums_alg */
2884 memset(p->verify_alg, 0, 2 * SHARED_SECRET_MAX);
2885
Philipp Reisnerde0ff332011-02-07 16:56:20 +01002886 if (drbd_recv(mdev->tconn, &p->head.payload, header_size) != header_size)
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002887 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002888
2889 mdev->sync_conf.rate = be32_to_cpu(p->rate);
2890
2891 if (apv >= 88) {
2892 if (apv == 88) {
2893 if (data_size > SHARED_SECRET_MAX) {
2894 dev_err(DEV, "verify-alg too long, "
2895 "peer wants %u, accepting only %u byte\n",
2896 data_size, SHARED_SECRET_MAX);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002897 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002898 }
2899
Philipp Reisnerde0ff332011-02-07 16:56:20 +01002900 if (drbd_recv(mdev->tconn, p->verify_alg, data_size) != data_size)
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002901 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002902
2903 /* we expect NUL terminated string */
2904 /* but just in case someone tries to be evil */
2905 D_ASSERT(p->verify_alg[data_size-1] == 0);
2906 p->verify_alg[data_size-1] = 0;
2907
2908 } else /* apv >= 89 */ {
2909 /* we still expect NUL terminated strings */
2910 /* but just in case someone tries to be evil */
2911 D_ASSERT(p->verify_alg[SHARED_SECRET_MAX-1] == 0);
2912 D_ASSERT(p->csums_alg[SHARED_SECRET_MAX-1] == 0);
2913 p->verify_alg[SHARED_SECRET_MAX-1] = 0;
2914 p->csums_alg[SHARED_SECRET_MAX-1] = 0;
2915 }
2916
2917 if (strcmp(mdev->sync_conf.verify_alg, p->verify_alg)) {
2918 if (mdev->state.conn == C_WF_REPORT_PARAMS) {
2919 dev_err(DEV, "Different verify-alg settings. me=\"%s\" peer=\"%s\"\n",
2920 mdev->sync_conf.verify_alg, p->verify_alg);
2921 goto disconnect;
2922 }
2923 verify_tfm = drbd_crypto_alloc_digest_safe(mdev,
2924 p->verify_alg, "verify-alg");
2925 if (IS_ERR(verify_tfm)) {
2926 verify_tfm = NULL;
2927 goto disconnect;
2928 }
2929 }
2930
2931 if (apv >= 89 && strcmp(mdev->sync_conf.csums_alg, p->csums_alg)) {
2932 if (mdev->state.conn == C_WF_REPORT_PARAMS) {
2933 dev_err(DEV, "Different csums-alg settings. me=\"%s\" peer=\"%s\"\n",
2934 mdev->sync_conf.csums_alg, p->csums_alg);
2935 goto disconnect;
2936 }
2937 csums_tfm = drbd_crypto_alloc_digest_safe(mdev,
2938 p->csums_alg, "csums-alg");
2939 if (IS_ERR(csums_tfm)) {
2940 csums_tfm = NULL;
2941 goto disconnect;
2942 }
2943 }
2944
Philipp Reisner8e26f9c2010-07-06 17:25:54 +02002945 if (apv > 94) {
2946 mdev->sync_conf.rate = be32_to_cpu(p->rate);
2947 mdev->sync_conf.c_plan_ahead = be32_to_cpu(p->c_plan_ahead);
2948 mdev->sync_conf.c_delay_target = be32_to_cpu(p->c_delay_target);
2949 mdev->sync_conf.c_fill_target = be32_to_cpu(p->c_fill_target);
2950 mdev->sync_conf.c_max_rate = be32_to_cpu(p->c_max_rate);
Philipp Reisner778f2712010-07-06 11:14:00 +02002951
2952 fifo_size = (mdev->sync_conf.c_plan_ahead * 10 * SLEEP_TIME) / HZ;
2953 if (fifo_size != mdev->rs_plan_s.size && fifo_size > 0) {
2954 rs_plan_s = kzalloc(sizeof(int) * fifo_size, GFP_KERNEL);
2955 if (!rs_plan_s) {
2956 dev_err(DEV, "kmalloc of fifo_buffer failed");
2957 goto disconnect;
2958 }
2959 }
Philipp Reisner8e26f9c2010-07-06 17:25:54 +02002960 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07002961
2962 spin_lock(&mdev->peer_seq_lock);
2963 /* lock against drbd_nl_syncer_conf() */
2964 if (verify_tfm) {
2965 strcpy(mdev->sync_conf.verify_alg, p->verify_alg);
2966 mdev->sync_conf.verify_alg_len = strlen(p->verify_alg) + 1;
2967 crypto_free_hash(mdev->verify_tfm);
2968 mdev->verify_tfm = verify_tfm;
2969 dev_info(DEV, "using verify-alg: \"%s\"\n", p->verify_alg);
2970 }
2971 if (csums_tfm) {
2972 strcpy(mdev->sync_conf.csums_alg, p->csums_alg);
2973 mdev->sync_conf.csums_alg_len = strlen(p->csums_alg) + 1;
2974 crypto_free_hash(mdev->csums_tfm);
2975 mdev->csums_tfm = csums_tfm;
2976 dev_info(DEV, "using csums-alg: \"%s\"\n", p->csums_alg);
2977 }
Philipp Reisner778f2712010-07-06 11:14:00 +02002978 if (fifo_size != mdev->rs_plan_s.size) {
2979 kfree(mdev->rs_plan_s.values);
2980 mdev->rs_plan_s.values = rs_plan_s;
2981 mdev->rs_plan_s.size = fifo_size;
2982 mdev->rs_planed = 0;
2983 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07002984 spin_unlock(&mdev->peer_seq_lock);
2985 }
2986
2987 return ok;
2988disconnect:
2989 /* just for completeness: actually not needed,
2990 * as this is not reached if csums_tfm was ok. */
2991 crypto_free_hash(csums_tfm);
2992 /* but free the verify_tfm again, if csums_tfm did not work out */
2993 crypto_free_hash(verify_tfm);
2994 drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002995 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002996}
2997
Philipp Reisnerb411b362009-09-25 16:07:19 -07002998/* warn if the arguments differ by more than 12.5% */
2999static void warn_if_differ_considerably(struct drbd_conf *mdev,
3000 const char *s, sector_t a, sector_t b)
3001{
3002 sector_t d;
3003 if (a == 0 || b == 0)
3004 return;
3005 d = (a > b) ? (a - b) : (b - a);
3006 if (d > (a>>3) || d > (b>>3))
3007 dev_warn(DEV, "Considerable difference in %s: %llus vs. %llus\n", s,
3008 (unsigned long long)a, (unsigned long long)b);
3009}
3010
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01003011static int receive_sizes(struct drbd_conf *mdev, enum drbd_packet cmd,
3012 unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003013{
Philipp Reisnere42325a2011-01-19 13:55:45 +01003014 struct p_sizes *p = &mdev->tconn->data.rbuf.sizes;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003015 enum determine_dev_size dd = unchanged;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003016 sector_t p_size, p_usize, my_usize;
3017 int ldsc = 0; /* local disk size changed */
Philipp Reisnere89b5912010-03-24 17:11:33 +01003018 enum dds_flags ddsf;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003019
Philipp Reisnerb411b362009-09-25 16:07:19 -07003020 p_size = be64_to_cpu(p->d_size);
3021 p_usize = be64_to_cpu(p->u_size);
3022
3023 if (p_size == 0 && mdev->state.disk == D_DISKLESS) {
3024 dev_err(DEV, "some backing storage is needed\n");
3025 drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003026 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003027 }
3028
3029 /* just store the peer's disk size for now.
3030 * we still need to figure out whether we accept that. */
3031 mdev->p_size = p_size;
3032
Philipp Reisnerb411b362009-09-25 16:07:19 -07003033 if (get_ldev(mdev)) {
3034 warn_if_differ_considerably(mdev, "lower level device sizes",
3035 p_size, drbd_get_max_capacity(mdev->ldev));
3036 warn_if_differ_considerably(mdev, "user requested size",
3037 p_usize, mdev->ldev->dc.disk_size);
3038
3039 /* if this is the first connect, or an otherwise expected
3040 * param exchange, choose the minimum */
3041 if (mdev->state.conn == C_WF_REPORT_PARAMS)
3042 p_usize = min_not_zero((sector_t)mdev->ldev->dc.disk_size,
3043 p_usize);
3044
3045 my_usize = mdev->ldev->dc.disk_size;
3046
3047 if (mdev->ldev->dc.disk_size != p_usize) {
3048 mdev->ldev->dc.disk_size = p_usize;
3049 dev_info(DEV, "Peer sets u_size to %lu sectors\n",
3050 (unsigned long)mdev->ldev->dc.disk_size);
3051 }
3052
3053 /* Never shrink a device with usable data during connect.
3054 But allow online shrinking if we are connected. */
Philipp Reisnera393db62009-12-22 13:35:52 +01003055 if (drbd_new_dev_size(mdev, mdev->ldev, 0) <
Philipp Reisnerb411b362009-09-25 16:07:19 -07003056 drbd_get_capacity(mdev->this_bdev) &&
3057 mdev->state.disk >= D_OUTDATED &&
3058 mdev->state.conn < C_CONNECTED) {
3059 dev_err(DEV, "The peer's disk size is too small!\n");
3060 drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
3061 mdev->ldev->dc.disk_size = my_usize;
3062 put_ldev(mdev);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003063 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003064 }
3065 put_ldev(mdev);
3066 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003067
Philipp Reisnere89b5912010-03-24 17:11:33 +01003068 ddsf = be16_to_cpu(p->dds_flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003069 if (get_ldev(mdev)) {
Bart Van Assche24c48302011-05-21 18:32:29 +02003070 dd = drbd_determine_dev_size(mdev, ddsf);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003071 put_ldev(mdev);
3072 if (dd == dev_size_error)
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003073 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003074 drbd_md_sync(mdev);
3075 } else {
3076 /* I am diskless, need to accept the peer's size. */
3077 drbd_set_my_capacity(mdev, p_size);
3078 }
3079
Philipp Reisner99432fc2011-05-20 16:39:13 +02003080 mdev->peer_max_bio_size = be32_to_cpu(p->max_bio_size);
3081 drbd_reconsider_max_bio_size(mdev);
3082
Philipp Reisnerb411b362009-09-25 16:07:19 -07003083 if (get_ldev(mdev)) {
3084 if (mdev->ldev->known_size != drbd_get_capacity(mdev->ldev->backing_bdev)) {
3085 mdev->ldev->known_size = drbd_get_capacity(mdev->ldev->backing_bdev);
3086 ldsc = 1;
3087 }
3088
Philipp Reisnerb411b362009-09-25 16:07:19 -07003089 put_ldev(mdev);
3090 }
3091
3092 if (mdev->state.conn > C_WF_REPORT_PARAMS) {
3093 if (be64_to_cpu(p->c_size) !=
3094 drbd_get_capacity(mdev->this_bdev) || ldsc) {
3095 /* we have different sizes, probably peer
3096 * needs to know my new size... */
Philipp Reisnere89b5912010-03-24 17:11:33 +01003097 drbd_send_sizes(mdev, 0, ddsf);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003098 }
3099 if (test_and_clear_bit(RESIZE_PENDING, &mdev->flags) ||
3100 (dd == grew && mdev->state.conn == C_CONNECTED)) {
3101 if (mdev->state.pdsk >= D_INCONSISTENT &&
Philipp Reisnere89b5912010-03-24 17:11:33 +01003102 mdev->state.disk >= D_INCONSISTENT) {
3103 if (ddsf & DDSF_NO_RESYNC)
3104 dev_info(DEV, "Resync of new storage suppressed with --assume-clean\n");
3105 else
3106 resync_after_online_grow(mdev);
3107 } else
Philipp Reisnerb411b362009-09-25 16:07:19 -07003108 set_bit(RESYNC_AFTER_NEG, &mdev->flags);
3109 }
3110 }
3111
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003112 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003113}
3114
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01003115static int receive_uuids(struct drbd_conf *mdev, enum drbd_packet cmd,
3116 unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003117{
Philipp Reisnere42325a2011-01-19 13:55:45 +01003118 struct p_uuids *p = &mdev->tconn->data.rbuf.uuids;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003119 u64 *p_uuid;
Lars Ellenberg62b0da32011-01-20 13:25:21 +01003120 int i, updated_uuids = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003121
Philipp Reisnerb411b362009-09-25 16:07:19 -07003122 p_uuid = kmalloc(sizeof(u64)*UI_EXTENDED_SIZE, GFP_NOIO);
3123
3124 for (i = UI_CURRENT; i < UI_EXTENDED_SIZE; i++)
3125 p_uuid[i] = be64_to_cpu(p->uuid[i]);
3126
3127 kfree(mdev->p_uuid);
3128 mdev->p_uuid = p_uuid;
3129
3130 if (mdev->state.conn < C_CONNECTED &&
3131 mdev->state.disk < D_INCONSISTENT &&
3132 mdev->state.role == R_PRIMARY &&
3133 (mdev->ed_uuid & ~((u64)1)) != (p_uuid[UI_CURRENT] & ~((u64)1))) {
3134 dev_err(DEV, "Can only connect to data with current UUID=%016llX\n",
3135 (unsigned long long)mdev->ed_uuid);
3136 drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003137 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003138 }
3139
3140 if (get_ldev(mdev)) {
3141 int skip_initial_sync =
3142 mdev->state.conn == C_CONNECTED &&
Philipp Reisner31890f42011-01-19 14:12:51 +01003143 mdev->tconn->agreed_pro_version >= 90 &&
Philipp Reisnerb411b362009-09-25 16:07:19 -07003144 mdev->ldev->md.uuid[UI_CURRENT] == UUID_JUST_CREATED &&
3145 (p_uuid[UI_FLAGS] & 8);
3146 if (skip_initial_sync) {
3147 dev_info(DEV, "Accepted new current UUID, preparing to skip initial sync\n");
3148 drbd_bitmap_io(mdev, &drbd_bmio_clear_n_write,
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01003149 "clear_n_write from receive_uuids",
3150 BM_LOCKED_TEST_ALLOWED);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003151 _drbd_uuid_set(mdev, UI_CURRENT, p_uuid[UI_CURRENT]);
3152 _drbd_uuid_set(mdev, UI_BITMAP, 0);
3153 _drbd_set_state(_NS2(mdev, disk, D_UP_TO_DATE, pdsk, D_UP_TO_DATE),
3154 CS_VERBOSE, NULL);
3155 drbd_md_sync(mdev);
Lars Ellenberg62b0da32011-01-20 13:25:21 +01003156 updated_uuids = 1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003157 }
3158 put_ldev(mdev);
Philipp Reisner18a50fa2010-06-21 14:14:15 +02003159 } else if (mdev->state.disk < D_INCONSISTENT &&
3160 mdev->state.role == R_PRIMARY) {
3161 /* I am a diskless primary, the peer just created a new current UUID
3162 for me. */
Lars Ellenberg62b0da32011-01-20 13:25:21 +01003163 updated_uuids = drbd_set_ed_uuid(mdev, p_uuid[UI_CURRENT]);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003164 }
3165
3166 /* Before we test for the disk state, we should wait until an eventually
3167 ongoing cluster wide state change is finished. That is important if
3168 we are primary and are detaching from our disk. We need to see the
3169 new disk state... */
3170 wait_event(mdev->misc_wait, !test_bit(CLUSTER_ST_CHANGE, &mdev->flags));
3171 if (mdev->state.conn >= C_CONNECTED && mdev->state.disk < D_INCONSISTENT)
Lars Ellenberg62b0da32011-01-20 13:25:21 +01003172 updated_uuids |= drbd_set_ed_uuid(mdev, p_uuid[UI_CURRENT]);
3173
3174 if (updated_uuids)
3175 drbd_print_uuids(mdev, "receiver updated UUIDs to");
Philipp Reisnerb411b362009-09-25 16:07:19 -07003176
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003177 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003178}
3179
3180/**
3181 * convert_state() - Converts the peer's view of the cluster state to our point of view
3182 * @ps: The state as seen by the peer.
3183 */
3184static union drbd_state convert_state(union drbd_state ps)
3185{
3186 union drbd_state ms;
3187
3188 static enum drbd_conns c_tab[] = {
3189 [C_CONNECTED] = C_CONNECTED,
3190
3191 [C_STARTING_SYNC_S] = C_STARTING_SYNC_T,
3192 [C_STARTING_SYNC_T] = C_STARTING_SYNC_S,
3193 [C_DISCONNECTING] = C_TEAR_DOWN, /* C_NETWORK_FAILURE, */
3194 [C_VERIFY_S] = C_VERIFY_T,
3195 [C_MASK] = C_MASK,
3196 };
3197
3198 ms.i = ps.i;
3199
3200 ms.conn = c_tab[ps.conn];
3201 ms.peer = ps.role;
3202 ms.role = ps.peer;
3203 ms.pdsk = ps.disk;
3204 ms.disk = ps.pdsk;
3205 ms.peer_isp = (ps.aftr_isp | ps.user_isp);
3206
3207 return ms;
3208}
3209
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01003210static int receive_req_state(struct drbd_conf *mdev, enum drbd_packet cmd,
3211 unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003212{
Philipp Reisnere42325a2011-01-19 13:55:45 +01003213 struct p_req_state *p = &mdev->tconn->data.rbuf.req_state;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003214 union drbd_state mask, val;
Andreas Gruenbacherbf885f82010-12-08 00:39:32 +01003215 enum drbd_state_rv rv;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003216
Philipp Reisnerb411b362009-09-25 16:07:19 -07003217 mask.i = be32_to_cpu(p->mask);
3218 val.i = be32_to_cpu(p->val);
3219
Philipp Reisner25703f82011-02-07 14:35:25 +01003220 if (test_bit(DISCARD_CONCURRENT, &mdev->tconn->flags) &&
Philipp Reisnerb411b362009-09-25 16:07:19 -07003221 test_bit(CLUSTER_ST_CHANGE, &mdev->flags)) {
3222 drbd_send_sr_reply(mdev, SS_CONCURRENT_ST_CHG);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003223 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003224 }
3225
3226 mask = convert_state(mask);
3227 val = convert_state(val);
3228
3229 rv = drbd_change_state(mdev, CS_VERBOSE, mask, val);
3230
3231 drbd_send_sr_reply(mdev, rv);
3232 drbd_md_sync(mdev);
3233
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003234 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003235}
3236
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01003237static int receive_state(struct drbd_conf *mdev, enum drbd_packet cmd,
3238 unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003239{
Philipp Reisnere42325a2011-01-19 13:55:45 +01003240 struct p_state *p = &mdev->tconn->data.rbuf.state;
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003241 union drbd_state os, ns, peer_state;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003242 enum drbd_disk_state real_peer_disk;
Philipp Reisner65d922c2010-06-16 16:18:09 +02003243 enum chg_state_flags cs_flags;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003244 int rv;
3245
Philipp Reisnerb411b362009-09-25 16:07:19 -07003246 peer_state.i = be32_to_cpu(p->state);
3247
3248 real_peer_disk = peer_state.disk;
3249 if (peer_state.disk == D_NEGOTIATING) {
3250 real_peer_disk = mdev->p_uuid[UI_FLAGS] & 4 ? D_INCONSISTENT : D_CONSISTENT;
3251 dev_info(DEV, "real peer disk state = %s\n", drbd_disk_str(real_peer_disk));
3252 }
3253
Philipp Reisner87eeee42011-01-19 14:16:30 +01003254 spin_lock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003255 retry:
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003256 os = ns = mdev->state;
Philipp Reisner87eeee42011-01-19 14:16:30 +01003257 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003258
Lars Ellenberge9ef7bb2010-10-07 15:55:39 +02003259 /* peer says his disk is uptodate, while we think it is inconsistent,
3260 * and this happens while we think we have a sync going on. */
3261 if (os.pdsk == D_INCONSISTENT && real_peer_disk == D_UP_TO_DATE &&
3262 os.conn > C_CONNECTED && os.disk == D_UP_TO_DATE) {
3263 /* If we are (becoming) SyncSource, but peer is still in sync
3264 * preparation, ignore its uptodate-ness to avoid flapping, it
3265 * will change to inconsistent once the peer reaches active
3266 * syncing states.
3267 * It may have changed syncer-paused flags, however, so we
3268 * cannot ignore this completely. */
3269 if (peer_state.conn > C_CONNECTED &&
3270 peer_state.conn < C_SYNC_SOURCE)
3271 real_peer_disk = D_INCONSISTENT;
3272
3273 /* if peer_state changes to connected at the same time,
3274 * it explicitly notifies us that it finished resync.
3275 * Maybe we should finish it up, too? */
3276 else if (os.conn >= C_SYNC_SOURCE &&
3277 peer_state.conn == C_CONNECTED) {
3278 if (drbd_bm_total_weight(mdev) <= mdev->rs_failed)
3279 drbd_resync_finished(mdev);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003280 return true;
Lars Ellenberge9ef7bb2010-10-07 15:55:39 +02003281 }
3282 }
3283
3284 /* peer says his disk is inconsistent, while we think it is uptodate,
3285 * and this happens while the peer still thinks we have a sync going on,
3286 * but we think we are already done with the sync.
3287 * We ignore this to avoid flapping pdsk.
3288 * This should not happen, if the peer is a recent version of drbd. */
3289 if (os.pdsk == D_UP_TO_DATE && real_peer_disk == D_INCONSISTENT &&
3290 os.conn == C_CONNECTED && peer_state.conn > C_SYNC_SOURCE)
3291 real_peer_disk = D_UP_TO_DATE;
3292
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003293 if (ns.conn == C_WF_REPORT_PARAMS)
3294 ns.conn = C_CONNECTED;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003295
Philipp Reisner67531712010-10-27 12:21:30 +02003296 if (peer_state.conn == C_AHEAD)
3297 ns.conn = C_BEHIND;
3298
Philipp Reisnerb411b362009-09-25 16:07:19 -07003299 if (mdev->p_uuid && peer_state.disk >= D_NEGOTIATING &&
3300 get_ldev_if_state(mdev, D_NEGOTIATING)) {
3301 int cr; /* consider resync */
3302
3303 /* if we established a new connection */
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003304 cr = (os.conn < C_CONNECTED);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003305 /* if we had an established connection
3306 * and one of the nodes newly attaches a disk */
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003307 cr |= (os.conn == C_CONNECTED &&
Philipp Reisnerb411b362009-09-25 16:07:19 -07003308 (peer_state.disk == D_NEGOTIATING ||
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003309 os.disk == D_NEGOTIATING));
Philipp Reisnerb411b362009-09-25 16:07:19 -07003310 /* if we have both been inconsistent, and the peer has been
3311 * forced to be UpToDate with --overwrite-data */
3312 cr |= test_bit(CONSIDER_RESYNC, &mdev->flags);
3313 /* if we had been plain connected, and the admin requested to
3314 * start a sync by "invalidate" or "invalidate-remote" */
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003315 cr |= (os.conn == C_CONNECTED &&
Philipp Reisnerb411b362009-09-25 16:07:19 -07003316 (peer_state.conn >= C_STARTING_SYNC_S &&
3317 peer_state.conn <= C_WF_BITMAP_T));
3318
3319 if (cr)
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003320 ns.conn = drbd_sync_handshake(mdev, peer_state.role, real_peer_disk);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003321
3322 put_ldev(mdev);
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003323 if (ns.conn == C_MASK) {
3324 ns.conn = C_CONNECTED;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003325 if (mdev->state.disk == D_NEGOTIATING) {
Lars Ellenberg82f59cc2010-10-16 12:13:47 +02003326 drbd_force_state(mdev, NS(disk, D_FAILED));
Philipp Reisnerb411b362009-09-25 16:07:19 -07003327 } else if (peer_state.disk == D_NEGOTIATING) {
3328 dev_err(DEV, "Disk attach process on the peer node was aborted.\n");
3329 peer_state.disk = D_DISKLESS;
Lars Ellenberg580b9762010-02-26 23:15:23 +01003330 real_peer_disk = D_DISKLESS;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003331 } else {
Philipp Reisnercf14c2e2010-02-02 21:03:50 +01003332 if (test_and_clear_bit(CONN_DRY_RUN, &mdev->flags))
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003333 return false;
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003334 D_ASSERT(os.conn == C_WF_REPORT_PARAMS);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003335 drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003336 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003337 }
3338 }
3339 }
3340
Philipp Reisner87eeee42011-01-19 14:16:30 +01003341 spin_lock_irq(&mdev->tconn->req_lock);
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003342 if (mdev->state.i != os.i)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003343 goto retry;
3344 clear_bit(CONSIDER_RESYNC, &mdev->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003345 ns.peer = peer_state.role;
3346 ns.pdsk = real_peer_disk;
3347 ns.peer_isp = (peer_state.aftr_isp | peer_state.user_isp);
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003348 if ((ns.conn == C_CONNECTED || ns.conn == C_WF_BITMAP_S) && ns.disk == D_NEGOTIATING)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003349 ns.disk = mdev->new_state_tmp.disk;
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003350 cs_flags = CS_VERBOSE + (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED ? 0 : CS_HARD);
3351 if (ns.pdsk == D_CONSISTENT && is_susp(ns) && ns.conn == C_CONNECTED && os.conn < C_CONNECTED &&
Philipp Reisner481c6f52010-06-22 14:03:27 +02003352 test_bit(NEW_CUR_UUID, &mdev->flags)) {
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01003353 /* Do not allow tl_restart(RESEND) for a rebooted peer. We can only allow this
Philipp Reisner481c6f52010-06-22 14:03:27 +02003354 for temporal network outages! */
Philipp Reisner87eeee42011-01-19 14:16:30 +01003355 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisner481c6f52010-06-22 14:03:27 +02003356 dev_err(DEV, "Aborting Connect, can not thaw IO with an only Consistent peer\n");
3357 tl_clear(mdev);
3358 drbd_uuid_new_current(mdev);
3359 clear_bit(NEW_CUR_UUID, &mdev->flags);
3360 drbd_force_state(mdev, NS2(conn, C_PROTOCOL_ERROR, susp, 0));
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003361 return false;
Philipp Reisner481c6f52010-06-22 14:03:27 +02003362 }
Philipp Reisner65d922c2010-06-16 16:18:09 +02003363 rv = _drbd_set_state(mdev, ns, cs_flags, NULL);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003364 ns = mdev->state;
Philipp Reisner87eeee42011-01-19 14:16:30 +01003365 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003366
3367 if (rv < SS_SUCCESS) {
3368 drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003369 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003370 }
3371
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003372 if (os.conn > C_WF_REPORT_PARAMS) {
3373 if (ns.conn > C_CONNECTED && peer_state.conn <= C_CONNECTED &&
Philipp Reisnerb411b362009-09-25 16:07:19 -07003374 peer_state.disk != D_NEGOTIATING ) {
3375 /* we want resync, peer has not yet decided to sync... */
3376 /* Nowadays only used when forcing a node into primary role and
3377 setting its disk to UpToDate with that */
3378 drbd_send_uuids(mdev);
3379 drbd_send_state(mdev);
3380 }
3381 }
3382
Philipp Reisner89e58e72011-01-19 13:12:45 +01003383 mdev->tconn->net_conf->want_lose = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003384
3385 drbd_md_sync(mdev); /* update connected indicator, la_size, ... */
3386
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003387 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003388}
3389
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01003390static int receive_sync_uuid(struct drbd_conf *mdev, enum drbd_packet cmd,
3391 unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003392{
Philipp Reisnere42325a2011-01-19 13:55:45 +01003393 struct p_rs_uuid *p = &mdev->tconn->data.rbuf.rs_uuid;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003394
3395 wait_event(mdev->misc_wait,
3396 mdev->state.conn == C_WF_SYNC_UUID ||
Philipp Reisnerc4752ef2010-10-27 17:32:36 +02003397 mdev->state.conn == C_BEHIND ||
Philipp Reisnerb411b362009-09-25 16:07:19 -07003398 mdev->state.conn < C_CONNECTED ||
3399 mdev->state.disk < D_NEGOTIATING);
3400
3401 /* D_ASSERT( mdev->state.conn == C_WF_SYNC_UUID ); */
3402
Philipp Reisnerb411b362009-09-25 16:07:19 -07003403 /* Here the _drbd_uuid_ functions are right, current should
3404 _not_ be rotated into the history */
3405 if (get_ldev_if_state(mdev, D_NEGOTIATING)) {
3406 _drbd_uuid_set(mdev, UI_CURRENT, be64_to_cpu(p->uuid));
3407 _drbd_uuid_set(mdev, UI_BITMAP, 0UL);
3408
Lars Ellenberg62b0da32011-01-20 13:25:21 +01003409 drbd_print_uuids(mdev, "updated sync uuid");
Philipp Reisnerb411b362009-09-25 16:07:19 -07003410 drbd_start_resync(mdev, C_SYNC_TARGET);
3411
3412 put_ldev(mdev);
3413 } else
3414 dev_err(DEV, "Ignoring SyncUUID packet!\n");
3415
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003416 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003417}
3418
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003419/**
3420 * receive_bitmap_plain
3421 *
3422 * Return 0 when done, 1 when another iteration is needed, and a negative error
3423 * code upon failure.
3424 */
3425static int
Philipp Reisner02918be2010-08-20 14:35:10 +02003426receive_bitmap_plain(struct drbd_conf *mdev, unsigned int data_size,
3427 unsigned long *buffer, struct bm_xfer_ctx *c)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003428{
3429 unsigned num_words = min_t(size_t, BM_PACKET_WORDS, c->bm_words - c->word_offset);
3430 unsigned want = num_words * sizeof(long);
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003431 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003432
Philipp Reisner02918be2010-08-20 14:35:10 +02003433 if (want != data_size) {
3434 dev_err(DEV, "%s:want (%u) != data_size (%u)\n", __func__, want, data_size);
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003435 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003436 }
3437 if (want == 0)
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003438 return 0;
Philipp Reisnerde0ff332011-02-07 16:56:20 +01003439 err = drbd_recv(mdev->tconn, buffer, want);
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003440 if (err != want) {
3441 if (err >= 0)
3442 err = -EIO;
3443 return err;
3444 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003445
3446 drbd_bm_merge_lel(mdev, c->word_offset, num_words, buffer);
3447
3448 c->word_offset += num_words;
3449 c->bit_offset = c->word_offset * BITS_PER_LONG;
3450 if (c->bit_offset > c->bm_bits)
3451 c->bit_offset = c->bm_bits;
3452
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003453 return 1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003454}
3455
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003456/**
3457 * recv_bm_rle_bits
3458 *
3459 * Return 0 when done, 1 when another iteration is needed, and a negative error
3460 * code upon failure.
3461 */
3462static int
Philipp Reisnerb411b362009-09-25 16:07:19 -07003463recv_bm_rle_bits(struct drbd_conf *mdev,
3464 struct p_compressed_bm *p,
Philipp Reisnerc6d25cf2011-01-19 16:13:06 +01003465 struct bm_xfer_ctx *c,
3466 unsigned int len)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003467{
3468 struct bitstream bs;
3469 u64 look_ahead;
3470 u64 rl;
3471 u64 tmp;
3472 unsigned long s = c->bit_offset;
3473 unsigned long e;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003474 int toggle = DCBP_get_start(p);
3475 int have;
3476 int bits;
3477
3478 bitstream_init(&bs, p->code, len, DCBP_get_pad_bits(p));
3479
3480 bits = bitstream_get_bits(&bs, &look_ahead, 64);
3481 if (bits < 0)
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003482 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003483
3484 for (have = bits; have > 0; s += rl, toggle = !toggle) {
3485 bits = vli_decode_bits(&rl, look_ahead);
3486 if (bits <= 0)
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003487 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003488
3489 if (toggle) {
3490 e = s + rl -1;
3491 if (e >= c->bm_bits) {
3492 dev_err(DEV, "bitmap overflow (e:%lu) while decoding bm RLE packet\n", e);
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003493 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003494 }
3495 _drbd_bm_set_bits(mdev, s, e);
3496 }
3497
3498 if (have < bits) {
3499 dev_err(DEV, "bitmap decoding error: h:%d b:%d la:0x%08llx l:%u/%u\n",
3500 have, bits, look_ahead,
3501 (unsigned int)(bs.cur.b - p->code),
3502 (unsigned int)bs.buf_len);
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003503 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003504 }
3505 look_ahead >>= bits;
3506 have -= bits;
3507
3508 bits = bitstream_get_bits(&bs, &tmp, 64 - have);
3509 if (bits < 0)
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003510 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003511 look_ahead |= tmp << have;
3512 have += bits;
3513 }
3514
3515 c->bit_offset = s;
3516 bm_xfer_ctx_bit_to_word_offset(c);
3517
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003518 return (s != c->bm_bits);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003519}
3520
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003521/**
3522 * decode_bitmap_c
3523 *
3524 * Return 0 when done, 1 when another iteration is needed, and a negative error
3525 * code upon failure.
3526 */
3527static int
Philipp Reisnerb411b362009-09-25 16:07:19 -07003528decode_bitmap_c(struct drbd_conf *mdev,
3529 struct p_compressed_bm *p,
Philipp Reisnerc6d25cf2011-01-19 16:13:06 +01003530 struct bm_xfer_ctx *c,
3531 unsigned int len)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003532{
3533 if (DCBP_get_code(p) == RLE_VLI_Bits)
Philipp Reisnerc6d25cf2011-01-19 16:13:06 +01003534 return recv_bm_rle_bits(mdev, p, c, len);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003535
3536 /* other variants had been implemented for evaluation,
3537 * but have been dropped as this one turned out to be "best"
3538 * during all our tests. */
3539
3540 dev_err(DEV, "receive_bitmap_c: unknown encoding %u\n", p->encoding);
3541 drbd_force_state(mdev, NS(conn, C_PROTOCOL_ERROR));
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003542 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003543}
3544
3545void INFO_bm_xfer_stats(struct drbd_conf *mdev,
3546 const char *direction, struct bm_xfer_ctx *c)
3547{
3548 /* what would it take to transfer it "plaintext" */
Philipp Reisnerc0129492011-01-19 16:58:16 +01003549 unsigned plain = sizeof(struct p_header) *
Philipp Reisnerb411b362009-09-25 16:07:19 -07003550 ((c->bm_words+BM_PACKET_WORDS-1)/BM_PACKET_WORDS+1)
3551 + c->bm_words * sizeof(long);
3552 unsigned total = c->bytes[0] + c->bytes[1];
3553 unsigned r;
3554
3555 /* total can not be zero. but just in case: */
3556 if (total == 0)
3557 return;
3558
3559 /* don't report if not compressed */
3560 if (total >= plain)
3561 return;
3562
3563 /* total < plain. check for overflow, still */
3564 r = (total > UINT_MAX/1000) ? (total / (plain/1000))
3565 : (1000 * total / plain);
3566
3567 if (r > 1000)
3568 r = 1000;
3569
3570 r = 1000 - r;
3571 dev_info(DEV, "%s bitmap stats [Bytes(packets)]: plain %u(%u), RLE %u(%u), "
3572 "total %u; compression: %u.%u%%\n",
3573 direction,
3574 c->bytes[1], c->packets[1],
3575 c->bytes[0], c->packets[0],
3576 total, r/10, r % 10);
3577}
3578
3579/* Since we are processing the bitfield from lower addresses to higher,
3580 it does not matter if the process it in 32 bit chunks or 64 bit
3581 chunks as long as it is little endian. (Understand it as byte stream,
3582 beginning with the lowest byte...) If we would use big endian
3583 we would need to process it from the highest address to the lowest,
3584 in order to be agnostic to the 32 vs 64 bits issue.
3585
3586 returns 0 on failure, 1 if we successfully received it. */
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01003587static int receive_bitmap(struct drbd_conf *mdev, enum drbd_packet cmd,
3588 unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003589{
3590 struct bm_xfer_ctx c;
3591 void *buffer;
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003592 int err;
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003593 int ok = false;
Philipp Reisner257d0af2011-01-26 12:15:29 +01003594 struct p_header *h = &mdev->tconn->data.rbuf.header;
Philipp Reisner77351055b2011-02-07 17:24:26 +01003595 struct packet_info pi;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003596
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01003597 drbd_bm_lock(mdev, "receive bitmap", BM_LOCKED_SET_ALLOWED);
3598 /* you are supposed to send additional out-of-sync information
3599 * if you actually set bits during this phase */
Philipp Reisnerb411b362009-09-25 16:07:19 -07003600
3601 /* maybe we should use some per thread scratch page,
3602 * and allocate that during initial device creation? */
3603 buffer = (unsigned long *) __get_free_page(GFP_NOIO);
3604 if (!buffer) {
3605 dev_err(DEV, "failed to allocate one page buffer in %s\n", __func__);
3606 goto out;
3607 }
3608
3609 c = (struct bm_xfer_ctx) {
3610 .bm_bits = drbd_bm_bits(mdev),
3611 .bm_words = drbd_bm_words(mdev),
3612 };
3613
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003614 for(;;) {
Philipp Reisner02918be2010-08-20 14:35:10 +02003615 if (cmd == P_BITMAP) {
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003616 err = receive_bitmap_plain(mdev, data_size, buffer, &c);
Philipp Reisner02918be2010-08-20 14:35:10 +02003617 } else if (cmd == P_COMPRESSED_BITMAP) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07003618 /* MAYBE: sanity check that we speak proto >= 90,
3619 * and the feature is enabled! */
3620 struct p_compressed_bm *p;
3621
Philipp Reisner02918be2010-08-20 14:35:10 +02003622 if (data_size > BM_PACKET_PAYLOAD_BYTES) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07003623 dev_err(DEV, "ReportCBitmap packet too large\n");
3624 goto out;
3625 }
3626 /* use the page buff */
3627 p = buffer;
3628 memcpy(p, h, sizeof(*h));
Philipp Reisnerde0ff332011-02-07 16:56:20 +01003629 if (drbd_recv(mdev->tconn, p->head.payload, data_size) != data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003630 goto out;
Lars Ellenberg004352f2010-10-05 20:13:58 +02003631 if (data_size <= (sizeof(*p) - sizeof(p->head))) {
3632 dev_err(DEV, "ReportCBitmap packet too small (l:%u)\n", data_size);
Andreas Gruenbacher78fcbda2010-12-10 22:18:27 +01003633 goto out;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003634 }
Philipp Reisnerc6d25cf2011-01-19 16:13:06 +01003635 err = decode_bitmap_c(mdev, p, &c, data_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003636 } else {
Philipp Reisner02918be2010-08-20 14:35:10 +02003637 dev_warn(DEV, "receive_bitmap: cmd neither ReportBitMap nor ReportCBitMap (is 0x%x)", cmd);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003638 goto out;
3639 }
3640
Philipp Reisner02918be2010-08-20 14:35:10 +02003641 c.packets[cmd == P_BITMAP]++;
Philipp Reisner257d0af2011-01-26 12:15:29 +01003642 c.bytes[cmd == P_BITMAP] += sizeof(struct p_header) + data_size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003643
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003644 if (err <= 0) {
3645 if (err < 0)
3646 goto out;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003647 break;
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003648 }
Philipp Reisner9ba7aa02011-02-07 17:32:41 +01003649 if (!drbd_recv_header(mdev->tconn, &pi))
Philipp Reisnerb411b362009-09-25 16:07:19 -07003650 goto out;
Philipp Reisner77351055b2011-02-07 17:24:26 +01003651 cmd = pi.cmd;
3652 data_size = pi.size;
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003653 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003654
3655 INFO_bm_xfer_stats(mdev, "receive", &c);
3656
3657 if (mdev->state.conn == C_WF_BITMAP_T) {
Andreas Gruenbacherde1f8e42010-12-10 21:04:00 +01003658 enum drbd_state_rv rv;
3659
Philipp Reisnerb411b362009-09-25 16:07:19 -07003660 ok = !drbd_send_bitmap(mdev);
3661 if (!ok)
3662 goto out;
3663 /* Omit CS_ORDERED with this state transition to avoid deadlocks. */
Andreas Gruenbacherde1f8e42010-12-10 21:04:00 +01003664 rv = _drbd_request_state(mdev, NS(conn, C_WF_SYNC_UUID), CS_VERBOSE);
3665 D_ASSERT(rv == SS_SUCCESS);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003666 } else if (mdev->state.conn != C_WF_BITMAP_S) {
3667 /* admin may have requested C_DISCONNECTING,
3668 * other threads may have noticed network errors */
3669 dev_info(DEV, "unexpected cstate (%s) in receive_bitmap\n",
3670 drbd_conn_str(mdev->state.conn));
3671 }
3672
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003673 ok = true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003674 out:
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01003675 drbd_bm_unlock(mdev);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003676 if (ok && mdev->state.conn == C_WF_BITMAP_S)
3677 drbd_start_resync(mdev, C_SYNC_SOURCE);
3678 free_page((unsigned long) buffer);
3679 return ok;
3680}
3681
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01003682static int receive_skip(struct drbd_conf *mdev, enum drbd_packet cmd,
3683 unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003684{
3685 /* TODO zero copy sink :) */
3686 static char sink[128];
3687 int size, want, r;
3688
Philipp Reisner02918be2010-08-20 14:35:10 +02003689 dev_warn(DEV, "skipping unknown optional packet type %d, l: %d!\n",
3690 cmd, data_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003691
Philipp Reisner02918be2010-08-20 14:35:10 +02003692 size = data_size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003693 while (size > 0) {
3694 want = min_t(int, size, sizeof(sink));
Philipp Reisnerde0ff332011-02-07 16:56:20 +01003695 r = drbd_recv(mdev->tconn, sink, want);
Andreas Gruenbacher841ce242010-12-15 19:31:20 +01003696 if (!expect(r > 0))
3697 break;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003698 size -= r;
3699 }
3700 return size == 0;
3701}
3702
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01003703static int receive_UnplugRemote(struct drbd_conf *mdev, enum drbd_packet cmd,
3704 unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003705{
Philipp Reisnerb411b362009-09-25 16:07:19 -07003706 /* Make sure we've acked all the TCP data associated
3707 * with the data requests being unplugged */
Philipp Reisnere42325a2011-01-19 13:55:45 +01003708 drbd_tcp_quickack(mdev->tconn->data.socket);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003709
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003710 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003711}
3712
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01003713static int receive_out_of_sync(struct drbd_conf *mdev, enum drbd_packet cmd,
3714 unsigned int data_size)
Philipp Reisner73a01a12010-10-27 14:33:00 +02003715{
Philipp Reisnere42325a2011-01-19 13:55:45 +01003716 struct p_block_desc *p = &mdev->tconn->data.rbuf.block_desc;
Philipp Reisner73a01a12010-10-27 14:33:00 +02003717
Lars Ellenbergf735e3632010-12-17 21:06:18 +01003718 switch (mdev->state.conn) {
3719 case C_WF_SYNC_UUID:
3720 case C_WF_BITMAP_T:
3721 case C_BEHIND:
3722 break;
3723 default:
3724 dev_err(DEV, "ASSERT FAILED cstate = %s, expected: WFSyncUUID|WFBitMapT|Behind\n",
3725 drbd_conn_str(mdev->state.conn));
3726 }
3727
Philipp Reisner73a01a12010-10-27 14:33:00 +02003728 drbd_set_out_of_sync(mdev, be64_to_cpu(p->sector), be32_to_cpu(p->blksize));
3729
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003730 return true;
Philipp Reisner73a01a12010-10-27 14:33:00 +02003731}
3732
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01003733typedef int (*drbd_cmd_handler_f)(struct drbd_conf *, enum drbd_packet cmd,
3734 unsigned int to_receive);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003735
Philipp Reisner02918be2010-08-20 14:35:10 +02003736struct data_cmd {
3737 int expect_payload;
3738 size_t pkt_size;
3739 drbd_cmd_handler_f function;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003740};
3741
Philipp Reisner02918be2010-08-20 14:35:10 +02003742static struct data_cmd drbd_cmd_handler[] = {
3743 [P_DATA] = { 1, sizeof(struct p_data), receive_Data },
3744 [P_DATA_REPLY] = { 1, sizeof(struct p_data), receive_DataReply },
3745 [P_RS_DATA_REPLY] = { 1, sizeof(struct p_data), receive_RSDataReply } ,
3746 [P_BARRIER] = { 0, sizeof(struct p_barrier), receive_Barrier } ,
Philipp Reisner257d0af2011-01-26 12:15:29 +01003747 [P_BITMAP] = { 1, sizeof(struct p_header), receive_bitmap } ,
3748 [P_COMPRESSED_BITMAP] = { 1, sizeof(struct p_header), receive_bitmap } ,
3749 [P_UNPLUG_REMOTE] = { 0, sizeof(struct p_header), receive_UnplugRemote },
Philipp Reisner02918be2010-08-20 14:35:10 +02003750 [P_DATA_REQUEST] = { 0, sizeof(struct p_block_req), receive_DataRequest },
3751 [P_RS_DATA_REQUEST] = { 0, sizeof(struct p_block_req), receive_DataRequest },
Philipp Reisner257d0af2011-01-26 12:15:29 +01003752 [P_SYNC_PARAM] = { 1, sizeof(struct p_header), receive_SyncParam },
3753 [P_SYNC_PARAM89] = { 1, sizeof(struct p_header), receive_SyncParam },
Philipp Reisner02918be2010-08-20 14:35:10 +02003754 [P_PROTOCOL] = { 1, sizeof(struct p_protocol), receive_protocol },
3755 [P_UUIDS] = { 0, sizeof(struct p_uuids), receive_uuids },
3756 [P_SIZES] = { 0, sizeof(struct p_sizes), receive_sizes },
3757 [P_STATE] = { 0, sizeof(struct p_state), receive_state },
3758 [P_STATE_CHG_REQ] = { 0, sizeof(struct p_req_state), receive_req_state },
3759 [P_SYNC_UUID] = { 0, sizeof(struct p_rs_uuid), receive_sync_uuid },
3760 [P_OV_REQUEST] = { 0, sizeof(struct p_block_req), receive_DataRequest },
3761 [P_OV_REPLY] = { 1, sizeof(struct p_block_req), receive_DataRequest },
3762 [P_CSUM_RS_REQUEST] = { 1, sizeof(struct p_block_req), receive_DataRequest },
3763 [P_DELAY_PROBE] = { 0, sizeof(struct p_delay_probe93), receive_skip },
Philipp Reisner73a01a12010-10-27 14:33:00 +02003764 [P_OUT_OF_SYNC] = { 0, sizeof(struct p_block_desc), receive_out_of_sync },
Philipp Reisner02918be2010-08-20 14:35:10 +02003765 /* anything missing from this table is in
3766 * the asender_tbl, see get_asender_cmd */
3767 [P_MAX_CMD] = { 0, 0, NULL },
3768};
3769
3770/* All handler functions that expect a sub-header get that sub-heder in
Philipp Reisnere42325a2011-01-19 13:55:45 +01003771 mdev->tconn->data.rbuf.header.head.payload.
Philipp Reisner02918be2010-08-20 14:35:10 +02003772
Philipp Reisnere42325a2011-01-19 13:55:45 +01003773 Usually in mdev->tconn->data.rbuf.header.head the callback can find the usual
Philipp Reisner02918be2010-08-20 14:35:10 +02003774 p_header, but they may not rely on that. Since there is also p_header95 !
3775 */
Philipp Reisnerb411b362009-09-25 16:07:19 -07003776
Philipp Reisnereefc2f72011-02-08 12:55:24 +01003777static void drbdd(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003778{
Philipp Reisnereefc2f72011-02-08 12:55:24 +01003779 struct p_header *header = &tconn->data.rbuf.header;
Philipp Reisner77351055b2011-02-07 17:24:26 +01003780 struct packet_info pi;
Philipp Reisner02918be2010-08-20 14:35:10 +02003781 size_t shs; /* sub header size */
3782 int rv;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003783
Philipp Reisnereefc2f72011-02-08 12:55:24 +01003784 while (get_t_state(&tconn->receiver) == RUNNING) {
3785 drbd_thread_current_set_cpu(&tconn->receiver);
3786 if (!drbd_recv_header(tconn, &pi))
Philipp Reisner02918be2010-08-20 14:35:10 +02003787 goto err_out;
3788
Philipp Reisner77351055b2011-02-07 17:24:26 +01003789 if (unlikely(pi.cmd >= P_MAX_CMD || !drbd_cmd_handler[pi.cmd].function)) {
Philipp Reisnereefc2f72011-02-08 12:55:24 +01003790 conn_err(tconn, "unknown packet type %d, l: %d!\n", pi.cmd, pi.size);
Philipp Reisner02918be2010-08-20 14:35:10 +02003791 goto err_out;
Lars Ellenberg0b33a912009-11-16 15:58:04 +01003792 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003793
Philipp Reisner77351055b2011-02-07 17:24:26 +01003794 shs = drbd_cmd_handler[pi.cmd].pkt_size - sizeof(struct p_header);
3795 if (pi.size - shs > 0 && !drbd_cmd_handler[pi.cmd].expect_payload) {
Philipp Reisnereefc2f72011-02-08 12:55:24 +01003796 conn_err(tconn, "No payload expected %s l:%d\n", cmdname(pi.cmd), pi.size);
Philipp Reisner02918be2010-08-20 14:35:10 +02003797 goto err_out;
3798 }
3799
Lars Ellenbergc13f7e12010-10-29 23:32:01 +02003800 if (shs) {
Philipp Reisnereefc2f72011-02-08 12:55:24 +01003801 rv = drbd_recv(tconn, &header->payload, shs);
Lars Ellenbergc13f7e12010-10-29 23:32:01 +02003802 if (unlikely(rv != shs)) {
Lars Ellenberg0ddc5542011-01-21 12:35:15 +01003803 if (!signal_pending(current))
Philipp Reisnereefc2f72011-02-08 12:55:24 +01003804 conn_warn(tconn, "short read while reading sub header: rv=%d\n", rv);
Lars Ellenbergc13f7e12010-10-29 23:32:01 +02003805 goto err_out;
3806 }
3807 }
3808
Philipp Reisnereefc2f72011-02-08 12:55:24 +01003809 rv = drbd_cmd_handler[pi.cmd].function(vnr_to_mdev(tconn, pi.vnr), pi.cmd, pi.size - shs);
Philipp Reisner02918be2010-08-20 14:35:10 +02003810
3811 if (unlikely(!rv)) {
Philipp Reisnereefc2f72011-02-08 12:55:24 +01003812 conn_err(tconn, "error receiving %s, l: %d!\n",
Philipp Reisner77351055b2011-02-07 17:24:26 +01003813 cmdname(pi.cmd), pi.size);
Philipp Reisner02918be2010-08-20 14:35:10 +02003814 goto err_out;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003815 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003816 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003817
Philipp Reisner02918be2010-08-20 14:35:10 +02003818 if (0) {
3819 err_out:
Philipp Reisnereefc2f72011-02-08 12:55:24 +01003820 drbd_force_state(tconn->volume0, NS(conn, C_PROTOCOL_ERROR));
Philipp Reisnerb411b362009-09-25 16:07:19 -07003821 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003822}
3823
Philipp Reisnera21e9292011-02-08 15:08:49 +01003824void drbd_flush_workqueue(struct drbd_conf *mdev)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003825{
3826 struct drbd_wq_barrier barr;
3827
3828 barr.w.cb = w_prev_work_done;
Philipp Reisnera21e9292011-02-08 15:08:49 +01003829 barr.w.mdev = mdev;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003830 init_completion(&barr.done);
Philipp Reisnera21e9292011-02-08 15:08:49 +01003831 drbd_queue_work(&mdev->tconn->data.work, &barr.w);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003832 wait_for_completion(&barr.done);
3833}
3834
Philipp Reisner360cc742011-02-08 14:29:53 +01003835static void drbd_disconnect(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003836{
Philipp Reisnerb411b362009-09-25 16:07:19 -07003837 union drbd_state os, ns;
3838 int rv = SS_UNKNOWN_ERROR;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003839
Philipp Reisner360cc742011-02-08 14:29:53 +01003840 if (tconn->volume0->state.conn == C_STANDALONE)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003841 return;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003842
3843 /* asender does not clean up anything. it must not interfere, either */
Philipp Reisner360cc742011-02-08 14:29:53 +01003844 drbd_thread_stop(&tconn->asender);
3845 drbd_free_sock(tconn);
3846
3847 idr_for_each(&tconn->volumes, drbd_disconnected, tconn);
3848
3849 conn_info(tconn, "Connection closed\n");
3850
3851 spin_lock_irq(&tconn->req_lock);
3852 os = tconn->volume0->state;
3853 if (os.conn >= C_UNCONNECTED) {
3854 /* Do not restart in case we are C_DISCONNECTING */
3855 ns.i = os.i;
3856 ns.conn = C_UNCONNECTED;
3857 rv = _drbd_set_state(tconn->volume0, ns, CS_VERBOSE, NULL);
3858 }
3859 spin_unlock_irq(&tconn->req_lock);
3860
3861 if (os.conn == C_DISCONNECTING) {
3862 wait_event(tconn->net_cnt_wait, atomic_read(&tconn->net_cnt) == 0);
3863
3864 crypto_free_hash(tconn->cram_hmac_tfm);
3865 tconn->cram_hmac_tfm = NULL;
3866
3867 kfree(tconn->net_conf);
3868 tconn->net_conf = NULL;
3869 drbd_request_state(tconn->volume0, NS(conn, C_STANDALONE));
3870 }
3871}
3872
3873static int drbd_disconnected(int vnr, void *p, void *data)
3874{
3875 struct drbd_conf *mdev = (struct drbd_conf *)p;
3876 enum drbd_fencing_p fp;
3877 unsigned int i;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003878
Philipp Reisner85719572010-07-21 10:20:17 +02003879 /* wait for current activity to cease. */
Philipp Reisner87eeee42011-01-19 14:16:30 +01003880 spin_lock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003881 _drbd_wait_ee_list_empty(mdev, &mdev->active_ee);
3882 _drbd_wait_ee_list_empty(mdev, &mdev->sync_ee);
3883 _drbd_wait_ee_list_empty(mdev, &mdev->read_ee);
Philipp Reisner87eeee42011-01-19 14:16:30 +01003884 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003885
3886 /* We do not have data structures that would allow us to
3887 * get the rs_pending_cnt down to 0 again.
3888 * * On C_SYNC_TARGET we do not have any data structures describing
3889 * the pending RSDataRequest's we have sent.
3890 * * On C_SYNC_SOURCE there is no data structure that tracks
3891 * the P_RS_DATA_REPLY blocks that we sent to the SyncTarget.
3892 * And no, it is not the sum of the reference counts in the
3893 * resync_LRU. The resync_LRU tracks the whole operation including
3894 * the disk-IO, while the rs_pending_cnt only tracks the blocks
3895 * on the fly. */
3896 drbd_rs_cancel_all(mdev);
3897 mdev->rs_total = 0;
3898 mdev->rs_failed = 0;
3899 atomic_set(&mdev->rs_pending_cnt, 0);
3900 wake_up(&mdev->misc_wait);
3901
Philipp Reisner7fde2be2011-03-01 11:08:28 +01003902 del_timer(&mdev->request_timer);
3903
Philipp Reisnerb411b362009-09-25 16:07:19 -07003904 /* make sure syncer is stopped and w_resume_next_sg queued */
3905 del_timer_sync(&mdev->resync_timer);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003906 resync_timer_fn((unsigned long)mdev);
3907
Philipp Reisnerb411b362009-09-25 16:07:19 -07003908 /* wait for all w_e_end_data_req, w_e_end_rsdata_req, w_send_barrier,
3909 * w_make_resync_request etc. which may still be on the worker queue
3910 * to be "canceled" */
Philipp Reisnera21e9292011-02-08 15:08:49 +01003911 drbd_flush_workqueue(mdev);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003912
3913 /* This also does reclaim_net_ee(). If we do this too early, we might
3914 * miss some resync ee and pages.*/
3915 drbd_process_done_ee(mdev);
3916
3917 kfree(mdev->p_uuid);
3918 mdev->p_uuid = NULL;
3919
Philipp Reisnerfb22c402010-09-08 23:20:21 +02003920 if (!is_susp(mdev->state))
Philipp Reisnerb411b362009-09-25 16:07:19 -07003921 tl_clear(mdev);
3922
Philipp Reisnerb411b362009-09-25 16:07:19 -07003923 drbd_md_sync(mdev);
3924
3925 fp = FP_DONT_CARE;
3926 if (get_ldev(mdev)) {
3927 fp = mdev->ldev->dc.fencing;
3928 put_ldev(mdev);
3929 }
3930
Philipp Reisner87f7be42010-06-11 13:56:33 +02003931 if (mdev->state.role == R_PRIMARY && fp >= FP_RESOURCE && mdev->state.pdsk >= D_UNKNOWN)
3932 drbd_try_outdate_peer_async(mdev);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003933
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01003934 /* serialize with bitmap writeout triggered by the state change,
3935 * if any. */
3936 wait_event(mdev->misc_wait, !test_bit(BITMAP_IO, &mdev->flags));
3937
Philipp Reisnerb411b362009-09-25 16:07:19 -07003938 /* tcp_close and release of sendpage pages can be deferred. I don't
3939 * want to use SO_LINGER, because apparently it can be deferred for
3940 * more than 20 seconds (longest time I checked).
3941 *
3942 * Actually we don't care for exactly when the network stack does its
3943 * put_page(), but release our reference on these pages right here.
3944 */
3945 i = drbd_release_ee(mdev, &mdev->net_ee);
3946 if (i)
3947 dev_info(DEV, "net_ee not empty, killed %u entries\n", i);
Lars Ellenberg435f0742010-09-06 12:30:25 +02003948 i = atomic_read(&mdev->pp_in_use_by_net);
3949 if (i)
3950 dev_info(DEV, "pp_in_use_by_net = %d, expected 0\n", i);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003951 i = atomic_read(&mdev->pp_in_use);
3952 if (i)
Lars Ellenberg45bb9122010-05-14 17:10:48 +02003953 dev_info(DEV, "pp_in_use = %d, expected 0\n", i);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003954
3955 D_ASSERT(list_empty(&mdev->read_ee));
3956 D_ASSERT(list_empty(&mdev->active_ee));
3957 D_ASSERT(list_empty(&mdev->sync_ee));
3958 D_ASSERT(list_empty(&mdev->done_ee));
3959
3960 /* ok, no more ee's on the fly, it is safe to reset the epoch_size */
3961 atomic_set(&mdev->current_epoch->epoch_size, 0);
3962 D_ASSERT(list_empty(&mdev->current_epoch->list));
Philipp Reisner360cc742011-02-08 14:29:53 +01003963
3964 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003965}
3966
3967/*
3968 * We support PRO_VERSION_MIN to PRO_VERSION_MAX. The protocol version
3969 * we can agree on is stored in agreed_pro_version.
3970 *
3971 * feature flags and the reserved array should be enough room for future
3972 * enhancements of the handshake protocol, and possible plugins...
3973 *
3974 * for now, they are expected to be zero, but ignored.
3975 */
Philipp Reisner8a22ccc2011-02-07 16:47:12 +01003976static int drbd_send_handshake(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003977{
Philipp Reisnere6b3ea82011-01-19 14:02:01 +01003978 /* ASSERT current == mdev->tconn->receiver ... */
Philipp Reisner8a22ccc2011-02-07 16:47:12 +01003979 struct p_handshake *p = &tconn->data.sbuf.handshake;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003980 int ok;
3981
Philipp Reisner8a22ccc2011-02-07 16:47:12 +01003982 if (mutex_lock_interruptible(&tconn->data.mutex)) {
3983 conn_err(tconn, "interrupted during initial handshake\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07003984 return 0; /* interrupted. not ok. */
3985 }
3986
Philipp Reisner8a22ccc2011-02-07 16:47:12 +01003987 if (tconn->data.socket == NULL) {
3988 mutex_unlock(&tconn->data.mutex);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003989 return 0;
3990 }
3991
3992 memset(p, 0, sizeof(*p));
3993 p->protocol_min = cpu_to_be32(PRO_VERSION_MIN);
3994 p->protocol_max = cpu_to_be32(PRO_VERSION_MAX);
Philipp Reisner8a22ccc2011-02-07 16:47:12 +01003995 ok = _conn_send_cmd(tconn, 0, tconn->data.socket, P_HAND_SHAKE,
3996 &p->head, sizeof(*p), 0);
3997 mutex_unlock(&tconn->data.mutex);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003998 return ok;
3999}
4000
4001/*
4002 * return values:
4003 * 1 yes, we have a valid connection
4004 * 0 oops, did not work out, please try again
4005 * -1 peer talks different language,
4006 * no point in trying again, please go standalone.
4007 */
Philipp Reisner65d11ed2011-02-07 17:35:59 +01004008static int drbd_do_handshake(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004009{
Philipp Reisner65d11ed2011-02-07 17:35:59 +01004010 /* ASSERT current == tconn->receiver ... */
4011 struct p_handshake *p = &tconn->data.rbuf.handshake;
Philipp Reisner02918be2010-08-20 14:35:10 +02004012 const int expect = sizeof(struct p_handshake) - sizeof(struct p_header80);
Philipp Reisner77351055b2011-02-07 17:24:26 +01004013 struct packet_info pi;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004014 int rv;
4015
Philipp Reisner65d11ed2011-02-07 17:35:59 +01004016 rv = drbd_send_handshake(tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004017 if (!rv)
4018 return 0;
4019
Philipp Reisner65d11ed2011-02-07 17:35:59 +01004020 rv = drbd_recv_header(tconn, &pi);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004021 if (!rv)
4022 return 0;
4023
Philipp Reisner77351055b2011-02-07 17:24:26 +01004024 if (pi.cmd != P_HAND_SHAKE) {
Philipp Reisner65d11ed2011-02-07 17:35:59 +01004025 conn_err(tconn, "expected HandShake packet, received: %s (0x%04x)\n",
Philipp Reisner77351055b2011-02-07 17:24:26 +01004026 cmdname(pi.cmd), pi.cmd);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004027 return -1;
4028 }
4029
Philipp Reisner77351055b2011-02-07 17:24:26 +01004030 if (pi.size != expect) {
Philipp Reisner65d11ed2011-02-07 17:35:59 +01004031 conn_err(tconn, "expected HandShake length: %u, received: %u\n",
Philipp Reisner77351055b2011-02-07 17:24:26 +01004032 expect, pi.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004033 return -1;
4034 }
4035
Philipp Reisner65d11ed2011-02-07 17:35:59 +01004036 rv = drbd_recv(tconn, &p->head.payload, expect);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004037
4038 if (rv != expect) {
Lars Ellenberg0ddc5542011-01-21 12:35:15 +01004039 if (!signal_pending(current))
Philipp Reisner65d11ed2011-02-07 17:35:59 +01004040 conn_warn(tconn, "short read receiving handshake packet: l=%u\n", rv);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004041 return 0;
4042 }
4043
Philipp Reisnerb411b362009-09-25 16:07:19 -07004044 p->protocol_min = be32_to_cpu(p->protocol_min);
4045 p->protocol_max = be32_to_cpu(p->protocol_max);
4046 if (p->protocol_max == 0)
4047 p->protocol_max = p->protocol_min;
4048
4049 if (PRO_VERSION_MAX < p->protocol_min ||
4050 PRO_VERSION_MIN > p->protocol_max)
4051 goto incompat;
4052
Philipp Reisner65d11ed2011-02-07 17:35:59 +01004053 tconn->agreed_pro_version = min_t(int, PRO_VERSION_MAX, p->protocol_max);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004054
Philipp Reisner65d11ed2011-02-07 17:35:59 +01004055 conn_info(tconn, "Handshake successful: "
4056 "Agreed network protocol version %d\n", tconn->agreed_pro_version);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004057
4058 return 1;
4059
4060 incompat:
Philipp Reisner65d11ed2011-02-07 17:35:59 +01004061 conn_err(tconn, "incompatible DRBD dialects: "
Philipp Reisnerb411b362009-09-25 16:07:19 -07004062 "I support %d-%d, peer supports %d-%d\n",
4063 PRO_VERSION_MIN, PRO_VERSION_MAX,
4064 p->protocol_min, p->protocol_max);
4065 return -1;
4066}
4067
4068#if !defined(CONFIG_CRYPTO_HMAC) && !defined(CONFIG_CRYPTO_HMAC_MODULE)
Philipp Reisner13e60372011-02-08 09:54:40 +01004069static int drbd_do_auth(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004070{
4071 dev_err(DEV, "This kernel was build without CONFIG_CRYPTO_HMAC.\n");
4072 dev_err(DEV, "You need to disable 'cram-hmac-alg' in drbd.conf.\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01004073 return -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004074}
4075#else
4076#define CHALLENGE_LEN 64
Johannes Thomab10d96c2010-01-07 16:02:50 +01004077
4078/* Return value:
4079 1 - auth succeeded,
4080 0 - failed, try again (network error),
4081 -1 - auth failed, don't try again.
4082*/
4083
Philipp Reisner13e60372011-02-08 09:54:40 +01004084static int drbd_do_auth(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004085{
4086 char my_challenge[CHALLENGE_LEN]; /* 64 Bytes... */
4087 struct scatterlist sg;
4088 char *response = NULL;
4089 char *right_response = NULL;
4090 char *peers_ch = NULL;
Philipp Reisner13e60372011-02-08 09:54:40 +01004091 unsigned int key_len = strlen(tconn->net_conf->shared_secret);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004092 unsigned int resp_size;
4093 struct hash_desc desc;
Philipp Reisner77351055b2011-02-07 17:24:26 +01004094 struct packet_info pi;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004095 int rv;
4096
Philipp Reisner13e60372011-02-08 09:54:40 +01004097 desc.tfm = tconn->cram_hmac_tfm;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004098 desc.flags = 0;
4099
Philipp Reisner13e60372011-02-08 09:54:40 +01004100 rv = crypto_hash_setkey(tconn->cram_hmac_tfm,
4101 (u8 *)tconn->net_conf->shared_secret, key_len);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004102 if (rv) {
Philipp Reisner13e60372011-02-08 09:54:40 +01004103 conn_err(tconn, "crypto_hash_setkey() failed with %d\n", rv);
Johannes Thomab10d96c2010-01-07 16:02:50 +01004104 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004105 goto fail;
4106 }
4107
4108 get_random_bytes(my_challenge, CHALLENGE_LEN);
4109
Philipp Reisner13e60372011-02-08 09:54:40 +01004110 rv = conn_send_cmd2(tconn, P_AUTH_CHALLENGE, my_challenge, CHALLENGE_LEN);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004111 if (!rv)
4112 goto fail;
4113
Philipp Reisner13e60372011-02-08 09:54:40 +01004114 rv = drbd_recv_header(tconn, &pi);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004115 if (!rv)
4116 goto fail;
4117
Philipp Reisner77351055b2011-02-07 17:24:26 +01004118 if (pi.cmd != P_AUTH_CHALLENGE) {
Philipp Reisner13e60372011-02-08 09:54:40 +01004119 conn_err(tconn, "expected AuthChallenge packet, received: %s (0x%04x)\n",
Philipp Reisner77351055b2011-02-07 17:24:26 +01004120 cmdname(pi.cmd), pi.cmd);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004121 rv = 0;
4122 goto fail;
4123 }
4124
Philipp Reisner77351055b2011-02-07 17:24:26 +01004125 if (pi.size > CHALLENGE_LEN * 2) {
Philipp Reisner13e60372011-02-08 09:54:40 +01004126 conn_err(tconn, "expected AuthChallenge payload too big.\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01004127 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004128 goto fail;
4129 }
4130
Philipp Reisner77351055b2011-02-07 17:24:26 +01004131 peers_ch = kmalloc(pi.size, GFP_NOIO);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004132 if (peers_ch == NULL) {
Philipp Reisner13e60372011-02-08 09:54:40 +01004133 conn_err(tconn, "kmalloc of peers_ch failed\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01004134 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004135 goto fail;
4136 }
4137
Philipp Reisner13e60372011-02-08 09:54:40 +01004138 rv = drbd_recv(tconn, peers_ch, pi.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004139
Philipp Reisner77351055b2011-02-07 17:24:26 +01004140 if (rv != pi.size) {
Lars Ellenberg0ddc5542011-01-21 12:35:15 +01004141 if (!signal_pending(current))
Philipp Reisner13e60372011-02-08 09:54:40 +01004142 conn_warn(tconn, "short read AuthChallenge: l=%u\n", rv);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004143 rv = 0;
4144 goto fail;
4145 }
4146
Philipp Reisner13e60372011-02-08 09:54:40 +01004147 resp_size = crypto_hash_digestsize(tconn->cram_hmac_tfm);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004148 response = kmalloc(resp_size, GFP_NOIO);
4149 if (response == NULL) {
Philipp Reisner13e60372011-02-08 09:54:40 +01004150 conn_err(tconn, "kmalloc of response failed\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01004151 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004152 goto fail;
4153 }
4154
4155 sg_init_table(&sg, 1);
Philipp Reisner77351055b2011-02-07 17:24:26 +01004156 sg_set_buf(&sg, peers_ch, pi.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004157
4158 rv = crypto_hash_digest(&desc, &sg, sg.length, response);
4159 if (rv) {
Philipp Reisner13e60372011-02-08 09:54:40 +01004160 conn_err(tconn, "crypto_hash_digest() failed with %d\n", rv);
Johannes Thomab10d96c2010-01-07 16:02:50 +01004161 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004162 goto fail;
4163 }
4164
Philipp Reisner13e60372011-02-08 09:54:40 +01004165 rv = conn_send_cmd2(tconn, P_AUTH_RESPONSE, response, resp_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004166 if (!rv)
4167 goto fail;
4168
Philipp Reisner13e60372011-02-08 09:54:40 +01004169 rv = drbd_recv_header(tconn, &pi);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004170 if (!rv)
4171 goto fail;
4172
Philipp Reisner77351055b2011-02-07 17:24:26 +01004173 if (pi.cmd != P_AUTH_RESPONSE) {
Philipp Reisner13e60372011-02-08 09:54:40 +01004174 conn_err(tconn, "expected AuthResponse packet, received: %s (0x%04x)\n",
Philipp Reisner77351055b2011-02-07 17:24:26 +01004175 cmdname(pi.cmd), pi.cmd);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004176 rv = 0;
4177 goto fail;
4178 }
4179
Philipp Reisner77351055b2011-02-07 17:24:26 +01004180 if (pi.size != resp_size) {
Philipp Reisner13e60372011-02-08 09:54:40 +01004181 conn_err(tconn, "expected AuthResponse payload of wrong size\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07004182 rv = 0;
4183 goto fail;
4184 }
4185
Philipp Reisner13e60372011-02-08 09:54:40 +01004186 rv = drbd_recv(tconn, response , resp_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004187
4188 if (rv != resp_size) {
Lars Ellenberg0ddc5542011-01-21 12:35:15 +01004189 if (!signal_pending(current))
Philipp Reisner13e60372011-02-08 09:54:40 +01004190 conn_warn(tconn, "short read receiving AuthResponse: l=%u\n", rv);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004191 rv = 0;
4192 goto fail;
4193 }
4194
4195 right_response = kmalloc(resp_size, GFP_NOIO);
Julia Lawall2d1ee872009-12-27 22:27:11 +01004196 if (right_response == NULL) {
Philipp Reisner13e60372011-02-08 09:54:40 +01004197 conn_err(tconn, "kmalloc of right_response failed\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01004198 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004199 goto fail;
4200 }
4201
4202 sg_set_buf(&sg, my_challenge, CHALLENGE_LEN);
4203
4204 rv = crypto_hash_digest(&desc, &sg, sg.length, right_response);
4205 if (rv) {
Philipp Reisner13e60372011-02-08 09:54:40 +01004206 conn_err(tconn, "crypto_hash_digest() failed with %d\n", rv);
Johannes Thomab10d96c2010-01-07 16:02:50 +01004207 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004208 goto fail;
4209 }
4210
4211 rv = !memcmp(response, right_response, resp_size);
4212
4213 if (rv)
Philipp Reisner13e60372011-02-08 09:54:40 +01004214 conn_info(tconn, "Peer authenticated using %d bytes of '%s' HMAC\n",
4215 resp_size, tconn->net_conf->cram_hmac_alg);
Johannes Thomab10d96c2010-01-07 16:02:50 +01004216 else
4217 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004218
4219 fail:
4220 kfree(peers_ch);
4221 kfree(response);
4222 kfree(right_response);
4223
4224 return rv;
4225}
4226#endif
4227
4228int drbdd_init(struct drbd_thread *thi)
4229{
Philipp Reisner392c8802011-02-09 10:33:31 +01004230 struct drbd_tconn *tconn = thi->tconn;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004231 int h;
4232
Philipp Reisner4d641dd2011-02-08 15:40:24 +01004233 conn_info(tconn, "receiver (re)started\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07004234
4235 do {
Philipp Reisner4d641dd2011-02-08 15:40:24 +01004236 h = drbd_connect(tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004237 if (h == 0) {
Philipp Reisner4d641dd2011-02-08 15:40:24 +01004238 drbd_disconnect(tconn);
Philipp Reisner20ee6392011-01-18 15:28:59 +01004239 schedule_timeout_interruptible(HZ);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004240 }
4241 if (h == -1) {
Philipp Reisner4d641dd2011-02-08 15:40:24 +01004242 conn_warn(tconn, "Discarding network configuration.\n");
4243 drbd_force_state(tconn->volume0, NS(conn, C_DISCONNECTING));
Philipp Reisnerb411b362009-09-25 16:07:19 -07004244 }
4245 } while (h == 0);
4246
4247 if (h > 0) {
Philipp Reisner4d641dd2011-02-08 15:40:24 +01004248 if (get_net_conf(tconn)) {
4249 drbdd(tconn);
4250 put_net_conf(tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004251 }
4252 }
4253
Philipp Reisner4d641dd2011-02-08 15:40:24 +01004254 drbd_disconnect(tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004255
Philipp Reisner4d641dd2011-02-08 15:40:24 +01004256 conn_info(tconn, "receiver terminated\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07004257 return 0;
4258}
4259
4260/* ********* acknowledge sender ******** */
4261
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01004262static int got_RqSReply(struct drbd_conf *mdev, enum drbd_packet cmd)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004263{
Philipp Reisner257d0af2011-01-26 12:15:29 +01004264 struct p_req_state_reply *p = &mdev->tconn->meta.rbuf.req_state_reply;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004265
4266 int retcode = be32_to_cpu(p->retcode);
4267
4268 if (retcode >= SS_SUCCESS) {
4269 set_bit(CL_ST_CHG_SUCCESS, &mdev->flags);
4270 } else {
4271 set_bit(CL_ST_CHG_FAIL, &mdev->flags);
4272 dev_err(DEV, "Requested state change failed by peer: %s (%d)\n",
4273 drbd_set_st_err_str(retcode), retcode);
4274 }
4275 wake_up(&mdev->state_wait);
4276
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004277 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004278}
4279
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01004280static int got_Ping(struct drbd_conf *mdev, enum drbd_packet cmd)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004281{
4282 return drbd_send_ping_ack(mdev);
4283
4284}
4285
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01004286static int got_PingAck(struct drbd_conf *mdev, enum drbd_packet cmd)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004287{
4288 /* restore idle timeout */
Philipp Reisnere42325a2011-01-19 13:55:45 +01004289 mdev->tconn->meta.socket->sk->sk_rcvtimeo = mdev->tconn->net_conf->ping_int*HZ;
Philipp Reisner309d1602010-03-02 15:03:44 +01004290 if (!test_and_set_bit(GOT_PING_ACK, &mdev->flags))
4291 wake_up(&mdev->misc_wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004292
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004293 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004294}
4295
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01004296static int got_IsInSync(struct drbd_conf *mdev, enum drbd_packet cmd)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004297{
Philipp Reisner257d0af2011-01-26 12:15:29 +01004298 struct p_block_ack *p = &mdev->tconn->meta.rbuf.block_ack;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004299 sector_t sector = be64_to_cpu(p->sector);
4300 int blksize = be32_to_cpu(p->blksize);
4301
Philipp Reisner31890f42011-01-19 14:12:51 +01004302 D_ASSERT(mdev->tconn->agreed_pro_version >= 89);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004303
4304 update_peer_seq(mdev, be32_to_cpu(p->seq_num));
4305
Lars Ellenberg1d53f092010-09-05 01:13:24 +02004306 if (get_ldev(mdev)) {
4307 drbd_rs_complete_io(mdev, sector);
4308 drbd_set_in_sync(mdev, sector, blksize);
4309 /* rs_same_csums is supposed to count in units of BM_BLOCK_SIZE */
4310 mdev->rs_same_csum += (blksize >> BM_BLOCK_SHIFT);
4311 put_ldev(mdev);
4312 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07004313 dec_rs_pending(mdev);
Philipp Reisner778f2712010-07-06 11:14:00 +02004314 atomic_add(blksize >> 9, &mdev->rs_sect_in);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004315
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004316 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004317}
4318
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01004319static int
4320validate_req_change_req_state(struct drbd_conf *mdev, u64 id, sector_t sector,
4321 struct rb_root *root, const char *func,
4322 enum drbd_req_event what, bool missing_ok)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004323{
4324 struct drbd_request *req;
4325 struct bio_and_error m;
4326
Philipp Reisner87eeee42011-01-19 14:16:30 +01004327 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01004328 req = find_request(mdev, root, id, sector, missing_ok, func);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004329 if (unlikely(!req)) {
Philipp Reisner87eeee42011-01-19 14:16:30 +01004330 spin_unlock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004331 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004332 }
4333 __req_mod(req, what, &m);
Philipp Reisner87eeee42011-01-19 14:16:30 +01004334 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004335
4336 if (m.bio)
4337 complete_master_bio(mdev, &m);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004338 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004339}
4340
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01004341static int got_BlockAck(struct drbd_conf *mdev, enum drbd_packet cmd)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004342{
Philipp Reisner257d0af2011-01-26 12:15:29 +01004343 struct p_block_ack *p = &mdev->tconn->meta.rbuf.block_ack;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004344 sector_t sector = be64_to_cpu(p->sector);
4345 int blksize = be32_to_cpu(p->blksize);
4346 enum drbd_req_event what;
4347
4348 update_peer_seq(mdev, be32_to_cpu(p->seq_num));
4349
Andreas Gruenbacher579b57e2011-01-13 18:40:57 +01004350 if (p->block_id == ID_SYNCER) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004351 drbd_set_in_sync(mdev, sector, blksize);
4352 dec_rs_pending(mdev);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004353 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004354 }
Philipp Reisner257d0af2011-01-26 12:15:29 +01004355 switch (cmd) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004356 case P_RS_WRITE_ACK:
Philipp Reisner89e58e72011-01-19 13:12:45 +01004357 D_ASSERT(mdev->tconn->net_conf->wire_protocol == DRBD_PROT_C);
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01004358 what = WRITE_ACKED_BY_PEER_AND_SIS;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004359 break;
4360 case P_WRITE_ACK:
Philipp Reisner89e58e72011-01-19 13:12:45 +01004361 D_ASSERT(mdev->tconn->net_conf->wire_protocol == DRBD_PROT_C);
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01004362 what = WRITE_ACKED_BY_PEER;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004363 break;
4364 case P_RECV_ACK:
Philipp Reisner89e58e72011-01-19 13:12:45 +01004365 D_ASSERT(mdev->tconn->net_conf->wire_protocol == DRBD_PROT_B);
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01004366 what = RECV_ACKED_BY_PEER;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004367 break;
4368 case P_DISCARD_ACK:
Philipp Reisner89e58e72011-01-19 13:12:45 +01004369 D_ASSERT(mdev->tconn->net_conf->wire_protocol == DRBD_PROT_C);
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01004370 what = CONFLICT_DISCARDED_BY_PEER;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004371 break;
4372 default:
4373 D_ASSERT(0);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004374 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004375 }
4376
4377 return validate_req_change_req_state(mdev, p->block_id, sector,
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01004378 &mdev->write_requests, __func__,
4379 what, false);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004380}
4381
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01004382static int got_NegAck(struct drbd_conf *mdev, enum drbd_packet cmd)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004383{
Philipp Reisner257d0af2011-01-26 12:15:29 +01004384 struct p_block_ack *p = &mdev->tconn->meta.rbuf.block_ack;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004385 sector_t sector = be64_to_cpu(p->sector);
Philipp Reisner2deb8332011-01-17 18:39:18 +01004386 int size = be32_to_cpu(p->blksize);
Philipp Reisner89e58e72011-01-19 13:12:45 +01004387 bool missing_ok = mdev->tconn->net_conf->wire_protocol == DRBD_PROT_A ||
4388 mdev->tconn->net_conf->wire_protocol == DRBD_PROT_B;
Andreas Gruenbacherc3afd8f2011-01-20 22:25:40 +01004389 bool found;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004390
4391 update_peer_seq(mdev, be32_to_cpu(p->seq_num));
4392
Andreas Gruenbacher579b57e2011-01-13 18:40:57 +01004393 if (p->block_id == ID_SYNCER) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004394 dec_rs_pending(mdev);
4395 drbd_rs_failed_io(mdev, sector, size);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004396 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004397 }
Philipp Reisner2deb8332011-01-17 18:39:18 +01004398
Andreas Gruenbacherc3afd8f2011-01-20 22:25:40 +01004399 found = validate_req_change_req_state(mdev, p->block_id, sector,
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01004400 &mdev->write_requests, __func__,
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01004401 NEG_ACKED, missing_ok);
Andreas Gruenbacherc3afd8f2011-01-20 22:25:40 +01004402 if (!found) {
4403 /* Protocol A has no P_WRITE_ACKs, but has P_NEG_ACKs.
4404 The master bio might already be completed, therefore the
4405 request is no longer in the collision hash. */
4406 /* In Protocol B we might already have got a P_RECV_ACK
4407 but then get a P_NEG_ACK afterwards. */
4408 if (!missing_ok)
Philipp Reisner2deb8332011-01-17 18:39:18 +01004409 return false;
Andreas Gruenbacherc3afd8f2011-01-20 22:25:40 +01004410 drbd_set_out_of_sync(mdev, sector, size);
Philipp Reisner2deb8332011-01-17 18:39:18 +01004411 }
Philipp Reisner2deb8332011-01-17 18:39:18 +01004412 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004413}
4414
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01004415static int got_NegDReply(struct drbd_conf *mdev, enum drbd_packet cmd)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004416{
Philipp Reisner257d0af2011-01-26 12:15:29 +01004417 struct p_block_ack *p = &mdev->tconn->meta.rbuf.block_ack;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004418 sector_t sector = be64_to_cpu(p->sector);
4419
4420 update_peer_seq(mdev, be32_to_cpu(p->seq_num));
4421 dev_err(DEV, "Got NegDReply; Sector %llus, len %u; Fail original request.\n",
4422 (unsigned long long)sector, be32_to_cpu(p->blksize));
4423
4424 return validate_req_change_req_state(mdev, p->block_id, sector,
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01004425 &mdev->read_requests, __func__,
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01004426 NEG_ACKED, false);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004427}
4428
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01004429static int got_NegRSDReply(struct drbd_conf *mdev, enum drbd_packet cmd)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004430{
4431 sector_t sector;
4432 int size;
Philipp Reisner257d0af2011-01-26 12:15:29 +01004433 struct p_block_ack *p = &mdev->tconn->meta.rbuf.block_ack;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004434
4435 sector = be64_to_cpu(p->sector);
4436 size = be32_to_cpu(p->blksize);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004437
4438 update_peer_seq(mdev, be32_to_cpu(p->seq_num));
4439
4440 dec_rs_pending(mdev);
4441
4442 if (get_ldev_if_state(mdev, D_FAILED)) {
4443 drbd_rs_complete_io(mdev, sector);
Philipp Reisner257d0af2011-01-26 12:15:29 +01004444 switch (cmd) {
Philipp Reisnerd612d302010-12-27 10:53:28 +01004445 case P_NEG_RS_DREPLY:
4446 drbd_rs_failed_io(mdev, sector, size);
4447 case P_RS_CANCEL:
4448 break;
4449 default:
4450 D_ASSERT(0);
4451 put_ldev(mdev);
4452 return false;
4453 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07004454 put_ldev(mdev);
4455 }
4456
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004457 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004458}
4459
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01004460static int got_BarrierAck(struct drbd_conf *mdev, enum drbd_packet cmd)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004461{
Philipp Reisner257d0af2011-01-26 12:15:29 +01004462 struct p_barrier_ack *p = &mdev->tconn->meta.rbuf.barrier_ack;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004463
4464 tl_release(mdev, p->barrier, be32_to_cpu(p->set_size));
4465
Philipp Reisnerc4752ef2010-10-27 17:32:36 +02004466 if (mdev->state.conn == C_AHEAD &&
4467 atomic_read(&mdev->ap_in_flight) == 0 &&
Philipp Reisner370a43e2011-01-14 16:03:11 +01004468 !test_and_set_bit(AHEAD_TO_SYNC_SOURCE, &mdev->current_epoch->flags)) {
4469 mdev->start_resync_timer.expires = jiffies + HZ;
4470 add_timer(&mdev->start_resync_timer);
Philipp Reisnerc4752ef2010-10-27 17:32:36 +02004471 }
4472
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004473 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004474}
4475
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01004476static int got_OVResult(struct drbd_conf *mdev, enum drbd_packet cmd)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004477{
Philipp Reisner257d0af2011-01-26 12:15:29 +01004478 struct p_block_ack *p = &mdev->tconn->meta.rbuf.block_ack;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004479 struct drbd_work *w;
4480 sector_t sector;
4481 int size;
4482
4483 sector = be64_to_cpu(p->sector);
4484 size = be32_to_cpu(p->blksize);
4485
4486 update_peer_seq(mdev, be32_to_cpu(p->seq_num));
4487
4488 if (be64_to_cpu(p->block_id) == ID_OUT_OF_SYNC)
4489 drbd_ov_oos_found(mdev, sector, size);
4490 else
4491 ov_oos_print(mdev);
4492
Lars Ellenberg1d53f092010-09-05 01:13:24 +02004493 if (!get_ldev(mdev))
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004494 return true;
Lars Ellenberg1d53f092010-09-05 01:13:24 +02004495
Philipp Reisnerb411b362009-09-25 16:07:19 -07004496 drbd_rs_complete_io(mdev, sector);
4497 dec_rs_pending(mdev);
4498
Lars Ellenbergea5442a2010-11-05 09:48:01 +01004499 --mdev->ov_left;
4500
4501 /* let's advance progress step marks only for every other megabyte */
4502 if ((mdev->ov_left & 0x200) == 0x200)
4503 drbd_advance_rs_marks(mdev, mdev->ov_left);
4504
4505 if (mdev->ov_left == 0) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004506 w = kmalloc(sizeof(*w), GFP_NOIO);
4507 if (w) {
4508 w->cb = w_ov_finished;
Philipp Reisnera21e9292011-02-08 15:08:49 +01004509 w->mdev = mdev;
Philipp Reisnere42325a2011-01-19 13:55:45 +01004510 drbd_queue_work_front(&mdev->tconn->data.work, w);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004511 } else {
4512 dev_err(DEV, "kmalloc(w) failed.");
4513 ov_oos_print(mdev);
4514 drbd_resync_finished(mdev);
4515 }
4516 }
Lars Ellenberg1d53f092010-09-05 01:13:24 +02004517 put_ldev(mdev);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004518 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004519}
4520
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01004521static int got_skip(struct drbd_conf *mdev, enum drbd_packet cmd)
Philipp Reisner0ced55a2010-04-30 15:26:20 +02004522{
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004523 return true;
Philipp Reisner0ced55a2010-04-30 15:26:20 +02004524}
4525
Philipp Reisnerb411b362009-09-25 16:07:19 -07004526struct asender_cmd {
4527 size_t pkt_size;
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01004528 int (*process)(struct drbd_conf *mdev, enum drbd_packet cmd);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004529};
4530
4531static struct asender_cmd *get_asender_cmd(int cmd)
4532{
4533 static struct asender_cmd asender_tbl[] = {
4534 /* anything missing from this table is in
4535 * the drbd_cmd_handler (drbd_default_handler) table,
4536 * see the beginning of drbdd() */
Philipp Reisner257d0af2011-01-26 12:15:29 +01004537 [P_PING] = { sizeof(struct p_header), got_Ping },
4538 [P_PING_ACK] = { sizeof(struct p_header), got_PingAck },
Philipp Reisnerb411b362009-09-25 16:07:19 -07004539 [P_RECV_ACK] = { sizeof(struct p_block_ack), got_BlockAck },
4540 [P_WRITE_ACK] = { sizeof(struct p_block_ack), got_BlockAck },
4541 [P_RS_WRITE_ACK] = { sizeof(struct p_block_ack), got_BlockAck },
4542 [P_DISCARD_ACK] = { sizeof(struct p_block_ack), got_BlockAck },
4543 [P_NEG_ACK] = { sizeof(struct p_block_ack), got_NegAck },
4544 [P_NEG_DREPLY] = { sizeof(struct p_block_ack), got_NegDReply },
4545 [P_NEG_RS_DREPLY] = { sizeof(struct p_block_ack), got_NegRSDReply},
4546 [P_OV_RESULT] = { sizeof(struct p_block_ack), got_OVResult },
4547 [P_BARRIER_ACK] = { sizeof(struct p_barrier_ack), got_BarrierAck },
4548 [P_STATE_CHG_REPLY] = { sizeof(struct p_req_state_reply), got_RqSReply },
4549 [P_RS_IS_IN_SYNC] = { sizeof(struct p_block_ack), got_IsInSync },
Philipp Reisner02918be2010-08-20 14:35:10 +02004550 [P_DELAY_PROBE] = { sizeof(struct p_delay_probe93), got_skip },
Philipp Reisnerd612d302010-12-27 10:53:28 +01004551 [P_RS_CANCEL] = { sizeof(struct p_block_ack), got_NegRSDReply},
Philipp Reisnerb411b362009-09-25 16:07:19 -07004552 [P_MAX_CMD] = { 0, NULL },
4553 };
4554 if (cmd > P_MAX_CMD || asender_tbl[cmd].process == NULL)
4555 return NULL;
4556 return &asender_tbl[cmd];
4557}
4558
Philipp Reisner32862ec2011-02-08 16:41:01 +01004559static int _drbd_process_done_ee(int vnr, void *p, void *data)
4560{
4561 struct drbd_conf *mdev = (struct drbd_conf *)p;
4562 return !drbd_process_done_ee(mdev);
4563}
4564
4565static int _check_ee_empty(int vnr, void *p, void *data)
4566{
4567 struct drbd_conf *mdev = (struct drbd_conf *)p;
4568 struct drbd_tconn *tconn = mdev->tconn;
4569 int not_empty;
4570
4571 spin_lock_irq(&tconn->req_lock);
4572 not_empty = !list_empty(&mdev->done_ee);
4573 spin_unlock_irq(&tconn->req_lock);
4574
4575 return not_empty;
4576}
4577
4578static int tconn_process_done_ee(struct drbd_tconn *tconn)
4579{
4580 int not_empty, err;
4581
4582 do {
4583 clear_bit(SIGNAL_ASENDER, &tconn->flags);
4584 flush_signals(current);
4585 err = idr_for_each(&tconn->volumes, _drbd_process_done_ee, NULL);
4586 if (err)
4587 return err;
4588 set_bit(SIGNAL_ASENDER, &tconn->flags);
4589 not_empty = idr_for_each(&tconn->volumes, _check_ee_empty, NULL);
4590 } while (not_empty);
4591
4592 return 0;
4593}
4594
Philipp Reisnerb411b362009-09-25 16:07:19 -07004595int drbd_asender(struct drbd_thread *thi)
4596{
Philipp Reisner392c8802011-02-09 10:33:31 +01004597 struct drbd_tconn *tconn = thi->tconn;
Philipp Reisner32862ec2011-02-08 16:41:01 +01004598 struct p_header *h = &tconn->meta.rbuf.header;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004599 struct asender_cmd *cmd = NULL;
Philipp Reisner77351055b2011-02-07 17:24:26 +01004600 struct packet_info pi;
Philipp Reisner257d0af2011-01-26 12:15:29 +01004601 int rv;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004602 void *buf = h;
4603 int received = 0;
Philipp Reisner257d0af2011-01-26 12:15:29 +01004604 int expect = sizeof(struct p_header);
Lars Ellenbergf36af182011-03-09 22:44:55 +01004605 int ping_timeout_active = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004606
Philipp Reisnerb411b362009-09-25 16:07:19 -07004607 current->policy = SCHED_RR; /* Make this a realtime task! */
4608 current->rt_priority = 2; /* more important than all other tasks */
4609
Andreas Gruenbachere77a0a52011-01-25 15:43:39 +01004610 while (get_t_state(thi) == RUNNING) {
Philipp Reisner80822282011-02-08 12:46:30 +01004611 drbd_thread_current_set_cpu(thi);
Philipp Reisner32862ec2011-02-08 16:41:01 +01004612 if (test_and_clear_bit(SEND_PING, &tconn->flags)) {
4613 if (!drbd_send_ping(tconn->volume0)) {
4614 conn_err(tconn, "drbd_send_ping has failed\n");
Andreas Gruenbacher841ce242010-12-15 19:31:20 +01004615 goto reconnect;
4616 }
Philipp Reisner32862ec2011-02-08 16:41:01 +01004617 tconn->meta.socket->sk->sk_rcvtimeo =
4618 tconn->net_conf->ping_timeo*HZ/10;
Lars Ellenbergf36af182011-03-09 22:44:55 +01004619 ping_timeout_active = 1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004620 }
4621
Philipp Reisner32862ec2011-02-08 16:41:01 +01004622 /* TODO: conditionally cork; it may hurt latency if we cork without
4623 much to send */
4624 if (!tconn->net_conf->no_cork)
4625 drbd_tcp_cork(tconn->meta.socket);
4626 if (tconn_process_done_ee(tconn))
4627 goto reconnect;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004628 /* but unconditionally uncork unless disabled */
Philipp Reisner32862ec2011-02-08 16:41:01 +01004629 if (!tconn->net_conf->no_cork)
4630 drbd_tcp_uncork(tconn->meta.socket);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004631
4632 /* short circuit, recv_msg would return EINTR anyways. */
4633 if (signal_pending(current))
4634 continue;
4635
Philipp Reisner32862ec2011-02-08 16:41:01 +01004636 rv = drbd_recv_short(tconn->meta.socket, buf, expect-received, 0);
4637 clear_bit(SIGNAL_ASENDER, &tconn->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004638
4639 flush_signals(current);
4640
4641 /* Note:
4642 * -EINTR (on meta) we got a signal
4643 * -EAGAIN (on meta) rcvtimeo expired
4644 * -ECONNRESET other side closed the connection
4645 * -ERESTARTSYS (on data) we got a signal
4646 * rv < 0 other than above: unexpected error!
4647 * rv == expected: full header or command
4648 * rv < expected: "woken" by signal during receive
4649 * rv == 0 : "connection shut down by peer"
4650 */
4651 if (likely(rv > 0)) {
4652 received += rv;
4653 buf += rv;
4654 } else if (rv == 0) {
Philipp Reisner32862ec2011-02-08 16:41:01 +01004655 conn_err(tconn, "meta connection shut down by peer.\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07004656 goto reconnect;
4657 } else if (rv == -EAGAIN) {
Lars Ellenbergcb6518c2011-06-20 14:44:45 +02004658 /* If the data socket received something meanwhile,
4659 * that is good enough: peer is still alive. */
Philipp Reisner32862ec2011-02-08 16:41:01 +01004660 if (time_after(tconn->last_received,
4661 jiffies - tconn->meta.socket->sk->sk_rcvtimeo))
Lars Ellenbergcb6518c2011-06-20 14:44:45 +02004662 continue;
Lars Ellenbergf36af182011-03-09 22:44:55 +01004663 if (ping_timeout_active) {
Philipp Reisner32862ec2011-02-08 16:41:01 +01004664 conn_err(tconn, "PingAck did not arrive in time.\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07004665 goto reconnect;
4666 }
Philipp Reisner32862ec2011-02-08 16:41:01 +01004667 set_bit(SEND_PING, &tconn->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004668 continue;
4669 } else if (rv == -EINTR) {
4670 continue;
4671 } else {
Philipp Reisner32862ec2011-02-08 16:41:01 +01004672 conn_err(tconn, "sock_recvmsg returned %d\n", rv);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004673 goto reconnect;
4674 }
4675
4676 if (received == expect && cmd == NULL) {
Philipp Reisner32862ec2011-02-08 16:41:01 +01004677 if (!decode_header(tconn, h, &pi))
Philipp Reisnerb411b362009-09-25 16:07:19 -07004678 goto reconnect;
Philipp Reisner77351055b2011-02-07 17:24:26 +01004679 cmd = get_asender_cmd(pi.cmd);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004680 if (unlikely(cmd == NULL)) {
Philipp Reisner32862ec2011-02-08 16:41:01 +01004681 conn_err(tconn, "unknown command %d on meta (l: %d)\n",
Philipp Reisner77351055b2011-02-07 17:24:26 +01004682 pi.cmd, pi.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004683 goto disconnect;
4684 }
4685 expect = cmd->pkt_size;
Philipp Reisner77351055b2011-02-07 17:24:26 +01004686 if (pi.size != expect - sizeof(struct p_header)) {
Philipp Reisner32862ec2011-02-08 16:41:01 +01004687 conn_err(tconn, "Wrong packet size on meta (c: %d, l: %d)\n",
Philipp Reisner77351055b2011-02-07 17:24:26 +01004688 pi.cmd, pi.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004689 goto reconnect;
Philipp Reisner257d0af2011-01-26 12:15:29 +01004690 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07004691 }
4692 if (received == expect) {
Philipp Reisner32862ec2011-02-08 16:41:01 +01004693 tconn->last_received = jiffies;
4694 if (!cmd->process(vnr_to_mdev(tconn, pi.vnr), pi.cmd))
Philipp Reisnerb411b362009-09-25 16:07:19 -07004695 goto reconnect;
4696
Lars Ellenbergf36af182011-03-09 22:44:55 +01004697 /* the idle_timeout (ping-int)
4698 * has been restored in got_PingAck() */
4699 if (cmd == get_asender_cmd(P_PING_ACK))
4700 ping_timeout_active = 0;
4701
Philipp Reisnerb411b362009-09-25 16:07:19 -07004702 buf = h;
4703 received = 0;
Philipp Reisner257d0af2011-01-26 12:15:29 +01004704 expect = sizeof(struct p_header);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004705 cmd = NULL;
4706 }
4707 }
4708
4709 if (0) {
4710reconnect:
Philipp Reisner32862ec2011-02-08 16:41:01 +01004711 drbd_force_state(tconn->volume0, NS(conn, C_NETWORK_FAILURE));
Philipp Reisnerb411b362009-09-25 16:07:19 -07004712 }
4713 if (0) {
4714disconnect:
Philipp Reisner32862ec2011-02-08 16:41:01 +01004715 drbd_force_state(tconn->volume0, NS(conn, C_DISCONNECTING));
Philipp Reisnerb411b362009-09-25 16:07:19 -07004716 }
Philipp Reisner32862ec2011-02-08 16:41:01 +01004717 clear_bit(SIGNAL_ASENDER, &tconn->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004718
Philipp Reisner32862ec2011-02-08 16:41:01 +01004719 conn_info(tconn, "asender terminated\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07004720
4721 return 0;
4722}