blob: 262e5d97991c6b174e6158ff7bd1075364879519 [file] [log] [blame]
Philipp Reisnerb411b362009-09-25 16:07:19 -07001/*
2 drbd_receiver.c
3
4 This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
5
6 Copyright (C) 2001-2008, LINBIT Information Technologies GmbH.
7 Copyright (C) 1999-2008, Philipp Reisner <philipp.reisner@linbit.com>.
8 Copyright (C) 2002-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
9
10 drbd is free software; you can redistribute it and/or modify
11 it under the terms of the GNU General Public License as published by
12 the Free Software Foundation; either version 2, or (at your option)
13 any later version.
14
15 drbd is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License for more details.
19
20 You should have received a copy of the GNU General Public License
21 along with drbd; see the file COPYING. If not, write to
22 the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
23 */
24
25
Philipp Reisnerb411b362009-09-25 16:07:19 -070026#include <linux/module.h>
27
28#include <asm/uaccess.h>
29#include <net/sock.h>
30
Philipp Reisnerb411b362009-09-25 16:07:19 -070031#include <linux/drbd.h>
32#include <linux/fs.h>
33#include <linux/file.h>
34#include <linux/in.h>
35#include <linux/mm.h>
36#include <linux/memcontrol.h>
37#include <linux/mm_inline.h>
38#include <linux/slab.h>
Philipp Reisnerb411b362009-09-25 16:07:19 -070039#include <linux/pkt_sched.h>
40#define __KERNEL_SYSCALLS__
41#include <linux/unistd.h>
42#include <linux/vmalloc.h>
43#include <linux/random.h>
Philipp Reisnerb411b362009-09-25 16:07:19 -070044#include <linux/string.h>
45#include <linux/scatterlist.h>
46#include "drbd_int.h"
Philipp Reisnerb411b362009-09-25 16:07:19 -070047#include "drbd_req.h"
48
49#include "drbd_vli.h"
50
Philipp Reisner77351055b2011-02-07 17:24:26 +010051struct packet_info {
52 enum drbd_packet cmd;
53 int size;
54 int vnr;
55};
56
Philipp Reisnerb411b362009-09-25 16:07:19 -070057enum finish_epoch {
58 FE_STILL_LIVE,
59 FE_DESTROYED,
60 FE_RECYCLED,
61};
62
Philipp Reisner65d11ed2011-02-07 17:35:59 +010063static int drbd_do_handshake(struct drbd_tconn *tconn);
Philipp Reisner13e60372011-02-08 09:54:40 +010064static int drbd_do_auth(struct drbd_tconn *tconn);
Philipp Reisner360cc742011-02-08 14:29:53 +010065static int drbd_disconnected(int vnr, void *p, void *data);
Philipp Reisnerb411b362009-09-25 16:07:19 -070066
67static enum finish_epoch drbd_may_finish_epoch(struct drbd_conf *, struct drbd_epoch *, enum epoch_event);
Philipp Reisner00d56942011-02-09 18:09:48 +010068static int e_end_block(struct drbd_work *, int);
Philipp Reisnerb411b362009-09-25 16:07:19 -070069
Philipp Reisnerb411b362009-09-25 16:07:19 -070070
71#define GFP_TRY (__GFP_HIGHMEM | __GFP_NOWARN)
72
Lars Ellenberg45bb9122010-05-14 17:10:48 +020073/*
74 * some helper functions to deal with single linked page lists,
75 * page->private being our "next" pointer.
76 */
77
78/* If at least n pages are linked at head, get n pages off.
79 * Otherwise, don't modify head, and return NULL.
80 * Locking is the responsibility of the caller.
81 */
82static struct page *page_chain_del(struct page **head, int n)
83{
84 struct page *page;
85 struct page *tmp;
86
87 BUG_ON(!n);
88 BUG_ON(!head);
89
90 page = *head;
Philipp Reisner23ce4222010-05-20 13:35:31 +020091
92 if (!page)
93 return NULL;
94
Lars Ellenberg45bb9122010-05-14 17:10:48 +020095 while (page) {
96 tmp = page_chain_next(page);
97 if (--n == 0)
98 break; /* found sufficient pages */
99 if (tmp == NULL)
100 /* insufficient pages, don't use any of them. */
101 return NULL;
102 page = tmp;
103 }
104
105 /* add end of list marker for the returned list */
106 set_page_private(page, 0);
107 /* actual return value, and adjustment of head */
108 page = *head;
109 *head = tmp;
110 return page;
111}
112
113/* may be used outside of locks to find the tail of a (usually short)
114 * "private" page chain, before adding it back to a global chain head
115 * with page_chain_add() under a spinlock. */
116static struct page *page_chain_tail(struct page *page, int *len)
117{
118 struct page *tmp;
119 int i = 1;
120 while ((tmp = page_chain_next(page)))
121 ++i, page = tmp;
122 if (len)
123 *len = i;
124 return page;
125}
126
127static int page_chain_free(struct page *page)
128{
129 struct page *tmp;
130 int i = 0;
131 page_chain_for_each_safe(page, tmp) {
132 put_page(page);
133 ++i;
134 }
135 return i;
136}
137
138static void page_chain_add(struct page **head,
139 struct page *chain_first, struct page *chain_last)
140{
141#if 1
142 struct page *tmp;
143 tmp = page_chain_tail(chain_first, NULL);
144 BUG_ON(tmp != chain_last);
145#endif
146
147 /* add chain to head */
148 set_page_private(chain_last, (unsigned long)*head);
149 *head = chain_first;
150}
151
152static struct page *drbd_pp_first_pages_or_try_alloc(struct drbd_conf *mdev, int number)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700153{
154 struct page *page = NULL;
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200155 struct page *tmp = NULL;
156 int i = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700157
158 /* Yes, testing drbd_pp_vacant outside the lock is racy.
159 * So what. It saves a spin_lock. */
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200160 if (drbd_pp_vacant >= number) {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700161 spin_lock(&drbd_pp_lock);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200162 page = page_chain_del(&drbd_pp_pool, number);
163 if (page)
164 drbd_pp_vacant -= number;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700165 spin_unlock(&drbd_pp_lock);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200166 if (page)
167 return page;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700168 }
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200169
Philipp Reisnerb411b362009-09-25 16:07:19 -0700170 /* GFP_TRY, because we must not cause arbitrary write-out: in a DRBD
171 * "criss-cross" setup, that might cause write-out on some other DRBD,
172 * which in turn might block on the other node at this very place. */
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200173 for (i = 0; i < number; i++) {
174 tmp = alloc_page(GFP_TRY);
175 if (!tmp)
176 break;
177 set_page_private(tmp, (unsigned long)page);
178 page = tmp;
179 }
180
181 if (i == number)
182 return page;
183
184 /* Not enough pages immediately available this time.
185 * No need to jump around here, drbd_pp_alloc will retry this
186 * function "soon". */
187 if (page) {
188 tmp = page_chain_tail(page, NULL);
189 spin_lock(&drbd_pp_lock);
190 page_chain_add(&drbd_pp_pool, page, tmp);
191 drbd_pp_vacant += i;
192 spin_unlock(&drbd_pp_lock);
193 }
194 return NULL;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700195}
196
Philipp Reisnerb411b362009-09-25 16:07:19 -0700197static void reclaim_net_ee(struct drbd_conf *mdev, struct list_head *to_be_freed)
198{
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100199 struct drbd_peer_request *peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700200 struct list_head *le, *tle;
201
202 /* The EEs are always appended to the end of the list. Since
203 they are sent in order over the wire, they have to finish
204 in order. As soon as we see the first not finished we can
205 stop to examine the list... */
206
207 list_for_each_safe(le, tle, &mdev->net_ee) {
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100208 peer_req = list_entry(le, struct drbd_peer_request, w.list);
209 if (drbd_ee_has_active_page(peer_req))
Philipp Reisnerb411b362009-09-25 16:07:19 -0700210 break;
211 list_move(le, to_be_freed);
212 }
213}
214
215static void drbd_kick_lo_and_reclaim_net(struct drbd_conf *mdev)
216{
217 LIST_HEAD(reclaimed);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100218 struct drbd_peer_request *peer_req, *t;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700219
Philipp Reisner87eeee42011-01-19 14:16:30 +0100220 spin_lock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700221 reclaim_net_ee(mdev, &reclaimed);
Philipp Reisner87eeee42011-01-19 14:16:30 +0100222 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700223
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100224 list_for_each_entry_safe(peer_req, t, &reclaimed, w.list)
225 drbd_free_net_ee(mdev, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700226}
227
228/**
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200229 * drbd_pp_alloc() - Returns @number pages, retries forever (or until signalled)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700230 * @mdev: DRBD device.
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200231 * @number: number of pages requested
232 * @retry: whether to retry, if not enough pages are available right now
Philipp Reisnerb411b362009-09-25 16:07:19 -0700233 *
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200234 * Tries to allocate number pages, first from our own page pool, then from
235 * the kernel, unless this allocation would exceed the max_buffers setting.
236 * Possibly retry until DRBD frees sufficient pages somewhere else.
237 *
238 * Returns a page chain linked via page->private.
Philipp Reisnerb411b362009-09-25 16:07:19 -0700239 */
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200240static struct page *drbd_pp_alloc(struct drbd_conf *mdev, unsigned number, bool retry)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700241{
242 struct page *page = NULL;
243 DEFINE_WAIT(wait);
244
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200245 /* Yes, we may run up to @number over max_buffers. If we
246 * follow it strictly, the admin will get it wrong anyways. */
Philipp Reisner89e58e72011-01-19 13:12:45 +0100247 if (atomic_read(&mdev->pp_in_use) < mdev->tconn->net_conf->max_buffers)
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200248 page = drbd_pp_first_pages_or_try_alloc(mdev, number);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700249
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200250 while (page == NULL) {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700251 prepare_to_wait(&drbd_pp_wait, &wait, TASK_INTERRUPTIBLE);
252
253 drbd_kick_lo_and_reclaim_net(mdev);
254
Philipp Reisner89e58e72011-01-19 13:12:45 +0100255 if (atomic_read(&mdev->pp_in_use) < mdev->tconn->net_conf->max_buffers) {
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200256 page = drbd_pp_first_pages_or_try_alloc(mdev, number);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700257 if (page)
258 break;
259 }
260
261 if (!retry)
262 break;
263
264 if (signal_pending(current)) {
265 dev_warn(DEV, "drbd_pp_alloc interrupted!\n");
266 break;
267 }
268
269 schedule();
270 }
271 finish_wait(&drbd_pp_wait, &wait);
272
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200273 if (page)
274 atomic_add(number, &mdev->pp_in_use);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700275 return page;
276}
277
278/* Must not be used from irq, as that may deadlock: see drbd_pp_alloc.
Philipp Reisner87eeee42011-01-19 14:16:30 +0100279 * Is also used from inside an other spin_lock_irq(&mdev->tconn->req_lock);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200280 * Either links the page chain back to the global pool,
281 * or returns all pages to the system. */
Lars Ellenberg435f0742010-09-06 12:30:25 +0200282static void drbd_pp_free(struct drbd_conf *mdev, struct page *page, int is_net)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700283{
Lars Ellenberg435f0742010-09-06 12:30:25 +0200284 atomic_t *a = is_net ? &mdev->pp_in_use_by_net : &mdev->pp_in_use;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700285 int i;
Lars Ellenberg435f0742010-09-06 12:30:25 +0200286
Lars Ellenberg1816a2b2010-11-11 15:19:07 +0100287 if (drbd_pp_vacant > (DRBD_MAX_BIO_SIZE/PAGE_SIZE)*minor_count)
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200288 i = page_chain_free(page);
289 else {
290 struct page *tmp;
291 tmp = page_chain_tail(page, &i);
292 spin_lock(&drbd_pp_lock);
293 page_chain_add(&drbd_pp_pool, page, tmp);
294 drbd_pp_vacant += i;
295 spin_unlock(&drbd_pp_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700296 }
Lars Ellenberg435f0742010-09-06 12:30:25 +0200297 i = atomic_sub_return(i, a);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200298 if (i < 0)
Lars Ellenberg435f0742010-09-06 12:30:25 +0200299 dev_warn(DEV, "ASSERTION FAILED: %s: %d < 0\n",
300 is_net ? "pp_in_use_by_net" : "pp_in_use", i);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700301 wake_up(&drbd_pp_wait);
302}
303
304/*
305You need to hold the req_lock:
306 _drbd_wait_ee_list_empty()
307
308You must not have the req_lock:
309 drbd_free_ee()
310 drbd_alloc_ee()
311 drbd_init_ee()
312 drbd_release_ee()
313 drbd_ee_fix_bhs()
314 drbd_process_done_ee()
315 drbd_clear_done_ee()
316 drbd_wait_ee_list_empty()
317*/
318
Andreas Gruenbacherf6ffca92011-02-04 15:30:34 +0100319struct drbd_peer_request *
320drbd_alloc_ee(struct drbd_conf *mdev, u64 id, sector_t sector,
321 unsigned int data_size, gfp_t gfp_mask) __must_hold(local)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700322{
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100323 struct drbd_peer_request *peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700324 struct page *page;
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200325 unsigned nr_pages = (data_size + PAGE_SIZE -1) >> PAGE_SHIFT;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700326
Andreas Gruenbacher0cf9d272010-12-07 10:43:29 +0100327 if (drbd_insert_fault(mdev, DRBD_FAULT_AL_EE))
Philipp Reisnerb411b362009-09-25 16:07:19 -0700328 return NULL;
329
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100330 peer_req = mempool_alloc(drbd_ee_mempool, gfp_mask & ~__GFP_HIGHMEM);
331 if (!peer_req) {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700332 if (!(gfp_mask & __GFP_NOWARN))
333 dev_err(DEV, "alloc_ee: Allocation of an EE failed\n");
334 return NULL;
335 }
336
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200337 page = drbd_pp_alloc(mdev, nr_pages, (gfp_mask & __GFP_WAIT));
338 if (!page)
339 goto fail;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700340
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100341 drbd_clear_interval(&peer_req->i);
342 peer_req->i.size = data_size;
343 peer_req->i.sector = sector;
344 peer_req->i.local = false;
345 peer_req->i.waiting = false;
Andreas Gruenbacher53840642011-01-28 10:31:04 +0100346
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100347 peer_req->epoch = NULL;
Philipp Reisnera21e9292011-02-08 15:08:49 +0100348 peer_req->w.mdev = mdev;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100349 peer_req->pages = page;
350 atomic_set(&peer_req->pending_bios, 0);
351 peer_req->flags = 0;
Andreas Gruenbacher9a8e7752011-01-11 14:04:09 +0100352 /*
353 * The block_id is opaque to the receiver. It is not endianness
354 * converted, and sent back to the sender unchanged.
355 */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100356 peer_req->block_id = id;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700357
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100358 return peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700359
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200360 fail:
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100361 mempool_free(peer_req, drbd_ee_mempool);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700362 return NULL;
363}
364
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100365void drbd_free_some_ee(struct drbd_conf *mdev, struct drbd_peer_request *peer_req,
Andreas Gruenbacherf6ffca92011-02-04 15:30:34 +0100366 int is_net)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700367{
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100368 if (peer_req->flags & EE_HAS_DIGEST)
369 kfree(peer_req->digest);
370 drbd_pp_free(mdev, peer_req->pages, is_net);
371 D_ASSERT(atomic_read(&peer_req->pending_bios) == 0);
372 D_ASSERT(drbd_interval_empty(&peer_req->i));
373 mempool_free(peer_req, drbd_ee_mempool);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700374}
375
376int drbd_release_ee(struct drbd_conf *mdev, struct list_head *list)
377{
378 LIST_HEAD(work_list);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100379 struct drbd_peer_request *peer_req, *t;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700380 int count = 0;
Lars Ellenberg435f0742010-09-06 12:30:25 +0200381 int is_net = list == &mdev->net_ee;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700382
Philipp Reisner87eeee42011-01-19 14:16:30 +0100383 spin_lock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700384 list_splice_init(list, &work_list);
Philipp Reisner87eeee42011-01-19 14:16:30 +0100385 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700386
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100387 list_for_each_entry_safe(peer_req, t, &work_list, w.list) {
388 drbd_free_some_ee(mdev, peer_req, is_net);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700389 count++;
390 }
391 return count;
392}
393
394
Philipp Reisner32862ec2011-02-08 16:41:01 +0100395/* See also comments in _req_mod(,BARRIER_ACKED)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700396 * and receive_Barrier.
397 *
398 * Move entries from net_ee to done_ee, if ready.
399 * Grab done_ee, call all callbacks, free the entries.
400 * The callbacks typically send out ACKs.
401 */
402static int drbd_process_done_ee(struct drbd_conf *mdev)
403{
404 LIST_HEAD(work_list);
405 LIST_HEAD(reclaimed);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100406 struct drbd_peer_request *peer_req, *t;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700407 int ok = (mdev->state.conn >= C_WF_REPORT_PARAMS);
408
Philipp Reisner87eeee42011-01-19 14:16:30 +0100409 spin_lock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700410 reclaim_net_ee(mdev, &reclaimed);
411 list_splice_init(&mdev->done_ee, &work_list);
Philipp Reisner87eeee42011-01-19 14:16:30 +0100412 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700413
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100414 list_for_each_entry_safe(peer_req, t, &reclaimed, w.list)
415 drbd_free_net_ee(mdev, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700416
417 /* possible callbacks here:
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +0100418 * e_end_block, and e_end_resync_block, e_send_discard_write.
Philipp Reisnerb411b362009-09-25 16:07:19 -0700419 * all ignore the last argument.
420 */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100421 list_for_each_entry_safe(peer_req, t, &work_list, w.list) {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700422 /* list_del not necessary, next/prev members not touched */
Philipp Reisner00d56942011-02-09 18:09:48 +0100423 ok = peer_req->w.cb(&peer_req->w, !ok) && ok;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100424 drbd_free_ee(mdev, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700425 }
426 wake_up(&mdev->ee_wait);
427
428 return ok;
429}
430
431void _drbd_wait_ee_list_empty(struct drbd_conf *mdev, struct list_head *head)
432{
433 DEFINE_WAIT(wait);
434
435 /* avoids spin_lock/unlock
436 * and calling prepare_to_wait in the fast path */
437 while (!list_empty(head)) {
438 prepare_to_wait(&mdev->ee_wait, &wait, TASK_UNINTERRUPTIBLE);
Philipp Reisner87eeee42011-01-19 14:16:30 +0100439 spin_unlock_irq(&mdev->tconn->req_lock);
Jens Axboe7eaceac2011-03-10 08:52:07 +0100440 io_schedule();
Philipp Reisnerb411b362009-09-25 16:07:19 -0700441 finish_wait(&mdev->ee_wait, &wait);
Philipp Reisner87eeee42011-01-19 14:16:30 +0100442 spin_lock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700443 }
444}
445
446void drbd_wait_ee_list_empty(struct drbd_conf *mdev, struct list_head *head)
447{
Philipp Reisner87eeee42011-01-19 14:16:30 +0100448 spin_lock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700449 _drbd_wait_ee_list_empty(mdev, head);
Philipp Reisner87eeee42011-01-19 14:16:30 +0100450 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700451}
452
453/* see also kernel_accept; which is only present since 2.6.18.
454 * also we want to log which part of it failed, exactly */
Philipp Reisner76536202011-02-07 14:09:54 +0100455static int drbd_accept(const char **what, struct socket *sock, struct socket **newsock)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700456{
457 struct sock *sk = sock->sk;
458 int err = 0;
459
460 *what = "listen";
461 err = sock->ops->listen(sock, 5);
462 if (err < 0)
463 goto out;
464
465 *what = "sock_create_lite";
466 err = sock_create_lite(sk->sk_family, sk->sk_type, sk->sk_protocol,
467 newsock);
468 if (err < 0)
469 goto out;
470
471 *what = "accept";
472 err = sock->ops->accept(sock, *newsock, 0);
473 if (err < 0) {
474 sock_release(*newsock);
475 *newsock = NULL;
476 goto out;
477 }
478 (*newsock)->ops = sock->ops;
479
480out:
481 return err;
482}
483
Philipp Reisnerdbd9eea2011-02-07 15:34:16 +0100484static int drbd_recv_short(struct socket *sock, void *buf, size_t size, int flags)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700485{
486 mm_segment_t oldfs;
487 struct kvec iov = {
488 .iov_base = buf,
489 .iov_len = size,
490 };
491 struct msghdr msg = {
492 .msg_iovlen = 1,
493 .msg_iov = (struct iovec *)&iov,
494 .msg_flags = (flags ? flags : MSG_WAITALL | MSG_NOSIGNAL)
495 };
496 int rv;
497
498 oldfs = get_fs();
499 set_fs(KERNEL_DS);
500 rv = sock_recvmsg(sock, &msg, size, msg.msg_flags);
501 set_fs(oldfs);
502
503 return rv;
504}
505
Philipp Reisnerde0ff332011-02-07 16:56:20 +0100506static int drbd_recv(struct drbd_tconn *tconn, void *buf, size_t size)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700507{
508 mm_segment_t oldfs;
509 struct kvec iov = {
510 .iov_base = buf,
511 .iov_len = size,
512 };
513 struct msghdr msg = {
514 .msg_iovlen = 1,
515 .msg_iov = (struct iovec *)&iov,
516 .msg_flags = MSG_WAITALL | MSG_NOSIGNAL
517 };
518 int rv;
519
520 oldfs = get_fs();
521 set_fs(KERNEL_DS);
522
523 for (;;) {
Philipp Reisnerde0ff332011-02-07 16:56:20 +0100524 rv = sock_recvmsg(tconn->data.socket, &msg, size, msg.msg_flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700525 if (rv == size)
526 break;
527
528 /* Note:
529 * ECONNRESET other side closed the connection
530 * ERESTARTSYS (on sock) we got a signal
531 */
532
533 if (rv < 0) {
534 if (rv == -ECONNRESET)
Philipp Reisnerde0ff332011-02-07 16:56:20 +0100535 conn_info(tconn, "sock was reset by peer\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -0700536 else if (rv != -ERESTARTSYS)
Philipp Reisnerde0ff332011-02-07 16:56:20 +0100537 conn_err(tconn, "sock_recvmsg returned %d\n", rv);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700538 break;
539 } else if (rv == 0) {
Philipp Reisnerde0ff332011-02-07 16:56:20 +0100540 conn_info(tconn, "sock was shut down by peer\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -0700541 break;
542 } else {
543 /* signal came in, or peer/link went down,
544 * after we read a partial message
545 */
546 /* D_ASSERT(signal_pending(current)); */
547 break;
548 }
549 };
550
551 set_fs(oldfs);
552
553 if (rv != size)
Philipp Reisnerbbeb6412011-02-10 13:45:46 +0100554 conn_request_state(tconn, NS(conn, C_BROKEN_PIPE), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700555
556 return rv;
557}
558
Lars Ellenberg5dbf1672010-05-25 16:18:01 +0200559/* quoting tcp(7):
560 * On individual connections, the socket buffer size must be set prior to the
561 * listen(2) or connect(2) calls in order to have it take effect.
562 * This is our wrapper to do so.
563 */
564static void drbd_setbufsize(struct socket *sock, unsigned int snd,
565 unsigned int rcv)
566{
567 /* open coded SO_SNDBUF, SO_RCVBUF */
568 if (snd) {
569 sock->sk->sk_sndbuf = snd;
570 sock->sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
571 }
572 if (rcv) {
573 sock->sk->sk_rcvbuf = rcv;
574 sock->sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
575 }
576}
577
Philipp Reisnereac3e992011-02-07 14:05:07 +0100578static struct socket *drbd_try_connect(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700579{
580 const char *what;
581 struct socket *sock;
582 struct sockaddr_in6 src_in6;
583 int err;
584 int disconnect_on_error = 1;
585
Philipp Reisnereac3e992011-02-07 14:05:07 +0100586 if (!get_net_conf(tconn))
Philipp Reisnerb411b362009-09-25 16:07:19 -0700587 return NULL;
588
589 what = "sock_create_kern";
Philipp Reisnereac3e992011-02-07 14:05:07 +0100590 err = sock_create_kern(((struct sockaddr *)tconn->net_conf->my_addr)->sa_family,
Philipp Reisnerb411b362009-09-25 16:07:19 -0700591 SOCK_STREAM, IPPROTO_TCP, &sock);
592 if (err < 0) {
593 sock = NULL;
594 goto out;
595 }
596
597 sock->sk->sk_rcvtimeo =
Philipp Reisnereac3e992011-02-07 14:05:07 +0100598 sock->sk->sk_sndtimeo = tconn->net_conf->try_connect_int*HZ;
599 drbd_setbufsize(sock, tconn->net_conf->sndbuf_size,
600 tconn->net_conf->rcvbuf_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700601
602 /* explicitly bind to the configured IP as source IP
603 * for the outgoing connections.
604 * This is needed for multihomed hosts and to be
605 * able to use lo: interfaces for drbd.
606 * Make sure to use 0 as port number, so linux selects
607 * a free one dynamically.
608 */
Philipp Reisnereac3e992011-02-07 14:05:07 +0100609 memcpy(&src_in6, tconn->net_conf->my_addr,
610 min_t(int, tconn->net_conf->my_addr_len, sizeof(src_in6)));
611 if (((struct sockaddr *)tconn->net_conf->my_addr)->sa_family == AF_INET6)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700612 src_in6.sin6_port = 0;
613 else
614 ((struct sockaddr_in *)&src_in6)->sin_port = 0; /* AF_INET & AF_SCI */
615
616 what = "bind before connect";
617 err = sock->ops->bind(sock,
618 (struct sockaddr *) &src_in6,
Philipp Reisnereac3e992011-02-07 14:05:07 +0100619 tconn->net_conf->my_addr_len);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700620 if (err < 0)
621 goto out;
622
623 /* connect may fail, peer not yet available.
624 * stay C_WF_CONNECTION, don't go Disconnecting! */
625 disconnect_on_error = 0;
626 what = "connect";
627 err = sock->ops->connect(sock,
Philipp Reisnereac3e992011-02-07 14:05:07 +0100628 (struct sockaddr *)tconn->net_conf->peer_addr,
629 tconn->net_conf->peer_addr_len, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700630
631out:
632 if (err < 0) {
633 if (sock) {
634 sock_release(sock);
635 sock = NULL;
636 }
637 switch (-err) {
638 /* timeout, busy, signal pending */
639 case ETIMEDOUT: case EAGAIN: case EINPROGRESS:
640 case EINTR: case ERESTARTSYS:
641 /* peer not (yet) available, network problem */
642 case ECONNREFUSED: case ENETUNREACH:
643 case EHOSTDOWN: case EHOSTUNREACH:
644 disconnect_on_error = 0;
645 break;
646 default:
Philipp Reisnereac3e992011-02-07 14:05:07 +0100647 conn_err(tconn, "%s failed, err = %d\n", what, err);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700648 }
649 if (disconnect_on_error)
Philipp Reisnerbbeb6412011-02-10 13:45:46 +0100650 conn_request_state(tconn, NS(conn, C_DISCONNECTING), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700651 }
Philipp Reisnereac3e992011-02-07 14:05:07 +0100652 put_net_conf(tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700653 return sock;
654}
655
Philipp Reisner76536202011-02-07 14:09:54 +0100656static struct socket *drbd_wait_for_connect(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700657{
658 int timeo, err;
659 struct socket *s_estab = NULL, *s_listen;
660 const char *what;
661
Philipp Reisner76536202011-02-07 14:09:54 +0100662 if (!get_net_conf(tconn))
Philipp Reisnerb411b362009-09-25 16:07:19 -0700663 return NULL;
664
665 what = "sock_create_kern";
Philipp Reisner76536202011-02-07 14:09:54 +0100666 err = sock_create_kern(((struct sockaddr *)tconn->net_conf->my_addr)->sa_family,
Philipp Reisnerb411b362009-09-25 16:07:19 -0700667 SOCK_STREAM, IPPROTO_TCP, &s_listen);
668 if (err) {
669 s_listen = NULL;
670 goto out;
671 }
672
Philipp Reisner76536202011-02-07 14:09:54 +0100673 timeo = tconn->net_conf->try_connect_int * HZ;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700674 timeo += (random32() & 1) ? timeo / 7 : -timeo / 7; /* 28.5% random jitter */
675
676 s_listen->sk->sk_reuse = 1; /* SO_REUSEADDR */
677 s_listen->sk->sk_rcvtimeo = timeo;
678 s_listen->sk->sk_sndtimeo = timeo;
Philipp Reisner76536202011-02-07 14:09:54 +0100679 drbd_setbufsize(s_listen, tconn->net_conf->sndbuf_size,
680 tconn->net_conf->rcvbuf_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700681
682 what = "bind before listen";
683 err = s_listen->ops->bind(s_listen,
Philipp Reisner76536202011-02-07 14:09:54 +0100684 (struct sockaddr *) tconn->net_conf->my_addr,
685 tconn->net_conf->my_addr_len);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700686 if (err < 0)
687 goto out;
688
Philipp Reisner76536202011-02-07 14:09:54 +0100689 err = drbd_accept(&what, s_listen, &s_estab);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700690
691out:
692 if (s_listen)
693 sock_release(s_listen);
694 if (err < 0) {
695 if (err != -EAGAIN && err != -EINTR && err != -ERESTARTSYS) {
Philipp Reisner76536202011-02-07 14:09:54 +0100696 conn_err(tconn, "%s failed, err = %d\n", what, err);
Philipp Reisnerbbeb6412011-02-10 13:45:46 +0100697 conn_request_state(tconn, NS(conn, C_DISCONNECTING), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700698 }
699 }
Philipp Reisner76536202011-02-07 14:09:54 +0100700 put_net_conf(tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700701
702 return s_estab;
703}
704
Philipp Reisnerd38e7872011-02-07 15:32:04 +0100705static int drbd_send_fp(struct drbd_tconn *tconn, struct socket *sock, enum drbd_packet cmd)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700706{
Philipp Reisnerd38e7872011-02-07 15:32:04 +0100707 struct p_header *h = &tconn->data.sbuf.header;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700708
Philipp Reisnerd38e7872011-02-07 15:32:04 +0100709 return _conn_send_cmd(tconn, 0, sock, cmd, h, sizeof(*h), 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700710}
711
Philipp Reisnera25b63f2011-02-07 15:43:45 +0100712static enum drbd_packet drbd_recv_fp(struct drbd_tconn *tconn, struct socket *sock)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700713{
Philipp Reisnera25b63f2011-02-07 15:43:45 +0100714 struct p_header80 *h = &tconn->data.rbuf.header.h80;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700715 int rr;
716
Philipp Reisnerdbd9eea2011-02-07 15:34:16 +0100717 rr = drbd_recv_short(sock, h, sizeof(*h), 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700718
Andreas Gruenbacherca9bc122011-01-11 13:47:24 +0100719 if (rr == sizeof(*h) && h->magic == cpu_to_be32(DRBD_MAGIC))
Philipp Reisnerb411b362009-09-25 16:07:19 -0700720 return be16_to_cpu(h->command);
721
722 return 0xffff;
723}
724
725/**
726 * drbd_socket_okay() - Free the socket if its connection is not okay
Philipp Reisnerb411b362009-09-25 16:07:19 -0700727 * @sock: pointer to the pointer to the socket.
728 */
Philipp Reisnerdbd9eea2011-02-07 15:34:16 +0100729static int drbd_socket_okay(struct socket **sock)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700730{
731 int rr;
732 char tb[4];
733
734 if (!*sock)
Andreas Gruenbacher81e84652010-12-09 15:03:57 +0100735 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700736
Philipp Reisnerdbd9eea2011-02-07 15:34:16 +0100737 rr = drbd_recv_short(*sock, tb, 4, MSG_DONTWAIT | MSG_PEEK);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700738
739 if (rr > 0 || rr == -EAGAIN) {
Andreas Gruenbacher81e84652010-12-09 15:03:57 +0100740 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700741 } else {
742 sock_release(*sock);
743 *sock = NULL;
Andreas Gruenbacher81e84652010-12-09 15:03:57 +0100744 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700745 }
746}
747
Philipp Reisner907599e2011-02-08 11:25:37 +0100748static int drbd_connected(int vnr, void *p, void *data)
749{
750 struct drbd_conf *mdev = (struct drbd_conf *)p;
751 int ok = 1;
752
753 atomic_set(&mdev->packet_seq, 0);
754 mdev->peer_seq = 0;
755
Philipp Reisner8410da82011-02-11 20:11:10 +0100756 mdev->state_mutex = mdev->tconn->agreed_pro_version < 100 ?
757 &mdev->tconn->cstate_mutex :
758 &mdev->own_state_mutex;
759
Philipp Reisner907599e2011-02-08 11:25:37 +0100760 ok &= drbd_send_sync_param(mdev, &mdev->sync_conf);
761 ok &= drbd_send_sizes(mdev, 0, 0);
762 ok &= drbd_send_uuids(mdev);
763 ok &= drbd_send_state(mdev);
764 clear_bit(USE_DEGR_WFC_T, &mdev->flags);
765 clear_bit(RESIZE_PENDING, &mdev->flags);
766
Philipp Reisner8410da82011-02-11 20:11:10 +0100767
Philipp Reisner907599e2011-02-08 11:25:37 +0100768 return !ok;
769}
770
Philipp Reisnerb411b362009-09-25 16:07:19 -0700771/*
772 * return values:
773 * 1 yes, we have a valid connection
774 * 0 oops, did not work out, please try again
775 * -1 peer talks different language,
776 * no point in trying again, please go standalone.
777 * -2 We do not have a network config...
778 */
Philipp Reisner907599e2011-02-08 11:25:37 +0100779static int drbd_connect(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700780{
781 struct socket *s, *sock, *msock;
782 int try, h, ok;
783
Philipp Reisnerbbeb6412011-02-10 13:45:46 +0100784 if (conn_request_state(tconn, NS(conn, C_WF_CONNECTION), CS_VERBOSE) < SS_SUCCESS)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700785 return -2;
786
Philipp Reisner907599e2011-02-08 11:25:37 +0100787 clear_bit(DISCARD_CONCURRENT, &tconn->flags);
788 tconn->agreed_pro_version = 99;
Philipp Reisnerfd340c12011-01-19 16:57:39 +0100789 /* agreed_pro_version must be smaller than 100 so we send the old
790 header (h80) in the first packet and in the handshake packet. */
Philipp Reisnerb411b362009-09-25 16:07:19 -0700791
792 sock = NULL;
793 msock = NULL;
794
795 do {
796 for (try = 0;;) {
797 /* 3 tries, this should take less than a second! */
Philipp Reisner907599e2011-02-08 11:25:37 +0100798 s = drbd_try_connect(tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700799 if (s || ++try >= 3)
800 break;
801 /* give the other side time to call bind() & listen() */
Philipp Reisner20ee6392011-01-18 15:28:59 +0100802 schedule_timeout_interruptible(HZ / 10);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700803 }
804
805 if (s) {
806 if (!sock) {
Philipp Reisner907599e2011-02-08 11:25:37 +0100807 drbd_send_fp(tconn, s, P_HAND_SHAKE_S);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700808 sock = s;
809 s = NULL;
810 } else if (!msock) {
Philipp Reisner907599e2011-02-08 11:25:37 +0100811 drbd_send_fp(tconn, s, P_HAND_SHAKE_M);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700812 msock = s;
813 s = NULL;
814 } else {
Philipp Reisner907599e2011-02-08 11:25:37 +0100815 conn_err(tconn, "Logic error in drbd_connect()\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -0700816 goto out_release_sockets;
817 }
818 }
819
820 if (sock && msock) {
Philipp Reisner907599e2011-02-08 11:25:37 +0100821 schedule_timeout_interruptible(tconn->net_conf->ping_timeo*HZ/10);
Philipp Reisnerdbd9eea2011-02-07 15:34:16 +0100822 ok = drbd_socket_okay(&sock);
823 ok = drbd_socket_okay(&msock) && ok;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700824 if (ok)
825 break;
826 }
827
828retry:
Philipp Reisner907599e2011-02-08 11:25:37 +0100829 s = drbd_wait_for_connect(tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700830 if (s) {
Philipp Reisner907599e2011-02-08 11:25:37 +0100831 try = drbd_recv_fp(tconn, s);
Philipp Reisnerdbd9eea2011-02-07 15:34:16 +0100832 drbd_socket_okay(&sock);
833 drbd_socket_okay(&msock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700834 switch (try) {
835 case P_HAND_SHAKE_S:
836 if (sock) {
Philipp Reisner907599e2011-02-08 11:25:37 +0100837 conn_warn(tconn, "initial packet S crossed\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -0700838 sock_release(sock);
839 }
840 sock = s;
841 break;
842 case P_HAND_SHAKE_M:
843 if (msock) {
Philipp Reisner907599e2011-02-08 11:25:37 +0100844 conn_warn(tconn, "initial packet M crossed\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -0700845 sock_release(msock);
846 }
847 msock = s;
Philipp Reisner907599e2011-02-08 11:25:37 +0100848 set_bit(DISCARD_CONCURRENT, &tconn->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700849 break;
850 default:
Philipp Reisner907599e2011-02-08 11:25:37 +0100851 conn_warn(tconn, "Error receiving initial packet\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -0700852 sock_release(s);
853 if (random32() & 1)
854 goto retry;
855 }
856 }
857
Philipp Reisnerbbeb6412011-02-10 13:45:46 +0100858 if (tconn->cstate <= C_DISCONNECTING)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700859 goto out_release_sockets;
860 if (signal_pending(current)) {
861 flush_signals(current);
862 smp_rmb();
Philipp Reisner907599e2011-02-08 11:25:37 +0100863 if (get_t_state(&tconn->receiver) == EXITING)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700864 goto out_release_sockets;
865 }
866
867 if (sock && msock) {
Philipp Reisnerdbd9eea2011-02-07 15:34:16 +0100868 ok = drbd_socket_okay(&sock);
869 ok = drbd_socket_okay(&msock) && ok;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700870 if (ok)
871 break;
872 }
873 } while (1);
874
875 msock->sk->sk_reuse = 1; /* SO_REUSEADDR */
876 sock->sk->sk_reuse = 1; /* SO_REUSEADDR */
877
878 sock->sk->sk_allocation = GFP_NOIO;
879 msock->sk->sk_allocation = GFP_NOIO;
880
881 sock->sk->sk_priority = TC_PRIO_INTERACTIVE_BULK;
882 msock->sk->sk_priority = TC_PRIO_INTERACTIVE;
883
Philipp Reisnerb411b362009-09-25 16:07:19 -0700884 /* NOT YET ...
Philipp Reisner907599e2011-02-08 11:25:37 +0100885 * sock->sk->sk_sndtimeo = tconn->net_conf->timeout*HZ/10;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700886 * sock->sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
887 * first set it to the P_HAND_SHAKE timeout,
888 * which we set to 4x the configured ping_timeout. */
889 sock->sk->sk_sndtimeo =
Philipp Reisner907599e2011-02-08 11:25:37 +0100890 sock->sk->sk_rcvtimeo = tconn->net_conf->ping_timeo*4*HZ/10;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700891
Philipp Reisner907599e2011-02-08 11:25:37 +0100892 msock->sk->sk_sndtimeo = tconn->net_conf->timeout*HZ/10;
893 msock->sk->sk_rcvtimeo = tconn->net_conf->ping_int*HZ;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700894
895 /* we don't want delays.
Lucas De Marchi25985ed2011-03-30 22:57:33 -0300896 * we use TCP_CORK where appropriate, though */
Philipp Reisnerb411b362009-09-25 16:07:19 -0700897 drbd_tcp_nodelay(sock);
898 drbd_tcp_nodelay(msock);
899
Philipp Reisner907599e2011-02-08 11:25:37 +0100900 tconn->data.socket = sock;
901 tconn->meta.socket = msock;
902 tconn->last_received = jiffies;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700903
Philipp Reisner907599e2011-02-08 11:25:37 +0100904 h = drbd_do_handshake(tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700905 if (h <= 0)
906 return h;
907
Philipp Reisner907599e2011-02-08 11:25:37 +0100908 if (tconn->cram_hmac_tfm) {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700909 /* drbd_request_state(mdev, NS(conn, WFAuth)); */
Philipp Reisner907599e2011-02-08 11:25:37 +0100910 switch (drbd_do_auth(tconn)) {
Johannes Thomab10d96c2010-01-07 16:02:50 +0100911 case -1:
Philipp Reisner907599e2011-02-08 11:25:37 +0100912 conn_err(tconn, "Authentication of peer failed\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -0700913 return -1;
Johannes Thomab10d96c2010-01-07 16:02:50 +0100914 case 0:
Philipp Reisner907599e2011-02-08 11:25:37 +0100915 conn_err(tconn, "Authentication of peer failed, trying again.\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +0100916 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700917 }
918 }
919
Philipp Reisnerbbeb6412011-02-10 13:45:46 +0100920 if (conn_request_state(tconn, NS(conn, C_WF_REPORT_PARAMS), CS_VERBOSE) < SS_SUCCESS)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700921 return 0;
922
Philipp Reisner907599e2011-02-08 11:25:37 +0100923 sock->sk->sk_sndtimeo = tconn->net_conf->timeout*HZ/10;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700924 sock->sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
925
Philipp Reisner907599e2011-02-08 11:25:37 +0100926 drbd_thread_start(&tconn->asender);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700927
Philipp Reisner907599e2011-02-08 11:25:37 +0100928 if (drbd_send_protocol(tconn) == -1)
Philipp Reisner7e2455c2010-04-22 14:50:23 +0200929 return -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700930
Philipp Reisner907599e2011-02-08 11:25:37 +0100931 return !idr_for_each(&tconn->volumes, drbd_connected, tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700932
933out_release_sockets:
934 if (sock)
935 sock_release(sock);
936 if (msock)
937 sock_release(msock);
938 return -1;
939}
940
Philipp Reisnerce243852011-02-07 17:27:47 +0100941static bool decode_header(struct drbd_tconn *tconn, struct p_header *h, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700942{
Philipp Reisnerfd340c12011-01-19 16:57:39 +0100943 if (h->h80.magic == cpu_to_be32(DRBD_MAGIC)) {
Philipp Reisner77351055b2011-02-07 17:24:26 +0100944 pi->cmd = be16_to_cpu(h->h80.command);
945 pi->size = be16_to_cpu(h->h80.length);
Philipp Reisnereefc2f72011-02-08 12:55:24 +0100946 pi->vnr = 0;
Andreas Gruenbacherca9bc122011-01-11 13:47:24 +0100947 } else if (h->h95.magic == cpu_to_be16(DRBD_MAGIC_BIG)) {
Philipp Reisner77351055b2011-02-07 17:24:26 +0100948 pi->cmd = be16_to_cpu(h->h95.command);
949 pi->size = be32_to_cpu(h->h95.length) & 0x00ffffff;
950 pi->vnr = 0;
Philipp Reisner02918be2010-08-20 14:35:10 +0200951 } else {
Philipp Reisnerce243852011-02-07 17:27:47 +0100952 conn_err(tconn, "magic?? on data m: 0x%08x c: %d l: %d\n",
Lars Ellenberg004352f2010-10-05 20:13:58 +0200953 be32_to_cpu(h->h80.magic),
954 be16_to_cpu(h->h80.command),
955 be16_to_cpu(h->h80.length));
Andreas Gruenbacher81e84652010-12-09 15:03:57 +0100956 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700957 }
Philipp Reisner257d0af2011-01-26 12:15:29 +0100958 return true;
959}
960
Philipp Reisner9ba7aa02011-02-07 17:32:41 +0100961static int drbd_recv_header(struct drbd_tconn *tconn, struct packet_info *pi)
Philipp Reisner257d0af2011-01-26 12:15:29 +0100962{
Philipp Reisner9ba7aa02011-02-07 17:32:41 +0100963 struct p_header *h = &tconn->data.rbuf.header;
Philipp Reisner257d0af2011-01-26 12:15:29 +0100964 int r;
965
Philipp Reisner9ba7aa02011-02-07 17:32:41 +0100966 r = drbd_recv(tconn, h, sizeof(*h));
Philipp Reisner257d0af2011-01-26 12:15:29 +0100967 if (unlikely(r != sizeof(*h))) {
968 if (!signal_pending(current))
Philipp Reisner9ba7aa02011-02-07 17:32:41 +0100969 conn_warn(tconn, "short read expecting header on sock: r=%d\n", r);
Philipp Reisner257d0af2011-01-26 12:15:29 +0100970 return false;
971 }
972
Philipp Reisner9ba7aa02011-02-07 17:32:41 +0100973 r = decode_header(tconn, h, pi);
974 tconn->last_received = jiffies;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700975
Philipp Reisner257d0af2011-01-26 12:15:29 +0100976 return r;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700977}
978
Philipp Reisner2451fc32010-08-24 13:43:11 +0200979static void drbd_flush(struct drbd_conf *mdev)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700980{
981 int rv;
982
983 if (mdev->write_ordering >= WO_bdev_flush && get_ldev(mdev)) {
Dmitry Monakhovfbd9b092010-04-28 17:55:06 +0400984 rv = blkdev_issue_flush(mdev->ldev->backing_bdev, GFP_KERNEL,
Christoph Hellwigdd3932e2010-09-16 20:51:46 +0200985 NULL);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700986 if (rv) {
987 dev_err(DEV, "local disk flush failed with status %d\n", rv);
988 /* would rather check on EOPNOTSUPP, but that is not reliable.
989 * don't try again for ANY return value != 0
990 * if (rv == -EOPNOTSUPP) */
991 drbd_bump_write_ordering(mdev, WO_drain_io);
992 }
993 put_ldev(mdev);
994 }
Philipp Reisnerb411b362009-09-25 16:07:19 -0700995}
996
997/**
998 * drbd_may_finish_epoch() - Applies an epoch_event to the epoch's state, eventually finishes it.
999 * @mdev: DRBD device.
1000 * @epoch: Epoch object.
1001 * @ev: Epoch event.
1002 */
1003static enum finish_epoch drbd_may_finish_epoch(struct drbd_conf *mdev,
1004 struct drbd_epoch *epoch,
1005 enum epoch_event ev)
1006{
Philipp Reisner2451fc32010-08-24 13:43:11 +02001007 int epoch_size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001008 struct drbd_epoch *next_epoch;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001009 enum finish_epoch rv = FE_STILL_LIVE;
1010
1011 spin_lock(&mdev->epoch_lock);
1012 do {
1013 next_epoch = NULL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001014
1015 epoch_size = atomic_read(&epoch->epoch_size);
1016
1017 switch (ev & ~EV_CLEANUP) {
1018 case EV_PUT:
1019 atomic_dec(&epoch->active);
1020 break;
1021 case EV_GOT_BARRIER_NR:
1022 set_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001023 break;
1024 case EV_BECAME_LAST:
1025 /* nothing to do*/
1026 break;
1027 }
1028
Philipp Reisnerb411b362009-09-25 16:07:19 -07001029 if (epoch_size != 0 &&
1030 atomic_read(&epoch->active) == 0 &&
Philipp Reisner2451fc32010-08-24 13:43:11 +02001031 test_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001032 if (!(ev & EV_CLEANUP)) {
1033 spin_unlock(&mdev->epoch_lock);
1034 drbd_send_b_ack(mdev, epoch->barrier_nr, epoch_size);
1035 spin_lock(&mdev->epoch_lock);
1036 }
1037 dec_unacked(mdev);
1038
1039 if (mdev->current_epoch != epoch) {
1040 next_epoch = list_entry(epoch->list.next, struct drbd_epoch, list);
1041 list_del(&epoch->list);
1042 ev = EV_BECAME_LAST | (ev & EV_CLEANUP);
1043 mdev->epochs--;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001044 kfree(epoch);
1045
1046 if (rv == FE_STILL_LIVE)
1047 rv = FE_DESTROYED;
1048 } else {
1049 epoch->flags = 0;
1050 atomic_set(&epoch->epoch_size, 0);
Uwe Kleine-König698f9312010-07-02 20:41:51 +02001051 /* atomic_set(&epoch->active, 0); is already zero */
Philipp Reisnerb411b362009-09-25 16:07:19 -07001052 if (rv == FE_STILL_LIVE)
1053 rv = FE_RECYCLED;
Philipp Reisner2451fc32010-08-24 13:43:11 +02001054 wake_up(&mdev->ee_wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001055 }
1056 }
1057
1058 if (!next_epoch)
1059 break;
1060
1061 epoch = next_epoch;
1062 } while (1);
1063
1064 spin_unlock(&mdev->epoch_lock);
1065
Philipp Reisnerb411b362009-09-25 16:07:19 -07001066 return rv;
1067}
1068
1069/**
1070 * drbd_bump_write_ordering() - Fall back to an other write ordering method
1071 * @mdev: DRBD device.
1072 * @wo: Write ordering method to try.
1073 */
1074void drbd_bump_write_ordering(struct drbd_conf *mdev, enum write_ordering_e wo) __must_hold(local)
1075{
1076 enum write_ordering_e pwo;
1077 static char *write_ordering_str[] = {
1078 [WO_none] = "none",
1079 [WO_drain_io] = "drain",
1080 [WO_bdev_flush] = "flush",
Philipp Reisnerb411b362009-09-25 16:07:19 -07001081 };
1082
1083 pwo = mdev->write_ordering;
1084 wo = min(pwo, wo);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001085 if (wo == WO_bdev_flush && mdev->ldev->dc.no_disk_flush)
1086 wo = WO_drain_io;
1087 if (wo == WO_drain_io && mdev->ldev->dc.no_disk_drain)
1088 wo = WO_none;
1089 mdev->write_ordering = wo;
Philipp Reisner2451fc32010-08-24 13:43:11 +02001090 if (pwo != mdev->write_ordering || wo == WO_bdev_flush)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001091 dev_info(DEV, "Method to ensure write ordering: %s\n", write_ordering_str[mdev->write_ordering]);
1092}
1093
1094/**
Andreas Gruenbacherfbe29de2011-02-17 16:38:35 +01001095 * drbd_submit_peer_request()
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001096 * @mdev: DRBD device.
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001097 * @peer_req: peer request
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001098 * @rw: flag field, see bio->bi_rw
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001099 *
1100 * May spread the pages to multiple bios,
1101 * depending on bio_add_page restrictions.
1102 *
1103 * Returns 0 if all bios have been submitted,
1104 * -ENOMEM if we could not allocate enough bios,
1105 * -ENOSPC (any better suggestion?) if we have not been able to bio_add_page a
1106 * single page to an empty bio (which should never happen and likely indicates
1107 * that the lower level IO stack is in some way broken). This has been observed
1108 * on certain Xen deployments.
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001109 */
1110/* TODO allocate from our own bio_set. */
Andreas Gruenbacherfbe29de2011-02-17 16:38:35 +01001111int drbd_submit_peer_request(struct drbd_conf *mdev,
1112 struct drbd_peer_request *peer_req,
1113 const unsigned rw, const int fault_type)
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001114{
1115 struct bio *bios = NULL;
1116 struct bio *bio;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001117 struct page *page = peer_req->pages;
1118 sector_t sector = peer_req->i.sector;
1119 unsigned ds = peer_req->i.size;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001120 unsigned n_bios = 0;
1121 unsigned nr_pages = (ds + PAGE_SIZE -1) >> PAGE_SHIFT;
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001122 int err = -ENOMEM;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001123
1124 /* In most cases, we will only need one bio. But in case the lower
1125 * level restrictions happen to be different at this offset on this
1126 * side than those of the sending peer, we may need to submit the
1127 * request in more than one bio. */
1128next_bio:
1129 bio = bio_alloc(GFP_NOIO, nr_pages);
1130 if (!bio) {
1131 dev_err(DEV, "submit_ee: Allocation of a bio failed\n");
1132 goto fail;
1133 }
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001134 /* > peer_req->i.sector, unless this is the first bio */
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001135 bio->bi_sector = sector;
1136 bio->bi_bdev = mdev->ldev->backing_bdev;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001137 bio->bi_rw = rw;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001138 bio->bi_private = peer_req;
Andreas Gruenbacherfcefa622011-02-17 16:46:59 +01001139 bio->bi_end_io = drbd_peer_request_endio;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001140
1141 bio->bi_next = bios;
1142 bios = bio;
1143 ++n_bios;
1144
1145 page_chain_for_each(page) {
1146 unsigned len = min_t(unsigned, ds, PAGE_SIZE);
1147 if (!bio_add_page(bio, page, len, 0)) {
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001148 /* A single page must always be possible!
1149 * But in case it fails anyways,
1150 * we deal with it, and complain (below). */
1151 if (bio->bi_vcnt == 0) {
1152 dev_err(DEV,
1153 "bio_add_page failed for len=%u, "
1154 "bi_vcnt=0 (bi_sector=%llu)\n",
1155 len, (unsigned long long)bio->bi_sector);
1156 err = -ENOSPC;
1157 goto fail;
1158 }
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001159 goto next_bio;
1160 }
1161 ds -= len;
1162 sector += len >> 9;
1163 --nr_pages;
1164 }
1165 D_ASSERT(page == NULL);
1166 D_ASSERT(ds == 0);
1167
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001168 atomic_set(&peer_req->pending_bios, n_bios);
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001169 do {
1170 bio = bios;
1171 bios = bios->bi_next;
1172 bio->bi_next = NULL;
1173
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001174 drbd_generic_make_request(mdev, fault_type, bio);
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001175 } while (bios);
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001176 return 0;
1177
1178fail:
1179 while (bios) {
1180 bio = bios;
1181 bios = bios->bi_next;
1182 bio_put(bio);
1183 }
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001184 return err;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001185}
1186
Andreas Gruenbacher53840642011-01-28 10:31:04 +01001187static void drbd_remove_epoch_entry_interval(struct drbd_conf *mdev,
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001188 struct drbd_peer_request *peer_req)
Andreas Gruenbacher53840642011-01-28 10:31:04 +01001189{
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001190 struct drbd_interval *i = &peer_req->i;
Andreas Gruenbacher53840642011-01-28 10:31:04 +01001191
1192 drbd_remove_interval(&mdev->write_requests, i);
1193 drbd_clear_interval(i);
1194
Andreas Gruenbacher6c852be2011-02-04 15:38:52 +01001195 /* Wake up any processes waiting for this peer request to complete. */
Andreas Gruenbacher53840642011-01-28 10:31:04 +01001196 if (i->waiting)
1197 wake_up(&mdev->misc_wait);
1198}
1199
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01001200static int receive_Barrier(struct drbd_conf *mdev, enum drbd_packet cmd,
1201 unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001202{
Philipp Reisner2451fc32010-08-24 13:43:11 +02001203 int rv;
Philipp Reisnere42325a2011-01-19 13:55:45 +01001204 struct p_barrier *p = &mdev->tconn->data.rbuf.barrier;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001205 struct drbd_epoch *epoch;
1206
Philipp Reisnerb411b362009-09-25 16:07:19 -07001207 inc_unacked(mdev);
1208
Philipp Reisnerb411b362009-09-25 16:07:19 -07001209 mdev->current_epoch->barrier_nr = p->barrier;
1210 rv = drbd_may_finish_epoch(mdev, mdev->current_epoch, EV_GOT_BARRIER_NR);
1211
1212 /* P_BARRIER_ACK may imply that the corresponding extent is dropped from
1213 * the activity log, which means it would not be resynced in case the
1214 * R_PRIMARY crashes now.
1215 * Therefore we must send the barrier_ack after the barrier request was
1216 * completed. */
1217 switch (mdev->write_ordering) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001218 case WO_none:
1219 if (rv == FE_RECYCLED)
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01001220 return true;
Philipp Reisner2451fc32010-08-24 13:43:11 +02001221
1222 /* receiver context, in the writeout path of the other node.
1223 * avoid potential distributed deadlock */
1224 epoch = kmalloc(sizeof(struct drbd_epoch), GFP_NOIO);
1225 if (epoch)
1226 break;
1227 else
1228 dev_warn(DEV, "Allocation of an epoch failed, slowing down\n");
1229 /* Fall through */
Philipp Reisnerb411b362009-09-25 16:07:19 -07001230
1231 case WO_bdev_flush:
1232 case WO_drain_io:
Philipp Reisnerb411b362009-09-25 16:07:19 -07001233 drbd_wait_ee_list_empty(mdev, &mdev->active_ee);
Philipp Reisner2451fc32010-08-24 13:43:11 +02001234 drbd_flush(mdev);
1235
1236 if (atomic_read(&mdev->current_epoch->epoch_size)) {
1237 epoch = kmalloc(sizeof(struct drbd_epoch), GFP_NOIO);
1238 if (epoch)
1239 break;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001240 }
1241
Philipp Reisner2451fc32010-08-24 13:43:11 +02001242 epoch = mdev->current_epoch;
1243 wait_event(mdev->ee_wait, atomic_read(&epoch->epoch_size) == 0);
1244
1245 D_ASSERT(atomic_read(&epoch->active) == 0);
1246 D_ASSERT(epoch->flags == 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001247
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01001248 return true;
Philipp Reisner2451fc32010-08-24 13:43:11 +02001249 default:
1250 dev_err(DEV, "Strangeness in mdev->write_ordering %d\n", mdev->write_ordering);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01001251 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001252 }
1253
1254 epoch->flags = 0;
1255 atomic_set(&epoch->epoch_size, 0);
1256 atomic_set(&epoch->active, 0);
1257
1258 spin_lock(&mdev->epoch_lock);
1259 if (atomic_read(&mdev->current_epoch->epoch_size)) {
1260 list_add(&epoch->list, &mdev->current_epoch->list);
1261 mdev->current_epoch = epoch;
1262 mdev->epochs++;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001263 } else {
1264 /* The current_epoch got recycled while we allocated this one... */
1265 kfree(epoch);
1266 }
1267 spin_unlock(&mdev->epoch_lock);
1268
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01001269 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001270}
1271
1272/* used from receive_RSDataReply (recv_resync_read)
1273 * and from receive_Data */
Andreas Gruenbacherf6ffca92011-02-04 15:30:34 +01001274static struct drbd_peer_request *
1275read_in_block(struct drbd_conf *mdev, u64 id, sector_t sector,
1276 int data_size) __must_hold(local)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001277{
Lars Ellenberg66660322010-04-06 12:15:04 +02001278 const sector_t capacity = drbd_get_capacity(mdev->this_bdev);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001279 struct drbd_peer_request *peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001280 struct page *page;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001281 int dgs, ds, rr;
Philipp Reisnera0638452011-01-19 14:31:32 +01001282 void *dig_in = mdev->tconn->int_dig_in;
1283 void *dig_vv = mdev->tconn->int_dig_vv;
Philipp Reisner6b4388a2010-04-26 14:11:45 +02001284 unsigned long *data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001285
Philipp Reisnera0638452011-01-19 14:31:32 +01001286 dgs = (mdev->tconn->agreed_pro_version >= 87 && mdev->tconn->integrity_r_tfm) ?
1287 crypto_hash_digestsize(mdev->tconn->integrity_r_tfm) : 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001288
1289 if (dgs) {
Philipp Reisnerde0ff332011-02-07 16:56:20 +01001290 rr = drbd_recv(mdev->tconn, dig_in, dgs);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001291 if (rr != dgs) {
Lars Ellenberg0ddc5542011-01-21 12:35:15 +01001292 if (!signal_pending(current))
1293 dev_warn(DEV,
1294 "short read receiving data digest: read %d expected %d\n",
1295 rr, dgs);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001296 return NULL;
1297 }
1298 }
1299
1300 data_size -= dgs;
1301
Andreas Gruenbacher841ce242010-12-15 19:31:20 +01001302 if (!expect(data_size != 0))
1303 return NULL;
1304 if (!expect(IS_ALIGNED(data_size, 512)))
1305 return NULL;
1306 if (!expect(data_size <= DRBD_MAX_BIO_SIZE))
1307 return NULL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001308
Lars Ellenberg66660322010-04-06 12:15:04 +02001309 /* even though we trust out peer,
1310 * we sometimes have to double check. */
1311 if (sector + (data_size>>9) > capacity) {
Lars Ellenbergfdda6542011-01-24 15:11:01 +01001312 dev_err(DEV, "request from peer beyond end of local disk: "
1313 "capacity: %llus < sector: %llus + size: %u\n",
Lars Ellenberg66660322010-04-06 12:15:04 +02001314 (unsigned long long)capacity,
1315 (unsigned long long)sector, data_size);
1316 return NULL;
1317 }
1318
Philipp Reisnerb411b362009-09-25 16:07:19 -07001319 /* GFP_NOIO, because we must not cause arbitrary write-out: in a DRBD
1320 * "criss-cross" setup, that might cause write-out on some other DRBD,
1321 * which in turn might block on the other node at this very place. */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001322 peer_req = drbd_alloc_ee(mdev, id, sector, data_size, GFP_NOIO);
1323 if (!peer_req)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001324 return NULL;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001325
Philipp Reisnerb411b362009-09-25 16:07:19 -07001326 ds = data_size;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001327 page = peer_req->pages;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001328 page_chain_for_each(page) {
1329 unsigned len = min_t(int, ds, PAGE_SIZE);
Philipp Reisner6b4388a2010-04-26 14:11:45 +02001330 data = kmap(page);
Philipp Reisnerde0ff332011-02-07 16:56:20 +01001331 rr = drbd_recv(mdev->tconn, data, len);
Andreas Gruenbacher0cf9d272010-12-07 10:43:29 +01001332 if (drbd_insert_fault(mdev, DRBD_FAULT_RECEIVE)) {
Philipp Reisner6b4388a2010-04-26 14:11:45 +02001333 dev_err(DEV, "Fault injection: Corrupting data on receive\n");
1334 data[0] = data[0] ^ (unsigned long)-1;
1335 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07001336 kunmap(page);
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001337 if (rr != len) {
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001338 drbd_free_ee(mdev, peer_req);
Lars Ellenberg0ddc5542011-01-21 12:35:15 +01001339 if (!signal_pending(current))
1340 dev_warn(DEV, "short read receiving data: read %d expected %d\n",
1341 rr, len);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001342 return NULL;
1343 }
1344 ds -= rr;
1345 }
1346
1347 if (dgs) {
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001348 drbd_csum_ee(mdev, mdev->tconn->integrity_r_tfm, peer_req, dig_vv);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001349 if (memcmp(dig_in, dig_vv, dgs)) {
Lars Ellenberg470be442010-11-10 10:36:52 +01001350 dev_err(DEV, "Digest integrity check FAILED: %llus +%u\n",
1351 (unsigned long long)sector, data_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001352 drbd_bcast_ee(mdev, "digest failed",
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001353 dgs, dig_in, dig_vv, peer_req);
1354 drbd_free_ee(mdev, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001355 return NULL;
1356 }
1357 }
1358 mdev->recv_cnt += data_size>>9;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001359 return peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001360}
1361
1362/* drbd_drain_block() just takes a data block
1363 * out of the socket input buffer, and discards it.
1364 */
1365static int drbd_drain_block(struct drbd_conf *mdev, int data_size)
1366{
1367 struct page *page;
1368 int rr, rv = 1;
1369 void *data;
1370
Lars Ellenbergc3470cd2010-04-01 16:57:19 +02001371 if (!data_size)
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01001372 return true;
Lars Ellenbergc3470cd2010-04-01 16:57:19 +02001373
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001374 page = drbd_pp_alloc(mdev, 1, 1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001375
1376 data = kmap(page);
1377 while (data_size) {
Philipp Reisnerde0ff332011-02-07 16:56:20 +01001378 rr = drbd_recv(mdev->tconn, data, min_t(int, data_size, PAGE_SIZE));
Philipp Reisnerb411b362009-09-25 16:07:19 -07001379 if (rr != min_t(int, data_size, PAGE_SIZE)) {
1380 rv = 0;
Lars Ellenberg0ddc5542011-01-21 12:35:15 +01001381 if (!signal_pending(current))
1382 dev_warn(DEV,
1383 "short read receiving data: read %d expected %d\n",
1384 rr, min_t(int, data_size, PAGE_SIZE));
Philipp Reisnerb411b362009-09-25 16:07:19 -07001385 break;
1386 }
1387 data_size -= rr;
1388 }
1389 kunmap(page);
Lars Ellenberg435f0742010-09-06 12:30:25 +02001390 drbd_pp_free(mdev, page, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001391 return rv;
1392}
1393
1394static int recv_dless_read(struct drbd_conf *mdev, struct drbd_request *req,
1395 sector_t sector, int data_size)
1396{
1397 struct bio_vec *bvec;
1398 struct bio *bio;
1399 int dgs, rr, i, expect;
Philipp Reisnera0638452011-01-19 14:31:32 +01001400 void *dig_in = mdev->tconn->int_dig_in;
1401 void *dig_vv = mdev->tconn->int_dig_vv;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001402
Philipp Reisnera0638452011-01-19 14:31:32 +01001403 dgs = (mdev->tconn->agreed_pro_version >= 87 && mdev->tconn->integrity_r_tfm) ?
1404 crypto_hash_digestsize(mdev->tconn->integrity_r_tfm) : 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001405
1406 if (dgs) {
Philipp Reisnerde0ff332011-02-07 16:56:20 +01001407 rr = drbd_recv(mdev->tconn, dig_in, dgs);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001408 if (rr != dgs) {
Lars Ellenberg0ddc5542011-01-21 12:35:15 +01001409 if (!signal_pending(current))
1410 dev_warn(DEV,
1411 "short read receiving data reply digest: read %d expected %d\n",
1412 rr, dgs);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001413 return 0;
1414 }
1415 }
1416
1417 data_size -= dgs;
1418
1419 /* optimistically update recv_cnt. if receiving fails below,
1420 * we disconnect anyways, and counters will be reset. */
1421 mdev->recv_cnt += data_size>>9;
1422
1423 bio = req->master_bio;
1424 D_ASSERT(sector == bio->bi_sector);
1425
1426 bio_for_each_segment(bvec, bio, i) {
1427 expect = min_t(int, data_size, bvec->bv_len);
Philipp Reisnerde0ff332011-02-07 16:56:20 +01001428 rr = drbd_recv(mdev->tconn,
Philipp Reisnerb411b362009-09-25 16:07:19 -07001429 kmap(bvec->bv_page)+bvec->bv_offset,
1430 expect);
1431 kunmap(bvec->bv_page);
1432 if (rr != expect) {
Lars Ellenberg0ddc5542011-01-21 12:35:15 +01001433 if (!signal_pending(current))
1434 dev_warn(DEV, "short read receiving data reply: "
1435 "read %d expected %d\n",
1436 rr, expect);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001437 return 0;
1438 }
1439 data_size -= rr;
1440 }
1441
1442 if (dgs) {
Philipp Reisnera0638452011-01-19 14:31:32 +01001443 drbd_csum_bio(mdev, mdev->tconn->integrity_r_tfm, bio, dig_vv);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001444 if (memcmp(dig_in, dig_vv, dgs)) {
1445 dev_err(DEV, "Digest integrity check FAILED. Broken NICs?\n");
1446 return 0;
1447 }
1448 }
1449
1450 D_ASSERT(data_size == 0);
1451 return 1;
1452}
1453
1454/* e_end_resync_block() is called via
1455 * drbd_process_done_ee() by asender only */
Philipp Reisner00d56942011-02-09 18:09:48 +01001456static int e_end_resync_block(struct drbd_work *w, int unused)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001457{
Andreas Gruenbacher8050e6d2011-02-18 16:12:48 +01001458 struct drbd_peer_request *peer_req =
1459 container_of(w, struct drbd_peer_request, w);
Philipp Reisner00d56942011-02-09 18:09:48 +01001460 struct drbd_conf *mdev = w->mdev;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001461 sector_t sector = peer_req->i.sector;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001462 int ok;
1463
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001464 D_ASSERT(drbd_interval_empty(&peer_req->i));
Philipp Reisnerb411b362009-09-25 16:07:19 -07001465
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001466 if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
1467 drbd_set_in_sync(mdev, sector, peer_req->i.size);
1468 ok = drbd_send_ack(mdev, P_RS_WRITE_ACK, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001469 } else {
1470 /* Record failure to sync */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001471 drbd_rs_failed_io(mdev, sector, peer_req->i.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001472
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001473 ok = drbd_send_ack(mdev, P_NEG_ACK, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001474 }
1475 dec_unacked(mdev);
1476
1477 return ok;
1478}
1479
1480static int recv_resync_read(struct drbd_conf *mdev, sector_t sector, int data_size) __releases(local)
1481{
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001482 struct drbd_peer_request *peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001483
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001484 peer_req = read_in_block(mdev, ID_SYNCER, sector, data_size);
1485 if (!peer_req)
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001486 goto fail;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001487
1488 dec_rs_pending(mdev);
1489
Philipp Reisnerb411b362009-09-25 16:07:19 -07001490 inc_unacked(mdev);
1491 /* corresponding dec_unacked() in e_end_resync_block()
1492 * respective _drbd_clear_done_ee */
1493
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001494 peer_req->w.cb = e_end_resync_block;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001495
Philipp Reisner87eeee42011-01-19 14:16:30 +01001496 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001497 list_add(&peer_req->w.list, &mdev->sync_ee);
Philipp Reisner87eeee42011-01-19 14:16:30 +01001498 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001499
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02001500 atomic_add(data_size >> 9, &mdev->rs_sect_ev);
Andreas Gruenbacherfbe29de2011-02-17 16:38:35 +01001501 if (drbd_submit_peer_request(mdev, peer_req, WRITE, DRBD_FAULT_RS_WR) == 0)
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01001502 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001503
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001504 /* don't care for the reason here */
1505 dev_err(DEV, "submit failed, triggering re-connect\n");
Philipp Reisner87eeee42011-01-19 14:16:30 +01001506 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001507 list_del(&peer_req->w.list);
Philipp Reisner87eeee42011-01-19 14:16:30 +01001508 spin_unlock_irq(&mdev->tconn->req_lock);
Lars Ellenberg22cc37a2010-09-14 20:40:41 +02001509
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001510 drbd_free_ee(mdev, peer_req);
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001511fail:
1512 put_ldev(mdev);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01001513 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001514}
1515
Andreas Gruenbacher668eebc2011-01-20 17:14:26 +01001516static struct drbd_request *
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01001517find_request(struct drbd_conf *mdev, struct rb_root *root, u64 id,
1518 sector_t sector, bool missing_ok, const char *func)
Andreas Gruenbacher668eebc2011-01-20 17:14:26 +01001519{
Andreas Gruenbacher668eebc2011-01-20 17:14:26 +01001520 struct drbd_request *req;
1521
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01001522 /* Request object according to our peer */
1523 req = (struct drbd_request *)(unsigned long)id;
Andreas Gruenbacher5e472262011-01-27 14:42:51 +01001524 if (drbd_contains_interval(root, sector, &req->i) && req->i.local)
Andreas Gruenbacher668eebc2011-01-20 17:14:26 +01001525 return req;
Andreas Gruenbacherc3afd8f2011-01-20 22:25:40 +01001526 if (!missing_ok) {
1527 dev_err(DEV, "%s: failed to find request %lu, sector %llus\n", func,
1528 (unsigned long)id, (unsigned long long)sector);
1529 }
Andreas Gruenbacher668eebc2011-01-20 17:14:26 +01001530 return NULL;
1531}
1532
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01001533static int receive_DataReply(struct drbd_conf *mdev, enum drbd_packet cmd,
1534 unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001535{
1536 struct drbd_request *req;
1537 sector_t sector;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001538 int ok;
Philipp Reisnere42325a2011-01-19 13:55:45 +01001539 struct p_data *p = &mdev->tconn->data.rbuf.data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001540
1541 sector = be64_to_cpu(p->sector);
1542
Philipp Reisner87eeee42011-01-19 14:16:30 +01001543 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01001544 req = find_request(mdev, &mdev->read_requests, p->block_id, sector, false, __func__);
Philipp Reisner87eeee42011-01-19 14:16:30 +01001545 spin_unlock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherc3afd8f2011-01-20 22:25:40 +01001546 if (unlikely(!req))
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01001547 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001548
Bart Van Assche24c48302011-05-21 18:32:29 +02001549 /* hlist_del(&req->collision) is done in _req_may_be_done, to avoid
Philipp Reisnerb411b362009-09-25 16:07:19 -07001550 * special casing it there for the various failure cases.
1551 * still no race with drbd_fail_pending_reads */
1552 ok = recv_dless_read(mdev, req, sector, data_size);
1553
1554 if (ok)
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01001555 req_mod(req, DATA_RECEIVED);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001556 /* else: nothing. handled from drbd_disconnect...
1557 * I don't think we may complete this just yet
1558 * in case we are "on-disconnect: freeze" */
1559
1560 return ok;
1561}
1562
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01001563static int receive_RSDataReply(struct drbd_conf *mdev, enum drbd_packet cmd,
1564 unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001565{
1566 sector_t sector;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001567 int ok;
Philipp Reisnere42325a2011-01-19 13:55:45 +01001568 struct p_data *p = &mdev->tconn->data.rbuf.data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001569
1570 sector = be64_to_cpu(p->sector);
1571 D_ASSERT(p->block_id == ID_SYNCER);
1572
1573 if (get_ldev(mdev)) {
1574 /* data is submitted to disk within recv_resync_read.
1575 * corresponding put_ldev done below on error,
Andreas Gruenbacherfcefa622011-02-17 16:46:59 +01001576 * or in drbd_peer_request_endio. */
Philipp Reisnerb411b362009-09-25 16:07:19 -07001577 ok = recv_resync_read(mdev, sector, data_size);
1578 } else {
1579 if (__ratelimit(&drbd_ratelimit_state))
1580 dev_err(DEV, "Can not write resync data to local disk.\n");
1581
1582 ok = drbd_drain_block(mdev, data_size);
1583
Lars Ellenberg2b2bf212010-10-06 11:46:55 +02001584 drbd_send_ack_dp(mdev, P_NEG_ACK, p, data_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001585 }
1586
Philipp Reisner778f2712010-07-06 11:14:00 +02001587 atomic_add(data_size >> 9, &mdev->rs_sect_in);
1588
Philipp Reisnerb411b362009-09-25 16:07:19 -07001589 return ok;
1590}
1591
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001592static int w_restart_write(struct drbd_work *w, int cancel)
1593{
1594 struct drbd_request *req = container_of(w, struct drbd_request, w);
1595 struct drbd_conf *mdev = w->mdev;
1596 struct bio *bio;
1597 unsigned long start_time;
1598 unsigned long flags;
1599
1600 spin_lock_irqsave(&mdev->tconn->req_lock, flags);
1601 if (!expect(req->rq_state & RQ_POSTPONED)) {
1602 spin_unlock_irqrestore(&mdev->tconn->req_lock, flags);
1603 return 0;
1604 }
1605 bio = req->master_bio;
1606 start_time = req->start_time;
1607 /* Postponed requests will not have their master_bio completed! */
1608 __req_mod(req, DISCARD_WRITE, NULL);
1609 spin_unlock_irqrestore(&mdev->tconn->req_lock, flags);
1610
1611 while (__drbd_make_request(mdev, bio, start_time))
1612 /* retry */ ;
1613 return 1;
1614}
1615
1616static void restart_conflicting_writes(struct drbd_conf *mdev,
1617 sector_t sector, int size)
1618{
1619 struct drbd_interval *i;
1620 struct drbd_request *req;
1621
1622 drbd_for_each_overlap(i, &mdev->write_requests, sector, size) {
1623 if (!i->local)
1624 continue;
1625 req = container_of(i, struct drbd_request, i);
1626 if (req->rq_state & RQ_LOCAL_PENDING ||
1627 !(req->rq_state & RQ_POSTPONED))
1628 continue;
1629 if (expect(list_empty(&req->w.list))) {
1630 req->w.mdev = mdev;
1631 req->w.cb = w_restart_write;
1632 drbd_queue_work(&mdev->tconn->data.work, &req->w);
1633 }
1634 }
1635}
1636
Philipp Reisnerb411b362009-09-25 16:07:19 -07001637/* e_end_block() is called via drbd_process_done_ee().
1638 * this means this function only runs in the asender thread
1639 */
Philipp Reisner00d56942011-02-09 18:09:48 +01001640static int e_end_block(struct drbd_work *w, int cancel)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001641{
Andreas Gruenbacher8050e6d2011-02-18 16:12:48 +01001642 struct drbd_peer_request *peer_req =
1643 container_of(w, struct drbd_peer_request, w);
Philipp Reisner00d56942011-02-09 18:09:48 +01001644 struct drbd_conf *mdev = w->mdev;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001645 sector_t sector = peer_req->i.sector;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001646 int ok = 1, pcmd;
1647
Philipp Reisner89e58e72011-01-19 13:12:45 +01001648 if (mdev->tconn->net_conf->wire_protocol == DRBD_PROT_C) {
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001649 if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001650 pcmd = (mdev->state.conn >= C_SYNC_SOURCE &&
1651 mdev->state.conn <= C_PAUSED_SYNC_T &&
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001652 peer_req->flags & EE_MAY_SET_IN_SYNC) ?
Philipp Reisnerb411b362009-09-25 16:07:19 -07001653 P_RS_WRITE_ACK : P_WRITE_ACK;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001654 ok &= drbd_send_ack(mdev, pcmd, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001655 if (pcmd == P_RS_WRITE_ACK)
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001656 drbd_set_in_sync(mdev, sector, peer_req->i.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001657 } else {
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001658 ok = drbd_send_ack(mdev, P_NEG_ACK, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001659 /* we expect it to be marked out of sync anyways...
1660 * maybe assert this? */
1661 }
1662 dec_unacked(mdev);
1663 }
1664 /* we delete from the conflict detection hash _after_ we sent out the
1665 * P_WRITE_ACK / P_NEG_ACK, to get the sequence number right. */
Philipp Reisner89e58e72011-01-19 13:12:45 +01001666 if (mdev->tconn->net_conf->two_primaries) {
Philipp Reisner87eeee42011-01-19 14:16:30 +01001667 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001668 D_ASSERT(!drbd_interval_empty(&peer_req->i));
1669 drbd_remove_epoch_entry_interval(mdev, peer_req);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001670 if (peer_req->flags & EE_RESTART_REQUESTS)
1671 restart_conflicting_writes(mdev, sector, peer_req->i.size);
Philipp Reisner87eeee42011-01-19 14:16:30 +01001672 spin_unlock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherbb3bfe92011-01-21 15:59:23 +01001673 } else
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001674 D_ASSERT(drbd_interval_empty(&peer_req->i));
Philipp Reisnerb411b362009-09-25 16:07:19 -07001675
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001676 drbd_may_finish_epoch(mdev, peer_req->epoch, EV_PUT + (cancel ? EV_CLEANUP : 0));
Philipp Reisnerb411b362009-09-25 16:07:19 -07001677
1678 return ok;
1679}
1680
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001681static int e_send_ack(struct drbd_work *w, enum drbd_packet ack)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001682{
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001683 struct drbd_conf *mdev = w->mdev;
Andreas Gruenbacher8050e6d2011-02-18 16:12:48 +01001684 struct drbd_peer_request *peer_req =
1685 container_of(w, struct drbd_peer_request, w);
Andreas Gruenbacher206d3582011-02-26 23:19:15 +01001686 int ok;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001687
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001688 ok = drbd_send_ack(mdev, ack, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001689 dec_unacked(mdev);
1690
1691 return ok;
1692}
1693
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001694static int e_send_discard_write(struct drbd_work *w, int unused)
1695{
1696 return e_send_ack(w, P_DISCARD_WRITE);
1697}
1698
1699static int e_send_retry_write(struct drbd_work *w, int unused)
1700{
1701 struct drbd_tconn *tconn = w->mdev->tconn;
1702
1703 return e_send_ack(w, tconn->agreed_pro_version >= 100 ?
1704 P_RETRY_WRITE : P_DISCARD_WRITE);
1705}
1706
Andreas Gruenbacher3e394da2011-01-26 18:36:55 +01001707static bool seq_greater(u32 a, u32 b)
1708{
1709 /*
1710 * We assume 32-bit wrap-around here.
1711 * For 24-bit wrap-around, we would have to shift:
1712 * a <<= 8; b <<= 8;
1713 */
1714 return (s32)a - (s32)b > 0;
1715}
1716
1717static u32 seq_max(u32 a, u32 b)
1718{
1719 return seq_greater(a, b) ? a : b;
1720}
1721
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001722static bool need_peer_seq(struct drbd_conf *mdev)
1723{
1724 struct drbd_tconn *tconn = mdev->tconn;
1725
1726 /*
1727 * We only need to keep track of the last packet_seq number of our peer
1728 * if we are in dual-primary mode and we have the discard flag set; see
1729 * handle_write_conflicts().
1730 */
1731 return tconn->net_conf->two_primaries &&
1732 test_bit(DISCARD_CONCURRENT, &tconn->flags);
1733}
1734
Andreas Gruenbacher43ae0772011-02-03 18:42:08 +01001735static void update_peer_seq(struct drbd_conf *mdev, unsigned int peer_seq)
Andreas Gruenbacher3e394da2011-01-26 18:36:55 +01001736{
Andreas Gruenbacher43ae0772011-02-03 18:42:08 +01001737 unsigned int old_peer_seq;
Andreas Gruenbacher3e394da2011-01-26 18:36:55 +01001738
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001739 if (need_peer_seq(mdev)) {
1740 spin_lock(&mdev->peer_seq_lock);
1741 old_peer_seq = mdev->peer_seq;
1742 mdev->peer_seq = seq_max(mdev->peer_seq, peer_seq);
1743 spin_unlock(&mdev->peer_seq_lock);
1744 if (old_peer_seq != peer_seq)
1745 wake_up(&mdev->seq_wait);
1746 }
Andreas Gruenbacher3e394da2011-01-26 18:36:55 +01001747}
1748
Philipp Reisnerb411b362009-09-25 16:07:19 -07001749/* Called from receive_Data.
1750 * Synchronize packets on sock with packets on msock.
1751 *
1752 * This is here so even when a P_DATA packet traveling via sock overtook an Ack
1753 * packet traveling on msock, they are still processed in the order they have
1754 * been sent.
1755 *
1756 * Note: we don't care for Ack packets overtaking P_DATA packets.
1757 *
1758 * In case packet_seq is larger than mdev->peer_seq number, there are
1759 * outstanding packets on the msock. We wait for them to arrive.
1760 * In case we are the logically next packet, we update mdev->peer_seq
1761 * ourselves. Correctly handles 32bit wrap around.
1762 *
1763 * Assume we have a 10 GBit connection, that is about 1<<30 byte per second,
1764 * about 1<<21 sectors per second. So "worst" case, we have 1<<3 == 8 seconds
1765 * for the 24bit wrap (historical atomic_t guarantee on some archs), and we have
1766 * 1<<9 == 512 seconds aka ages for the 32bit wrap around...
1767 *
1768 * returns 0 if we may process the packet,
1769 * -ERESTARTSYS if we were interrupted (by disconnect signal). */
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001770static int wait_for_and_update_peer_seq(struct drbd_conf *mdev, const u32 peer_seq)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001771{
1772 DEFINE_WAIT(wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001773 long timeout;
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001774 int ret;
1775
1776 if (!need_peer_seq(mdev))
1777 return 0;
1778
Philipp Reisnerb411b362009-09-25 16:07:19 -07001779 spin_lock(&mdev->peer_seq_lock);
1780 for (;;) {
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001781 if (!seq_greater(peer_seq - 1, mdev->peer_seq)) {
1782 mdev->peer_seq = seq_max(mdev->peer_seq, peer_seq);
1783 ret = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001784 break;
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001785 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07001786 if (signal_pending(current)) {
1787 ret = -ERESTARTSYS;
1788 break;
1789 }
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001790 prepare_to_wait(&mdev->seq_wait, &wait, TASK_INTERRUPTIBLE);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001791 spin_unlock(&mdev->peer_seq_lock);
Andreas Gruenbacher71b1c1e2011-03-01 15:40:43 +01001792 timeout = mdev->tconn->net_conf->ping_timeo*HZ/10;
1793 timeout = schedule_timeout(timeout);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001794 spin_lock(&mdev->peer_seq_lock);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001795 if (!timeout) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001796 ret = -ETIMEDOUT;
Andreas Gruenbacher71b1c1e2011-03-01 15:40:43 +01001797 dev_err(DEV, "Timed out waiting for missing ack packets; disconnecting\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07001798 break;
1799 }
1800 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07001801 spin_unlock(&mdev->peer_seq_lock);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001802 finish_wait(&mdev->seq_wait, &wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001803 return ret;
1804}
1805
Lars Ellenberg688593c2010-11-17 22:25:03 +01001806/* see also bio_flags_to_wire()
1807 * DRBD_REQ_*, because we need to semantically map the flags to data packet
1808 * flags and back. We may replicate to other kernel versions. */
1809static unsigned long wire_flags_to_bio(struct drbd_conf *mdev, u32 dpf)
Philipp Reisner76d2e7e2010-08-25 11:58:05 +02001810{
Lars Ellenberg688593c2010-11-17 22:25:03 +01001811 return (dpf & DP_RW_SYNC ? REQ_SYNC : 0) |
1812 (dpf & DP_FUA ? REQ_FUA : 0) |
1813 (dpf & DP_FLUSH ? REQ_FLUSH : 0) |
1814 (dpf & DP_DISCARD ? REQ_DISCARD : 0);
Philipp Reisner76d2e7e2010-08-25 11:58:05 +02001815}
1816
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001817static void fail_postponed_requests(struct drbd_conf *mdev, sector_t sector,
1818 unsigned int size)
1819{
1820 struct drbd_interval *i;
1821
1822 repeat:
1823 drbd_for_each_overlap(i, &mdev->write_requests, sector, size) {
1824 struct drbd_request *req;
1825 struct bio_and_error m;
1826
1827 if (!i->local)
1828 continue;
1829 req = container_of(i, struct drbd_request, i);
1830 if (!(req->rq_state & RQ_POSTPONED))
1831 continue;
1832 req->rq_state &= ~RQ_POSTPONED;
1833 __req_mod(req, NEG_ACKED, &m);
1834 spin_unlock_irq(&mdev->tconn->req_lock);
1835 if (m.bio)
1836 complete_master_bio(mdev, &m);
1837 spin_lock_irq(&mdev->tconn->req_lock);
1838 goto repeat;
1839 }
1840}
1841
1842static int handle_write_conflicts(struct drbd_conf *mdev,
1843 struct drbd_peer_request *peer_req)
1844{
1845 struct drbd_tconn *tconn = mdev->tconn;
1846 bool resolve_conflicts = test_bit(DISCARD_CONCURRENT, &tconn->flags);
1847 sector_t sector = peer_req->i.sector;
1848 const unsigned int size = peer_req->i.size;
1849 struct drbd_interval *i;
1850 bool equal;
1851 int err;
1852
1853 /*
1854 * Inserting the peer request into the write_requests tree will prevent
1855 * new conflicting local requests from being added.
1856 */
1857 drbd_insert_interval(&mdev->write_requests, &peer_req->i);
1858
1859 repeat:
1860 drbd_for_each_overlap(i, &mdev->write_requests, sector, size) {
1861 if (i == &peer_req->i)
1862 continue;
1863
1864 if (!i->local) {
1865 /*
1866 * Our peer has sent a conflicting remote request; this
1867 * should not happen in a two-node setup. Wait for the
1868 * earlier peer request to complete.
1869 */
1870 err = drbd_wait_misc(mdev, i);
1871 if (err)
1872 goto out;
1873 goto repeat;
1874 }
1875
1876 equal = i->sector == sector && i->size == size;
1877 if (resolve_conflicts) {
1878 /*
1879 * If the peer request is fully contained within the
1880 * overlapping request, it can be discarded; otherwise,
1881 * it will be retried once all overlapping requests
1882 * have completed.
1883 */
1884 bool discard = i->sector <= sector && i->sector +
1885 (i->size >> 9) >= sector + (size >> 9);
1886
1887 if (!equal)
1888 dev_alert(DEV, "Concurrent writes detected: "
1889 "local=%llus +%u, remote=%llus +%u, "
1890 "assuming %s came first\n",
1891 (unsigned long long)i->sector, i->size,
1892 (unsigned long long)sector, size,
1893 discard ? "local" : "remote");
1894
1895 inc_unacked(mdev);
1896 peer_req->w.cb = discard ? e_send_discard_write :
1897 e_send_retry_write;
1898 list_add_tail(&peer_req->w.list, &mdev->done_ee);
1899 wake_asender(mdev->tconn);
1900
1901 err = -ENOENT;
1902 goto out;
1903 } else {
1904 struct drbd_request *req =
1905 container_of(i, struct drbd_request, i);
1906
1907 if (!equal)
1908 dev_alert(DEV, "Concurrent writes detected: "
1909 "local=%llus +%u, remote=%llus +%u\n",
1910 (unsigned long long)i->sector, i->size,
1911 (unsigned long long)sector, size);
1912
1913 if (req->rq_state & RQ_LOCAL_PENDING ||
1914 !(req->rq_state & RQ_POSTPONED)) {
1915 /*
1916 * Wait for the node with the discard flag to
1917 * decide if this request will be discarded or
1918 * retried. Requests that are discarded will
1919 * disappear from the write_requests tree.
1920 *
1921 * In addition, wait for the conflicting
1922 * request to finish locally before submitting
1923 * the conflicting peer request.
1924 */
1925 err = drbd_wait_misc(mdev, &req->i);
1926 if (err) {
1927 _conn_request_state(mdev->tconn,
1928 NS(conn, C_TIMEOUT),
1929 CS_HARD);
1930 fail_postponed_requests(mdev, sector, size);
1931 goto out;
1932 }
1933 goto repeat;
1934 }
1935 /*
1936 * Remember to restart the conflicting requests after
1937 * the new peer request has completed.
1938 */
1939 peer_req->flags |= EE_RESTART_REQUESTS;
1940 }
1941 }
1942 err = 0;
1943
1944 out:
1945 if (err)
1946 drbd_remove_epoch_entry_interval(mdev, peer_req);
1947 return err;
1948}
1949
Philipp Reisnerb411b362009-09-25 16:07:19 -07001950/* mirrored write */
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01001951static int receive_Data(struct drbd_conf *mdev, enum drbd_packet cmd,
1952 unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001953{
1954 sector_t sector;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001955 struct drbd_peer_request *peer_req;
Philipp Reisnere42325a2011-01-19 13:55:45 +01001956 struct p_data *p = &mdev->tconn->data.rbuf.data;
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001957 u32 peer_seq = be32_to_cpu(p->seq_num);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001958 int rw = WRITE;
1959 u32 dp_flags;
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001960 int err;
1961
Philipp Reisnerb411b362009-09-25 16:07:19 -07001962
Philipp Reisnerb411b362009-09-25 16:07:19 -07001963 if (!get_ldev(mdev)) {
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001964 err = wait_for_and_update_peer_seq(mdev, peer_seq);
Lars Ellenberg2b2bf212010-10-06 11:46:55 +02001965 drbd_send_ack_dp(mdev, P_NEG_ACK, p, data_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001966 atomic_inc(&mdev->current_epoch->epoch_size);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001967 return drbd_drain_block(mdev, data_size) && err == 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001968 }
1969
Andreas Gruenbacherfcefa622011-02-17 16:46:59 +01001970 /*
1971 * Corresponding put_ldev done either below (on various errors), or in
1972 * drbd_peer_request_endio, if we successfully submit the data at the
1973 * end of this function.
1974 */
Philipp Reisnerb411b362009-09-25 16:07:19 -07001975
1976 sector = be64_to_cpu(p->sector);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001977 peer_req = read_in_block(mdev, p->block_id, sector, data_size);
1978 if (!peer_req) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001979 put_ldev(mdev);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01001980 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001981 }
1982
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001983 peer_req->w.cb = e_end_block;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001984
Lars Ellenberg688593c2010-11-17 22:25:03 +01001985 dp_flags = be32_to_cpu(p->dp_flags);
1986 rw |= wire_flags_to_bio(mdev, dp_flags);
1987
1988 if (dp_flags & DP_MAY_SET_IN_SYNC)
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001989 peer_req->flags |= EE_MAY_SET_IN_SYNC;
Lars Ellenberg688593c2010-11-17 22:25:03 +01001990
Philipp Reisnerb411b362009-09-25 16:07:19 -07001991 spin_lock(&mdev->epoch_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001992 peer_req->epoch = mdev->current_epoch;
1993 atomic_inc(&peer_req->epoch->epoch_size);
1994 atomic_inc(&peer_req->epoch->active);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001995 spin_unlock(&mdev->epoch_lock);
1996
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001997 if (mdev->tconn->net_conf->two_primaries) {
1998 err = wait_for_and_update_peer_seq(mdev, peer_seq);
1999 if (err)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002000 goto out_interrupted;
Philipp Reisner87eeee42011-01-19 14:16:30 +01002001 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002002 err = handle_write_conflicts(mdev, peer_req);
2003 if (err) {
2004 spin_unlock_irq(&mdev->tconn->req_lock);
2005 if (err == -ENOENT) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002006 put_ldev(mdev);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002007 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002008 }
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002009 goto out_interrupted;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002010 }
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002011 } else
2012 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002013 list_add(&peer_req->w.list, &mdev->active_ee);
Philipp Reisner87eeee42011-01-19 14:16:30 +01002014 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002015
Philipp Reisner89e58e72011-01-19 13:12:45 +01002016 switch (mdev->tconn->net_conf->wire_protocol) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002017 case DRBD_PROT_C:
2018 inc_unacked(mdev);
2019 /* corresponding dec_unacked() in e_end_block()
2020 * respective _drbd_clear_done_ee */
2021 break;
2022 case DRBD_PROT_B:
2023 /* I really don't like it that the receiver thread
2024 * sends on the msock, but anyways */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002025 drbd_send_ack(mdev, P_RECV_ACK, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002026 break;
2027 case DRBD_PROT_A:
2028 /* nothing to do */
2029 break;
2030 }
2031
Lars Ellenberg6719fb02010-10-18 23:04:07 +02002032 if (mdev->state.pdsk < D_INCONSISTENT) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002033 /* In case we have the only disk of the cluster, */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002034 drbd_set_out_of_sync(mdev, peer_req->i.sector, peer_req->i.size);
2035 peer_req->flags |= EE_CALL_AL_COMPLETE_IO;
2036 peer_req->flags &= ~EE_MAY_SET_IN_SYNC;
2037 drbd_al_begin_io(mdev, peer_req->i.sector);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002038 }
2039
Andreas Gruenbacherfbe29de2011-02-17 16:38:35 +01002040 if (drbd_submit_peer_request(mdev, peer_req, rw, DRBD_FAULT_DT_WR) == 0)
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002041 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002042
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01002043 /* don't care for the reason here */
2044 dev_err(DEV, "submit failed, triggering re-connect\n");
Philipp Reisner87eeee42011-01-19 14:16:30 +01002045 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002046 list_del(&peer_req->w.list);
2047 drbd_remove_epoch_entry_interval(mdev, peer_req);
Philipp Reisner87eeee42011-01-19 14:16:30 +01002048 spin_unlock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002049 if (peer_req->flags & EE_CALL_AL_COMPLETE_IO)
2050 drbd_al_complete_io(mdev, peer_req->i.sector);
Lars Ellenberg22cc37a2010-09-14 20:40:41 +02002051
Philipp Reisnerb411b362009-09-25 16:07:19 -07002052out_interrupted:
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002053 drbd_may_finish_epoch(mdev, peer_req->epoch, EV_PUT + EV_CLEANUP);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002054 put_ldev(mdev);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002055 drbd_free_ee(mdev, peer_req);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002056 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002057}
2058
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002059/* We may throttle resync, if the lower device seems to be busy,
2060 * and current sync rate is above c_min_rate.
2061 *
2062 * To decide whether or not the lower device is busy, we use a scheme similar
2063 * to MD RAID is_mddev_idle(): if the partition stats reveal "significant"
2064 * (more than 64 sectors) of activity we cannot account for with our own resync
2065 * activity, it obviously is "busy".
2066 *
2067 * The current sync rate used here uses only the most recent two step marks,
2068 * to have a short time average so we can react faster.
2069 */
Philipp Reisnere3555d82010-11-07 15:56:29 +01002070int drbd_rs_should_slow_down(struct drbd_conf *mdev, sector_t sector)
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002071{
2072 struct gendisk *disk = mdev->ldev->backing_bdev->bd_contains->bd_disk;
2073 unsigned long db, dt, dbdt;
Philipp Reisnere3555d82010-11-07 15:56:29 +01002074 struct lc_element *tmp;
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002075 int curr_events;
2076 int throttle = 0;
2077
2078 /* feature disabled? */
2079 if (mdev->sync_conf.c_min_rate == 0)
2080 return 0;
2081
Philipp Reisnere3555d82010-11-07 15:56:29 +01002082 spin_lock_irq(&mdev->al_lock);
2083 tmp = lc_find(mdev->resync, BM_SECT_TO_EXT(sector));
2084 if (tmp) {
2085 struct bm_extent *bm_ext = lc_entry(tmp, struct bm_extent, lce);
2086 if (test_bit(BME_PRIORITY, &bm_ext->flags)) {
2087 spin_unlock_irq(&mdev->al_lock);
2088 return 0;
2089 }
2090 /* Do not slow down if app IO is already waiting for this extent */
2091 }
2092 spin_unlock_irq(&mdev->al_lock);
2093
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002094 curr_events = (int)part_stat_read(&disk->part0, sectors[0]) +
2095 (int)part_stat_read(&disk->part0, sectors[1]) -
2096 atomic_read(&mdev->rs_sect_ev);
Philipp Reisnere3555d82010-11-07 15:56:29 +01002097
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002098 if (!mdev->rs_last_events || curr_events - mdev->rs_last_events > 64) {
2099 unsigned long rs_left;
2100 int i;
2101
2102 mdev->rs_last_events = curr_events;
2103
2104 /* sync speed average over the last 2*DRBD_SYNC_MARK_STEP,
2105 * approx. */
Lars Ellenberg2649f082010-11-05 10:05:47 +01002106 i = (mdev->rs_last_mark + DRBD_SYNC_MARKS-1) % DRBD_SYNC_MARKS;
2107
2108 if (mdev->state.conn == C_VERIFY_S || mdev->state.conn == C_VERIFY_T)
2109 rs_left = mdev->ov_left;
2110 else
2111 rs_left = drbd_bm_total_weight(mdev) - mdev->rs_failed;
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002112
2113 dt = ((long)jiffies - (long)mdev->rs_mark_time[i]) / HZ;
2114 if (!dt)
2115 dt++;
2116 db = mdev->rs_mark_left[i] - rs_left;
2117 dbdt = Bit2KB(db/dt);
2118
2119 if (dbdt > mdev->sync_conf.c_min_rate)
2120 throttle = 1;
2121 }
2122 return throttle;
2123}
2124
2125
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01002126static int receive_DataRequest(struct drbd_conf *mdev, enum drbd_packet cmd,
2127 unsigned int digest_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002128{
2129 sector_t sector;
2130 const sector_t capacity = drbd_get_capacity(mdev->this_bdev);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002131 struct drbd_peer_request *peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002132 struct digest_info *di = NULL;
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02002133 int size, verb;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002134 unsigned int fault_type;
Philipp Reisnere42325a2011-01-19 13:55:45 +01002135 struct p_block_req *p = &mdev->tconn->data.rbuf.block_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002136
2137 sector = be64_to_cpu(p->sector);
2138 size = be32_to_cpu(p->blksize);
2139
Andreas Gruenbacherc670a392011-02-21 12:41:39 +01002140 if (size <= 0 || !IS_ALIGNED(size, 512) || size > DRBD_MAX_BIO_SIZE) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002141 dev_err(DEV, "%s:%d: sector: %llus, size: %u\n", __FILE__, __LINE__,
2142 (unsigned long long)sector, size);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002143 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002144 }
2145 if (sector + (size>>9) > capacity) {
2146 dev_err(DEV, "%s:%d: sector: %llus, size: %u\n", __FILE__, __LINE__,
2147 (unsigned long long)sector, size);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002148 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002149 }
2150
2151 if (!get_ldev_if_state(mdev, D_UP_TO_DATE)) {
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02002152 verb = 1;
2153 switch (cmd) {
2154 case P_DATA_REQUEST:
2155 drbd_send_ack_rp(mdev, P_NEG_DREPLY, p);
2156 break;
2157 case P_RS_DATA_REQUEST:
2158 case P_CSUM_RS_REQUEST:
2159 case P_OV_REQUEST:
2160 drbd_send_ack_rp(mdev, P_NEG_RS_DREPLY , p);
2161 break;
2162 case P_OV_REPLY:
2163 verb = 0;
2164 dec_rs_pending(mdev);
2165 drbd_send_ack_ex(mdev, P_OV_RESULT, sector, size, ID_IN_SYNC);
2166 break;
2167 default:
2168 dev_err(DEV, "unexpected command (%s) in receive_DataRequest\n",
2169 cmdname(cmd));
2170 }
2171 if (verb && __ratelimit(&drbd_ratelimit_state))
Philipp Reisnerb411b362009-09-25 16:07:19 -07002172 dev_err(DEV, "Can not satisfy peer's read request, "
2173 "no local data.\n");
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02002174
Lars Ellenberga821cc42010-09-06 12:31:37 +02002175 /* drain possibly payload */
2176 return drbd_drain_block(mdev, digest_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002177 }
2178
2179 /* GFP_NOIO, because we must not cause arbitrary write-out: in a DRBD
2180 * "criss-cross" setup, that might cause write-out on some other DRBD,
2181 * which in turn might block on the other node at this very place. */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002182 peer_req = drbd_alloc_ee(mdev, p->block_id, sector, size, GFP_NOIO);
2183 if (!peer_req) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002184 put_ldev(mdev);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002185 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002186 }
2187
Philipp Reisner02918be2010-08-20 14:35:10 +02002188 switch (cmd) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002189 case P_DATA_REQUEST:
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002190 peer_req->w.cb = w_e_end_data_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002191 fault_type = DRBD_FAULT_DT_RD;
Lars Ellenberg80a40e42010-08-11 23:28:00 +02002192 /* application IO, don't drbd_rs_begin_io */
2193 goto submit;
2194
Philipp Reisnerb411b362009-09-25 16:07:19 -07002195 case P_RS_DATA_REQUEST:
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002196 peer_req->w.cb = w_e_end_rsdata_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002197 fault_type = DRBD_FAULT_RS_RD;
Lars Ellenberg5f9915b2010-11-09 14:15:24 +01002198 /* used in the sector offset progress display */
2199 mdev->bm_resync_fo = BM_SECT_TO_BIT(sector);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002200 break;
2201
2202 case P_OV_REPLY:
2203 case P_CSUM_RS_REQUEST:
2204 fault_type = DRBD_FAULT_RS_RD;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002205 di = kmalloc(sizeof(*di) + digest_size, GFP_NOIO);
2206 if (!di)
2207 goto out_free_e;
2208
2209 di->digest_size = digest_size;
2210 di->digest = (((char *)di)+sizeof(struct digest_info));
2211
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002212 peer_req->digest = di;
2213 peer_req->flags |= EE_HAS_DIGEST;
Lars Ellenbergc36c3ce2010-08-11 20:42:55 +02002214
Philipp Reisnerde0ff332011-02-07 16:56:20 +01002215 if (drbd_recv(mdev->tconn, di->digest, digest_size) != digest_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002216 goto out_free_e;
2217
Philipp Reisner02918be2010-08-20 14:35:10 +02002218 if (cmd == P_CSUM_RS_REQUEST) {
Philipp Reisner31890f42011-01-19 14:12:51 +01002219 D_ASSERT(mdev->tconn->agreed_pro_version >= 89);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002220 peer_req->w.cb = w_e_end_csum_rs_req;
Lars Ellenberg5f9915b2010-11-09 14:15:24 +01002221 /* used in the sector offset progress display */
2222 mdev->bm_resync_fo = BM_SECT_TO_BIT(sector);
Philipp Reisner02918be2010-08-20 14:35:10 +02002223 } else if (cmd == P_OV_REPLY) {
Lars Ellenberg2649f082010-11-05 10:05:47 +01002224 /* track progress, we may need to throttle */
2225 atomic_add(size >> 9, &mdev->rs_sect_in);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002226 peer_req->w.cb = w_e_end_ov_reply;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002227 dec_rs_pending(mdev);
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002228 /* drbd_rs_begin_io done when we sent this request,
2229 * but accounting still needs to be done. */
2230 goto submit_for_resync;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002231 }
2232 break;
2233
2234 case P_OV_REQUEST:
Philipp Reisnerb411b362009-09-25 16:07:19 -07002235 if (mdev->ov_start_sector == ~(sector_t)0 &&
Philipp Reisner31890f42011-01-19 14:12:51 +01002236 mdev->tconn->agreed_pro_version >= 90) {
Lars Ellenbergde228bb2010-11-05 09:43:15 +01002237 unsigned long now = jiffies;
2238 int i;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002239 mdev->ov_start_sector = sector;
2240 mdev->ov_position = sector;
Lars Ellenberg30b743a2010-11-05 09:39:06 +01002241 mdev->ov_left = drbd_bm_bits(mdev) - BM_SECT_TO_BIT(sector);
2242 mdev->rs_total = mdev->ov_left;
Lars Ellenbergde228bb2010-11-05 09:43:15 +01002243 for (i = 0; i < DRBD_SYNC_MARKS; i++) {
2244 mdev->rs_mark_left[i] = mdev->ov_left;
2245 mdev->rs_mark_time[i] = now;
2246 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07002247 dev_info(DEV, "Online Verify start sector: %llu\n",
2248 (unsigned long long)sector);
2249 }
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002250 peer_req->w.cb = w_e_end_ov_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002251 fault_type = DRBD_FAULT_RS_RD;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002252 break;
2253
Philipp Reisnerb411b362009-09-25 16:07:19 -07002254 default:
2255 dev_err(DEV, "unexpected command (%s) in receive_DataRequest\n",
Philipp Reisner02918be2010-08-20 14:35:10 +02002256 cmdname(cmd));
Philipp Reisnerb411b362009-09-25 16:07:19 -07002257 fault_type = DRBD_FAULT_MAX;
Lars Ellenberg80a40e42010-08-11 23:28:00 +02002258 goto out_free_e;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002259 }
2260
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002261 /* Throttle, drbd_rs_begin_io and submit should become asynchronous
2262 * wrt the receiver, but it is not as straightforward as it may seem.
2263 * Various places in the resync start and stop logic assume resync
2264 * requests are processed in order, requeuing this on the worker thread
2265 * introduces a bunch of new code for synchronization between threads.
2266 *
2267 * Unlimited throttling before drbd_rs_begin_io may stall the resync
2268 * "forever", throttling after drbd_rs_begin_io will lock that extent
2269 * for application writes for the same time. For now, just throttle
2270 * here, where the rest of the code expects the receiver to sleep for
2271 * a while, anyways.
2272 */
Philipp Reisnerb411b362009-09-25 16:07:19 -07002273
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002274 /* Throttle before drbd_rs_begin_io, as that locks out application IO;
2275 * this defers syncer requests for some time, before letting at least
2276 * on request through. The resync controller on the receiving side
2277 * will adapt to the incoming rate accordingly.
2278 *
2279 * We cannot throttle here if remote is Primary/SyncTarget:
2280 * we would also throttle its application reads.
2281 * In that case, throttling is done on the SyncTarget only.
2282 */
Philipp Reisnere3555d82010-11-07 15:56:29 +01002283 if (mdev->state.peer != R_PRIMARY && drbd_rs_should_slow_down(mdev, sector))
2284 schedule_timeout_uninterruptible(HZ/10);
2285 if (drbd_rs_begin_io(mdev, sector))
Lars Ellenberg80a40e42010-08-11 23:28:00 +02002286 goto out_free_e;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002287
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002288submit_for_resync:
2289 atomic_add(size >> 9, &mdev->rs_sect_ev);
2290
Lars Ellenberg80a40e42010-08-11 23:28:00 +02002291submit:
Philipp Reisnerb411b362009-09-25 16:07:19 -07002292 inc_unacked(mdev);
Philipp Reisner87eeee42011-01-19 14:16:30 +01002293 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002294 list_add_tail(&peer_req->w.list, &mdev->read_ee);
Philipp Reisner87eeee42011-01-19 14:16:30 +01002295 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002296
Andreas Gruenbacherfbe29de2011-02-17 16:38:35 +01002297 if (drbd_submit_peer_request(mdev, peer_req, READ, fault_type) == 0)
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002298 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002299
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01002300 /* don't care for the reason here */
2301 dev_err(DEV, "submit failed, triggering re-connect\n");
Philipp Reisner87eeee42011-01-19 14:16:30 +01002302 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002303 list_del(&peer_req->w.list);
Philipp Reisner87eeee42011-01-19 14:16:30 +01002304 spin_unlock_irq(&mdev->tconn->req_lock);
Lars Ellenberg22cc37a2010-09-14 20:40:41 +02002305 /* no drbd_rs_complete_io(), we are dropping the connection anyways */
2306
Philipp Reisnerb411b362009-09-25 16:07:19 -07002307out_free_e:
Philipp Reisnerb411b362009-09-25 16:07:19 -07002308 put_ldev(mdev);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002309 drbd_free_ee(mdev, peer_req);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002310 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002311}
2312
2313static int drbd_asb_recover_0p(struct drbd_conf *mdev) __must_hold(local)
2314{
2315 int self, peer, rv = -100;
2316 unsigned long ch_self, ch_peer;
2317
2318 self = mdev->ldev->md.uuid[UI_BITMAP] & 1;
2319 peer = mdev->p_uuid[UI_BITMAP] & 1;
2320
2321 ch_peer = mdev->p_uuid[UI_SIZE];
2322 ch_self = mdev->comm_bm_set;
2323
Philipp Reisner89e58e72011-01-19 13:12:45 +01002324 switch (mdev->tconn->net_conf->after_sb_0p) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002325 case ASB_CONSENSUS:
2326 case ASB_DISCARD_SECONDARY:
2327 case ASB_CALL_HELPER:
2328 dev_err(DEV, "Configuration error.\n");
2329 break;
2330 case ASB_DISCONNECT:
2331 break;
2332 case ASB_DISCARD_YOUNGER_PRI:
2333 if (self == 0 && peer == 1) {
2334 rv = -1;
2335 break;
2336 }
2337 if (self == 1 && peer == 0) {
2338 rv = 1;
2339 break;
2340 }
2341 /* Else fall through to one of the other strategies... */
2342 case ASB_DISCARD_OLDER_PRI:
2343 if (self == 0 && peer == 1) {
2344 rv = 1;
2345 break;
2346 }
2347 if (self == 1 && peer == 0) {
2348 rv = -1;
2349 break;
2350 }
2351 /* Else fall through to one of the other strategies... */
Lars Ellenbergad19bf62009-10-14 09:36:49 +02002352 dev_warn(DEV, "Discard younger/older primary did not find a decision\n"
Philipp Reisnerb411b362009-09-25 16:07:19 -07002353 "Using discard-least-changes instead\n");
2354 case ASB_DISCARD_ZERO_CHG:
2355 if (ch_peer == 0 && ch_self == 0) {
Philipp Reisner25703f82011-02-07 14:35:25 +01002356 rv = test_bit(DISCARD_CONCURRENT, &mdev->tconn->flags)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002357 ? -1 : 1;
2358 break;
2359 } else {
2360 if (ch_peer == 0) { rv = 1; break; }
2361 if (ch_self == 0) { rv = -1; break; }
2362 }
Philipp Reisner89e58e72011-01-19 13:12:45 +01002363 if (mdev->tconn->net_conf->after_sb_0p == ASB_DISCARD_ZERO_CHG)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002364 break;
2365 case ASB_DISCARD_LEAST_CHG:
2366 if (ch_self < ch_peer)
2367 rv = -1;
2368 else if (ch_self > ch_peer)
2369 rv = 1;
2370 else /* ( ch_self == ch_peer ) */
2371 /* Well, then use something else. */
Philipp Reisner25703f82011-02-07 14:35:25 +01002372 rv = test_bit(DISCARD_CONCURRENT, &mdev->tconn->flags)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002373 ? -1 : 1;
2374 break;
2375 case ASB_DISCARD_LOCAL:
2376 rv = -1;
2377 break;
2378 case ASB_DISCARD_REMOTE:
2379 rv = 1;
2380 }
2381
2382 return rv;
2383}
2384
2385static int drbd_asb_recover_1p(struct drbd_conf *mdev) __must_hold(local)
2386{
Andreas Gruenbacher6184ea22010-12-09 14:23:27 +01002387 int hg, rv = -100;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002388
Philipp Reisner89e58e72011-01-19 13:12:45 +01002389 switch (mdev->tconn->net_conf->after_sb_1p) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002390 case ASB_DISCARD_YOUNGER_PRI:
2391 case ASB_DISCARD_OLDER_PRI:
2392 case ASB_DISCARD_LEAST_CHG:
2393 case ASB_DISCARD_LOCAL:
2394 case ASB_DISCARD_REMOTE:
2395 dev_err(DEV, "Configuration error.\n");
2396 break;
2397 case ASB_DISCONNECT:
2398 break;
2399 case ASB_CONSENSUS:
2400 hg = drbd_asb_recover_0p(mdev);
2401 if (hg == -1 && mdev->state.role == R_SECONDARY)
2402 rv = hg;
2403 if (hg == 1 && mdev->state.role == R_PRIMARY)
2404 rv = hg;
2405 break;
2406 case ASB_VIOLENTLY:
2407 rv = drbd_asb_recover_0p(mdev);
2408 break;
2409 case ASB_DISCARD_SECONDARY:
2410 return mdev->state.role == R_PRIMARY ? 1 : -1;
2411 case ASB_CALL_HELPER:
2412 hg = drbd_asb_recover_0p(mdev);
2413 if (hg == -1 && mdev->state.role == R_PRIMARY) {
Andreas Gruenbacherbb437942010-12-09 14:02:35 +01002414 enum drbd_state_rv rv2;
2415
2416 drbd_set_role(mdev, R_SECONDARY, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002417 /* drbd_change_state() does not sleep while in SS_IN_TRANSIENT_STATE,
2418 * we might be here in C_WF_REPORT_PARAMS which is transient.
2419 * we do not need to wait for the after state change work either. */
Andreas Gruenbacherbb437942010-12-09 14:02:35 +01002420 rv2 = drbd_change_state(mdev, CS_VERBOSE, NS(role, R_SECONDARY));
2421 if (rv2 != SS_SUCCESS) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002422 drbd_khelper(mdev, "pri-lost-after-sb");
2423 } else {
2424 dev_warn(DEV, "Successfully gave up primary role.\n");
2425 rv = hg;
2426 }
2427 } else
2428 rv = hg;
2429 }
2430
2431 return rv;
2432}
2433
2434static int drbd_asb_recover_2p(struct drbd_conf *mdev) __must_hold(local)
2435{
Andreas Gruenbacher6184ea22010-12-09 14:23:27 +01002436 int hg, rv = -100;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002437
Philipp Reisner89e58e72011-01-19 13:12:45 +01002438 switch (mdev->tconn->net_conf->after_sb_2p) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002439 case ASB_DISCARD_YOUNGER_PRI:
2440 case ASB_DISCARD_OLDER_PRI:
2441 case ASB_DISCARD_LEAST_CHG:
2442 case ASB_DISCARD_LOCAL:
2443 case ASB_DISCARD_REMOTE:
2444 case ASB_CONSENSUS:
2445 case ASB_DISCARD_SECONDARY:
2446 dev_err(DEV, "Configuration error.\n");
2447 break;
2448 case ASB_VIOLENTLY:
2449 rv = drbd_asb_recover_0p(mdev);
2450 break;
2451 case ASB_DISCONNECT:
2452 break;
2453 case ASB_CALL_HELPER:
2454 hg = drbd_asb_recover_0p(mdev);
2455 if (hg == -1) {
Andreas Gruenbacherbb437942010-12-09 14:02:35 +01002456 enum drbd_state_rv rv2;
2457
Philipp Reisnerb411b362009-09-25 16:07:19 -07002458 /* drbd_change_state() does not sleep while in SS_IN_TRANSIENT_STATE,
2459 * we might be here in C_WF_REPORT_PARAMS which is transient.
2460 * we do not need to wait for the after state change work either. */
Andreas Gruenbacherbb437942010-12-09 14:02:35 +01002461 rv2 = drbd_change_state(mdev, CS_VERBOSE, NS(role, R_SECONDARY));
2462 if (rv2 != SS_SUCCESS) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002463 drbd_khelper(mdev, "pri-lost-after-sb");
2464 } else {
2465 dev_warn(DEV, "Successfully gave up primary role.\n");
2466 rv = hg;
2467 }
2468 } else
2469 rv = hg;
2470 }
2471
2472 return rv;
2473}
2474
2475static void drbd_uuid_dump(struct drbd_conf *mdev, char *text, u64 *uuid,
2476 u64 bits, u64 flags)
2477{
2478 if (!uuid) {
2479 dev_info(DEV, "%s uuid info vanished while I was looking!\n", text);
2480 return;
2481 }
2482 dev_info(DEV, "%s %016llX:%016llX:%016llX:%016llX bits:%llu flags:%llX\n",
2483 text,
2484 (unsigned long long)uuid[UI_CURRENT],
2485 (unsigned long long)uuid[UI_BITMAP],
2486 (unsigned long long)uuid[UI_HISTORY_START],
2487 (unsigned long long)uuid[UI_HISTORY_END],
2488 (unsigned long long)bits,
2489 (unsigned long long)flags);
2490}
2491
2492/*
2493 100 after split brain try auto recover
2494 2 C_SYNC_SOURCE set BitMap
2495 1 C_SYNC_SOURCE use BitMap
2496 0 no Sync
2497 -1 C_SYNC_TARGET use BitMap
2498 -2 C_SYNC_TARGET set BitMap
2499 -100 after split brain, disconnect
2500-1000 unrelated data
Philipp Reisner4a23f262011-01-11 17:42:17 +01002501-1091 requires proto 91
2502-1096 requires proto 96
Philipp Reisnerb411b362009-09-25 16:07:19 -07002503 */
2504static int drbd_uuid_compare(struct drbd_conf *mdev, int *rule_nr) __must_hold(local)
2505{
2506 u64 self, peer;
2507 int i, j;
2508
2509 self = mdev->ldev->md.uuid[UI_CURRENT] & ~((u64)1);
2510 peer = mdev->p_uuid[UI_CURRENT] & ~((u64)1);
2511
2512 *rule_nr = 10;
2513 if (self == UUID_JUST_CREATED && peer == UUID_JUST_CREATED)
2514 return 0;
2515
2516 *rule_nr = 20;
2517 if ((self == UUID_JUST_CREATED || self == (u64)0) &&
2518 peer != UUID_JUST_CREATED)
2519 return -2;
2520
2521 *rule_nr = 30;
2522 if (self != UUID_JUST_CREATED &&
2523 (peer == UUID_JUST_CREATED || peer == (u64)0))
2524 return 2;
2525
2526 if (self == peer) {
2527 int rct, dc; /* roles at crash time */
2528
2529 if (mdev->p_uuid[UI_BITMAP] == (u64)0 && mdev->ldev->md.uuid[UI_BITMAP] != (u64)0) {
2530
Philipp Reisner31890f42011-01-19 14:12:51 +01002531 if (mdev->tconn->agreed_pro_version < 91)
Philipp Reisner4a23f262011-01-11 17:42:17 +01002532 return -1091;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002533
2534 if ((mdev->ldev->md.uuid[UI_BITMAP] & ~((u64)1)) == (mdev->p_uuid[UI_HISTORY_START] & ~((u64)1)) &&
2535 (mdev->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) == (mdev->p_uuid[UI_HISTORY_START + 1] & ~((u64)1))) {
2536 dev_info(DEV, "was SyncSource, missed the resync finished event, corrected myself:\n");
2537 drbd_uuid_set_bm(mdev, 0UL);
2538
2539 drbd_uuid_dump(mdev, "self", mdev->ldev->md.uuid,
2540 mdev->state.disk >= D_NEGOTIATING ? drbd_bm_total_weight(mdev) : 0, 0);
2541 *rule_nr = 34;
2542 } else {
2543 dev_info(DEV, "was SyncSource (peer failed to write sync_uuid)\n");
2544 *rule_nr = 36;
2545 }
2546
2547 return 1;
2548 }
2549
2550 if (mdev->ldev->md.uuid[UI_BITMAP] == (u64)0 && mdev->p_uuid[UI_BITMAP] != (u64)0) {
2551
Philipp Reisner31890f42011-01-19 14:12:51 +01002552 if (mdev->tconn->agreed_pro_version < 91)
Philipp Reisner4a23f262011-01-11 17:42:17 +01002553 return -1091;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002554
2555 if ((mdev->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) == (mdev->p_uuid[UI_BITMAP] & ~((u64)1)) &&
2556 (mdev->ldev->md.uuid[UI_HISTORY_START + 1] & ~((u64)1)) == (mdev->p_uuid[UI_HISTORY_START] & ~((u64)1))) {
2557 dev_info(DEV, "was SyncTarget, peer missed the resync finished event, corrected peer:\n");
2558
2559 mdev->p_uuid[UI_HISTORY_START + 1] = mdev->p_uuid[UI_HISTORY_START];
2560 mdev->p_uuid[UI_HISTORY_START] = mdev->p_uuid[UI_BITMAP];
2561 mdev->p_uuid[UI_BITMAP] = 0UL;
2562
2563 drbd_uuid_dump(mdev, "peer", mdev->p_uuid, mdev->p_uuid[UI_SIZE], mdev->p_uuid[UI_FLAGS]);
2564 *rule_nr = 35;
2565 } else {
2566 dev_info(DEV, "was SyncTarget (failed to write sync_uuid)\n");
2567 *rule_nr = 37;
2568 }
2569
2570 return -1;
2571 }
2572
2573 /* Common power [off|failure] */
2574 rct = (test_bit(CRASHED_PRIMARY, &mdev->flags) ? 1 : 0) +
2575 (mdev->p_uuid[UI_FLAGS] & 2);
2576 /* lowest bit is set when we were primary,
2577 * next bit (weight 2) is set when peer was primary */
2578 *rule_nr = 40;
2579
2580 switch (rct) {
2581 case 0: /* !self_pri && !peer_pri */ return 0;
2582 case 1: /* self_pri && !peer_pri */ return 1;
2583 case 2: /* !self_pri && peer_pri */ return -1;
2584 case 3: /* self_pri && peer_pri */
Philipp Reisner25703f82011-02-07 14:35:25 +01002585 dc = test_bit(DISCARD_CONCURRENT, &mdev->tconn->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002586 return dc ? -1 : 1;
2587 }
2588 }
2589
2590 *rule_nr = 50;
2591 peer = mdev->p_uuid[UI_BITMAP] & ~((u64)1);
2592 if (self == peer)
2593 return -1;
2594
2595 *rule_nr = 51;
2596 peer = mdev->p_uuid[UI_HISTORY_START] & ~((u64)1);
2597 if (self == peer) {
Philipp Reisner31890f42011-01-19 14:12:51 +01002598 if (mdev->tconn->agreed_pro_version < 96 ?
Philipp Reisner4a23f262011-01-11 17:42:17 +01002599 (mdev->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) ==
2600 (mdev->p_uuid[UI_HISTORY_START + 1] & ~((u64)1)) :
2601 peer + UUID_NEW_BM_OFFSET == (mdev->p_uuid[UI_BITMAP] & ~((u64)1))) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002602 /* The last P_SYNC_UUID did not get though. Undo the last start of
2603 resync as sync source modifications of the peer's UUIDs. */
2604
Philipp Reisner31890f42011-01-19 14:12:51 +01002605 if (mdev->tconn->agreed_pro_version < 91)
Philipp Reisner4a23f262011-01-11 17:42:17 +01002606 return -1091;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002607
2608 mdev->p_uuid[UI_BITMAP] = mdev->p_uuid[UI_HISTORY_START];
2609 mdev->p_uuid[UI_HISTORY_START] = mdev->p_uuid[UI_HISTORY_START + 1];
Philipp Reisner4a23f262011-01-11 17:42:17 +01002610
2611 dev_info(DEV, "Did not got last syncUUID packet, corrected:\n");
2612 drbd_uuid_dump(mdev, "peer", mdev->p_uuid, mdev->p_uuid[UI_SIZE], mdev->p_uuid[UI_FLAGS]);
2613
Philipp Reisnerb411b362009-09-25 16:07:19 -07002614 return -1;
2615 }
2616 }
2617
2618 *rule_nr = 60;
2619 self = mdev->ldev->md.uuid[UI_CURRENT] & ~((u64)1);
2620 for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) {
2621 peer = mdev->p_uuid[i] & ~((u64)1);
2622 if (self == peer)
2623 return -2;
2624 }
2625
2626 *rule_nr = 70;
2627 self = mdev->ldev->md.uuid[UI_BITMAP] & ~((u64)1);
2628 peer = mdev->p_uuid[UI_CURRENT] & ~((u64)1);
2629 if (self == peer)
2630 return 1;
2631
2632 *rule_nr = 71;
2633 self = mdev->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1);
2634 if (self == peer) {
Philipp Reisner31890f42011-01-19 14:12:51 +01002635 if (mdev->tconn->agreed_pro_version < 96 ?
Philipp Reisner4a23f262011-01-11 17:42:17 +01002636 (mdev->ldev->md.uuid[UI_HISTORY_START + 1] & ~((u64)1)) ==
2637 (mdev->p_uuid[UI_HISTORY_START] & ~((u64)1)) :
2638 self + UUID_NEW_BM_OFFSET == (mdev->ldev->md.uuid[UI_BITMAP] & ~((u64)1))) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002639 /* The last P_SYNC_UUID did not get though. Undo the last start of
2640 resync as sync source modifications of our UUIDs. */
2641
Philipp Reisner31890f42011-01-19 14:12:51 +01002642 if (mdev->tconn->agreed_pro_version < 91)
Philipp Reisner4a23f262011-01-11 17:42:17 +01002643 return -1091;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002644
2645 _drbd_uuid_set(mdev, UI_BITMAP, mdev->ldev->md.uuid[UI_HISTORY_START]);
2646 _drbd_uuid_set(mdev, UI_HISTORY_START, mdev->ldev->md.uuid[UI_HISTORY_START + 1]);
2647
Philipp Reisner4a23f262011-01-11 17:42:17 +01002648 dev_info(DEV, "Last syncUUID did not get through, corrected:\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002649 drbd_uuid_dump(mdev, "self", mdev->ldev->md.uuid,
2650 mdev->state.disk >= D_NEGOTIATING ? drbd_bm_total_weight(mdev) : 0, 0);
2651
2652 return 1;
2653 }
2654 }
2655
2656
2657 *rule_nr = 80;
Philipp Reisnerd8c2a362009-11-18 15:52:51 +01002658 peer = mdev->p_uuid[UI_CURRENT] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002659 for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) {
2660 self = mdev->ldev->md.uuid[i] & ~((u64)1);
2661 if (self == peer)
2662 return 2;
2663 }
2664
2665 *rule_nr = 90;
2666 self = mdev->ldev->md.uuid[UI_BITMAP] & ~((u64)1);
2667 peer = mdev->p_uuid[UI_BITMAP] & ~((u64)1);
2668 if (self == peer && self != ((u64)0))
2669 return 100;
2670
2671 *rule_nr = 100;
2672 for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) {
2673 self = mdev->ldev->md.uuid[i] & ~((u64)1);
2674 for (j = UI_HISTORY_START; j <= UI_HISTORY_END; j++) {
2675 peer = mdev->p_uuid[j] & ~((u64)1);
2676 if (self == peer)
2677 return -100;
2678 }
2679 }
2680
2681 return -1000;
2682}
2683
2684/* drbd_sync_handshake() returns the new conn state on success, or
2685 CONN_MASK (-1) on failure.
2686 */
2687static enum drbd_conns drbd_sync_handshake(struct drbd_conf *mdev, enum drbd_role peer_role,
2688 enum drbd_disk_state peer_disk) __must_hold(local)
2689{
2690 int hg, rule_nr;
2691 enum drbd_conns rv = C_MASK;
2692 enum drbd_disk_state mydisk;
2693
2694 mydisk = mdev->state.disk;
2695 if (mydisk == D_NEGOTIATING)
2696 mydisk = mdev->new_state_tmp.disk;
2697
2698 dev_info(DEV, "drbd_sync_handshake:\n");
2699 drbd_uuid_dump(mdev, "self", mdev->ldev->md.uuid, mdev->comm_bm_set, 0);
2700 drbd_uuid_dump(mdev, "peer", mdev->p_uuid,
2701 mdev->p_uuid[UI_SIZE], mdev->p_uuid[UI_FLAGS]);
2702
2703 hg = drbd_uuid_compare(mdev, &rule_nr);
2704
2705 dev_info(DEV, "uuid_compare()=%d by rule %d\n", hg, rule_nr);
2706
2707 if (hg == -1000) {
2708 dev_alert(DEV, "Unrelated data, aborting!\n");
2709 return C_MASK;
2710 }
Philipp Reisner4a23f262011-01-11 17:42:17 +01002711 if (hg < -1000) {
2712 dev_alert(DEV, "To resolve this both sides have to support at least protocol %d\n", -hg - 1000);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002713 return C_MASK;
2714 }
2715
2716 if ((mydisk == D_INCONSISTENT && peer_disk > D_INCONSISTENT) ||
2717 (peer_disk == D_INCONSISTENT && mydisk > D_INCONSISTENT)) {
2718 int f = (hg == -100) || abs(hg) == 2;
2719 hg = mydisk > D_INCONSISTENT ? 1 : -1;
2720 if (f)
2721 hg = hg*2;
2722 dev_info(DEV, "Becoming sync %s due to disk states.\n",
2723 hg > 0 ? "source" : "target");
2724 }
2725
Adam Gandelman3a11a482010-04-08 16:48:23 -07002726 if (abs(hg) == 100)
2727 drbd_khelper(mdev, "initial-split-brain");
2728
Philipp Reisner89e58e72011-01-19 13:12:45 +01002729 if (hg == 100 || (hg == -100 && mdev->tconn->net_conf->always_asbp)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002730 int pcount = (mdev->state.role == R_PRIMARY)
2731 + (peer_role == R_PRIMARY);
2732 int forced = (hg == -100);
2733
2734 switch (pcount) {
2735 case 0:
2736 hg = drbd_asb_recover_0p(mdev);
2737 break;
2738 case 1:
2739 hg = drbd_asb_recover_1p(mdev);
2740 break;
2741 case 2:
2742 hg = drbd_asb_recover_2p(mdev);
2743 break;
2744 }
2745 if (abs(hg) < 100) {
2746 dev_warn(DEV, "Split-Brain detected, %d primaries, "
2747 "automatically solved. Sync from %s node\n",
2748 pcount, (hg < 0) ? "peer" : "this");
2749 if (forced) {
2750 dev_warn(DEV, "Doing a full sync, since"
2751 " UUIDs where ambiguous.\n");
2752 hg = hg*2;
2753 }
2754 }
2755 }
2756
2757 if (hg == -100) {
Philipp Reisner89e58e72011-01-19 13:12:45 +01002758 if (mdev->tconn->net_conf->want_lose && !(mdev->p_uuid[UI_FLAGS]&1))
Philipp Reisnerb411b362009-09-25 16:07:19 -07002759 hg = -1;
Philipp Reisner89e58e72011-01-19 13:12:45 +01002760 if (!mdev->tconn->net_conf->want_lose && (mdev->p_uuid[UI_FLAGS]&1))
Philipp Reisnerb411b362009-09-25 16:07:19 -07002761 hg = 1;
2762
2763 if (abs(hg) < 100)
2764 dev_warn(DEV, "Split-Brain detected, manually solved. "
2765 "Sync from %s node\n",
2766 (hg < 0) ? "peer" : "this");
2767 }
2768
2769 if (hg == -100) {
Lars Ellenberg580b9762010-02-26 23:15:23 +01002770 /* FIXME this log message is not correct if we end up here
2771 * after an attempted attach on a diskless node.
2772 * We just refuse to attach -- well, we drop the "connection"
2773 * to that disk, in a way... */
Adam Gandelman3a11a482010-04-08 16:48:23 -07002774 dev_alert(DEV, "Split-Brain detected but unresolved, dropping connection!\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002775 drbd_khelper(mdev, "split-brain");
2776 return C_MASK;
2777 }
2778
2779 if (hg > 0 && mydisk <= D_INCONSISTENT) {
2780 dev_err(DEV, "I shall become SyncSource, but I am inconsistent!\n");
2781 return C_MASK;
2782 }
2783
2784 if (hg < 0 && /* by intention we do not use mydisk here. */
2785 mdev->state.role == R_PRIMARY && mdev->state.disk >= D_CONSISTENT) {
Philipp Reisner89e58e72011-01-19 13:12:45 +01002786 switch (mdev->tconn->net_conf->rr_conflict) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002787 case ASB_CALL_HELPER:
2788 drbd_khelper(mdev, "pri-lost");
2789 /* fall through */
2790 case ASB_DISCONNECT:
2791 dev_err(DEV, "I shall become SyncTarget, but I am primary!\n");
2792 return C_MASK;
2793 case ASB_VIOLENTLY:
2794 dev_warn(DEV, "Becoming SyncTarget, violating the stable-data"
2795 "assumption\n");
2796 }
2797 }
2798
Philipp Reisner89e58e72011-01-19 13:12:45 +01002799 if (mdev->tconn->net_conf->dry_run || test_bit(CONN_DRY_RUN, &mdev->flags)) {
Philipp Reisnercf14c2e2010-02-02 21:03:50 +01002800 if (hg == 0)
2801 dev_info(DEV, "dry-run connect: No resync, would become Connected immediately.\n");
2802 else
2803 dev_info(DEV, "dry-run connect: Would become %s, doing a %s resync.",
2804 drbd_conn_str(hg > 0 ? C_SYNC_SOURCE : C_SYNC_TARGET),
2805 abs(hg) >= 2 ? "full" : "bit-map based");
2806 return C_MASK;
2807 }
2808
Philipp Reisnerb411b362009-09-25 16:07:19 -07002809 if (abs(hg) >= 2) {
2810 dev_info(DEV, "Writing the whole bitmap, full sync required after drbd_sync_handshake.\n");
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01002811 if (drbd_bitmap_io(mdev, &drbd_bmio_set_n_write, "set_n_write from sync_handshake",
2812 BM_LOCKED_SET_ALLOWED))
Philipp Reisnerb411b362009-09-25 16:07:19 -07002813 return C_MASK;
2814 }
2815
2816 if (hg > 0) { /* become sync source. */
2817 rv = C_WF_BITMAP_S;
2818 } else if (hg < 0) { /* become sync target */
2819 rv = C_WF_BITMAP_T;
2820 } else {
2821 rv = C_CONNECTED;
2822 if (drbd_bm_total_weight(mdev)) {
2823 dev_info(DEV, "No resync, but %lu bits in bitmap!\n",
2824 drbd_bm_total_weight(mdev));
2825 }
2826 }
2827
2828 return rv;
2829}
2830
2831/* returns 1 if invalid */
2832static int cmp_after_sb(enum drbd_after_sb_p peer, enum drbd_after_sb_p self)
2833{
2834 /* ASB_DISCARD_REMOTE - ASB_DISCARD_LOCAL is valid */
2835 if ((peer == ASB_DISCARD_REMOTE && self == ASB_DISCARD_LOCAL) ||
2836 (self == ASB_DISCARD_REMOTE && peer == ASB_DISCARD_LOCAL))
2837 return 0;
2838
2839 /* any other things with ASB_DISCARD_REMOTE or ASB_DISCARD_LOCAL are invalid */
2840 if (peer == ASB_DISCARD_REMOTE || peer == ASB_DISCARD_LOCAL ||
2841 self == ASB_DISCARD_REMOTE || self == ASB_DISCARD_LOCAL)
2842 return 1;
2843
2844 /* everything else is valid if they are equal on both sides. */
2845 if (peer == self)
2846 return 0;
2847
2848 /* everything es is invalid. */
2849 return 1;
2850}
2851
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01002852static int receive_protocol(struct drbd_conf *mdev, enum drbd_packet cmd,
2853 unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002854{
Philipp Reisnere42325a2011-01-19 13:55:45 +01002855 struct p_protocol *p = &mdev->tconn->data.rbuf.protocol;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002856 int p_proto, p_after_sb_0p, p_after_sb_1p, p_after_sb_2p;
Philipp Reisnercf14c2e2010-02-02 21:03:50 +01002857 int p_want_lose, p_two_primaries, cf;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002858 char p_integrity_alg[SHARED_SECRET_MAX] = "";
2859
Philipp Reisnerb411b362009-09-25 16:07:19 -07002860 p_proto = be32_to_cpu(p->protocol);
2861 p_after_sb_0p = be32_to_cpu(p->after_sb_0p);
2862 p_after_sb_1p = be32_to_cpu(p->after_sb_1p);
2863 p_after_sb_2p = be32_to_cpu(p->after_sb_2p);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002864 p_two_primaries = be32_to_cpu(p->two_primaries);
Philipp Reisnercf14c2e2010-02-02 21:03:50 +01002865 cf = be32_to_cpu(p->conn_flags);
2866 p_want_lose = cf & CF_WANT_LOSE;
2867
2868 clear_bit(CONN_DRY_RUN, &mdev->flags);
2869
2870 if (cf & CF_DRY_RUN)
2871 set_bit(CONN_DRY_RUN, &mdev->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002872
Philipp Reisner89e58e72011-01-19 13:12:45 +01002873 if (p_proto != mdev->tconn->net_conf->wire_protocol) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002874 dev_err(DEV, "incompatible communication protocols\n");
2875 goto disconnect;
2876 }
2877
Philipp Reisner89e58e72011-01-19 13:12:45 +01002878 if (cmp_after_sb(p_after_sb_0p, mdev->tconn->net_conf->after_sb_0p)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002879 dev_err(DEV, "incompatible after-sb-0pri settings\n");
2880 goto disconnect;
2881 }
2882
Philipp Reisner89e58e72011-01-19 13:12:45 +01002883 if (cmp_after_sb(p_after_sb_1p, mdev->tconn->net_conf->after_sb_1p)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002884 dev_err(DEV, "incompatible after-sb-1pri settings\n");
2885 goto disconnect;
2886 }
2887
Philipp Reisner89e58e72011-01-19 13:12:45 +01002888 if (cmp_after_sb(p_after_sb_2p, mdev->tconn->net_conf->after_sb_2p)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002889 dev_err(DEV, "incompatible after-sb-2pri settings\n");
2890 goto disconnect;
2891 }
2892
Philipp Reisner89e58e72011-01-19 13:12:45 +01002893 if (p_want_lose && mdev->tconn->net_conf->want_lose) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002894 dev_err(DEV, "both sides have the 'want_lose' flag set\n");
2895 goto disconnect;
2896 }
2897
Philipp Reisner89e58e72011-01-19 13:12:45 +01002898 if (p_two_primaries != mdev->tconn->net_conf->two_primaries) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002899 dev_err(DEV, "incompatible setting of the two-primaries options\n");
2900 goto disconnect;
2901 }
2902
Philipp Reisner31890f42011-01-19 14:12:51 +01002903 if (mdev->tconn->agreed_pro_version >= 87) {
Philipp Reisner89e58e72011-01-19 13:12:45 +01002904 unsigned char *my_alg = mdev->tconn->net_conf->integrity_alg;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002905
Philipp Reisnerde0ff332011-02-07 16:56:20 +01002906 if (drbd_recv(mdev->tconn, p_integrity_alg, data_size) != data_size)
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002907 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002908
2909 p_integrity_alg[SHARED_SECRET_MAX-1] = 0;
2910 if (strcmp(p_integrity_alg, my_alg)) {
2911 dev_err(DEV, "incompatible setting of the data-integrity-alg\n");
2912 goto disconnect;
2913 }
2914 dev_info(DEV, "data-integrity-alg: %s\n",
2915 my_alg[0] ? my_alg : (unsigned char *)"<not-used>");
2916 }
2917
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002918 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002919
2920disconnect:
2921 drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002922 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002923}
2924
2925/* helper function
2926 * input: alg name, feature name
2927 * return: NULL (alg name was "")
2928 * ERR_PTR(error) if something goes wrong
2929 * or the crypto hash ptr, if it worked out ok. */
2930struct crypto_hash *drbd_crypto_alloc_digest_safe(const struct drbd_conf *mdev,
2931 const char *alg, const char *name)
2932{
2933 struct crypto_hash *tfm;
2934
2935 if (!alg[0])
2936 return NULL;
2937
2938 tfm = crypto_alloc_hash(alg, 0, CRYPTO_ALG_ASYNC);
2939 if (IS_ERR(tfm)) {
2940 dev_err(DEV, "Can not allocate \"%s\" as %s (reason: %ld)\n",
2941 alg, name, PTR_ERR(tfm));
2942 return tfm;
2943 }
2944 if (!drbd_crypto_is_hash(crypto_hash_tfm(tfm))) {
2945 crypto_free_hash(tfm);
2946 dev_err(DEV, "\"%s\" is not a digest (%s)\n", alg, name);
2947 return ERR_PTR(-EINVAL);
2948 }
2949 return tfm;
2950}
2951
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01002952static int receive_SyncParam(struct drbd_conf *mdev, enum drbd_packet cmd,
2953 unsigned int packet_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002954{
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002955 int ok = true;
Philipp Reisnere42325a2011-01-19 13:55:45 +01002956 struct p_rs_param_95 *p = &mdev->tconn->data.rbuf.rs_param_95;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002957 unsigned int header_size, data_size, exp_max_sz;
2958 struct crypto_hash *verify_tfm = NULL;
2959 struct crypto_hash *csums_tfm = NULL;
Philipp Reisner31890f42011-01-19 14:12:51 +01002960 const int apv = mdev->tconn->agreed_pro_version;
Philipp Reisner778f2712010-07-06 11:14:00 +02002961 int *rs_plan_s = NULL;
2962 int fifo_size = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002963
2964 exp_max_sz = apv <= 87 ? sizeof(struct p_rs_param)
2965 : apv == 88 ? sizeof(struct p_rs_param)
2966 + SHARED_SECRET_MAX
Philipp Reisner8e26f9c2010-07-06 17:25:54 +02002967 : apv <= 94 ? sizeof(struct p_rs_param_89)
2968 : /* apv >= 95 */ sizeof(struct p_rs_param_95);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002969
Philipp Reisner02918be2010-08-20 14:35:10 +02002970 if (packet_size > exp_max_sz) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002971 dev_err(DEV, "SyncParam packet too long: received %u, expected <= %u bytes\n",
Philipp Reisner02918be2010-08-20 14:35:10 +02002972 packet_size, exp_max_sz);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002973 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002974 }
2975
2976 if (apv <= 88) {
Philipp Reisner257d0af2011-01-26 12:15:29 +01002977 header_size = sizeof(struct p_rs_param) - sizeof(struct p_header);
Philipp Reisner02918be2010-08-20 14:35:10 +02002978 data_size = packet_size - header_size;
Philipp Reisner8e26f9c2010-07-06 17:25:54 +02002979 } else if (apv <= 94) {
Philipp Reisner257d0af2011-01-26 12:15:29 +01002980 header_size = sizeof(struct p_rs_param_89) - sizeof(struct p_header);
Philipp Reisner02918be2010-08-20 14:35:10 +02002981 data_size = packet_size - header_size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002982 D_ASSERT(data_size == 0);
Philipp Reisner8e26f9c2010-07-06 17:25:54 +02002983 } else {
Philipp Reisner257d0af2011-01-26 12:15:29 +01002984 header_size = sizeof(struct p_rs_param_95) - sizeof(struct p_header);
Philipp Reisner02918be2010-08-20 14:35:10 +02002985 data_size = packet_size - header_size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002986 D_ASSERT(data_size == 0);
2987 }
2988
2989 /* initialize verify_alg and csums_alg */
2990 memset(p->verify_alg, 0, 2 * SHARED_SECRET_MAX);
2991
Philipp Reisnerde0ff332011-02-07 16:56:20 +01002992 if (drbd_recv(mdev->tconn, &p->head.payload, header_size) != header_size)
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002993 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002994
2995 mdev->sync_conf.rate = be32_to_cpu(p->rate);
2996
2997 if (apv >= 88) {
2998 if (apv == 88) {
2999 if (data_size > SHARED_SECRET_MAX) {
3000 dev_err(DEV, "verify-alg too long, "
3001 "peer wants %u, accepting only %u byte\n",
3002 data_size, SHARED_SECRET_MAX);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003003 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003004 }
3005
Philipp Reisnerde0ff332011-02-07 16:56:20 +01003006 if (drbd_recv(mdev->tconn, p->verify_alg, data_size) != data_size)
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003007 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003008
3009 /* we expect NUL terminated string */
3010 /* but just in case someone tries to be evil */
3011 D_ASSERT(p->verify_alg[data_size-1] == 0);
3012 p->verify_alg[data_size-1] = 0;
3013
3014 } else /* apv >= 89 */ {
3015 /* we still expect NUL terminated strings */
3016 /* but just in case someone tries to be evil */
3017 D_ASSERT(p->verify_alg[SHARED_SECRET_MAX-1] == 0);
3018 D_ASSERT(p->csums_alg[SHARED_SECRET_MAX-1] == 0);
3019 p->verify_alg[SHARED_SECRET_MAX-1] = 0;
3020 p->csums_alg[SHARED_SECRET_MAX-1] = 0;
3021 }
3022
3023 if (strcmp(mdev->sync_conf.verify_alg, p->verify_alg)) {
3024 if (mdev->state.conn == C_WF_REPORT_PARAMS) {
3025 dev_err(DEV, "Different verify-alg settings. me=\"%s\" peer=\"%s\"\n",
3026 mdev->sync_conf.verify_alg, p->verify_alg);
3027 goto disconnect;
3028 }
3029 verify_tfm = drbd_crypto_alloc_digest_safe(mdev,
3030 p->verify_alg, "verify-alg");
3031 if (IS_ERR(verify_tfm)) {
3032 verify_tfm = NULL;
3033 goto disconnect;
3034 }
3035 }
3036
3037 if (apv >= 89 && strcmp(mdev->sync_conf.csums_alg, p->csums_alg)) {
3038 if (mdev->state.conn == C_WF_REPORT_PARAMS) {
3039 dev_err(DEV, "Different csums-alg settings. me=\"%s\" peer=\"%s\"\n",
3040 mdev->sync_conf.csums_alg, p->csums_alg);
3041 goto disconnect;
3042 }
3043 csums_tfm = drbd_crypto_alloc_digest_safe(mdev,
3044 p->csums_alg, "csums-alg");
3045 if (IS_ERR(csums_tfm)) {
3046 csums_tfm = NULL;
3047 goto disconnect;
3048 }
3049 }
3050
Philipp Reisner8e26f9c2010-07-06 17:25:54 +02003051 if (apv > 94) {
3052 mdev->sync_conf.rate = be32_to_cpu(p->rate);
3053 mdev->sync_conf.c_plan_ahead = be32_to_cpu(p->c_plan_ahead);
3054 mdev->sync_conf.c_delay_target = be32_to_cpu(p->c_delay_target);
3055 mdev->sync_conf.c_fill_target = be32_to_cpu(p->c_fill_target);
3056 mdev->sync_conf.c_max_rate = be32_to_cpu(p->c_max_rate);
Philipp Reisner778f2712010-07-06 11:14:00 +02003057
3058 fifo_size = (mdev->sync_conf.c_plan_ahead * 10 * SLEEP_TIME) / HZ;
3059 if (fifo_size != mdev->rs_plan_s.size && fifo_size > 0) {
3060 rs_plan_s = kzalloc(sizeof(int) * fifo_size, GFP_KERNEL);
3061 if (!rs_plan_s) {
3062 dev_err(DEV, "kmalloc of fifo_buffer failed");
3063 goto disconnect;
3064 }
3065 }
Philipp Reisner8e26f9c2010-07-06 17:25:54 +02003066 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003067
3068 spin_lock(&mdev->peer_seq_lock);
3069 /* lock against drbd_nl_syncer_conf() */
3070 if (verify_tfm) {
3071 strcpy(mdev->sync_conf.verify_alg, p->verify_alg);
3072 mdev->sync_conf.verify_alg_len = strlen(p->verify_alg) + 1;
3073 crypto_free_hash(mdev->verify_tfm);
3074 mdev->verify_tfm = verify_tfm;
3075 dev_info(DEV, "using verify-alg: \"%s\"\n", p->verify_alg);
3076 }
3077 if (csums_tfm) {
3078 strcpy(mdev->sync_conf.csums_alg, p->csums_alg);
3079 mdev->sync_conf.csums_alg_len = strlen(p->csums_alg) + 1;
3080 crypto_free_hash(mdev->csums_tfm);
3081 mdev->csums_tfm = csums_tfm;
3082 dev_info(DEV, "using csums-alg: \"%s\"\n", p->csums_alg);
3083 }
Philipp Reisner778f2712010-07-06 11:14:00 +02003084 if (fifo_size != mdev->rs_plan_s.size) {
3085 kfree(mdev->rs_plan_s.values);
3086 mdev->rs_plan_s.values = rs_plan_s;
3087 mdev->rs_plan_s.size = fifo_size;
3088 mdev->rs_planed = 0;
3089 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003090 spin_unlock(&mdev->peer_seq_lock);
3091 }
3092
3093 return ok;
3094disconnect:
3095 /* just for completeness: actually not needed,
3096 * as this is not reached if csums_tfm was ok. */
3097 crypto_free_hash(csums_tfm);
3098 /* but free the verify_tfm again, if csums_tfm did not work out */
3099 crypto_free_hash(verify_tfm);
3100 drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003101 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003102}
3103
Philipp Reisnerb411b362009-09-25 16:07:19 -07003104/* warn if the arguments differ by more than 12.5% */
3105static void warn_if_differ_considerably(struct drbd_conf *mdev,
3106 const char *s, sector_t a, sector_t b)
3107{
3108 sector_t d;
3109 if (a == 0 || b == 0)
3110 return;
3111 d = (a > b) ? (a - b) : (b - a);
3112 if (d > (a>>3) || d > (b>>3))
3113 dev_warn(DEV, "Considerable difference in %s: %llus vs. %llus\n", s,
3114 (unsigned long long)a, (unsigned long long)b);
3115}
3116
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01003117static int receive_sizes(struct drbd_conf *mdev, enum drbd_packet cmd,
3118 unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003119{
Philipp Reisnere42325a2011-01-19 13:55:45 +01003120 struct p_sizes *p = &mdev->tconn->data.rbuf.sizes;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003121 enum determine_dev_size dd = unchanged;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003122 sector_t p_size, p_usize, my_usize;
3123 int ldsc = 0; /* local disk size changed */
Philipp Reisnere89b5912010-03-24 17:11:33 +01003124 enum dds_flags ddsf;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003125
Philipp Reisnerb411b362009-09-25 16:07:19 -07003126 p_size = be64_to_cpu(p->d_size);
3127 p_usize = be64_to_cpu(p->u_size);
3128
3129 if (p_size == 0 && mdev->state.disk == D_DISKLESS) {
3130 dev_err(DEV, "some backing storage is needed\n");
3131 drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003132 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003133 }
3134
3135 /* just store the peer's disk size for now.
3136 * we still need to figure out whether we accept that. */
3137 mdev->p_size = p_size;
3138
Philipp Reisnerb411b362009-09-25 16:07:19 -07003139 if (get_ldev(mdev)) {
3140 warn_if_differ_considerably(mdev, "lower level device sizes",
3141 p_size, drbd_get_max_capacity(mdev->ldev));
3142 warn_if_differ_considerably(mdev, "user requested size",
3143 p_usize, mdev->ldev->dc.disk_size);
3144
3145 /* if this is the first connect, or an otherwise expected
3146 * param exchange, choose the minimum */
3147 if (mdev->state.conn == C_WF_REPORT_PARAMS)
3148 p_usize = min_not_zero((sector_t)mdev->ldev->dc.disk_size,
3149 p_usize);
3150
3151 my_usize = mdev->ldev->dc.disk_size;
3152
3153 if (mdev->ldev->dc.disk_size != p_usize) {
3154 mdev->ldev->dc.disk_size = p_usize;
3155 dev_info(DEV, "Peer sets u_size to %lu sectors\n",
3156 (unsigned long)mdev->ldev->dc.disk_size);
3157 }
3158
3159 /* Never shrink a device with usable data during connect.
3160 But allow online shrinking if we are connected. */
Philipp Reisnera393db62009-12-22 13:35:52 +01003161 if (drbd_new_dev_size(mdev, mdev->ldev, 0) <
Philipp Reisnerb411b362009-09-25 16:07:19 -07003162 drbd_get_capacity(mdev->this_bdev) &&
3163 mdev->state.disk >= D_OUTDATED &&
3164 mdev->state.conn < C_CONNECTED) {
3165 dev_err(DEV, "The peer's disk size is too small!\n");
3166 drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
3167 mdev->ldev->dc.disk_size = my_usize;
3168 put_ldev(mdev);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003169 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003170 }
3171 put_ldev(mdev);
3172 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003173
Philipp Reisnere89b5912010-03-24 17:11:33 +01003174 ddsf = be16_to_cpu(p->dds_flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003175 if (get_ldev(mdev)) {
Bart Van Assche24c48302011-05-21 18:32:29 +02003176 dd = drbd_determine_dev_size(mdev, ddsf);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003177 put_ldev(mdev);
3178 if (dd == dev_size_error)
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003179 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003180 drbd_md_sync(mdev);
3181 } else {
3182 /* I am diskless, need to accept the peer's size. */
3183 drbd_set_my_capacity(mdev, p_size);
3184 }
3185
Philipp Reisner99432fc2011-05-20 16:39:13 +02003186 mdev->peer_max_bio_size = be32_to_cpu(p->max_bio_size);
3187 drbd_reconsider_max_bio_size(mdev);
3188
Philipp Reisnerb411b362009-09-25 16:07:19 -07003189 if (get_ldev(mdev)) {
3190 if (mdev->ldev->known_size != drbd_get_capacity(mdev->ldev->backing_bdev)) {
3191 mdev->ldev->known_size = drbd_get_capacity(mdev->ldev->backing_bdev);
3192 ldsc = 1;
3193 }
3194
Philipp Reisnerb411b362009-09-25 16:07:19 -07003195 put_ldev(mdev);
3196 }
3197
3198 if (mdev->state.conn > C_WF_REPORT_PARAMS) {
3199 if (be64_to_cpu(p->c_size) !=
3200 drbd_get_capacity(mdev->this_bdev) || ldsc) {
3201 /* we have different sizes, probably peer
3202 * needs to know my new size... */
Philipp Reisnere89b5912010-03-24 17:11:33 +01003203 drbd_send_sizes(mdev, 0, ddsf);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003204 }
3205 if (test_and_clear_bit(RESIZE_PENDING, &mdev->flags) ||
3206 (dd == grew && mdev->state.conn == C_CONNECTED)) {
3207 if (mdev->state.pdsk >= D_INCONSISTENT &&
Philipp Reisnere89b5912010-03-24 17:11:33 +01003208 mdev->state.disk >= D_INCONSISTENT) {
3209 if (ddsf & DDSF_NO_RESYNC)
3210 dev_info(DEV, "Resync of new storage suppressed with --assume-clean\n");
3211 else
3212 resync_after_online_grow(mdev);
3213 } else
Philipp Reisnerb411b362009-09-25 16:07:19 -07003214 set_bit(RESYNC_AFTER_NEG, &mdev->flags);
3215 }
3216 }
3217
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003218 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003219}
3220
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01003221static int receive_uuids(struct drbd_conf *mdev, enum drbd_packet cmd,
3222 unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003223{
Philipp Reisnere42325a2011-01-19 13:55:45 +01003224 struct p_uuids *p = &mdev->tconn->data.rbuf.uuids;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003225 u64 *p_uuid;
Lars Ellenberg62b0da32011-01-20 13:25:21 +01003226 int i, updated_uuids = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003227
Philipp Reisnerb411b362009-09-25 16:07:19 -07003228 p_uuid = kmalloc(sizeof(u64)*UI_EXTENDED_SIZE, GFP_NOIO);
3229
3230 for (i = UI_CURRENT; i < UI_EXTENDED_SIZE; i++)
3231 p_uuid[i] = be64_to_cpu(p->uuid[i]);
3232
3233 kfree(mdev->p_uuid);
3234 mdev->p_uuid = p_uuid;
3235
3236 if (mdev->state.conn < C_CONNECTED &&
3237 mdev->state.disk < D_INCONSISTENT &&
3238 mdev->state.role == R_PRIMARY &&
3239 (mdev->ed_uuid & ~((u64)1)) != (p_uuid[UI_CURRENT] & ~((u64)1))) {
3240 dev_err(DEV, "Can only connect to data with current UUID=%016llX\n",
3241 (unsigned long long)mdev->ed_uuid);
3242 drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003243 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003244 }
3245
3246 if (get_ldev(mdev)) {
3247 int skip_initial_sync =
3248 mdev->state.conn == C_CONNECTED &&
Philipp Reisner31890f42011-01-19 14:12:51 +01003249 mdev->tconn->agreed_pro_version >= 90 &&
Philipp Reisnerb411b362009-09-25 16:07:19 -07003250 mdev->ldev->md.uuid[UI_CURRENT] == UUID_JUST_CREATED &&
3251 (p_uuid[UI_FLAGS] & 8);
3252 if (skip_initial_sync) {
3253 dev_info(DEV, "Accepted new current UUID, preparing to skip initial sync\n");
3254 drbd_bitmap_io(mdev, &drbd_bmio_clear_n_write,
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01003255 "clear_n_write from receive_uuids",
3256 BM_LOCKED_TEST_ALLOWED);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003257 _drbd_uuid_set(mdev, UI_CURRENT, p_uuid[UI_CURRENT]);
3258 _drbd_uuid_set(mdev, UI_BITMAP, 0);
3259 _drbd_set_state(_NS2(mdev, disk, D_UP_TO_DATE, pdsk, D_UP_TO_DATE),
3260 CS_VERBOSE, NULL);
3261 drbd_md_sync(mdev);
Lars Ellenberg62b0da32011-01-20 13:25:21 +01003262 updated_uuids = 1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003263 }
3264 put_ldev(mdev);
Philipp Reisner18a50fa2010-06-21 14:14:15 +02003265 } else if (mdev->state.disk < D_INCONSISTENT &&
3266 mdev->state.role == R_PRIMARY) {
3267 /* I am a diskless primary, the peer just created a new current UUID
3268 for me. */
Lars Ellenberg62b0da32011-01-20 13:25:21 +01003269 updated_uuids = drbd_set_ed_uuid(mdev, p_uuid[UI_CURRENT]);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003270 }
3271
3272 /* Before we test for the disk state, we should wait until an eventually
3273 ongoing cluster wide state change is finished. That is important if
3274 we are primary and are detaching from our disk. We need to see the
3275 new disk state... */
Philipp Reisner8410da82011-02-11 20:11:10 +01003276 mutex_lock(mdev->state_mutex);
3277 mutex_unlock(mdev->state_mutex);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003278 if (mdev->state.conn >= C_CONNECTED && mdev->state.disk < D_INCONSISTENT)
Lars Ellenberg62b0da32011-01-20 13:25:21 +01003279 updated_uuids |= drbd_set_ed_uuid(mdev, p_uuid[UI_CURRENT]);
3280
3281 if (updated_uuids)
3282 drbd_print_uuids(mdev, "receiver updated UUIDs to");
Philipp Reisnerb411b362009-09-25 16:07:19 -07003283
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003284 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003285}
3286
3287/**
3288 * convert_state() - Converts the peer's view of the cluster state to our point of view
3289 * @ps: The state as seen by the peer.
3290 */
3291static union drbd_state convert_state(union drbd_state ps)
3292{
3293 union drbd_state ms;
3294
3295 static enum drbd_conns c_tab[] = {
3296 [C_CONNECTED] = C_CONNECTED,
3297
3298 [C_STARTING_SYNC_S] = C_STARTING_SYNC_T,
3299 [C_STARTING_SYNC_T] = C_STARTING_SYNC_S,
3300 [C_DISCONNECTING] = C_TEAR_DOWN, /* C_NETWORK_FAILURE, */
3301 [C_VERIFY_S] = C_VERIFY_T,
3302 [C_MASK] = C_MASK,
3303 };
3304
3305 ms.i = ps.i;
3306
3307 ms.conn = c_tab[ps.conn];
3308 ms.peer = ps.role;
3309 ms.role = ps.peer;
3310 ms.pdsk = ps.disk;
3311 ms.disk = ps.pdsk;
3312 ms.peer_isp = (ps.aftr_isp | ps.user_isp);
3313
3314 return ms;
3315}
3316
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01003317static int receive_req_state(struct drbd_conf *mdev, enum drbd_packet cmd,
3318 unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003319{
Philipp Reisnere42325a2011-01-19 13:55:45 +01003320 struct p_req_state *p = &mdev->tconn->data.rbuf.req_state;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003321 union drbd_state mask, val;
Andreas Gruenbacherbf885f82010-12-08 00:39:32 +01003322 enum drbd_state_rv rv;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003323
Philipp Reisnerb411b362009-09-25 16:07:19 -07003324 mask.i = be32_to_cpu(p->mask);
3325 val.i = be32_to_cpu(p->val);
3326
Philipp Reisner25703f82011-02-07 14:35:25 +01003327 if (test_bit(DISCARD_CONCURRENT, &mdev->tconn->flags) &&
Philipp Reisner8410da82011-02-11 20:11:10 +01003328 mutex_is_locked(mdev->state_mutex)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07003329 drbd_send_sr_reply(mdev, SS_CONCURRENT_ST_CHG);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003330 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003331 }
3332
3333 mask = convert_state(mask);
3334 val = convert_state(val);
3335
Philipp Reisner047cd4a2011-02-15 11:09:33 +01003336 if (cmd == P_CONN_ST_CHG_REQ) {
3337 rv = conn_request_state(mdev->tconn, mask, val, CS_VERBOSE | CS_LOCAL_ONLY);
3338 conn_send_sr_reply(mdev->tconn, rv);
3339 } else {
3340 rv = drbd_change_state(mdev, CS_VERBOSE, mask, val);
3341 drbd_send_sr_reply(mdev, rv);
3342 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003343
Philipp Reisnerb411b362009-09-25 16:07:19 -07003344 drbd_md_sync(mdev);
3345
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003346 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003347}
3348
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01003349static int receive_state(struct drbd_conf *mdev, enum drbd_packet cmd,
3350 unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003351{
Philipp Reisnere42325a2011-01-19 13:55:45 +01003352 struct p_state *p = &mdev->tconn->data.rbuf.state;
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003353 union drbd_state os, ns, peer_state;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003354 enum drbd_disk_state real_peer_disk;
Philipp Reisner65d922c2010-06-16 16:18:09 +02003355 enum chg_state_flags cs_flags;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003356 int rv;
3357
Philipp Reisnerb411b362009-09-25 16:07:19 -07003358 peer_state.i = be32_to_cpu(p->state);
3359
3360 real_peer_disk = peer_state.disk;
3361 if (peer_state.disk == D_NEGOTIATING) {
3362 real_peer_disk = mdev->p_uuid[UI_FLAGS] & 4 ? D_INCONSISTENT : D_CONSISTENT;
3363 dev_info(DEV, "real peer disk state = %s\n", drbd_disk_str(real_peer_disk));
3364 }
3365
Philipp Reisner87eeee42011-01-19 14:16:30 +01003366 spin_lock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003367 retry:
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003368 os = ns = mdev->state;
Philipp Reisner87eeee42011-01-19 14:16:30 +01003369 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003370
Lars Ellenberge9ef7bb2010-10-07 15:55:39 +02003371 /* peer says his disk is uptodate, while we think it is inconsistent,
3372 * and this happens while we think we have a sync going on. */
3373 if (os.pdsk == D_INCONSISTENT && real_peer_disk == D_UP_TO_DATE &&
3374 os.conn > C_CONNECTED && os.disk == D_UP_TO_DATE) {
3375 /* If we are (becoming) SyncSource, but peer is still in sync
3376 * preparation, ignore its uptodate-ness to avoid flapping, it
3377 * will change to inconsistent once the peer reaches active
3378 * syncing states.
3379 * It may have changed syncer-paused flags, however, so we
3380 * cannot ignore this completely. */
3381 if (peer_state.conn > C_CONNECTED &&
3382 peer_state.conn < C_SYNC_SOURCE)
3383 real_peer_disk = D_INCONSISTENT;
3384
3385 /* if peer_state changes to connected at the same time,
3386 * it explicitly notifies us that it finished resync.
3387 * Maybe we should finish it up, too? */
3388 else if (os.conn >= C_SYNC_SOURCE &&
3389 peer_state.conn == C_CONNECTED) {
3390 if (drbd_bm_total_weight(mdev) <= mdev->rs_failed)
3391 drbd_resync_finished(mdev);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003392 return true;
Lars Ellenberge9ef7bb2010-10-07 15:55:39 +02003393 }
3394 }
3395
3396 /* peer says his disk is inconsistent, while we think it is uptodate,
3397 * and this happens while the peer still thinks we have a sync going on,
3398 * but we think we are already done with the sync.
3399 * We ignore this to avoid flapping pdsk.
3400 * This should not happen, if the peer is a recent version of drbd. */
3401 if (os.pdsk == D_UP_TO_DATE && real_peer_disk == D_INCONSISTENT &&
3402 os.conn == C_CONNECTED && peer_state.conn > C_SYNC_SOURCE)
3403 real_peer_disk = D_UP_TO_DATE;
3404
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003405 if (ns.conn == C_WF_REPORT_PARAMS)
3406 ns.conn = C_CONNECTED;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003407
Philipp Reisner67531712010-10-27 12:21:30 +02003408 if (peer_state.conn == C_AHEAD)
3409 ns.conn = C_BEHIND;
3410
Philipp Reisnerb411b362009-09-25 16:07:19 -07003411 if (mdev->p_uuid && peer_state.disk >= D_NEGOTIATING &&
3412 get_ldev_if_state(mdev, D_NEGOTIATING)) {
3413 int cr; /* consider resync */
3414
3415 /* if we established a new connection */
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003416 cr = (os.conn < C_CONNECTED);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003417 /* if we had an established connection
3418 * and one of the nodes newly attaches a disk */
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003419 cr |= (os.conn == C_CONNECTED &&
Philipp Reisnerb411b362009-09-25 16:07:19 -07003420 (peer_state.disk == D_NEGOTIATING ||
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003421 os.disk == D_NEGOTIATING));
Philipp Reisnerb411b362009-09-25 16:07:19 -07003422 /* if we have both been inconsistent, and the peer has been
3423 * forced to be UpToDate with --overwrite-data */
3424 cr |= test_bit(CONSIDER_RESYNC, &mdev->flags);
3425 /* if we had been plain connected, and the admin requested to
3426 * start a sync by "invalidate" or "invalidate-remote" */
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003427 cr |= (os.conn == C_CONNECTED &&
Philipp Reisnerb411b362009-09-25 16:07:19 -07003428 (peer_state.conn >= C_STARTING_SYNC_S &&
3429 peer_state.conn <= C_WF_BITMAP_T));
3430
3431 if (cr)
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003432 ns.conn = drbd_sync_handshake(mdev, peer_state.role, real_peer_disk);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003433
3434 put_ldev(mdev);
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003435 if (ns.conn == C_MASK) {
3436 ns.conn = C_CONNECTED;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003437 if (mdev->state.disk == D_NEGOTIATING) {
Lars Ellenberg82f59cc2010-10-16 12:13:47 +02003438 drbd_force_state(mdev, NS(disk, D_FAILED));
Philipp Reisnerb411b362009-09-25 16:07:19 -07003439 } else if (peer_state.disk == D_NEGOTIATING) {
3440 dev_err(DEV, "Disk attach process on the peer node was aborted.\n");
3441 peer_state.disk = D_DISKLESS;
Lars Ellenberg580b9762010-02-26 23:15:23 +01003442 real_peer_disk = D_DISKLESS;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003443 } else {
Philipp Reisnercf14c2e2010-02-02 21:03:50 +01003444 if (test_and_clear_bit(CONN_DRY_RUN, &mdev->flags))
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003445 return false;
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003446 D_ASSERT(os.conn == C_WF_REPORT_PARAMS);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003447 drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003448 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003449 }
3450 }
3451 }
3452
Philipp Reisner87eeee42011-01-19 14:16:30 +01003453 spin_lock_irq(&mdev->tconn->req_lock);
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003454 if (mdev->state.i != os.i)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003455 goto retry;
3456 clear_bit(CONSIDER_RESYNC, &mdev->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003457 ns.peer = peer_state.role;
3458 ns.pdsk = real_peer_disk;
3459 ns.peer_isp = (peer_state.aftr_isp | peer_state.user_isp);
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003460 if ((ns.conn == C_CONNECTED || ns.conn == C_WF_BITMAP_S) && ns.disk == D_NEGOTIATING)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003461 ns.disk = mdev->new_state_tmp.disk;
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003462 cs_flags = CS_VERBOSE + (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED ? 0 : CS_HARD);
3463 if (ns.pdsk == D_CONSISTENT && is_susp(ns) && ns.conn == C_CONNECTED && os.conn < C_CONNECTED &&
Philipp Reisner481c6f52010-06-22 14:03:27 +02003464 test_bit(NEW_CUR_UUID, &mdev->flags)) {
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01003465 /* Do not allow tl_restart(RESEND) for a rebooted peer. We can only allow this
Philipp Reisner481c6f52010-06-22 14:03:27 +02003466 for temporal network outages! */
Philipp Reisner87eeee42011-01-19 14:16:30 +01003467 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisner481c6f52010-06-22 14:03:27 +02003468 dev_err(DEV, "Aborting Connect, can not thaw IO with an only Consistent peer\n");
3469 tl_clear(mdev);
3470 drbd_uuid_new_current(mdev);
3471 clear_bit(NEW_CUR_UUID, &mdev->flags);
3472 drbd_force_state(mdev, NS2(conn, C_PROTOCOL_ERROR, susp, 0));
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003473 return false;
Philipp Reisner481c6f52010-06-22 14:03:27 +02003474 }
Philipp Reisner65d922c2010-06-16 16:18:09 +02003475 rv = _drbd_set_state(mdev, ns, cs_flags, NULL);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003476 ns = mdev->state;
Philipp Reisner87eeee42011-01-19 14:16:30 +01003477 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003478
3479 if (rv < SS_SUCCESS) {
3480 drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003481 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003482 }
3483
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003484 if (os.conn > C_WF_REPORT_PARAMS) {
3485 if (ns.conn > C_CONNECTED && peer_state.conn <= C_CONNECTED &&
Philipp Reisnerb411b362009-09-25 16:07:19 -07003486 peer_state.disk != D_NEGOTIATING ) {
3487 /* we want resync, peer has not yet decided to sync... */
3488 /* Nowadays only used when forcing a node into primary role and
3489 setting its disk to UpToDate with that */
3490 drbd_send_uuids(mdev);
3491 drbd_send_state(mdev);
3492 }
3493 }
3494
Philipp Reisner89e58e72011-01-19 13:12:45 +01003495 mdev->tconn->net_conf->want_lose = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003496
3497 drbd_md_sync(mdev); /* update connected indicator, la_size, ... */
3498
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003499 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003500}
3501
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01003502static int receive_sync_uuid(struct drbd_conf *mdev, enum drbd_packet cmd,
3503 unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003504{
Philipp Reisnere42325a2011-01-19 13:55:45 +01003505 struct p_rs_uuid *p = &mdev->tconn->data.rbuf.rs_uuid;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003506
3507 wait_event(mdev->misc_wait,
3508 mdev->state.conn == C_WF_SYNC_UUID ||
Philipp Reisnerc4752ef2010-10-27 17:32:36 +02003509 mdev->state.conn == C_BEHIND ||
Philipp Reisnerb411b362009-09-25 16:07:19 -07003510 mdev->state.conn < C_CONNECTED ||
3511 mdev->state.disk < D_NEGOTIATING);
3512
3513 /* D_ASSERT( mdev->state.conn == C_WF_SYNC_UUID ); */
3514
Philipp Reisnerb411b362009-09-25 16:07:19 -07003515 /* Here the _drbd_uuid_ functions are right, current should
3516 _not_ be rotated into the history */
3517 if (get_ldev_if_state(mdev, D_NEGOTIATING)) {
3518 _drbd_uuid_set(mdev, UI_CURRENT, be64_to_cpu(p->uuid));
3519 _drbd_uuid_set(mdev, UI_BITMAP, 0UL);
3520
Lars Ellenberg62b0da32011-01-20 13:25:21 +01003521 drbd_print_uuids(mdev, "updated sync uuid");
Philipp Reisnerb411b362009-09-25 16:07:19 -07003522 drbd_start_resync(mdev, C_SYNC_TARGET);
3523
3524 put_ldev(mdev);
3525 } else
3526 dev_err(DEV, "Ignoring SyncUUID packet!\n");
3527
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003528 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003529}
3530
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003531/**
3532 * receive_bitmap_plain
3533 *
3534 * Return 0 when done, 1 when another iteration is needed, and a negative error
3535 * code upon failure.
3536 */
3537static int
Philipp Reisner02918be2010-08-20 14:35:10 +02003538receive_bitmap_plain(struct drbd_conf *mdev, unsigned int data_size,
3539 unsigned long *buffer, struct bm_xfer_ctx *c)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003540{
3541 unsigned num_words = min_t(size_t, BM_PACKET_WORDS, c->bm_words - c->word_offset);
3542 unsigned want = num_words * sizeof(long);
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003543 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003544
Philipp Reisner02918be2010-08-20 14:35:10 +02003545 if (want != data_size) {
3546 dev_err(DEV, "%s:want (%u) != data_size (%u)\n", __func__, want, data_size);
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003547 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003548 }
3549 if (want == 0)
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003550 return 0;
Philipp Reisnerde0ff332011-02-07 16:56:20 +01003551 err = drbd_recv(mdev->tconn, buffer, want);
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003552 if (err != want) {
3553 if (err >= 0)
3554 err = -EIO;
3555 return err;
3556 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003557
3558 drbd_bm_merge_lel(mdev, c->word_offset, num_words, buffer);
3559
3560 c->word_offset += num_words;
3561 c->bit_offset = c->word_offset * BITS_PER_LONG;
3562 if (c->bit_offset > c->bm_bits)
3563 c->bit_offset = c->bm_bits;
3564
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003565 return 1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003566}
3567
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003568/**
3569 * recv_bm_rle_bits
3570 *
3571 * Return 0 when done, 1 when another iteration is needed, and a negative error
3572 * code upon failure.
3573 */
3574static int
Philipp Reisnerb411b362009-09-25 16:07:19 -07003575recv_bm_rle_bits(struct drbd_conf *mdev,
3576 struct p_compressed_bm *p,
Philipp Reisnerc6d25cf2011-01-19 16:13:06 +01003577 struct bm_xfer_ctx *c,
3578 unsigned int len)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003579{
3580 struct bitstream bs;
3581 u64 look_ahead;
3582 u64 rl;
3583 u64 tmp;
3584 unsigned long s = c->bit_offset;
3585 unsigned long e;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003586 int toggle = DCBP_get_start(p);
3587 int have;
3588 int bits;
3589
3590 bitstream_init(&bs, p->code, len, DCBP_get_pad_bits(p));
3591
3592 bits = bitstream_get_bits(&bs, &look_ahead, 64);
3593 if (bits < 0)
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003594 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003595
3596 for (have = bits; have > 0; s += rl, toggle = !toggle) {
3597 bits = vli_decode_bits(&rl, look_ahead);
3598 if (bits <= 0)
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003599 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003600
3601 if (toggle) {
3602 e = s + rl -1;
3603 if (e >= c->bm_bits) {
3604 dev_err(DEV, "bitmap overflow (e:%lu) while decoding bm RLE packet\n", e);
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003605 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003606 }
3607 _drbd_bm_set_bits(mdev, s, e);
3608 }
3609
3610 if (have < bits) {
3611 dev_err(DEV, "bitmap decoding error: h:%d b:%d la:0x%08llx l:%u/%u\n",
3612 have, bits, look_ahead,
3613 (unsigned int)(bs.cur.b - p->code),
3614 (unsigned int)bs.buf_len);
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003615 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003616 }
3617 look_ahead >>= bits;
3618 have -= bits;
3619
3620 bits = bitstream_get_bits(&bs, &tmp, 64 - have);
3621 if (bits < 0)
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003622 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003623 look_ahead |= tmp << have;
3624 have += bits;
3625 }
3626
3627 c->bit_offset = s;
3628 bm_xfer_ctx_bit_to_word_offset(c);
3629
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003630 return (s != c->bm_bits);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003631}
3632
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003633/**
3634 * decode_bitmap_c
3635 *
3636 * Return 0 when done, 1 when another iteration is needed, and a negative error
3637 * code upon failure.
3638 */
3639static int
Philipp Reisnerb411b362009-09-25 16:07:19 -07003640decode_bitmap_c(struct drbd_conf *mdev,
3641 struct p_compressed_bm *p,
Philipp Reisnerc6d25cf2011-01-19 16:13:06 +01003642 struct bm_xfer_ctx *c,
3643 unsigned int len)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003644{
3645 if (DCBP_get_code(p) == RLE_VLI_Bits)
Philipp Reisnerc6d25cf2011-01-19 16:13:06 +01003646 return recv_bm_rle_bits(mdev, p, c, len);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003647
3648 /* other variants had been implemented for evaluation,
3649 * but have been dropped as this one turned out to be "best"
3650 * during all our tests. */
3651
3652 dev_err(DEV, "receive_bitmap_c: unknown encoding %u\n", p->encoding);
3653 drbd_force_state(mdev, NS(conn, C_PROTOCOL_ERROR));
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003654 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003655}
3656
3657void INFO_bm_xfer_stats(struct drbd_conf *mdev,
3658 const char *direction, struct bm_xfer_ctx *c)
3659{
3660 /* what would it take to transfer it "plaintext" */
Philipp Reisnerc0129492011-01-19 16:58:16 +01003661 unsigned plain = sizeof(struct p_header) *
Philipp Reisnerb411b362009-09-25 16:07:19 -07003662 ((c->bm_words+BM_PACKET_WORDS-1)/BM_PACKET_WORDS+1)
3663 + c->bm_words * sizeof(long);
3664 unsigned total = c->bytes[0] + c->bytes[1];
3665 unsigned r;
3666
3667 /* total can not be zero. but just in case: */
3668 if (total == 0)
3669 return;
3670
3671 /* don't report if not compressed */
3672 if (total >= plain)
3673 return;
3674
3675 /* total < plain. check for overflow, still */
3676 r = (total > UINT_MAX/1000) ? (total / (plain/1000))
3677 : (1000 * total / plain);
3678
3679 if (r > 1000)
3680 r = 1000;
3681
3682 r = 1000 - r;
3683 dev_info(DEV, "%s bitmap stats [Bytes(packets)]: plain %u(%u), RLE %u(%u), "
3684 "total %u; compression: %u.%u%%\n",
3685 direction,
3686 c->bytes[1], c->packets[1],
3687 c->bytes[0], c->packets[0],
3688 total, r/10, r % 10);
3689}
3690
3691/* Since we are processing the bitfield from lower addresses to higher,
3692 it does not matter if the process it in 32 bit chunks or 64 bit
3693 chunks as long as it is little endian. (Understand it as byte stream,
3694 beginning with the lowest byte...) If we would use big endian
3695 we would need to process it from the highest address to the lowest,
3696 in order to be agnostic to the 32 vs 64 bits issue.
3697
3698 returns 0 on failure, 1 if we successfully received it. */
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01003699static int receive_bitmap(struct drbd_conf *mdev, enum drbd_packet cmd,
3700 unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003701{
3702 struct bm_xfer_ctx c;
3703 void *buffer;
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003704 int err;
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003705 int ok = false;
Philipp Reisner257d0af2011-01-26 12:15:29 +01003706 struct p_header *h = &mdev->tconn->data.rbuf.header;
Philipp Reisner77351055b2011-02-07 17:24:26 +01003707 struct packet_info pi;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003708
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01003709 drbd_bm_lock(mdev, "receive bitmap", BM_LOCKED_SET_ALLOWED);
3710 /* you are supposed to send additional out-of-sync information
3711 * if you actually set bits during this phase */
Philipp Reisnerb411b362009-09-25 16:07:19 -07003712
3713 /* maybe we should use some per thread scratch page,
3714 * and allocate that during initial device creation? */
3715 buffer = (unsigned long *) __get_free_page(GFP_NOIO);
3716 if (!buffer) {
3717 dev_err(DEV, "failed to allocate one page buffer in %s\n", __func__);
3718 goto out;
3719 }
3720
3721 c = (struct bm_xfer_ctx) {
3722 .bm_bits = drbd_bm_bits(mdev),
3723 .bm_words = drbd_bm_words(mdev),
3724 };
3725
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003726 for(;;) {
Philipp Reisner02918be2010-08-20 14:35:10 +02003727 if (cmd == P_BITMAP) {
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003728 err = receive_bitmap_plain(mdev, data_size, buffer, &c);
Philipp Reisner02918be2010-08-20 14:35:10 +02003729 } else if (cmd == P_COMPRESSED_BITMAP) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07003730 /* MAYBE: sanity check that we speak proto >= 90,
3731 * and the feature is enabled! */
3732 struct p_compressed_bm *p;
3733
Philipp Reisner02918be2010-08-20 14:35:10 +02003734 if (data_size > BM_PACKET_PAYLOAD_BYTES) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07003735 dev_err(DEV, "ReportCBitmap packet too large\n");
3736 goto out;
3737 }
3738 /* use the page buff */
3739 p = buffer;
3740 memcpy(p, h, sizeof(*h));
Philipp Reisnerde0ff332011-02-07 16:56:20 +01003741 if (drbd_recv(mdev->tconn, p->head.payload, data_size) != data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003742 goto out;
Lars Ellenberg004352f2010-10-05 20:13:58 +02003743 if (data_size <= (sizeof(*p) - sizeof(p->head))) {
3744 dev_err(DEV, "ReportCBitmap packet too small (l:%u)\n", data_size);
Andreas Gruenbacher78fcbda2010-12-10 22:18:27 +01003745 goto out;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003746 }
Philipp Reisnerc6d25cf2011-01-19 16:13:06 +01003747 err = decode_bitmap_c(mdev, p, &c, data_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003748 } else {
Philipp Reisner02918be2010-08-20 14:35:10 +02003749 dev_warn(DEV, "receive_bitmap: cmd neither ReportBitMap nor ReportCBitMap (is 0x%x)", cmd);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003750 goto out;
3751 }
3752
Philipp Reisner02918be2010-08-20 14:35:10 +02003753 c.packets[cmd == P_BITMAP]++;
Philipp Reisner257d0af2011-01-26 12:15:29 +01003754 c.bytes[cmd == P_BITMAP] += sizeof(struct p_header) + data_size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003755
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003756 if (err <= 0) {
3757 if (err < 0)
3758 goto out;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003759 break;
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003760 }
Philipp Reisner9ba7aa02011-02-07 17:32:41 +01003761 if (!drbd_recv_header(mdev->tconn, &pi))
Philipp Reisnerb411b362009-09-25 16:07:19 -07003762 goto out;
Philipp Reisner77351055b2011-02-07 17:24:26 +01003763 cmd = pi.cmd;
3764 data_size = pi.size;
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003765 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003766
3767 INFO_bm_xfer_stats(mdev, "receive", &c);
3768
3769 if (mdev->state.conn == C_WF_BITMAP_T) {
Andreas Gruenbacherde1f8e42010-12-10 21:04:00 +01003770 enum drbd_state_rv rv;
3771
Philipp Reisnerb411b362009-09-25 16:07:19 -07003772 ok = !drbd_send_bitmap(mdev);
3773 if (!ok)
3774 goto out;
3775 /* Omit CS_ORDERED with this state transition to avoid deadlocks. */
Andreas Gruenbacherde1f8e42010-12-10 21:04:00 +01003776 rv = _drbd_request_state(mdev, NS(conn, C_WF_SYNC_UUID), CS_VERBOSE);
3777 D_ASSERT(rv == SS_SUCCESS);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003778 } else if (mdev->state.conn != C_WF_BITMAP_S) {
3779 /* admin may have requested C_DISCONNECTING,
3780 * other threads may have noticed network errors */
3781 dev_info(DEV, "unexpected cstate (%s) in receive_bitmap\n",
3782 drbd_conn_str(mdev->state.conn));
3783 }
3784
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003785 ok = true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003786 out:
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01003787 drbd_bm_unlock(mdev);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003788 if (ok && mdev->state.conn == C_WF_BITMAP_S)
3789 drbd_start_resync(mdev, C_SYNC_SOURCE);
3790 free_page((unsigned long) buffer);
3791 return ok;
3792}
3793
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01003794static int receive_skip(struct drbd_conf *mdev, enum drbd_packet cmd,
3795 unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003796{
3797 /* TODO zero copy sink :) */
3798 static char sink[128];
3799 int size, want, r;
3800
Philipp Reisner02918be2010-08-20 14:35:10 +02003801 dev_warn(DEV, "skipping unknown optional packet type %d, l: %d!\n",
3802 cmd, data_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003803
Philipp Reisner02918be2010-08-20 14:35:10 +02003804 size = data_size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003805 while (size > 0) {
3806 want = min_t(int, size, sizeof(sink));
Philipp Reisnerde0ff332011-02-07 16:56:20 +01003807 r = drbd_recv(mdev->tconn, sink, want);
Andreas Gruenbacher841ce242010-12-15 19:31:20 +01003808 if (!expect(r > 0))
3809 break;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003810 size -= r;
3811 }
3812 return size == 0;
3813}
3814
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01003815static int receive_UnplugRemote(struct drbd_conf *mdev, enum drbd_packet cmd,
3816 unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003817{
Philipp Reisnerb411b362009-09-25 16:07:19 -07003818 /* Make sure we've acked all the TCP data associated
3819 * with the data requests being unplugged */
Philipp Reisnere42325a2011-01-19 13:55:45 +01003820 drbd_tcp_quickack(mdev->tconn->data.socket);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003821
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003822 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003823}
3824
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01003825static int receive_out_of_sync(struct drbd_conf *mdev, enum drbd_packet cmd,
3826 unsigned int data_size)
Philipp Reisner73a01a12010-10-27 14:33:00 +02003827{
Philipp Reisnere42325a2011-01-19 13:55:45 +01003828 struct p_block_desc *p = &mdev->tconn->data.rbuf.block_desc;
Philipp Reisner73a01a12010-10-27 14:33:00 +02003829
Lars Ellenbergf735e3632010-12-17 21:06:18 +01003830 switch (mdev->state.conn) {
3831 case C_WF_SYNC_UUID:
3832 case C_WF_BITMAP_T:
3833 case C_BEHIND:
3834 break;
3835 default:
3836 dev_err(DEV, "ASSERT FAILED cstate = %s, expected: WFSyncUUID|WFBitMapT|Behind\n",
3837 drbd_conn_str(mdev->state.conn));
3838 }
3839
Philipp Reisner73a01a12010-10-27 14:33:00 +02003840 drbd_set_out_of_sync(mdev, be64_to_cpu(p->sector), be32_to_cpu(p->blksize));
3841
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003842 return true;
Philipp Reisner73a01a12010-10-27 14:33:00 +02003843}
3844
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01003845typedef int (*drbd_cmd_handler_f)(struct drbd_conf *, enum drbd_packet cmd,
3846 unsigned int to_receive);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003847
Philipp Reisner02918be2010-08-20 14:35:10 +02003848struct data_cmd {
3849 int expect_payload;
3850 size_t pkt_size;
3851 drbd_cmd_handler_f function;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003852};
3853
Philipp Reisner02918be2010-08-20 14:35:10 +02003854static struct data_cmd drbd_cmd_handler[] = {
3855 [P_DATA] = { 1, sizeof(struct p_data), receive_Data },
3856 [P_DATA_REPLY] = { 1, sizeof(struct p_data), receive_DataReply },
3857 [P_RS_DATA_REPLY] = { 1, sizeof(struct p_data), receive_RSDataReply } ,
3858 [P_BARRIER] = { 0, sizeof(struct p_barrier), receive_Barrier } ,
Philipp Reisner257d0af2011-01-26 12:15:29 +01003859 [P_BITMAP] = { 1, sizeof(struct p_header), receive_bitmap } ,
3860 [P_COMPRESSED_BITMAP] = { 1, sizeof(struct p_header), receive_bitmap } ,
3861 [P_UNPLUG_REMOTE] = { 0, sizeof(struct p_header), receive_UnplugRemote },
Philipp Reisner02918be2010-08-20 14:35:10 +02003862 [P_DATA_REQUEST] = { 0, sizeof(struct p_block_req), receive_DataRequest },
3863 [P_RS_DATA_REQUEST] = { 0, sizeof(struct p_block_req), receive_DataRequest },
Philipp Reisner257d0af2011-01-26 12:15:29 +01003864 [P_SYNC_PARAM] = { 1, sizeof(struct p_header), receive_SyncParam },
3865 [P_SYNC_PARAM89] = { 1, sizeof(struct p_header), receive_SyncParam },
Philipp Reisner02918be2010-08-20 14:35:10 +02003866 [P_PROTOCOL] = { 1, sizeof(struct p_protocol), receive_protocol },
3867 [P_UUIDS] = { 0, sizeof(struct p_uuids), receive_uuids },
3868 [P_SIZES] = { 0, sizeof(struct p_sizes), receive_sizes },
3869 [P_STATE] = { 0, sizeof(struct p_state), receive_state },
3870 [P_STATE_CHG_REQ] = { 0, sizeof(struct p_req_state), receive_req_state },
3871 [P_SYNC_UUID] = { 0, sizeof(struct p_rs_uuid), receive_sync_uuid },
3872 [P_OV_REQUEST] = { 0, sizeof(struct p_block_req), receive_DataRequest },
3873 [P_OV_REPLY] = { 1, sizeof(struct p_block_req), receive_DataRequest },
3874 [P_CSUM_RS_REQUEST] = { 1, sizeof(struct p_block_req), receive_DataRequest },
3875 [P_DELAY_PROBE] = { 0, sizeof(struct p_delay_probe93), receive_skip },
Philipp Reisner73a01a12010-10-27 14:33:00 +02003876 [P_OUT_OF_SYNC] = { 0, sizeof(struct p_block_desc), receive_out_of_sync },
Philipp Reisner047cd4a2011-02-15 11:09:33 +01003877 [P_CONN_ST_CHG_REQ] = { 0, sizeof(struct p_req_state), receive_req_state },
Philipp Reisner02918be2010-08-20 14:35:10 +02003878};
3879
3880/* All handler functions that expect a sub-header get that sub-heder in
Philipp Reisnere42325a2011-01-19 13:55:45 +01003881 mdev->tconn->data.rbuf.header.head.payload.
Philipp Reisner02918be2010-08-20 14:35:10 +02003882
Philipp Reisnere42325a2011-01-19 13:55:45 +01003883 Usually in mdev->tconn->data.rbuf.header.head the callback can find the usual
Philipp Reisner02918be2010-08-20 14:35:10 +02003884 p_header, but they may not rely on that. Since there is also p_header95 !
3885 */
Philipp Reisnerb411b362009-09-25 16:07:19 -07003886
Philipp Reisnereefc2f72011-02-08 12:55:24 +01003887static void drbdd(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003888{
Philipp Reisnereefc2f72011-02-08 12:55:24 +01003889 struct p_header *header = &tconn->data.rbuf.header;
Philipp Reisner77351055b2011-02-07 17:24:26 +01003890 struct packet_info pi;
Philipp Reisner02918be2010-08-20 14:35:10 +02003891 size_t shs; /* sub header size */
3892 int rv;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003893
Philipp Reisnereefc2f72011-02-08 12:55:24 +01003894 while (get_t_state(&tconn->receiver) == RUNNING) {
3895 drbd_thread_current_set_cpu(&tconn->receiver);
3896 if (!drbd_recv_header(tconn, &pi))
Philipp Reisner02918be2010-08-20 14:35:10 +02003897 goto err_out;
3898
Andreas Gruenbacher6e849ce2011-03-14 17:27:45 +01003899 if (unlikely(pi.cmd >= ARRAY_SIZE(drbd_cmd_handler) ||
3900 !drbd_cmd_handler[pi.cmd].function)) {
Philipp Reisnereefc2f72011-02-08 12:55:24 +01003901 conn_err(tconn, "unknown packet type %d, l: %d!\n", pi.cmd, pi.size);
Philipp Reisner02918be2010-08-20 14:35:10 +02003902 goto err_out;
Lars Ellenberg0b33a912009-11-16 15:58:04 +01003903 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003904
Philipp Reisner77351055b2011-02-07 17:24:26 +01003905 shs = drbd_cmd_handler[pi.cmd].pkt_size - sizeof(struct p_header);
3906 if (pi.size - shs > 0 && !drbd_cmd_handler[pi.cmd].expect_payload) {
Philipp Reisnereefc2f72011-02-08 12:55:24 +01003907 conn_err(tconn, "No payload expected %s l:%d\n", cmdname(pi.cmd), pi.size);
Philipp Reisner02918be2010-08-20 14:35:10 +02003908 goto err_out;
3909 }
3910
Lars Ellenbergc13f7e12010-10-29 23:32:01 +02003911 if (shs) {
Philipp Reisnereefc2f72011-02-08 12:55:24 +01003912 rv = drbd_recv(tconn, &header->payload, shs);
Lars Ellenbergc13f7e12010-10-29 23:32:01 +02003913 if (unlikely(rv != shs)) {
Lars Ellenberg0ddc5542011-01-21 12:35:15 +01003914 if (!signal_pending(current))
Philipp Reisnereefc2f72011-02-08 12:55:24 +01003915 conn_warn(tconn, "short read while reading sub header: rv=%d\n", rv);
Lars Ellenbergc13f7e12010-10-29 23:32:01 +02003916 goto err_out;
3917 }
3918 }
3919
Philipp Reisnereefc2f72011-02-08 12:55:24 +01003920 rv = drbd_cmd_handler[pi.cmd].function(vnr_to_mdev(tconn, pi.vnr), pi.cmd, pi.size - shs);
Philipp Reisner02918be2010-08-20 14:35:10 +02003921
3922 if (unlikely(!rv)) {
Philipp Reisnereefc2f72011-02-08 12:55:24 +01003923 conn_err(tconn, "error receiving %s, l: %d!\n",
Philipp Reisner77351055b2011-02-07 17:24:26 +01003924 cmdname(pi.cmd), pi.size);
Philipp Reisner02918be2010-08-20 14:35:10 +02003925 goto err_out;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003926 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003927 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003928
Philipp Reisner02918be2010-08-20 14:35:10 +02003929 if (0) {
3930 err_out:
Philipp Reisnerbbeb6412011-02-10 13:45:46 +01003931 conn_request_state(tconn, NS(conn, C_PROTOCOL_ERROR), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003932 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003933}
3934
Philipp Reisnera21e9292011-02-08 15:08:49 +01003935void drbd_flush_workqueue(struct drbd_conf *mdev)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003936{
3937 struct drbd_wq_barrier barr;
3938
3939 barr.w.cb = w_prev_work_done;
Philipp Reisnera21e9292011-02-08 15:08:49 +01003940 barr.w.mdev = mdev;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003941 init_completion(&barr.done);
Philipp Reisnera21e9292011-02-08 15:08:49 +01003942 drbd_queue_work(&mdev->tconn->data.work, &barr.w);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003943 wait_for_completion(&barr.done);
3944}
3945
Philipp Reisner360cc742011-02-08 14:29:53 +01003946static void drbd_disconnect(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003947{
Philipp Reisnerbbeb6412011-02-10 13:45:46 +01003948 enum drbd_conns oc;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003949 int rv = SS_UNKNOWN_ERROR;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003950
Philipp Reisnerbbeb6412011-02-10 13:45:46 +01003951 if (tconn->cstate == C_STANDALONE)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003952 return;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003953
3954 /* asender does not clean up anything. it must not interfere, either */
Philipp Reisner360cc742011-02-08 14:29:53 +01003955 drbd_thread_stop(&tconn->asender);
3956 drbd_free_sock(tconn);
3957
3958 idr_for_each(&tconn->volumes, drbd_disconnected, tconn);
3959
3960 conn_info(tconn, "Connection closed\n");
3961
3962 spin_lock_irq(&tconn->req_lock);
Philipp Reisnerbbeb6412011-02-10 13:45:46 +01003963 oc = tconn->cstate;
3964 if (oc >= C_UNCONNECTED)
3965 rv = _conn_request_state(tconn, NS(conn, C_UNCONNECTED), CS_VERBOSE);
3966
Philipp Reisner360cc742011-02-08 14:29:53 +01003967 spin_unlock_irq(&tconn->req_lock);
3968
Philipp Reisnerbbeb6412011-02-10 13:45:46 +01003969 if (oc == C_DISCONNECTING) {
Philipp Reisner360cc742011-02-08 14:29:53 +01003970 wait_event(tconn->net_cnt_wait, atomic_read(&tconn->net_cnt) == 0);
3971
3972 crypto_free_hash(tconn->cram_hmac_tfm);
3973 tconn->cram_hmac_tfm = NULL;
3974
3975 kfree(tconn->net_conf);
3976 tconn->net_conf = NULL;
Philipp Reisnerbbeb6412011-02-10 13:45:46 +01003977 conn_request_state(tconn, NS(conn, C_STANDALONE), CS_VERBOSE);
Philipp Reisner360cc742011-02-08 14:29:53 +01003978 }
3979}
3980
3981static int drbd_disconnected(int vnr, void *p, void *data)
3982{
3983 struct drbd_conf *mdev = (struct drbd_conf *)p;
3984 enum drbd_fencing_p fp;
3985 unsigned int i;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003986
Philipp Reisner85719572010-07-21 10:20:17 +02003987 /* wait for current activity to cease. */
Philipp Reisner87eeee42011-01-19 14:16:30 +01003988 spin_lock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003989 _drbd_wait_ee_list_empty(mdev, &mdev->active_ee);
3990 _drbd_wait_ee_list_empty(mdev, &mdev->sync_ee);
3991 _drbd_wait_ee_list_empty(mdev, &mdev->read_ee);
Philipp Reisner87eeee42011-01-19 14:16:30 +01003992 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003993
3994 /* We do not have data structures that would allow us to
3995 * get the rs_pending_cnt down to 0 again.
3996 * * On C_SYNC_TARGET we do not have any data structures describing
3997 * the pending RSDataRequest's we have sent.
3998 * * On C_SYNC_SOURCE there is no data structure that tracks
3999 * the P_RS_DATA_REPLY blocks that we sent to the SyncTarget.
4000 * And no, it is not the sum of the reference counts in the
4001 * resync_LRU. The resync_LRU tracks the whole operation including
4002 * the disk-IO, while the rs_pending_cnt only tracks the blocks
4003 * on the fly. */
4004 drbd_rs_cancel_all(mdev);
4005 mdev->rs_total = 0;
4006 mdev->rs_failed = 0;
4007 atomic_set(&mdev->rs_pending_cnt, 0);
4008 wake_up(&mdev->misc_wait);
4009
Philipp Reisner7fde2be2011-03-01 11:08:28 +01004010 del_timer(&mdev->request_timer);
4011
Philipp Reisnerb411b362009-09-25 16:07:19 -07004012 /* make sure syncer is stopped and w_resume_next_sg queued */
4013 del_timer_sync(&mdev->resync_timer);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004014 resync_timer_fn((unsigned long)mdev);
4015
Philipp Reisnerb411b362009-09-25 16:07:19 -07004016 /* wait for all w_e_end_data_req, w_e_end_rsdata_req, w_send_barrier,
4017 * w_make_resync_request etc. which may still be on the worker queue
4018 * to be "canceled" */
Philipp Reisnera21e9292011-02-08 15:08:49 +01004019 drbd_flush_workqueue(mdev);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004020
4021 /* This also does reclaim_net_ee(). If we do this too early, we might
4022 * miss some resync ee and pages.*/
4023 drbd_process_done_ee(mdev);
4024
4025 kfree(mdev->p_uuid);
4026 mdev->p_uuid = NULL;
4027
Philipp Reisnerfb22c402010-09-08 23:20:21 +02004028 if (!is_susp(mdev->state))
Philipp Reisnerb411b362009-09-25 16:07:19 -07004029 tl_clear(mdev);
4030
Philipp Reisnerb411b362009-09-25 16:07:19 -07004031 drbd_md_sync(mdev);
4032
4033 fp = FP_DONT_CARE;
4034 if (get_ldev(mdev)) {
4035 fp = mdev->ldev->dc.fencing;
4036 put_ldev(mdev);
4037 }
4038
Philipp Reisner87f7be42010-06-11 13:56:33 +02004039 if (mdev->state.role == R_PRIMARY && fp >= FP_RESOURCE && mdev->state.pdsk >= D_UNKNOWN)
4040 drbd_try_outdate_peer_async(mdev);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004041
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01004042 /* serialize with bitmap writeout triggered by the state change,
4043 * if any. */
4044 wait_event(mdev->misc_wait, !test_bit(BITMAP_IO, &mdev->flags));
4045
Philipp Reisnerb411b362009-09-25 16:07:19 -07004046 /* tcp_close and release of sendpage pages can be deferred. I don't
4047 * want to use SO_LINGER, because apparently it can be deferred for
4048 * more than 20 seconds (longest time I checked).
4049 *
4050 * Actually we don't care for exactly when the network stack does its
4051 * put_page(), but release our reference on these pages right here.
4052 */
4053 i = drbd_release_ee(mdev, &mdev->net_ee);
4054 if (i)
4055 dev_info(DEV, "net_ee not empty, killed %u entries\n", i);
Lars Ellenberg435f0742010-09-06 12:30:25 +02004056 i = atomic_read(&mdev->pp_in_use_by_net);
4057 if (i)
4058 dev_info(DEV, "pp_in_use_by_net = %d, expected 0\n", i);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004059 i = atomic_read(&mdev->pp_in_use);
4060 if (i)
Lars Ellenberg45bb9122010-05-14 17:10:48 +02004061 dev_info(DEV, "pp_in_use = %d, expected 0\n", i);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004062
4063 D_ASSERT(list_empty(&mdev->read_ee));
4064 D_ASSERT(list_empty(&mdev->active_ee));
4065 D_ASSERT(list_empty(&mdev->sync_ee));
4066 D_ASSERT(list_empty(&mdev->done_ee));
4067
4068 /* ok, no more ee's on the fly, it is safe to reset the epoch_size */
4069 atomic_set(&mdev->current_epoch->epoch_size, 0);
4070 D_ASSERT(list_empty(&mdev->current_epoch->list));
Philipp Reisner360cc742011-02-08 14:29:53 +01004071
4072 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004073}
4074
4075/*
4076 * We support PRO_VERSION_MIN to PRO_VERSION_MAX. The protocol version
4077 * we can agree on is stored in agreed_pro_version.
4078 *
4079 * feature flags and the reserved array should be enough room for future
4080 * enhancements of the handshake protocol, and possible plugins...
4081 *
4082 * for now, they are expected to be zero, but ignored.
4083 */
Philipp Reisner8a22ccc2011-02-07 16:47:12 +01004084static int drbd_send_handshake(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004085{
Philipp Reisnere6b3ea82011-01-19 14:02:01 +01004086 /* ASSERT current == mdev->tconn->receiver ... */
Philipp Reisner8a22ccc2011-02-07 16:47:12 +01004087 struct p_handshake *p = &tconn->data.sbuf.handshake;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004088 int ok;
4089
Philipp Reisner8a22ccc2011-02-07 16:47:12 +01004090 if (mutex_lock_interruptible(&tconn->data.mutex)) {
4091 conn_err(tconn, "interrupted during initial handshake\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07004092 return 0; /* interrupted. not ok. */
4093 }
4094
Philipp Reisner8a22ccc2011-02-07 16:47:12 +01004095 if (tconn->data.socket == NULL) {
4096 mutex_unlock(&tconn->data.mutex);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004097 return 0;
4098 }
4099
4100 memset(p, 0, sizeof(*p));
4101 p->protocol_min = cpu_to_be32(PRO_VERSION_MIN);
4102 p->protocol_max = cpu_to_be32(PRO_VERSION_MAX);
Philipp Reisner8a22ccc2011-02-07 16:47:12 +01004103 ok = _conn_send_cmd(tconn, 0, tconn->data.socket, P_HAND_SHAKE,
4104 &p->head, sizeof(*p), 0);
4105 mutex_unlock(&tconn->data.mutex);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004106 return ok;
4107}
4108
4109/*
4110 * return values:
4111 * 1 yes, we have a valid connection
4112 * 0 oops, did not work out, please try again
4113 * -1 peer talks different language,
4114 * no point in trying again, please go standalone.
4115 */
Philipp Reisner65d11ed2011-02-07 17:35:59 +01004116static int drbd_do_handshake(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004117{
Philipp Reisner65d11ed2011-02-07 17:35:59 +01004118 /* ASSERT current == tconn->receiver ... */
4119 struct p_handshake *p = &tconn->data.rbuf.handshake;
Philipp Reisner02918be2010-08-20 14:35:10 +02004120 const int expect = sizeof(struct p_handshake) - sizeof(struct p_header80);
Philipp Reisner77351055b2011-02-07 17:24:26 +01004121 struct packet_info pi;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004122 int rv;
4123
Philipp Reisner65d11ed2011-02-07 17:35:59 +01004124 rv = drbd_send_handshake(tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004125 if (!rv)
4126 return 0;
4127
Philipp Reisner65d11ed2011-02-07 17:35:59 +01004128 rv = drbd_recv_header(tconn, &pi);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004129 if (!rv)
4130 return 0;
4131
Philipp Reisner77351055b2011-02-07 17:24:26 +01004132 if (pi.cmd != P_HAND_SHAKE) {
Philipp Reisner65d11ed2011-02-07 17:35:59 +01004133 conn_err(tconn, "expected HandShake packet, received: %s (0x%04x)\n",
Philipp Reisner77351055b2011-02-07 17:24:26 +01004134 cmdname(pi.cmd), pi.cmd);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004135 return -1;
4136 }
4137
Philipp Reisner77351055b2011-02-07 17:24:26 +01004138 if (pi.size != expect) {
Philipp Reisner65d11ed2011-02-07 17:35:59 +01004139 conn_err(tconn, "expected HandShake length: %u, received: %u\n",
Philipp Reisner77351055b2011-02-07 17:24:26 +01004140 expect, pi.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004141 return -1;
4142 }
4143
Philipp Reisner65d11ed2011-02-07 17:35:59 +01004144 rv = drbd_recv(tconn, &p->head.payload, expect);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004145
4146 if (rv != expect) {
Lars Ellenberg0ddc5542011-01-21 12:35:15 +01004147 if (!signal_pending(current))
Philipp Reisner65d11ed2011-02-07 17:35:59 +01004148 conn_warn(tconn, "short read receiving handshake packet: l=%u\n", rv);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004149 return 0;
4150 }
4151
Philipp Reisnerb411b362009-09-25 16:07:19 -07004152 p->protocol_min = be32_to_cpu(p->protocol_min);
4153 p->protocol_max = be32_to_cpu(p->protocol_max);
4154 if (p->protocol_max == 0)
4155 p->protocol_max = p->protocol_min;
4156
4157 if (PRO_VERSION_MAX < p->protocol_min ||
4158 PRO_VERSION_MIN > p->protocol_max)
4159 goto incompat;
4160
Philipp Reisner65d11ed2011-02-07 17:35:59 +01004161 tconn->agreed_pro_version = min_t(int, PRO_VERSION_MAX, p->protocol_max);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004162
Philipp Reisner65d11ed2011-02-07 17:35:59 +01004163 conn_info(tconn, "Handshake successful: "
4164 "Agreed network protocol version %d\n", tconn->agreed_pro_version);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004165
4166 return 1;
4167
4168 incompat:
Philipp Reisner65d11ed2011-02-07 17:35:59 +01004169 conn_err(tconn, "incompatible DRBD dialects: "
Philipp Reisnerb411b362009-09-25 16:07:19 -07004170 "I support %d-%d, peer supports %d-%d\n",
4171 PRO_VERSION_MIN, PRO_VERSION_MAX,
4172 p->protocol_min, p->protocol_max);
4173 return -1;
4174}
4175
4176#if !defined(CONFIG_CRYPTO_HMAC) && !defined(CONFIG_CRYPTO_HMAC_MODULE)
Philipp Reisner13e60372011-02-08 09:54:40 +01004177static int drbd_do_auth(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004178{
4179 dev_err(DEV, "This kernel was build without CONFIG_CRYPTO_HMAC.\n");
4180 dev_err(DEV, "You need to disable 'cram-hmac-alg' in drbd.conf.\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01004181 return -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004182}
4183#else
4184#define CHALLENGE_LEN 64
Johannes Thomab10d96c2010-01-07 16:02:50 +01004185
4186/* Return value:
4187 1 - auth succeeded,
4188 0 - failed, try again (network error),
4189 -1 - auth failed, don't try again.
4190*/
4191
Philipp Reisner13e60372011-02-08 09:54:40 +01004192static int drbd_do_auth(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004193{
4194 char my_challenge[CHALLENGE_LEN]; /* 64 Bytes... */
4195 struct scatterlist sg;
4196 char *response = NULL;
4197 char *right_response = NULL;
4198 char *peers_ch = NULL;
Philipp Reisner13e60372011-02-08 09:54:40 +01004199 unsigned int key_len = strlen(tconn->net_conf->shared_secret);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004200 unsigned int resp_size;
4201 struct hash_desc desc;
Philipp Reisner77351055b2011-02-07 17:24:26 +01004202 struct packet_info pi;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004203 int rv;
4204
Philipp Reisner13e60372011-02-08 09:54:40 +01004205 desc.tfm = tconn->cram_hmac_tfm;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004206 desc.flags = 0;
4207
Philipp Reisner13e60372011-02-08 09:54:40 +01004208 rv = crypto_hash_setkey(tconn->cram_hmac_tfm,
4209 (u8 *)tconn->net_conf->shared_secret, key_len);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004210 if (rv) {
Philipp Reisner13e60372011-02-08 09:54:40 +01004211 conn_err(tconn, "crypto_hash_setkey() failed with %d\n", rv);
Johannes Thomab10d96c2010-01-07 16:02:50 +01004212 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004213 goto fail;
4214 }
4215
4216 get_random_bytes(my_challenge, CHALLENGE_LEN);
4217
Philipp Reisner13e60372011-02-08 09:54:40 +01004218 rv = conn_send_cmd2(tconn, P_AUTH_CHALLENGE, my_challenge, CHALLENGE_LEN);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004219 if (!rv)
4220 goto fail;
4221
Philipp Reisner13e60372011-02-08 09:54:40 +01004222 rv = drbd_recv_header(tconn, &pi);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004223 if (!rv)
4224 goto fail;
4225
Philipp Reisner77351055b2011-02-07 17:24:26 +01004226 if (pi.cmd != P_AUTH_CHALLENGE) {
Philipp Reisner13e60372011-02-08 09:54:40 +01004227 conn_err(tconn, "expected AuthChallenge packet, received: %s (0x%04x)\n",
Philipp Reisner77351055b2011-02-07 17:24:26 +01004228 cmdname(pi.cmd), pi.cmd);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004229 rv = 0;
4230 goto fail;
4231 }
4232
Philipp Reisner77351055b2011-02-07 17:24:26 +01004233 if (pi.size > CHALLENGE_LEN * 2) {
Philipp Reisner13e60372011-02-08 09:54:40 +01004234 conn_err(tconn, "expected AuthChallenge payload too big.\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01004235 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004236 goto fail;
4237 }
4238
Philipp Reisner77351055b2011-02-07 17:24:26 +01004239 peers_ch = kmalloc(pi.size, GFP_NOIO);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004240 if (peers_ch == NULL) {
Philipp Reisner13e60372011-02-08 09:54:40 +01004241 conn_err(tconn, "kmalloc of peers_ch failed\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01004242 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004243 goto fail;
4244 }
4245
Philipp Reisner13e60372011-02-08 09:54:40 +01004246 rv = drbd_recv(tconn, peers_ch, pi.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004247
Philipp Reisner77351055b2011-02-07 17:24:26 +01004248 if (rv != pi.size) {
Lars Ellenberg0ddc5542011-01-21 12:35:15 +01004249 if (!signal_pending(current))
Philipp Reisner13e60372011-02-08 09:54:40 +01004250 conn_warn(tconn, "short read AuthChallenge: l=%u\n", rv);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004251 rv = 0;
4252 goto fail;
4253 }
4254
Philipp Reisner13e60372011-02-08 09:54:40 +01004255 resp_size = crypto_hash_digestsize(tconn->cram_hmac_tfm);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004256 response = kmalloc(resp_size, GFP_NOIO);
4257 if (response == NULL) {
Philipp Reisner13e60372011-02-08 09:54:40 +01004258 conn_err(tconn, "kmalloc of response failed\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01004259 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004260 goto fail;
4261 }
4262
4263 sg_init_table(&sg, 1);
Philipp Reisner77351055b2011-02-07 17:24:26 +01004264 sg_set_buf(&sg, peers_ch, pi.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004265
4266 rv = crypto_hash_digest(&desc, &sg, sg.length, response);
4267 if (rv) {
Philipp Reisner13e60372011-02-08 09:54:40 +01004268 conn_err(tconn, "crypto_hash_digest() failed with %d\n", rv);
Johannes Thomab10d96c2010-01-07 16:02:50 +01004269 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004270 goto fail;
4271 }
4272
Philipp Reisner13e60372011-02-08 09:54:40 +01004273 rv = conn_send_cmd2(tconn, P_AUTH_RESPONSE, response, resp_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004274 if (!rv)
4275 goto fail;
4276
Philipp Reisner13e60372011-02-08 09:54:40 +01004277 rv = drbd_recv_header(tconn, &pi);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004278 if (!rv)
4279 goto fail;
4280
Philipp Reisner77351055b2011-02-07 17:24:26 +01004281 if (pi.cmd != P_AUTH_RESPONSE) {
Philipp Reisner13e60372011-02-08 09:54:40 +01004282 conn_err(tconn, "expected AuthResponse packet, received: %s (0x%04x)\n",
Philipp Reisner77351055b2011-02-07 17:24:26 +01004283 cmdname(pi.cmd), pi.cmd);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004284 rv = 0;
4285 goto fail;
4286 }
4287
Philipp Reisner77351055b2011-02-07 17:24:26 +01004288 if (pi.size != resp_size) {
Philipp Reisner13e60372011-02-08 09:54:40 +01004289 conn_err(tconn, "expected AuthResponse payload of wrong size\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07004290 rv = 0;
4291 goto fail;
4292 }
4293
Philipp Reisner13e60372011-02-08 09:54:40 +01004294 rv = drbd_recv(tconn, response , resp_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004295
4296 if (rv != resp_size) {
Lars Ellenberg0ddc5542011-01-21 12:35:15 +01004297 if (!signal_pending(current))
Philipp Reisner13e60372011-02-08 09:54:40 +01004298 conn_warn(tconn, "short read receiving AuthResponse: l=%u\n", rv);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004299 rv = 0;
4300 goto fail;
4301 }
4302
4303 right_response = kmalloc(resp_size, GFP_NOIO);
Julia Lawall2d1ee872009-12-27 22:27:11 +01004304 if (right_response == NULL) {
Philipp Reisner13e60372011-02-08 09:54:40 +01004305 conn_err(tconn, "kmalloc of right_response failed\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01004306 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004307 goto fail;
4308 }
4309
4310 sg_set_buf(&sg, my_challenge, CHALLENGE_LEN);
4311
4312 rv = crypto_hash_digest(&desc, &sg, sg.length, right_response);
4313 if (rv) {
Philipp Reisner13e60372011-02-08 09:54:40 +01004314 conn_err(tconn, "crypto_hash_digest() failed with %d\n", rv);
Johannes Thomab10d96c2010-01-07 16:02:50 +01004315 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004316 goto fail;
4317 }
4318
4319 rv = !memcmp(response, right_response, resp_size);
4320
4321 if (rv)
Philipp Reisner13e60372011-02-08 09:54:40 +01004322 conn_info(tconn, "Peer authenticated using %d bytes of '%s' HMAC\n",
4323 resp_size, tconn->net_conf->cram_hmac_alg);
Johannes Thomab10d96c2010-01-07 16:02:50 +01004324 else
4325 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004326
4327 fail:
4328 kfree(peers_ch);
4329 kfree(response);
4330 kfree(right_response);
4331
4332 return rv;
4333}
4334#endif
4335
4336int drbdd_init(struct drbd_thread *thi)
4337{
Philipp Reisner392c8802011-02-09 10:33:31 +01004338 struct drbd_tconn *tconn = thi->tconn;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004339 int h;
4340
Philipp Reisner4d641dd2011-02-08 15:40:24 +01004341 conn_info(tconn, "receiver (re)started\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07004342
4343 do {
Philipp Reisner4d641dd2011-02-08 15:40:24 +01004344 h = drbd_connect(tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004345 if (h == 0) {
Philipp Reisner4d641dd2011-02-08 15:40:24 +01004346 drbd_disconnect(tconn);
Philipp Reisner20ee6392011-01-18 15:28:59 +01004347 schedule_timeout_interruptible(HZ);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004348 }
4349 if (h == -1) {
Philipp Reisner4d641dd2011-02-08 15:40:24 +01004350 conn_warn(tconn, "Discarding network configuration.\n");
Philipp Reisnerbbeb6412011-02-10 13:45:46 +01004351 conn_request_state(tconn, NS(conn, C_DISCONNECTING), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004352 }
4353 } while (h == 0);
4354
4355 if (h > 0) {
Philipp Reisner4d641dd2011-02-08 15:40:24 +01004356 if (get_net_conf(tconn)) {
4357 drbdd(tconn);
4358 put_net_conf(tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004359 }
4360 }
4361
Philipp Reisner4d641dd2011-02-08 15:40:24 +01004362 drbd_disconnect(tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004363
Philipp Reisner4d641dd2011-02-08 15:40:24 +01004364 conn_info(tconn, "receiver terminated\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07004365 return 0;
4366}
4367
4368/* ********* acknowledge sender ******** */
4369
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01004370static int got_RqSReply(struct drbd_conf *mdev, enum drbd_packet cmd)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004371{
Philipp Reisner257d0af2011-01-26 12:15:29 +01004372 struct p_req_state_reply *p = &mdev->tconn->meta.rbuf.req_state_reply;
Philipp Reisnerfc3b10a2011-02-15 11:07:59 +01004373 struct drbd_tconn *tconn = mdev->tconn;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004374
4375 int retcode = be32_to_cpu(p->retcode);
4376
Philipp Reisnerfc3b10a2011-02-15 11:07:59 +01004377 if (cmd == P_STATE_CHG_REPLY) {
4378 if (retcode >= SS_SUCCESS) {
4379 set_bit(CL_ST_CHG_SUCCESS, &mdev->flags);
4380 } else {
4381 set_bit(CL_ST_CHG_FAIL, &mdev->flags);
4382 dev_err(DEV, "Requested state change failed by peer: %s (%d)\n",
4383 drbd_set_st_err_str(retcode), retcode);
4384 }
4385 wake_up(&mdev->state_wait);
4386 } else /* conn == P_CONN_ST_CHG_REPLY */ {
4387 if (retcode >= SS_SUCCESS) {
4388 set_bit(CONN_WD_ST_CHG_OKAY, &tconn->flags);
4389 } else {
4390 set_bit(CONN_WD_ST_CHG_FAIL, &tconn->flags);
4391 conn_err(tconn, "Requested state change failed by peer: %s (%d)\n",
4392 drbd_set_st_err_str(retcode), retcode);
4393 }
4394 wake_up(&tconn->ping_wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004395 }
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004396 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004397}
4398
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01004399static int got_Ping(struct drbd_conf *mdev, enum drbd_packet cmd)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004400{
Philipp Reisner2a67d8b2011-02-09 14:10:32 +01004401 return drbd_send_ping_ack(mdev->tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004402
4403}
4404
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01004405static int got_PingAck(struct drbd_conf *mdev, enum drbd_packet cmd)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004406{
Philipp Reisner2a67d8b2011-02-09 14:10:32 +01004407 struct drbd_tconn *tconn = mdev->tconn;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004408 /* restore idle timeout */
Philipp Reisner2a67d8b2011-02-09 14:10:32 +01004409 tconn->meta.socket->sk->sk_rcvtimeo = tconn->net_conf->ping_int*HZ;
4410 if (!test_and_set_bit(GOT_PING_ACK, &tconn->flags))
4411 wake_up(&tconn->ping_wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004412
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004413 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004414}
4415
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01004416static int got_IsInSync(struct drbd_conf *mdev, enum drbd_packet cmd)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004417{
Philipp Reisner257d0af2011-01-26 12:15:29 +01004418 struct p_block_ack *p = &mdev->tconn->meta.rbuf.block_ack;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004419 sector_t sector = be64_to_cpu(p->sector);
4420 int blksize = be32_to_cpu(p->blksize);
4421
Philipp Reisner31890f42011-01-19 14:12:51 +01004422 D_ASSERT(mdev->tconn->agreed_pro_version >= 89);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004423
4424 update_peer_seq(mdev, be32_to_cpu(p->seq_num));
4425
Lars Ellenberg1d53f092010-09-05 01:13:24 +02004426 if (get_ldev(mdev)) {
4427 drbd_rs_complete_io(mdev, sector);
4428 drbd_set_in_sync(mdev, sector, blksize);
4429 /* rs_same_csums is supposed to count in units of BM_BLOCK_SIZE */
4430 mdev->rs_same_csum += (blksize >> BM_BLOCK_SHIFT);
4431 put_ldev(mdev);
4432 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07004433 dec_rs_pending(mdev);
Philipp Reisner778f2712010-07-06 11:14:00 +02004434 atomic_add(blksize >> 9, &mdev->rs_sect_in);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004435
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004436 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004437}
4438
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01004439static int
4440validate_req_change_req_state(struct drbd_conf *mdev, u64 id, sector_t sector,
4441 struct rb_root *root, const char *func,
4442 enum drbd_req_event what, bool missing_ok)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004443{
4444 struct drbd_request *req;
4445 struct bio_and_error m;
4446
Philipp Reisner87eeee42011-01-19 14:16:30 +01004447 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01004448 req = find_request(mdev, root, id, sector, missing_ok, func);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004449 if (unlikely(!req)) {
Philipp Reisner87eeee42011-01-19 14:16:30 +01004450 spin_unlock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004451 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004452 }
4453 __req_mod(req, what, &m);
Philipp Reisner87eeee42011-01-19 14:16:30 +01004454 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004455
4456 if (m.bio)
4457 complete_master_bio(mdev, &m);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004458 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004459}
4460
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01004461static int got_BlockAck(struct drbd_conf *mdev, enum drbd_packet cmd)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004462{
Philipp Reisner257d0af2011-01-26 12:15:29 +01004463 struct p_block_ack *p = &mdev->tconn->meta.rbuf.block_ack;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004464 sector_t sector = be64_to_cpu(p->sector);
4465 int blksize = be32_to_cpu(p->blksize);
4466 enum drbd_req_event what;
4467
4468 update_peer_seq(mdev, be32_to_cpu(p->seq_num));
4469
Andreas Gruenbacher579b57e2011-01-13 18:40:57 +01004470 if (p->block_id == ID_SYNCER) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004471 drbd_set_in_sync(mdev, sector, blksize);
4472 dec_rs_pending(mdev);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004473 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004474 }
Philipp Reisner257d0af2011-01-26 12:15:29 +01004475 switch (cmd) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004476 case P_RS_WRITE_ACK:
Philipp Reisner89e58e72011-01-19 13:12:45 +01004477 D_ASSERT(mdev->tconn->net_conf->wire_protocol == DRBD_PROT_C);
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01004478 what = WRITE_ACKED_BY_PEER_AND_SIS;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004479 break;
4480 case P_WRITE_ACK:
Philipp Reisner89e58e72011-01-19 13:12:45 +01004481 D_ASSERT(mdev->tconn->net_conf->wire_protocol == DRBD_PROT_C);
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01004482 what = WRITE_ACKED_BY_PEER;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004483 break;
4484 case P_RECV_ACK:
Philipp Reisner89e58e72011-01-19 13:12:45 +01004485 D_ASSERT(mdev->tconn->net_conf->wire_protocol == DRBD_PROT_B);
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01004486 what = RECV_ACKED_BY_PEER;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004487 break;
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01004488 case P_DISCARD_WRITE:
Philipp Reisner89e58e72011-01-19 13:12:45 +01004489 D_ASSERT(mdev->tconn->net_conf->wire_protocol == DRBD_PROT_C);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01004490 what = DISCARD_WRITE;
4491 break;
4492 case P_RETRY_WRITE:
4493 D_ASSERT(mdev->tconn->net_conf->wire_protocol == DRBD_PROT_C);
4494 what = POSTPONE_WRITE;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004495 break;
4496 default:
4497 D_ASSERT(0);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004498 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004499 }
4500
4501 return validate_req_change_req_state(mdev, p->block_id, sector,
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01004502 &mdev->write_requests, __func__,
4503 what, false);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004504}
4505
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01004506static int got_NegAck(struct drbd_conf *mdev, enum drbd_packet cmd)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004507{
Philipp Reisner257d0af2011-01-26 12:15:29 +01004508 struct p_block_ack *p = &mdev->tconn->meta.rbuf.block_ack;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004509 sector_t sector = be64_to_cpu(p->sector);
Philipp Reisner2deb8332011-01-17 18:39:18 +01004510 int size = be32_to_cpu(p->blksize);
Philipp Reisner89e58e72011-01-19 13:12:45 +01004511 bool missing_ok = mdev->tconn->net_conf->wire_protocol == DRBD_PROT_A ||
4512 mdev->tconn->net_conf->wire_protocol == DRBD_PROT_B;
Andreas Gruenbacherc3afd8f2011-01-20 22:25:40 +01004513 bool found;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004514
4515 update_peer_seq(mdev, be32_to_cpu(p->seq_num));
4516
Andreas Gruenbacher579b57e2011-01-13 18:40:57 +01004517 if (p->block_id == ID_SYNCER) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004518 dec_rs_pending(mdev);
4519 drbd_rs_failed_io(mdev, sector, size);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004520 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004521 }
Philipp Reisner2deb8332011-01-17 18:39:18 +01004522
Andreas Gruenbacherc3afd8f2011-01-20 22:25:40 +01004523 found = validate_req_change_req_state(mdev, p->block_id, sector,
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01004524 &mdev->write_requests, __func__,
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01004525 NEG_ACKED, missing_ok);
Andreas Gruenbacherc3afd8f2011-01-20 22:25:40 +01004526 if (!found) {
4527 /* Protocol A has no P_WRITE_ACKs, but has P_NEG_ACKs.
4528 The master bio might already be completed, therefore the
4529 request is no longer in the collision hash. */
4530 /* In Protocol B we might already have got a P_RECV_ACK
4531 but then get a P_NEG_ACK afterwards. */
4532 if (!missing_ok)
Philipp Reisner2deb8332011-01-17 18:39:18 +01004533 return false;
Andreas Gruenbacherc3afd8f2011-01-20 22:25:40 +01004534 drbd_set_out_of_sync(mdev, sector, size);
Philipp Reisner2deb8332011-01-17 18:39:18 +01004535 }
Philipp Reisner2deb8332011-01-17 18:39:18 +01004536 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004537}
4538
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01004539static int got_NegDReply(struct drbd_conf *mdev, enum drbd_packet cmd)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004540{
Philipp Reisner257d0af2011-01-26 12:15:29 +01004541 struct p_block_ack *p = &mdev->tconn->meta.rbuf.block_ack;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004542 sector_t sector = be64_to_cpu(p->sector);
4543
4544 update_peer_seq(mdev, be32_to_cpu(p->seq_num));
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01004545
Philipp Reisnerb411b362009-09-25 16:07:19 -07004546 dev_err(DEV, "Got NegDReply; Sector %llus, len %u; Fail original request.\n",
4547 (unsigned long long)sector, be32_to_cpu(p->blksize));
4548
4549 return validate_req_change_req_state(mdev, p->block_id, sector,
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01004550 &mdev->read_requests, __func__,
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01004551 NEG_ACKED, false);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004552}
4553
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01004554static int got_NegRSDReply(struct drbd_conf *mdev, enum drbd_packet cmd)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004555{
4556 sector_t sector;
4557 int size;
Philipp Reisner257d0af2011-01-26 12:15:29 +01004558 struct p_block_ack *p = &mdev->tconn->meta.rbuf.block_ack;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004559
4560 sector = be64_to_cpu(p->sector);
4561 size = be32_to_cpu(p->blksize);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004562
4563 update_peer_seq(mdev, be32_to_cpu(p->seq_num));
4564
4565 dec_rs_pending(mdev);
4566
4567 if (get_ldev_if_state(mdev, D_FAILED)) {
4568 drbd_rs_complete_io(mdev, sector);
Philipp Reisner257d0af2011-01-26 12:15:29 +01004569 switch (cmd) {
Philipp Reisnerd612d302010-12-27 10:53:28 +01004570 case P_NEG_RS_DREPLY:
4571 drbd_rs_failed_io(mdev, sector, size);
4572 case P_RS_CANCEL:
4573 break;
4574 default:
4575 D_ASSERT(0);
4576 put_ldev(mdev);
4577 return false;
4578 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07004579 put_ldev(mdev);
4580 }
4581
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004582 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004583}
4584
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01004585static int got_BarrierAck(struct drbd_conf *mdev, enum drbd_packet cmd)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004586{
Philipp Reisner257d0af2011-01-26 12:15:29 +01004587 struct p_barrier_ack *p = &mdev->tconn->meta.rbuf.barrier_ack;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004588
4589 tl_release(mdev, p->barrier, be32_to_cpu(p->set_size));
4590
Philipp Reisnerc4752ef2010-10-27 17:32:36 +02004591 if (mdev->state.conn == C_AHEAD &&
4592 atomic_read(&mdev->ap_in_flight) == 0 &&
Philipp Reisner370a43e2011-01-14 16:03:11 +01004593 !test_and_set_bit(AHEAD_TO_SYNC_SOURCE, &mdev->current_epoch->flags)) {
4594 mdev->start_resync_timer.expires = jiffies + HZ;
4595 add_timer(&mdev->start_resync_timer);
Philipp Reisnerc4752ef2010-10-27 17:32:36 +02004596 }
4597
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004598 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004599}
4600
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01004601static int got_OVResult(struct drbd_conf *mdev, enum drbd_packet cmd)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004602{
Philipp Reisner257d0af2011-01-26 12:15:29 +01004603 struct p_block_ack *p = &mdev->tconn->meta.rbuf.block_ack;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004604 struct drbd_work *w;
4605 sector_t sector;
4606 int size;
4607
4608 sector = be64_to_cpu(p->sector);
4609 size = be32_to_cpu(p->blksize);
4610
4611 update_peer_seq(mdev, be32_to_cpu(p->seq_num));
4612
4613 if (be64_to_cpu(p->block_id) == ID_OUT_OF_SYNC)
4614 drbd_ov_oos_found(mdev, sector, size);
4615 else
4616 ov_oos_print(mdev);
4617
Lars Ellenberg1d53f092010-09-05 01:13:24 +02004618 if (!get_ldev(mdev))
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004619 return true;
Lars Ellenberg1d53f092010-09-05 01:13:24 +02004620
Philipp Reisnerb411b362009-09-25 16:07:19 -07004621 drbd_rs_complete_io(mdev, sector);
4622 dec_rs_pending(mdev);
4623
Lars Ellenbergea5442a2010-11-05 09:48:01 +01004624 --mdev->ov_left;
4625
4626 /* let's advance progress step marks only for every other megabyte */
4627 if ((mdev->ov_left & 0x200) == 0x200)
4628 drbd_advance_rs_marks(mdev, mdev->ov_left);
4629
4630 if (mdev->ov_left == 0) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004631 w = kmalloc(sizeof(*w), GFP_NOIO);
4632 if (w) {
4633 w->cb = w_ov_finished;
Philipp Reisnera21e9292011-02-08 15:08:49 +01004634 w->mdev = mdev;
Philipp Reisnere42325a2011-01-19 13:55:45 +01004635 drbd_queue_work_front(&mdev->tconn->data.work, w);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004636 } else {
4637 dev_err(DEV, "kmalloc(w) failed.");
4638 ov_oos_print(mdev);
4639 drbd_resync_finished(mdev);
4640 }
4641 }
Lars Ellenberg1d53f092010-09-05 01:13:24 +02004642 put_ldev(mdev);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004643 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004644}
4645
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01004646static int got_skip(struct drbd_conf *mdev, enum drbd_packet cmd)
Philipp Reisner0ced55a2010-04-30 15:26:20 +02004647{
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004648 return true;
Philipp Reisner0ced55a2010-04-30 15:26:20 +02004649}
4650
Philipp Reisnerb411b362009-09-25 16:07:19 -07004651struct asender_cmd {
4652 size_t pkt_size;
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01004653 int (*process)(struct drbd_conf *mdev, enum drbd_packet cmd);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004654};
4655
4656static struct asender_cmd *get_asender_cmd(int cmd)
4657{
4658 static struct asender_cmd asender_tbl[] = {
4659 /* anything missing from this table is in
4660 * the drbd_cmd_handler (drbd_default_handler) table,
4661 * see the beginning of drbdd() */
Philipp Reisner257d0af2011-01-26 12:15:29 +01004662 [P_PING] = { sizeof(struct p_header), got_Ping },
4663 [P_PING_ACK] = { sizeof(struct p_header), got_PingAck },
Philipp Reisnerb411b362009-09-25 16:07:19 -07004664 [P_RECV_ACK] = { sizeof(struct p_block_ack), got_BlockAck },
4665 [P_WRITE_ACK] = { sizeof(struct p_block_ack), got_BlockAck },
4666 [P_RS_WRITE_ACK] = { sizeof(struct p_block_ack), got_BlockAck },
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01004667 [P_DISCARD_WRITE] = { sizeof(struct p_block_ack), got_BlockAck },
Philipp Reisnerb411b362009-09-25 16:07:19 -07004668 [P_NEG_ACK] = { sizeof(struct p_block_ack), got_NegAck },
4669 [P_NEG_DREPLY] = { sizeof(struct p_block_ack), got_NegDReply },
4670 [P_NEG_RS_DREPLY] = { sizeof(struct p_block_ack), got_NegRSDReply},
4671 [P_OV_RESULT] = { sizeof(struct p_block_ack), got_OVResult },
4672 [P_BARRIER_ACK] = { sizeof(struct p_barrier_ack), got_BarrierAck },
4673 [P_STATE_CHG_REPLY] = { sizeof(struct p_req_state_reply), got_RqSReply },
4674 [P_RS_IS_IN_SYNC] = { sizeof(struct p_block_ack), got_IsInSync },
Philipp Reisner02918be2010-08-20 14:35:10 +02004675 [P_DELAY_PROBE] = { sizeof(struct p_delay_probe93), got_skip },
Philipp Reisnerd612d302010-12-27 10:53:28 +01004676 [P_RS_CANCEL] = { sizeof(struct p_block_ack), got_NegRSDReply},
Philipp Reisnerfc3b10a2011-02-15 11:07:59 +01004677 [P_CONN_ST_CHG_REPLY]={ sizeof(struct p_req_state_reply), got_RqSReply },
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01004678 [P_RETRY_WRITE] = { sizeof(struct p_block_ack), got_BlockAck },
Philipp Reisnerb411b362009-09-25 16:07:19 -07004679 };
Andreas Gruenbacher6e849ce2011-03-14 17:27:45 +01004680
4681 if (cmd >= ARRAY_SIZE(asender_tbl) || !asender_tbl[cmd].process)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004682 return NULL;
4683 return &asender_tbl[cmd];
4684}
4685
Philipp Reisner32862ec2011-02-08 16:41:01 +01004686static int _drbd_process_done_ee(int vnr, void *p, void *data)
4687{
4688 struct drbd_conf *mdev = (struct drbd_conf *)p;
4689 return !drbd_process_done_ee(mdev);
4690}
4691
4692static int _check_ee_empty(int vnr, void *p, void *data)
4693{
4694 struct drbd_conf *mdev = (struct drbd_conf *)p;
4695 struct drbd_tconn *tconn = mdev->tconn;
4696 int not_empty;
4697
4698 spin_lock_irq(&tconn->req_lock);
4699 not_empty = !list_empty(&mdev->done_ee);
4700 spin_unlock_irq(&tconn->req_lock);
4701
4702 return not_empty;
4703}
4704
4705static int tconn_process_done_ee(struct drbd_tconn *tconn)
4706{
4707 int not_empty, err;
4708
4709 do {
4710 clear_bit(SIGNAL_ASENDER, &tconn->flags);
4711 flush_signals(current);
4712 err = idr_for_each(&tconn->volumes, _drbd_process_done_ee, NULL);
4713 if (err)
4714 return err;
4715 set_bit(SIGNAL_ASENDER, &tconn->flags);
4716 not_empty = idr_for_each(&tconn->volumes, _check_ee_empty, NULL);
4717 } while (not_empty);
4718
4719 return 0;
4720}
4721
Philipp Reisnerb411b362009-09-25 16:07:19 -07004722int drbd_asender(struct drbd_thread *thi)
4723{
Philipp Reisner392c8802011-02-09 10:33:31 +01004724 struct drbd_tconn *tconn = thi->tconn;
Philipp Reisner32862ec2011-02-08 16:41:01 +01004725 struct p_header *h = &tconn->meta.rbuf.header;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004726 struct asender_cmd *cmd = NULL;
Philipp Reisner77351055b2011-02-07 17:24:26 +01004727 struct packet_info pi;
Philipp Reisner257d0af2011-01-26 12:15:29 +01004728 int rv;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004729 void *buf = h;
4730 int received = 0;
Philipp Reisner257d0af2011-01-26 12:15:29 +01004731 int expect = sizeof(struct p_header);
Lars Ellenbergf36af182011-03-09 22:44:55 +01004732 int ping_timeout_active = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004733
Philipp Reisnerb411b362009-09-25 16:07:19 -07004734 current->policy = SCHED_RR; /* Make this a realtime task! */
4735 current->rt_priority = 2; /* more important than all other tasks */
4736
Andreas Gruenbachere77a0a52011-01-25 15:43:39 +01004737 while (get_t_state(thi) == RUNNING) {
Philipp Reisner80822282011-02-08 12:46:30 +01004738 drbd_thread_current_set_cpu(thi);
Philipp Reisner32862ec2011-02-08 16:41:01 +01004739 if (test_and_clear_bit(SEND_PING, &tconn->flags)) {
Philipp Reisner2a67d8b2011-02-09 14:10:32 +01004740 if (!drbd_send_ping(tconn)) {
Philipp Reisner32862ec2011-02-08 16:41:01 +01004741 conn_err(tconn, "drbd_send_ping has failed\n");
Andreas Gruenbacher841ce242010-12-15 19:31:20 +01004742 goto reconnect;
4743 }
Philipp Reisner32862ec2011-02-08 16:41:01 +01004744 tconn->meta.socket->sk->sk_rcvtimeo =
4745 tconn->net_conf->ping_timeo*HZ/10;
Lars Ellenbergf36af182011-03-09 22:44:55 +01004746 ping_timeout_active = 1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004747 }
4748
Philipp Reisner32862ec2011-02-08 16:41:01 +01004749 /* TODO: conditionally cork; it may hurt latency if we cork without
4750 much to send */
4751 if (!tconn->net_conf->no_cork)
4752 drbd_tcp_cork(tconn->meta.socket);
4753 if (tconn_process_done_ee(tconn))
4754 goto reconnect;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004755 /* but unconditionally uncork unless disabled */
Philipp Reisner32862ec2011-02-08 16:41:01 +01004756 if (!tconn->net_conf->no_cork)
4757 drbd_tcp_uncork(tconn->meta.socket);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004758
4759 /* short circuit, recv_msg would return EINTR anyways. */
4760 if (signal_pending(current))
4761 continue;
4762
Philipp Reisner32862ec2011-02-08 16:41:01 +01004763 rv = drbd_recv_short(tconn->meta.socket, buf, expect-received, 0);
4764 clear_bit(SIGNAL_ASENDER, &tconn->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004765
4766 flush_signals(current);
4767
4768 /* Note:
4769 * -EINTR (on meta) we got a signal
4770 * -EAGAIN (on meta) rcvtimeo expired
4771 * -ECONNRESET other side closed the connection
4772 * -ERESTARTSYS (on data) we got a signal
4773 * rv < 0 other than above: unexpected error!
4774 * rv == expected: full header or command
4775 * rv < expected: "woken" by signal during receive
4776 * rv == 0 : "connection shut down by peer"
4777 */
4778 if (likely(rv > 0)) {
4779 received += rv;
4780 buf += rv;
4781 } else if (rv == 0) {
Philipp Reisner32862ec2011-02-08 16:41:01 +01004782 conn_err(tconn, "meta connection shut down by peer.\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07004783 goto reconnect;
4784 } else if (rv == -EAGAIN) {
Lars Ellenbergcb6518c2011-06-20 14:44:45 +02004785 /* If the data socket received something meanwhile,
4786 * that is good enough: peer is still alive. */
Philipp Reisner32862ec2011-02-08 16:41:01 +01004787 if (time_after(tconn->last_received,
4788 jiffies - tconn->meta.socket->sk->sk_rcvtimeo))
Lars Ellenbergcb6518c2011-06-20 14:44:45 +02004789 continue;
Lars Ellenbergf36af182011-03-09 22:44:55 +01004790 if (ping_timeout_active) {
Philipp Reisner32862ec2011-02-08 16:41:01 +01004791 conn_err(tconn, "PingAck did not arrive in time.\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07004792 goto reconnect;
4793 }
Philipp Reisner32862ec2011-02-08 16:41:01 +01004794 set_bit(SEND_PING, &tconn->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004795 continue;
4796 } else if (rv == -EINTR) {
4797 continue;
4798 } else {
Philipp Reisner32862ec2011-02-08 16:41:01 +01004799 conn_err(tconn, "sock_recvmsg returned %d\n", rv);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004800 goto reconnect;
4801 }
4802
4803 if (received == expect && cmd == NULL) {
Philipp Reisner32862ec2011-02-08 16:41:01 +01004804 if (!decode_header(tconn, h, &pi))
Philipp Reisnerb411b362009-09-25 16:07:19 -07004805 goto reconnect;
Philipp Reisner77351055b2011-02-07 17:24:26 +01004806 cmd = get_asender_cmd(pi.cmd);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004807 if (unlikely(cmd == NULL)) {
Philipp Reisner32862ec2011-02-08 16:41:01 +01004808 conn_err(tconn, "unknown command %d on meta (l: %d)\n",
Philipp Reisner77351055b2011-02-07 17:24:26 +01004809 pi.cmd, pi.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004810 goto disconnect;
4811 }
4812 expect = cmd->pkt_size;
Philipp Reisner77351055b2011-02-07 17:24:26 +01004813 if (pi.size != expect - sizeof(struct p_header)) {
Philipp Reisner32862ec2011-02-08 16:41:01 +01004814 conn_err(tconn, "Wrong packet size on meta (c: %d, l: %d)\n",
Philipp Reisner77351055b2011-02-07 17:24:26 +01004815 pi.cmd, pi.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004816 goto reconnect;
Philipp Reisner257d0af2011-01-26 12:15:29 +01004817 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07004818 }
4819 if (received == expect) {
Philipp Reisner32862ec2011-02-08 16:41:01 +01004820 tconn->last_received = jiffies;
4821 if (!cmd->process(vnr_to_mdev(tconn, pi.vnr), pi.cmd))
Philipp Reisnerb411b362009-09-25 16:07:19 -07004822 goto reconnect;
4823
Lars Ellenbergf36af182011-03-09 22:44:55 +01004824 /* the idle_timeout (ping-int)
4825 * has been restored in got_PingAck() */
4826 if (cmd == get_asender_cmd(P_PING_ACK))
4827 ping_timeout_active = 0;
4828
Philipp Reisnerb411b362009-09-25 16:07:19 -07004829 buf = h;
4830 received = 0;
Philipp Reisner257d0af2011-01-26 12:15:29 +01004831 expect = sizeof(struct p_header);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004832 cmd = NULL;
4833 }
4834 }
4835
4836 if (0) {
4837reconnect:
Philipp Reisnerbbeb6412011-02-10 13:45:46 +01004838 conn_request_state(tconn, NS(conn, C_NETWORK_FAILURE), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004839 }
4840 if (0) {
4841disconnect:
Philipp Reisnerbbeb6412011-02-10 13:45:46 +01004842 conn_request_state(tconn, NS(conn, C_DISCONNECTING), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004843 }
Philipp Reisner32862ec2011-02-08 16:41:01 +01004844 clear_bit(SIGNAL_ASENDER, &tconn->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004845
Philipp Reisner32862ec2011-02-08 16:41:01 +01004846 conn_info(tconn, "asender terminated\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07004847
4848 return 0;
4849}