blob: 58727e987110a2d1d8c3e292e6d27ca2ac74b5ef [file] [log] [blame]
Philipp Reisnerb411b362009-09-25 16:07:19 -07001/*
2 drbd_receiver.c
3
4 This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
5
6 Copyright (C) 2001-2008, LINBIT Information Technologies GmbH.
7 Copyright (C) 1999-2008, Philipp Reisner <philipp.reisner@linbit.com>.
8 Copyright (C) 2002-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
9
10 drbd is free software; you can redistribute it and/or modify
11 it under the terms of the GNU General Public License as published by
12 the Free Software Foundation; either version 2, or (at your option)
13 any later version.
14
15 drbd is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License for more details.
19
20 You should have received a copy of the GNU General Public License
21 along with drbd; see the file COPYING. If not, write to
22 the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
23 */
24
25
Philipp Reisnerb411b362009-09-25 16:07:19 -070026#include <linux/module.h>
27
28#include <asm/uaccess.h>
29#include <net/sock.h>
30
Philipp Reisnerb411b362009-09-25 16:07:19 -070031#include <linux/drbd.h>
32#include <linux/fs.h>
33#include <linux/file.h>
34#include <linux/in.h>
35#include <linux/mm.h>
36#include <linux/memcontrol.h>
37#include <linux/mm_inline.h>
38#include <linux/slab.h>
Philipp Reisnerb411b362009-09-25 16:07:19 -070039#include <linux/pkt_sched.h>
40#define __KERNEL_SYSCALLS__
41#include <linux/unistd.h>
42#include <linux/vmalloc.h>
43#include <linux/random.h>
Philipp Reisnerb411b362009-09-25 16:07:19 -070044#include <linux/string.h>
45#include <linux/scatterlist.h>
46#include "drbd_int.h"
Philipp Reisnerb411b362009-09-25 16:07:19 -070047#include "drbd_req.h"
48
49#include "drbd_vli.h"
50
Philipp Reisner77351055b2011-02-07 17:24:26 +010051struct packet_info {
52 enum drbd_packet cmd;
53 int size;
54 int vnr;
55};
56
Philipp Reisnerb411b362009-09-25 16:07:19 -070057enum finish_epoch {
58 FE_STILL_LIVE,
59 FE_DESTROYED,
60 FE_RECYCLED,
61};
62
Philipp Reisner65d11ed2011-02-07 17:35:59 +010063static int drbd_do_handshake(struct drbd_tconn *tconn);
Philipp Reisner13e60372011-02-08 09:54:40 +010064static int drbd_do_auth(struct drbd_tconn *tconn);
Philipp Reisner360cc742011-02-08 14:29:53 +010065static int drbd_disconnected(int vnr, void *p, void *data);
Philipp Reisnerb411b362009-09-25 16:07:19 -070066
67static enum finish_epoch drbd_may_finish_epoch(struct drbd_conf *, struct drbd_epoch *, enum epoch_event);
Philipp Reisner00d56942011-02-09 18:09:48 +010068static int e_end_block(struct drbd_work *, int);
Philipp Reisnerb411b362009-09-25 16:07:19 -070069
Philipp Reisnerb411b362009-09-25 16:07:19 -070070
71#define GFP_TRY (__GFP_HIGHMEM | __GFP_NOWARN)
72
Lars Ellenberg45bb9122010-05-14 17:10:48 +020073/*
74 * some helper functions to deal with single linked page lists,
75 * page->private being our "next" pointer.
76 */
77
78/* If at least n pages are linked at head, get n pages off.
79 * Otherwise, don't modify head, and return NULL.
80 * Locking is the responsibility of the caller.
81 */
82static struct page *page_chain_del(struct page **head, int n)
83{
84 struct page *page;
85 struct page *tmp;
86
87 BUG_ON(!n);
88 BUG_ON(!head);
89
90 page = *head;
Philipp Reisner23ce4222010-05-20 13:35:31 +020091
92 if (!page)
93 return NULL;
94
Lars Ellenberg45bb9122010-05-14 17:10:48 +020095 while (page) {
96 tmp = page_chain_next(page);
97 if (--n == 0)
98 break; /* found sufficient pages */
99 if (tmp == NULL)
100 /* insufficient pages, don't use any of them. */
101 return NULL;
102 page = tmp;
103 }
104
105 /* add end of list marker for the returned list */
106 set_page_private(page, 0);
107 /* actual return value, and adjustment of head */
108 page = *head;
109 *head = tmp;
110 return page;
111}
112
113/* may be used outside of locks to find the tail of a (usually short)
114 * "private" page chain, before adding it back to a global chain head
115 * with page_chain_add() under a spinlock. */
116static struct page *page_chain_tail(struct page *page, int *len)
117{
118 struct page *tmp;
119 int i = 1;
120 while ((tmp = page_chain_next(page)))
121 ++i, page = tmp;
122 if (len)
123 *len = i;
124 return page;
125}
126
127static int page_chain_free(struct page *page)
128{
129 struct page *tmp;
130 int i = 0;
131 page_chain_for_each_safe(page, tmp) {
132 put_page(page);
133 ++i;
134 }
135 return i;
136}
137
138static void page_chain_add(struct page **head,
139 struct page *chain_first, struct page *chain_last)
140{
141#if 1
142 struct page *tmp;
143 tmp = page_chain_tail(chain_first, NULL);
144 BUG_ON(tmp != chain_last);
145#endif
146
147 /* add chain to head */
148 set_page_private(chain_last, (unsigned long)*head);
149 *head = chain_first;
150}
151
152static struct page *drbd_pp_first_pages_or_try_alloc(struct drbd_conf *mdev, int number)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700153{
154 struct page *page = NULL;
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200155 struct page *tmp = NULL;
156 int i = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700157
158 /* Yes, testing drbd_pp_vacant outside the lock is racy.
159 * So what. It saves a spin_lock. */
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200160 if (drbd_pp_vacant >= number) {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700161 spin_lock(&drbd_pp_lock);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200162 page = page_chain_del(&drbd_pp_pool, number);
163 if (page)
164 drbd_pp_vacant -= number;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700165 spin_unlock(&drbd_pp_lock);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200166 if (page)
167 return page;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700168 }
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200169
Philipp Reisnerb411b362009-09-25 16:07:19 -0700170 /* GFP_TRY, because we must not cause arbitrary write-out: in a DRBD
171 * "criss-cross" setup, that might cause write-out on some other DRBD,
172 * which in turn might block on the other node at this very place. */
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200173 for (i = 0; i < number; i++) {
174 tmp = alloc_page(GFP_TRY);
175 if (!tmp)
176 break;
177 set_page_private(tmp, (unsigned long)page);
178 page = tmp;
179 }
180
181 if (i == number)
182 return page;
183
184 /* Not enough pages immediately available this time.
185 * No need to jump around here, drbd_pp_alloc will retry this
186 * function "soon". */
187 if (page) {
188 tmp = page_chain_tail(page, NULL);
189 spin_lock(&drbd_pp_lock);
190 page_chain_add(&drbd_pp_pool, page, tmp);
191 drbd_pp_vacant += i;
192 spin_unlock(&drbd_pp_lock);
193 }
194 return NULL;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700195}
196
Philipp Reisnerb411b362009-09-25 16:07:19 -0700197static void reclaim_net_ee(struct drbd_conf *mdev, struct list_head *to_be_freed)
198{
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100199 struct drbd_peer_request *peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700200 struct list_head *le, *tle;
201
202 /* The EEs are always appended to the end of the list. Since
203 they are sent in order over the wire, they have to finish
204 in order. As soon as we see the first not finished we can
205 stop to examine the list... */
206
207 list_for_each_safe(le, tle, &mdev->net_ee) {
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100208 peer_req = list_entry(le, struct drbd_peer_request, w.list);
209 if (drbd_ee_has_active_page(peer_req))
Philipp Reisnerb411b362009-09-25 16:07:19 -0700210 break;
211 list_move(le, to_be_freed);
212 }
213}
214
215static void drbd_kick_lo_and_reclaim_net(struct drbd_conf *mdev)
216{
217 LIST_HEAD(reclaimed);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100218 struct drbd_peer_request *peer_req, *t;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700219
Philipp Reisner87eeee42011-01-19 14:16:30 +0100220 spin_lock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700221 reclaim_net_ee(mdev, &reclaimed);
Philipp Reisner87eeee42011-01-19 14:16:30 +0100222 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700223
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100224 list_for_each_entry_safe(peer_req, t, &reclaimed, w.list)
225 drbd_free_net_ee(mdev, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700226}
227
228/**
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200229 * drbd_pp_alloc() - Returns @number pages, retries forever (or until signalled)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700230 * @mdev: DRBD device.
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200231 * @number: number of pages requested
232 * @retry: whether to retry, if not enough pages are available right now
Philipp Reisnerb411b362009-09-25 16:07:19 -0700233 *
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200234 * Tries to allocate number pages, first from our own page pool, then from
235 * the kernel, unless this allocation would exceed the max_buffers setting.
236 * Possibly retry until DRBD frees sufficient pages somewhere else.
237 *
238 * Returns a page chain linked via page->private.
Philipp Reisnerb411b362009-09-25 16:07:19 -0700239 */
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200240static struct page *drbd_pp_alloc(struct drbd_conf *mdev, unsigned number, bool retry)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700241{
242 struct page *page = NULL;
243 DEFINE_WAIT(wait);
244
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200245 /* Yes, we may run up to @number over max_buffers. If we
246 * follow it strictly, the admin will get it wrong anyways. */
Philipp Reisner89e58e72011-01-19 13:12:45 +0100247 if (atomic_read(&mdev->pp_in_use) < mdev->tconn->net_conf->max_buffers)
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200248 page = drbd_pp_first_pages_or_try_alloc(mdev, number);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700249
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200250 while (page == NULL) {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700251 prepare_to_wait(&drbd_pp_wait, &wait, TASK_INTERRUPTIBLE);
252
253 drbd_kick_lo_and_reclaim_net(mdev);
254
Philipp Reisner89e58e72011-01-19 13:12:45 +0100255 if (atomic_read(&mdev->pp_in_use) < mdev->tconn->net_conf->max_buffers) {
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200256 page = drbd_pp_first_pages_or_try_alloc(mdev, number);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700257 if (page)
258 break;
259 }
260
261 if (!retry)
262 break;
263
264 if (signal_pending(current)) {
265 dev_warn(DEV, "drbd_pp_alloc interrupted!\n");
266 break;
267 }
268
269 schedule();
270 }
271 finish_wait(&drbd_pp_wait, &wait);
272
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200273 if (page)
274 atomic_add(number, &mdev->pp_in_use);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700275 return page;
276}
277
278/* Must not be used from irq, as that may deadlock: see drbd_pp_alloc.
Philipp Reisner87eeee42011-01-19 14:16:30 +0100279 * Is also used from inside an other spin_lock_irq(&mdev->tconn->req_lock);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200280 * Either links the page chain back to the global pool,
281 * or returns all pages to the system. */
Lars Ellenberg435f0742010-09-06 12:30:25 +0200282static void drbd_pp_free(struct drbd_conf *mdev, struct page *page, int is_net)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700283{
Lars Ellenberg435f0742010-09-06 12:30:25 +0200284 atomic_t *a = is_net ? &mdev->pp_in_use_by_net : &mdev->pp_in_use;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700285 int i;
Lars Ellenberg435f0742010-09-06 12:30:25 +0200286
Philipp Reisner81a5d602011-02-22 19:53:16 -0500287 if (drbd_pp_vacant > (DRBD_MAX_BIO_SIZE/PAGE_SIZE) * minor_count)
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200288 i = page_chain_free(page);
289 else {
290 struct page *tmp;
291 tmp = page_chain_tail(page, &i);
292 spin_lock(&drbd_pp_lock);
293 page_chain_add(&drbd_pp_pool, page, tmp);
294 drbd_pp_vacant += i;
295 spin_unlock(&drbd_pp_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700296 }
Lars Ellenberg435f0742010-09-06 12:30:25 +0200297 i = atomic_sub_return(i, a);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200298 if (i < 0)
Lars Ellenberg435f0742010-09-06 12:30:25 +0200299 dev_warn(DEV, "ASSERTION FAILED: %s: %d < 0\n",
300 is_net ? "pp_in_use_by_net" : "pp_in_use", i);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700301 wake_up(&drbd_pp_wait);
302}
303
304/*
305You need to hold the req_lock:
306 _drbd_wait_ee_list_empty()
307
308You must not have the req_lock:
309 drbd_free_ee()
310 drbd_alloc_ee()
311 drbd_init_ee()
312 drbd_release_ee()
313 drbd_ee_fix_bhs()
314 drbd_process_done_ee()
315 drbd_clear_done_ee()
316 drbd_wait_ee_list_empty()
317*/
318
Andreas Gruenbacherf6ffca92011-02-04 15:30:34 +0100319struct drbd_peer_request *
320drbd_alloc_ee(struct drbd_conf *mdev, u64 id, sector_t sector,
321 unsigned int data_size, gfp_t gfp_mask) __must_hold(local)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700322{
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100323 struct drbd_peer_request *peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700324 struct page *page;
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200325 unsigned nr_pages = (data_size + PAGE_SIZE -1) >> PAGE_SHIFT;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700326
Andreas Gruenbacher0cf9d272010-12-07 10:43:29 +0100327 if (drbd_insert_fault(mdev, DRBD_FAULT_AL_EE))
Philipp Reisnerb411b362009-09-25 16:07:19 -0700328 return NULL;
329
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100330 peer_req = mempool_alloc(drbd_ee_mempool, gfp_mask & ~__GFP_HIGHMEM);
331 if (!peer_req) {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700332 if (!(gfp_mask & __GFP_NOWARN))
333 dev_err(DEV, "alloc_ee: Allocation of an EE failed\n");
334 return NULL;
335 }
336
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200337 page = drbd_pp_alloc(mdev, nr_pages, (gfp_mask & __GFP_WAIT));
338 if (!page)
339 goto fail;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700340
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100341 drbd_clear_interval(&peer_req->i);
342 peer_req->i.size = data_size;
343 peer_req->i.sector = sector;
344 peer_req->i.local = false;
345 peer_req->i.waiting = false;
Andreas Gruenbacher53840642011-01-28 10:31:04 +0100346
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100347 peer_req->epoch = NULL;
Philipp Reisnera21e9292011-02-08 15:08:49 +0100348 peer_req->w.mdev = mdev;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100349 peer_req->pages = page;
350 atomic_set(&peer_req->pending_bios, 0);
351 peer_req->flags = 0;
Andreas Gruenbacher9a8e7752011-01-11 14:04:09 +0100352 /*
353 * The block_id is opaque to the receiver. It is not endianness
354 * converted, and sent back to the sender unchanged.
355 */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100356 peer_req->block_id = id;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700357
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100358 return peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700359
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200360 fail:
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100361 mempool_free(peer_req, drbd_ee_mempool);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700362 return NULL;
363}
364
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100365void drbd_free_some_ee(struct drbd_conf *mdev, struct drbd_peer_request *peer_req,
Andreas Gruenbacherf6ffca92011-02-04 15:30:34 +0100366 int is_net)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700367{
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100368 if (peer_req->flags & EE_HAS_DIGEST)
369 kfree(peer_req->digest);
370 drbd_pp_free(mdev, peer_req->pages, is_net);
371 D_ASSERT(atomic_read(&peer_req->pending_bios) == 0);
372 D_ASSERT(drbd_interval_empty(&peer_req->i));
373 mempool_free(peer_req, drbd_ee_mempool);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700374}
375
376int drbd_release_ee(struct drbd_conf *mdev, struct list_head *list)
377{
378 LIST_HEAD(work_list);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100379 struct drbd_peer_request *peer_req, *t;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700380 int count = 0;
Lars Ellenberg435f0742010-09-06 12:30:25 +0200381 int is_net = list == &mdev->net_ee;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700382
Philipp Reisner87eeee42011-01-19 14:16:30 +0100383 spin_lock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700384 list_splice_init(list, &work_list);
Philipp Reisner87eeee42011-01-19 14:16:30 +0100385 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700386
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100387 list_for_each_entry_safe(peer_req, t, &work_list, w.list) {
388 drbd_free_some_ee(mdev, peer_req, is_net);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700389 count++;
390 }
391 return count;
392}
393
394
Philipp Reisner32862ec2011-02-08 16:41:01 +0100395/* See also comments in _req_mod(,BARRIER_ACKED)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700396 * and receive_Barrier.
397 *
398 * Move entries from net_ee to done_ee, if ready.
399 * Grab done_ee, call all callbacks, free the entries.
400 * The callbacks typically send out ACKs.
401 */
402static int drbd_process_done_ee(struct drbd_conf *mdev)
403{
404 LIST_HEAD(work_list);
405 LIST_HEAD(reclaimed);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100406 struct drbd_peer_request *peer_req, *t;
Philipp Reisner082a3432011-03-15 16:05:42 +0100407 int ok = 1;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700408
Philipp Reisner87eeee42011-01-19 14:16:30 +0100409 spin_lock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700410 reclaim_net_ee(mdev, &reclaimed);
411 list_splice_init(&mdev->done_ee, &work_list);
Philipp Reisner87eeee42011-01-19 14:16:30 +0100412 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700413
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100414 list_for_each_entry_safe(peer_req, t, &reclaimed, w.list)
415 drbd_free_net_ee(mdev, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700416
417 /* possible callbacks here:
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +0100418 * e_end_block, and e_end_resync_block, e_send_discard_write.
Philipp Reisnerb411b362009-09-25 16:07:19 -0700419 * all ignore the last argument.
420 */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100421 list_for_each_entry_safe(peer_req, t, &work_list, w.list) {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700422 /* list_del not necessary, next/prev members not touched */
Philipp Reisner00d56942011-02-09 18:09:48 +0100423 ok = peer_req->w.cb(&peer_req->w, !ok) && ok;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100424 drbd_free_ee(mdev, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700425 }
426 wake_up(&mdev->ee_wait);
427
428 return ok;
429}
430
431void _drbd_wait_ee_list_empty(struct drbd_conf *mdev, struct list_head *head)
432{
433 DEFINE_WAIT(wait);
434
435 /* avoids spin_lock/unlock
436 * and calling prepare_to_wait in the fast path */
437 while (!list_empty(head)) {
438 prepare_to_wait(&mdev->ee_wait, &wait, TASK_UNINTERRUPTIBLE);
Philipp Reisner87eeee42011-01-19 14:16:30 +0100439 spin_unlock_irq(&mdev->tconn->req_lock);
Jens Axboe7eaceac2011-03-10 08:52:07 +0100440 io_schedule();
Philipp Reisnerb411b362009-09-25 16:07:19 -0700441 finish_wait(&mdev->ee_wait, &wait);
Philipp Reisner87eeee42011-01-19 14:16:30 +0100442 spin_lock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700443 }
444}
445
446void drbd_wait_ee_list_empty(struct drbd_conf *mdev, struct list_head *head)
447{
Philipp Reisner87eeee42011-01-19 14:16:30 +0100448 spin_lock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700449 _drbd_wait_ee_list_empty(mdev, head);
Philipp Reisner87eeee42011-01-19 14:16:30 +0100450 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700451}
452
453/* see also kernel_accept; which is only present since 2.6.18.
454 * also we want to log which part of it failed, exactly */
Philipp Reisner76536202011-02-07 14:09:54 +0100455static int drbd_accept(const char **what, struct socket *sock, struct socket **newsock)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700456{
457 struct sock *sk = sock->sk;
458 int err = 0;
459
460 *what = "listen";
461 err = sock->ops->listen(sock, 5);
462 if (err < 0)
463 goto out;
464
465 *what = "sock_create_lite";
466 err = sock_create_lite(sk->sk_family, sk->sk_type, sk->sk_protocol,
467 newsock);
468 if (err < 0)
469 goto out;
470
471 *what = "accept";
472 err = sock->ops->accept(sock, *newsock, 0);
473 if (err < 0) {
474 sock_release(*newsock);
475 *newsock = NULL;
476 goto out;
477 }
478 (*newsock)->ops = sock->ops;
479
480out:
481 return err;
482}
483
Philipp Reisnerdbd9eea2011-02-07 15:34:16 +0100484static int drbd_recv_short(struct socket *sock, void *buf, size_t size, int flags)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700485{
486 mm_segment_t oldfs;
487 struct kvec iov = {
488 .iov_base = buf,
489 .iov_len = size,
490 };
491 struct msghdr msg = {
492 .msg_iovlen = 1,
493 .msg_iov = (struct iovec *)&iov,
494 .msg_flags = (flags ? flags : MSG_WAITALL | MSG_NOSIGNAL)
495 };
496 int rv;
497
498 oldfs = get_fs();
499 set_fs(KERNEL_DS);
500 rv = sock_recvmsg(sock, &msg, size, msg.msg_flags);
501 set_fs(oldfs);
502
503 return rv;
504}
505
Philipp Reisnerde0ff332011-02-07 16:56:20 +0100506static int drbd_recv(struct drbd_tconn *tconn, void *buf, size_t size)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700507{
508 mm_segment_t oldfs;
509 struct kvec iov = {
510 .iov_base = buf,
511 .iov_len = size,
512 };
513 struct msghdr msg = {
514 .msg_iovlen = 1,
515 .msg_iov = (struct iovec *)&iov,
516 .msg_flags = MSG_WAITALL | MSG_NOSIGNAL
517 };
518 int rv;
519
520 oldfs = get_fs();
521 set_fs(KERNEL_DS);
522
523 for (;;) {
Philipp Reisnerde0ff332011-02-07 16:56:20 +0100524 rv = sock_recvmsg(tconn->data.socket, &msg, size, msg.msg_flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700525 if (rv == size)
526 break;
527
528 /* Note:
529 * ECONNRESET other side closed the connection
530 * ERESTARTSYS (on sock) we got a signal
531 */
532
533 if (rv < 0) {
534 if (rv == -ECONNRESET)
Philipp Reisnerde0ff332011-02-07 16:56:20 +0100535 conn_info(tconn, "sock was reset by peer\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -0700536 else if (rv != -ERESTARTSYS)
Philipp Reisnerde0ff332011-02-07 16:56:20 +0100537 conn_err(tconn, "sock_recvmsg returned %d\n", rv);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700538 break;
539 } else if (rv == 0) {
Philipp Reisnerde0ff332011-02-07 16:56:20 +0100540 conn_info(tconn, "sock was shut down by peer\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -0700541 break;
542 } else {
543 /* signal came in, or peer/link went down,
544 * after we read a partial message
545 */
546 /* D_ASSERT(signal_pending(current)); */
547 break;
548 }
549 };
550
551 set_fs(oldfs);
552
553 if (rv != size)
Philipp Reisnerbbeb6412011-02-10 13:45:46 +0100554 conn_request_state(tconn, NS(conn, C_BROKEN_PIPE), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700555
556 return rv;
557}
558
Lars Ellenberg5dbf1672010-05-25 16:18:01 +0200559/* quoting tcp(7):
560 * On individual connections, the socket buffer size must be set prior to the
561 * listen(2) or connect(2) calls in order to have it take effect.
562 * This is our wrapper to do so.
563 */
564static void drbd_setbufsize(struct socket *sock, unsigned int snd,
565 unsigned int rcv)
566{
567 /* open coded SO_SNDBUF, SO_RCVBUF */
568 if (snd) {
569 sock->sk->sk_sndbuf = snd;
570 sock->sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
571 }
572 if (rcv) {
573 sock->sk->sk_rcvbuf = rcv;
574 sock->sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
575 }
576}
577
Philipp Reisnereac3e992011-02-07 14:05:07 +0100578static struct socket *drbd_try_connect(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700579{
580 const char *what;
581 struct socket *sock;
582 struct sockaddr_in6 src_in6;
583 int err;
584 int disconnect_on_error = 1;
585
Philipp Reisnereac3e992011-02-07 14:05:07 +0100586 if (!get_net_conf(tconn))
Philipp Reisnerb411b362009-09-25 16:07:19 -0700587 return NULL;
588
589 what = "sock_create_kern";
Philipp Reisnereac3e992011-02-07 14:05:07 +0100590 err = sock_create_kern(((struct sockaddr *)tconn->net_conf->my_addr)->sa_family,
Philipp Reisnerb411b362009-09-25 16:07:19 -0700591 SOCK_STREAM, IPPROTO_TCP, &sock);
592 if (err < 0) {
593 sock = NULL;
594 goto out;
595 }
596
597 sock->sk->sk_rcvtimeo =
Philipp Reisnereac3e992011-02-07 14:05:07 +0100598 sock->sk->sk_sndtimeo = tconn->net_conf->try_connect_int*HZ;
599 drbd_setbufsize(sock, tconn->net_conf->sndbuf_size,
600 tconn->net_conf->rcvbuf_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700601
602 /* explicitly bind to the configured IP as source IP
603 * for the outgoing connections.
604 * This is needed for multihomed hosts and to be
605 * able to use lo: interfaces for drbd.
606 * Make sure to use 0 as port number, so linux selects
607 * a free one dynamically.
608 */
Philipp Reisnereac3e992011-02-07 14:05:07 +0100609 memcpy(&src_in6, tconn->net_conf->my_addr,
610 min_t(int, tconn->net_conf->my_addr_len, sizeof(src_in6)));
611 if (((struct sockaddr *)tconn->net_conf->my_addr)->sa_family == AF_INET6)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700612 src_in6.sin6_port = 0;
613 else
614 ((struct sockaddr_in *)&src_in6)->sin_port = 0; /* AF_INET & AF_SCI */
615
616 what = "bind before connect";
617 err = sock->ops->bind(sock,
618 (struct sockaddr *) &src_in6,
Philipp Reisnereac3e992011-02-07 14:05:07 +0100619 tconn->net_conf->my_addr_len);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700620 if (err < 0)
621 goto out;
622
623 /* connect may fail, peer not yet available.
624 * stay C_WF_CONNECTION, don't go Disconnecting! */
625 disconnect_on_error = 0;
626 what = "connect";
627 err = sock->ops->connect(sock,
Philipp Reisnereac3e992011-02-07 14:05:07 +0100628 (struct sockaddr *)tconn->net_conf->peer_addr,
629 tconn->net_conf->peer_addr_len, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700630
631out:
632 if (err < 0) {
633 if (sock) {
634 sock_release(sock);
635 sock = NULL;
636 }
637 switch (-err) {
638 /* timeout, busy, signal pending */
639 case ETIMEDOUT: case EAGAIN: case EINPROGRESS:
640 case EINTR: case ERESTARTSYS:
641 /* peer not (yet) available, network problem */
642 case ECONNREFUSED: case ENETUNREACH:
643 case EHOSTDOWN: case EHOSTUNREACH:
644 disconnect_on_error = 0;
645 break;
646 default:
Philipp Reisnereac3e992011-02-07 14:05:07 +0100647 conn_err(tconn, "%s failed, err = %d\n", what, err);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700648 }
649 if (disconnect_on_error)
Philipp Reisnerbbeb6412011-02-10 13:45:46 +0100650 conn_request_state(tconn, NS(conn, C_DISCONNECTING), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700651 }
Philipp Reisnereac3e992011-02-07 14:05:07 +0100652 put_net_conf(tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700653 return sock;
654}
655
Philipp Reisner76536202011-02-07 14:09:54 +0100656static struct socket *drbd_wait_for_connect(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700657{
658 int timeo, err;
659 struct socket *s_estab = NULL, *s_listen;
660 const char *what;
661
Philipp Reisner76536202011-02-07 14:09:54 +0100662 if (!get_net_conf(tconn))
Philipp Reisnerb411b362009-09-25 16:07:19 -0700663 return NULL;
664
665 what = "sock_create_kern";
Philipp Reisner76536202011-02-07 14:09:54 +0100666 err = sock_create_kern(((struct sockaddr *)tconn->net_conf->my_addr)->sa_family,
Philipp Reisnerb411b362009-09-25 16:07:19 -0700667 SOCK_STREAM, IPPROTO_TCP, &s_listen);
668 if (err) {
669 s_listen = NULL;
670 goto out;
671 }
672
Philipp Reisner76536202011-02-07 14:09:54 +0100673 timeo = tconn->net_conf->try_connect_int * HZ;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700674 timeo += (random32() & 1) ? timeo / 7 : -timeo / 7; /* 28.5% random jitter */
675
676 s_listen->sk->sk_reuse = 1; /* SO_REUSEADDR */
677 s_listen->sk->sk_rcvtimeo = timeo;
678 s_listen->sk->sk_sndtimeo = timeo;
Philipp Reisner76536202011-02-07 14:09:54 +0100679 drbd_setbufsize(s_listen, tconn->net_conf->sndbuf_size,
680 tconn->net_conf->rcvbuf_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700681
682 what = "bind before listen";
683 err = s_listen->ops->bind(s_listen,
Philipp Reisner76536202011-02-07 14:09:54 +0100684 (struct sockaddr *) tconn->net_conf->my_addr,
685 tconn->net_conf->my_addr_len);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700686 if (err < 0)
687 goto out;
688
Philipp Reisner76536202011-02-07 14:09:54 +0100689 err = drbd_accept(&what, s_listen, &s_estab);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700690
691out:
692 if (s_listen)
693 sock_release(s_listen);
694 if (err < 0) {
695 if (err != -EAGAIN && err != -EINTR && err != -ERESTARTSYS) {
Philipp Reisner76536202011-02-07 14:09:54 +0100696 conn_err(tconn, "%s failed, err = %d\n", what, err);
Philipp Reisnerbbeb6412011-02-10 13:45:46 +0100697 conn_request_state(tconn, NS(conn, C_DISCONNECTING), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700698 }
699 }
Philipp Reisner76536202011-02-07 14:09:54 +0100700 put_net_conf(tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700701
702 return s_estab;
703}
704
Philipp Reisnerd38e7872011-02-07 15:32:04 +0100705static int drbd_send_fp(struct drbd_tconn *tconn, struct socket *sock, enum drbd_packet cmd)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700706{
Philipp Reisnerd38e7872011-02-07 15:32:04 +0100707 struct p_header *h = &tconn->data.sbuf.header;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700708
Philipp Reisnerd38e7872011-02-07 15:32:04 +0100709 return _conn_send_cmd(tconn, 0, sock, cmd, h, sizeof(*h), 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700710}
711
Philipp Reisnera25b63f2011-02-07 15:43:45 +0100712static enum drbd_packet drbd_recv_fp(struct drbd_tconn *tconn, struct socket *sock)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700713{
Philipp Reisnera25b63f2011-02-07 15:43:45 +0100714 struct p_header80 *h = &tconn->data.rbuf.header.h80;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700715 int rr;
716
Philipp Reisnerdbd9eea2011-02-07 15:34:16 +0100717 rr = drbd_recv_short(sock, h, sizeof(*h), 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700718
Andreas Gruenbacherca9bc122011-01-11 13:47:24 +0100719 if (rr == sizeof(*h) && h->magic == cpu_to_be32(DRBD_MAGIC))
Philipp Reisnerb411b362009-09-25 16:07:19 -0700720 return be16_to_cpu(h->command);
721
722 return 0xffff;
723}
724
725/**
726 * drbd_socket_okay() - Free the socket if its connection is not okay
Philipp Reisnerb411b362009-09-25 16:07:19 -0700727 * @sock: pointer to the pointer to the socket.
728 */
Philipp Reisnerdbd9eea2011-02-07 15:34:16 +0100729static int drbd_socket_okay(struct socket **sock)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700730{
731 int rr;
732 char tb[4];
733
734 if (!*sock)
Andreas Gruenbacher81e84652010-12-09 15:03:57 +0100735 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700736
Philipp Reisnerdbd9eea2011-02-07 15:34:16 +0100737 rr = drbd_recv_short(*sock, tb, 4, MSG_DONTWAIT | MSG_PEEK);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700738
739 if (rr > 0 || rr == -EAGAIN) {
Andreas Gruenbacher81e84652010-12-09 15:03:57 +0100740 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700741 } else {
742 sock_release(*sock);
743 *sock = NULL;
Andreas Gruenbacher81e84652010-12-09 15:03:57 +0100744 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700745 }
746}
Philipp Reisner2325eb62011-03-15 16:56:18 +0100747/* Gets called if a connection is established, or if a new minor gets created
748 in a connection */
749int drbd_connected(int vnr, void *p, void *data)
Philipp Reisner907599e2011-02-08 11:25:37 +0100750{
751 struct drbd_conf *mdev = (struct drbd_conf *)p;
752 int ok = 1;
753
754 atomic_set(&mdev->packet_seq, 0);
755 mdev->peer_seq = 0;
756
Philipp Reisner8410da82011-02-11 20:11:10 +0100757 mdev->state_mutex = mdev->tconn->agreed_pro_version < 100 ?
758 &mdev->tconn->cstate_mutex :
759 &mdev->own_state_mutex;
760
Philipp Reisner907599e2011-02-08 11:25:37 +0100761 ok &= drbd_send_sync_param(mdev, &mdev->sync_conf);
762 ok &= drbd_send_sizes(mdev, 0, 0);
763 ok &= drbd_send_uuids(mdev);
764 ok &= drbd_send_state(mdev);
765 clear_bit(USE_DEGR_WFC_T, &mdev->flags);
766 clear_bit(RESIZE_PENDING, &mdev->flags);
767
Philipp Reisner8410da82011-02-11 20:11:10 +0100768
Philipp Reisner907599e2011-02-08 11:25:37 +0100769 return !ok;
770}
771
Philipp Reisnerb411b362009-09-25 16:07:19 -0700772/*
773 * return values:
774 * 1 yes, we have a valid connection
775 * 0 oops, did not work out, please try again
776 * -1 peer talks different language,
777 * no point in trying again, please go standalone.
778 * -2 We do not have a network config...
779 */
Philipp Reisner907599e2011-02-08 11:25:37 +0100780static int drbd_connect(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700781{
782 struct socket *s, *sock, *msock;
783 int try, h, ok;
784
Philipp Reisnerbbeb6412011-02-10 13:45:46 +0100785 if (conn_request_state(tconn, NS(conn, C_WF_CONNECTION), CS_VERBOSE) < SS_SUCCESS)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700786 return -2;
787
Philipp Reisner907599e2011-02-08 11:25:37 +0100788 clear_bit(DISCARD_CONCURRENT, &tconn->flags);
789 tconn->agreed_pro_version = 99;
Philipp Reisnerfd340c12011-01-19 16:57:39 +0100790 /* agreed_pro_version must be smaller than 100 so we send the old
791 header (h80) in the first packet and in the handshake packet. */
Philipp Reisnerb411b362009-09-25 16:07:19 -0700792
793 sock = NULL;
794 msock = NULL;
795
796 do {
797 for (try = 0;;) {
798 /* 3 tries, this should take less than a second! */
Philipp Reisner907599e2011-02-08 11:25:37 +0100799 s = drbd_try_connect(tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700800 if (s || ++try >= 3)
801 break;
802 /* give the other side time to call bind() & listen() */
Philipp Reisner20ee6392011-01-18 15:28:59 +0100803 schedule_timeout_interruptible(HZ / 10);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700804 }
805
806 if (s) {
807 if (!sock) {
Philipp Reisner907599e2011-02-08 11:25:37 +0100808 drbd_send_fp(tconn, s, P_HAND_SHAKE_S);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700809 sock = s;
810 s = NULL;
811 } else if (!msock) {
Philipp Reisner907599e2011-02-08 11:25:37 +0100812 drbd_send_fp(tconn, s, P_HAND_SHAKE_M);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700813 msock = s;
814 s = NULL;
815 } else {
Philipp Reisner907599e2011-02-08 11:25:37 +0100816 conn_err(tconn, "Logic error in drbd_connect()\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -0700817 goto out_release_sockets;
818 }
819 }
820
821 if (sock && msock) {
Philipp Reisner907599e2011-02-08 11:25:37 +0100822 schedule_timeout_interruptible(tconn->net_conf->ping_timeo*HZ/10);
Philipp Reisnerdbd9eea2011-02-07 15:34:16 +0100823 ok = drbd_socket_okay(&sock);
824 ok = drbd_socket_okay(&msock) && ok;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700825 if (ok)
826 break;
827 }
828
829retry:
Philipp Reisner907599e2011-02-08 11:25:37 +0100830 s = drbd_wait_for_connect(tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700831 if (s) {
Philipp Reisner907599e2011-02-08 11:25:37 +0100832 try = drbd_recv_fp(tconn, s);
Philipp Reisnerdbd9eea2011-02-07 15:34:16 +0100833 drbd_socket_okay(&sock);
834 drbd_socket_okay(&msock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700835 switch (try) {
836 case P_HAND_SHAKE_S:
837 if (sock) {
Philipp Reisner907599e2011-02-08 11:25:37 +0100838 conn_warn(tconn, "initial packet S crossed\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -0700839 sock_release(sock);
840 }
841 sock = s;
842 break;
843 case P_HAND_SHAKE_M:
844 if (msock) {
Philipp Reisner907599e2011-02-08 11:25:37 +0100845 conn_warn(tconn, "initial packet M crossed\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -0700846 sock_release(msock);
847 }
848 msock = s;
Philipp Reisner907599e2011-02-08 11:25:37 +0100849 set_bit(DISCARD_CONCURRENT, &tconn->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700850 break;
851 default:
Philipp Reisner907599e2011-02-08 11:25:37 +0100852 conn_warn(tconn, "Error receiving initial packet\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -0700853 sock_release(s);
854 if (random32() & 1)
855 goto retry;
856 }
857 }
858
Philipp Reisnerbbeb6412011-02-10 13:45:46 +0100859 if (tconn->cstate <= C_DISCONNECTING)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700860 goto out_release_sockets;
861 if (signal_pending(current)) {
862 flush_signals(current);
863 smp_rmb();
Philipp Reisner907599e2011-02-08 11:25:37 +0100864 if (get_t_state(&tconn->receiver) == EXITING)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700865 goto out_release_sockets;
866 }
867
868 if (sock && msock) {
Philipp Reisnerdbd9eea2011-02-07 15:34:16 +0100869 ok = drbd_socket_okay(&sock);
870 ok = drbd_socket_okay(&msock) && ok;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700871 if (ok)
872 break;
873 }
874 } while (1);
875
876 msock->sk->sk_reuse = 1; /* SO_REUSEADDR */
877 sock->sk->sk_reuse = 1; /* SO_REUSEADDR */
878
879 sock->sk->sk_allocation = GFP_NOIO;
880 msock->sk->sk_allocation = GFP_NOIO;
881
882 sock->sk->sk_priority = TC_PRIO_INTERACTIVE_BULK;
883 msock->sk->sk_priority = TC_PRIO_INTERACTIVE;
884
Philipp Reisnerb411b362009-09-25 16:07:19 -0700885 /* NOT YET ...
Philipp Reisner907599e2011-02-08 11:25:37 +0100886 * sock->sk->sk_sndtimeo = tconn->net_conf->timeout*HZ/10;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700887 * sock->sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
888 * first set it to the P_HAND_SHAKE timeout,
889 * which we set to 4x the configured ping_timeout. */
890 sock->sk->sk_sndtimeo =
Philipp Reisner907599e2011-02-08 11:25:37 +0100891 sock->sk->sk_rcvtimeo = tconn->net_conf->ping_timeo*4*HZ/10;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700892
Philipp Reisner907599e2011-02-08 11:25:37 +0100893 msock->sk->sk_sndtimeo = tconn->net_conf->timeout*HZ/10;
894 msock->sk->sk_rcvtimeo = tconn->net_conf->ping_int*HZ;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700895
896 /* we don't want delays.
Lucas De Marchi25985ed2011-03-30 22:57:33 -0300897 * we use TCP_CORK where appropriate, though */
Philipp Reisnerb411b362009-09-25 16:07:19 -0700898 drbd_tcp_nodelay(sock);
899 drbd_tcp_nodelay(msock);
900
Philipp Reisner907599e2011-02-08 11:25:37 +0100901 tconn->data.socket = sock;
902 tconn->meta.socket = msock;
903 tconn->last_received = jiffies;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700904
Philipp Reisner907599e2011-02-08 11:25:37 +0100905 h = drbd_do_handshake(tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700906 if (h <= 0)
907 return h;
908
Philipp Reisner907599e2011-02-08 11:25:37 +0100909 if (tconn->cram_hmac_tfm) {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700910 /* drbd_request_state(mdev, NS(conn, WFAuth)); */
Philipp Reisner907599e2011-02-08 11:25:37 +0100911 switch (drbd_do_auth(tconn)) {
Johannes Thomab10d96c2010-01-07 16:02:50 +0100912 case -1:
Philipp Reisner907599e2011-02-08 11:25:37 +0100913 conn_err(tconn, "Authentication of peer failed\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -0700914 return -1;
Johannes Thomab10d96c2010-01-07 16:02:50 +0100915 case 0:
Philipp Reisner907599e2011-02-08 11:25:37 +0100916 conn_err(tconn, "Authentication of peer failed, trying again.\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +0100917 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700918 }
919 }
920
Philipp Reisnerbbeb6412011-02-10 13:45:46 +0100921 if (conn_request_state(tconn, NS(conn, C_WF_REPORT_PARAMS), CS_VERBOSE) < SS_SUCCESS)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700922 return 0;
923
Philipp Reisner907599e2011-02-08 11:25:37 +0100924 sock->sk->sk_sndtimeo = tconn->net_conf->timeout*HZ/10;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700925 sock->sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
926
Philipp Reisner907599e2011-02-08 11:25:37 +0100927 drbd_thread_start(&tconn->asender);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700928
Philipp Reisner907599e2011-02-08 11:25:37 +0100929 if (drbd_send_protocol(tconn) == -1)
Philipp Reisner7e2455c2010-04-22 14:50:23 +0200930 return -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700931
Philipp Reisner907599e2011-02-08 11:25:37 +0100932 return !idr_for_each(&tconn->volumes, drbd_connected, tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700933
934out_release_sockets:
935 if (sock)
936 sock_release(sock);
937 if (msock)
938 sock_release(msock);
939 return -1;
940}
941
Philipp Reisnerce243852011-02-07 17:27:47 +0100942static bool decode_header(struct drbd_tconn *tconn, struct p_header *h, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700943{
Philipp Reisnerfd340c12011-01-19 16:57:39 +0100944 if (h->h80.magic == cpu_to_be32(DRBD_MAGIC)) {
Philipp Reisner77351055b2011-02-07 17:24:26 +0100945 pi->cmd = be16_to_cpu(h->h80.command);
946 pi->size = be16_to_cpu(h->h80.length);
Philipp Reisnereefc2f72011-02-08 12:55:24 +0100947 pi->vnr = 0;
Andreas Gruenbacherca9bc122011-01-11 13:47:24 +0100948 } else if (h->h95.magic == cpu_to_be16(DRBD_MAGIC_BIG)) {
Philipp Reisner77351055b2011-02-07 17:24:26 +0100949 pi->cmd = be16_to_cpu(h->h95.command);
950 pi->size = be32_to_cpu(h->h95.length) & 0x00ffffff;
951 pi->vnr = 0;
Philipp Reisner02918be2010-08-20 14:35:10 +0200952 } else {
Philipp Reisnerce243852011-02-07 17:27:47 +0100953 conn_err(tconn, "magic?? on data m: 0x%08x c: %d l: %d\n",
Lars Ellenberg004352f2010-10-05 20:13:58 +0200954 be32_to_cpu(h->h80.magic),
955 be16_to_cpu(h->h80.command),
956 be16_to_cpu(h->h80.length));
Andreas Gruenbacher81e84652010-12-09 15:03:57 +0100957 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700958 }
Philipp Reisner257d0af2011-01-26 12:15:29 +0100959 return true;
960}
961
Philipp Reisner9ba7aa02011-02-07 17:32:41 +0100962static int drbd_recv_header(struct drbd_tconn *tconn, struct packet_info *pi)
Philipp Reisner257d0af2011-01-26 12:15:29 +0100963{
Philipp Reisner9ba7aa02011-02-07 17:32:41 +0100964 struct p_header *h = &tconn->data.rbuf.header;
Philipp Reisner257d0af2011-01-26 12:15:29 +0100965 int r;
966
Philipp Reisner9ba7aa02011-02-07 17:32:41 +0100967 r = drbd_recv(tconn, h, sizeof(*h));
Philipp Reisner257d0af2011-01-26 12:15:29 +0100968 if (unlikely(r != sizeof(*h))) {
969 if (!signal_pending(current))
Philipp Reisner9ba7aa02011-02-07 17:32:41 +0100970 conn_warn(tconn, "short read expecting header on sock: r=%d\n", r);
Philipp Reisner257d0af2011-01-26 12:15:29 +0100971 return false;
972 }
973
Philipp Reisner9ba7aa02011-02-07 17:32:41 +0100974 r = decode_header(tconn, h, pi);
975 tconn->last_received = jiffies;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700976
Philipp Reisner257d0af2011-01-26 12:15:29 +0100977 return r;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700978}
979
Philipp Reisner2451fc32010-08-24 13:43:11 +0200980static void drbd_flush(struct drbd_conf *mdev)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700981{
982 int rv;
983
984 if (mdev->write_ordering >= WO_bdev_flush && get_ldev(mdev)) {
Dmitry Monakhovfbd9b092010-04-28 17:55:06 +0400985 rv = blkdev_issue_flush(mdev->ldev->backing_bdev, GFP_KERNEL,
Christoph Hellwigdd3932e2010-09-16 20:51:46 +0200986 NULL);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700987 if (rv) {
988 dev_err(DEV, "local disk flush failed with status %d\n", rv);
989 /* would rather check on EOPNOTSUPP, but that is not reliable.
990 * don't try again for ANY return value != 0
991 * if (rv == -EOPNOTSUPP) */
992 drbd_bump_write_ordering(mdev, WO_drain_io);
993 }
994 put_ldev(mdev);
995 }
Philipp Reisnerb411b362009-09-25 16:07:19 -0700996}
997
998/**
999 * drbd_may_finish_epoch() - Applies an epoch_event to the epoch's state, eventually finishes it.
1000 * @mdev: DRBD device.
1001 * @epoch: Epoch object.
1002 * @ev: Epoch event.
1003 */
1004static enum finish_epoch drbd_may_finish_epoch(struct drbd_conf *mdev,
1005 struct drbd_epoch *epoch,
1006 enum epoch_event ev)
1007{
Philipp Reisner2451fc32010-08-24 13:43:11 +02001008 int epoch_size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001009 struct drbd_epoch *next_epoch;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001010 enum finish_epoch rv = FE_STILL_LIVE;
1011
1012 spin_lock(&mdev->epoch_lock);
1013 do {
1014 next_epoch = NULL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001015
1016 epoch_size = atomic_read(&epoch->epoch_size);
1017
1018 switch (ev & ~EV_CLEANUP) {
1019 case EV_PUT:
1020 atomic_dec(&epoch->active);
1021 break;
1022 case EV_GOT_BARRIER_NR:
1023 set_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001024 break;
1025 case EV_BECAME_LAST:
1026 /* nothing to do*/
1027 break;
1028 }
1029
Philipp Reisnerb411b362009-09-25 16:07:19 -07001030 if (epoch_size != 0 &&
1031 atomic_read(&epoch->active) == 0 &&
Philipp Reisner2451fc32010-08-24 13:43:11 +02001032 test_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001033 if (!(ev & EV_CLEANUP)) {
1034 spin_unlock(&mdev->epoch_lock);
1035 drbd_send_b_ack(mdev, epoch->barrier_nr, epoch_size);
1036 spin_lock(&mdev->epoch_lock);
1037 }
1038 dec_unacked(mdev);
1039
1040 if (mdev->current_epoch != epoch) {
1041 next_epoch = list_entry(epoch->list.next, struct drbd_epoch, list);
1042 list_del(&epoch->list);
1043 ev = EV_BECAME_LAST | (ev & EV_CLEANUP);
1044 mdev->epochs--;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001045 kfree(epoch);
1046
1047 if (rv == FE_STILL_LIVE)
1048 rv = FE_DESTROYED;
1049 } else {
1050 epoch->flags = 0;
1051 atomic_set(&epoch->epoch_size, 0);
Uwe Kleine-König698f9312010-07-02 20:41:51 +02001052 /* atomic_set(&epoch->active, 0); is already zero */
Philipp Reisnerb411b362009-09-25 16:07:19 -07001053 if (rv == FE_STILL_LIVE)
1054 rv = FE_RECYCLED;
Philipp Reisner2451fc32010-08-24 13:43:11 +02001055 wake_up(&mdev->ee_wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001056 }
1057 }
1058
1059 if (!next_epoch)
1060 break;
1061
1062 epoch = next_epoch;
1063 } while (1);
1064
1065 spin_unlock(&mdev->epoch_lock);
1066
Philipp Reisnerb411b362009-09-25 16:07:19 -07001067 return rv;
1068}
1069
1070/**
1071 * drbd_bump_write_ordering() - Fall back to an other write ordering method
1072 * @mdev: DRBD device.
1073 * @wo: Write ordering method to try.
1074 */
1075void drbd_bump_write_ordering(struct drbd_conf *mdev, enum write_ordering_e wo) __must_hold(local)
1076{
1077 enum write_ordering_e pwo;
1078 static char *write_ordering_str[] = {
1079 [WO_none] = "none",
1080 [WO_drain_io] = "drain",
1081 [WO_bdev_flush] = "flush",
Philipp Reisnerb411b362009-09-25 16:07:19 -07001082 };
1083
1084 pwo = mdev->write_ordering;
1085 wo = min(pwo, wo);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001086 if (wo == WO_bdev_flush && mdev->ldev->dc.no_disk_flush)
1087 wo = WO_drain_io;
1088 if (wo == WO_drain_io && mdev->ldev->dc.no_disk_drain)
1089 wo = WO_none;
1090 mdev->write_ordering = wo;
Philipp Reisner2451fc32010-08-24 13:43:11 +02001091 if (pwo != mdev->write_ordering || wo == WO_bdev_flush)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001092 dev_info(DEV, "Method to ensure write ordering: %s\n", write_ordering_str[mdev->write_ordering]);
1093}
1094
1095/**
Andreas Gruenbacherfbe29de2011-02-17 16:38:35 +01001096 * drbd_submit_peer_request()
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001097 * @mdev: DRBD device.
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001098 * @peer_req: peer request
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001099 * @rw: flag field, see bio->bi_rw
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001100 *
1101 * May spread the pages to multiple bios,
1102 * depending on bio_add_page restrictions.
1103 *
1104 * Returns 0 if all bios have been submitted,
1105 * -ENOMEM if we could not allocate enough bios,
1106 * -ENOSPC (any better suggestion?) if we have not been able to bio_add_page a
1107 * single page to an empty bio (which should never happen and likely indicates
1108 * that the lower level IO stack is in some way broken). This has been observed
1109 * on certain Xen deployments.
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001110 */
1111/* TODO allocate from our own bio_set. */
Andreas Gruenbacherfbe29de2011-02-17 16:38:35 +01001112int drbd_submit_peer_request(struct drbd_conf *mdev,
1113 struct drbd_peer_request *peer_req,
1114 const unsigned rw, const int fault_type)
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001115{
1116 struct bio *bios = NULL;
1117 struct bio *bio;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001118 struct page *page = peer_req->pages;
1119 sector_t sector = peer_req->i.sector;
1120 unsigned ds = peer_req->i.size;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001121 unsigned n_bios = 0;
1122 unsigned nr_pages = (ds + PAGE_SIZE -1) >> PAGE_SHIFT;
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001123 int err = -ENOMEM;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001124
1125 /* In most cases, we will only need one bio. But in case the lower
1126 * level restrictions happen to be different at this offset on this
1127 * side than those of the sending peer, we may need to submit the
Lars Ellenbergda4a75d2011-02-23 17:02:01 +01001128 * request in more than one bio.
1129 *
1130 * Plain bio_alloc is good enough here, this is no DRBD internally
1131 * generated bio, but a bio allocated on behalf of the peer.
1132 */
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001133next_bio:
1134 bio = bio_alloc(GFP_NOIO, nr_pages);
1135 if (!bio) {
1136 dev_err(DEV, "submit_ee: Allocation of a bio failed\n");
1137 goto fail;
1138 }
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001139 /* > peer_req->i.sector, unless this is the first bio */
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001140 bio->bi_sector = sector;
1141 bio->bi_bdev = mdev->ldev->backing_bdev;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001142 bio->bi_rw = rw;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001143 bio->bi_private = peer_req;
Andreas Gruenbacherfcefa622011-02-17 16:46:59 +01001144 bio->bi_end_io = drbd_peer_request_endio;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001145
1146 bio->bi_next = bios;
1147 bios = bio;
1148 ++n_bios;
1149
1150 page_chain_for_each(page) {
1151 unsigned len = min_t(unsigned, ds, PAGE_SIZE);
1152 if (!bio_add_page(bio, page, len, 0)) {
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001153 /* A single page must always be possible!
1154 * But in case it fails anyways,
1155 * we deal with it, and complain (below). */
1156 if (bio->bi_vcnt == 0) {
1157 dev_err(DEV,
1158 "bio_add_page failed for len=%u, "
1159 "bi_vcnt=0 (bi_sector=%llu)\n",
1160 len, (unsigned long long)bio->bi_sector);
1161 err = -ENOSPC;
1162 goto fail;
1163 }
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001164 goto next_bio;
1165 }
1166 ds -= len;
1167 sector += len >> 9;
1168 --nr_pages;
1169 }
1170 D_ASSERT(page == NULL);
1171 D_ASSERT(ds == 0);
1172
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001173 atomic_set(&peer_req->pending_bios, n_bios);
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001174 do {
1175 bio = bios;
1176 bios = bios->bi_next;
1177 bio->bi_next = NULL;
1178
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001179 drbd_generic_make_request(mdev, fault_type, bio);
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001180 } while (bios);
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001181 return 0;
1182
1183fail:
1184 while (bios) {
1185 bio = bios;
1186 bios = bios->bi_next;
1187 bio_put(bio);
1188 }
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001189 return err;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001190}
1191
Andreas Gruenbacher53840642011-01-28 10:31:04 +01001192static void drbd_remove_epoch_entry_interval(struct drbd_conf *mdev,
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001193 struct drbd_peer_request *peer_req)
Andreas Gruenbacher53840642011-01-28 10:31:04 +01001194{
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001195 struct drbd_interval *i = &peer_req->i;
Andreas Gruenbacher53840642011-01-28 10:31:04 +01001196
1197 drbd_remove_interval(&mdev->write_requests, i);
1198 drbd_clear_interval(i);
1199
Andreas Gruenbacher6c852be2011-02-04 15:38:52 +01001200 /* Wake up any processes waiting for this peer request to complete. */
Andreas Gruenbacher53840642011-01-28 10:31:04 +01001201 if (i->waiting)
1202 wake_up(&mdev->misc_wait);
1203}
1204
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01001205static int receive_Barrier(struct drbd_conf *mdev, enum drbd_packet cmd,
1206 unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001207{
Philipp Reisner2451fc32010-08-24 13:43:11 +02001208 int rv;
Philipp Reisnere42325a2011-01-19 13:55:45 +01001209 struct p_barrier *p = &mdev->tconn->data.rbuf.barrier;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001210 struct drbd_epoch *epoch;
1211
Philipp Reisnerb411b362009-09-25 16:07:19 -07001212 inc_unacked(mdev);
1213
Philipp Reisnerb411b362009-09-25 16:07:19 -07001214 mdev->current_epoch->barrier_nr = p->barrier;
1215 rv = drbd_may_finish_epoch(mdev, mdev->current_epoch, EV_GOT_BARRIER_NR);
1216
1217 /* P_BARRIER_ACK may imply that the corresponding extent is dropped from
1218 * the activity log, which means it would not be resynced in case the
1219 * R_PRIMARY crashes now.
1220 * Therefore we must send the barrier_ack after the barrier request was
1221 * completed. */
1222 switch (mdev->write_ordering) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001223 case WO_none:
1224 if (rv == FE_RECYCLED)
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01001225 return true;
Philipp Reisner2451fc32010-08-24 13:43:11 +02001226
1227 /* receiver context, in the writeout path of the other node.
1228 * avoid potential distributed deadlock */
1229 epoch = kmalloc(sizeof(struct drbd_epoch), GFP_NOIO);
1230 if (epoch)
1231 break;
1232 else
1233 dev_warn(DEV, "Allocation of an epoch failed, slowing down\n");
1234 /* Fall through */
Philipp Reisnerb411b362009-09-25 16:07:19 -07001235
1236 case WO_bdev_flush:
1237 case WO_drain_io:
Philipp Reisnerb411b362009-09-25 16:07:19 -07001238 drbd_wait_ee_list_empty(mdev, &mdev->active_ee);
Philipp Reisner2451fc32010-08-24 13:43:11 +02001239 drbd_flush(mdev);
1240
1241 if (atomic_read(&mdev->current_epoch->epoch_size)) {
1242 epoch = kmalloc(sizeof(struct drbd_epoch), GFP_NOIO);
1243 if (epoch)
1244 break;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001245 }
1246
Philipp Reisner2451fc32010-08-24 13:43:11 +02001247 epoch = mdev->current_epoch;
1248 wait_event(mdev->ee_wait, atomic_read(&epoch->epoch_size) == 0);
1249
1250 D_ASSERT(atomic_read(&epoch->active) == 0);
1251 D_ASSERT(epoch->flags == 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001252
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01001253 return true;
Philipp Reisner2451fc32010-08-24 13:43:11 +02001254 default:
1255 dev_err(DEV, "Strangeness in mdev->write_ordering %d\n", mdev->write_ordering);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01001256 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001257 }
1258
1259 epoch->flags = 0;
1260 atomic_set(&epoch->epoch_size, 0);
1261 atomic_set(&epoch->active, 0);
1262
1263 spin_lock(&mdev->epoch_lock);
1264 if (atomic_read(&mdev->current_epoch->epoch_size)) {
1265 list_add(&epoch->list, &mdev->current_epoch->list);
1266 mdev->current_epoch = epoch;
1267 mdev->epochs++;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001268 } else {
1269 /* The current_epoch got recycled while we allocated this one... */
1270 kfree(epoch);
1271 }
1272 spin_unlock(&mdev->epoch_lock);
1273
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01001274 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001275}
1276
1277/* used from receive_RSDataReply (recv_resync_read)
1278 * and from receive_Data */
Andreas Gruenbacherf6ffca92011-02-04 15:30:34 +01001279static struct drbd_peer_request *
1280read_in_block(struct drbd_conf *mdev, u64 id, sector_t sector,
1281 int data_size) __must_hold(local)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001282{
Lars Ellenberg66660322010-04-06 12:15:04 +02001283 const sector_t capacity = drbd_get_capacity(mdev->this_bdev);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001284 struct drbd_peer_request *peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001285 struct page *page;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001286 int dgs, ds, rr;
Philipp Reisnera0638452011-01-19 14:31:32 +01001287 void *dig_in = mdev->tconn->int_dig_in;
1288 void *dig_vv = mdev->tconn->int_dig_vv;
Philipp Reisner6b4388a2010-04-26 14:11:45 +02001289 unsigned long *data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001290
Philipp Reisnera0638452011-01-19 14:31:32 +01001291 dgs = (mdev->tconn->agreed_pro_version >= 87 && mdev->tconn->integrity_r_tfm) ?
1292 crypto_hash_digestsize(mdev->tconn->integrity_r_tfm) : 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001293
1294 if (dgs) {
Philipp Reisnerde0ff332011-02-07 16:56:20 +01001295 rr = drbd_recv(mdev->tconn, dig_in, dgs);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001296 if (rr != dgs) {
Lars Ellenberg0ddc5542011-01-21 12:35:15 +01001297 if (!signal_pending(current))
1298 dev_warn(DEV,
1299 "short read receiving data digest: read %d expected %d\n",
1300 rr, dgs);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001301 return NULL;
1302 }
1303 }
1304
1305 data_size -= dgs;
1306
Andreas Gruenbacher841ce242010-12-15 19:31:20 +01001307 if (!expect(data_size != 0))
1308 return NULL;
1309 if (!expect(IS_ALIGNED(data_size, 512)))
1310 return NULL;
1311 if (!expect(data_size <= DRBD_MAX_BIO_SIZE))
1312 return NULL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001313
Lars Ellenberg66660322010-04-06 12:15:04 +02001314 /* even though we trust out peer,
1315 * we sometimes have to double check. */
1316 if (sector + (data_size>>9) > capacity) {
Lars Ellenbergfdda6542011-01-24 15:11:01 +01001317 dev_err(DEV, "request from peer beyond end of local disk: "
1318 "capacity: %llus < sector: %llus + size: %u\n",
Lars Ellenberg66660322010-04-06 12:15:04 +02001319 (unsigned long long)capacity,
1320 (unsigned long long)sector, data_size);
1321 return NULL;
1322 }
1323
Philipp Reisnerb411b362009-09-25 16:07:19 -07001324 /* GFP_NOIO, because we must not cause arbitrary write-out: in a DRBD
1325 * "criss-cross" setup, that might cause write-out on some other DRBD,
1326 * which in turn might block on the other node at this very place. */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001327 peer_req = drbd_alloc_ee(mdev, id, sector, data_size, GFP_NOIO);
1328 if (!peer_req)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001329 return NULL;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001330
Philipp Reisnerb411b362009-09-25 16:07:19 -07001331 ds = data_size;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001332 page = peer_req->pages;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001333 page_chain_for_each(page) {
1334 unsigned len = min_t(int, ds, PAGE_SIZE);
Philipp Reisner6b4388a2010-04-26 14:11:45 +02001335 data = kmap(page);
Philipp Reisnerde0ff332011-02-07 16:56:20 +01001336 rr = drbd_recv(mdev->tconn, data, len);
Andreas Gruenbacher0cf9d272010-12-07 10:43:29 +01001337 if (drbd_insert_fault(mdev, DRBD_FAULT_RECEIVE)) {
Philipp Reisner6b4388a2010-04-26 14:11:45 +02001338 dev_err(DEV, "Fault injection: Corrupting data on receive\n");
1339 data[0] = data[0] ^ (unsigned long)-1;
1340 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07001341 kunmap(page);
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001342 if (rr != len) {
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001343 drbd_free_ee(mdev, peer_req);
Lars Ellenberg0ddc5542011-01-21 12:35:15 +01001344 if (!signal_pending(current))
1345 dev_warn(DEV, "short read receiving data: read %d expected %d\n",
1346 rr, len);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001347 return NULL;
1348 }
1349 ds -= rr;
1350 }
1351
1352 if (dgs) {
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001353 drbd_csum_ee(mdev, mdev->tconn->integrity_r_tfm, peer_req, dig_vv);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001354 if (memcmp(dig_in, dig_vv, dgs)) {
Lars Ellenberg470be442010-11-10 10:36:52 +01001355 dev_err(DEV, "Digest integrity check FAILED: %llus +%u\n",
1356 (unsigned long long)sector, data_size);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001357 drbd_free_ee(mdev, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001358 return NULL;
1359 }
1360 }
1361 mdev->recv_cnt += data_size>>9;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001362 return peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001363}
1364
1365/* drbd_drain_block() just takes a data block
1366 * out of the socket input buffer, and discards it.
1367 */
1368static int drbd_drain_block(struct drbd_conf *mdev, int data_size)
1369{
1370 struct page *page;
1371 int rr, rv = 1;
1372 void *data;
1373
Lars Ellenbergc3470cd2010-04-01 16:57:19 +02001374 if (!data_size)
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01001375 return true;
Lars Ellenbergc3470cd2010-04-01 16:57:19 +02001376
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001377 page = drbd_pp_alloc(mdev, 1, 1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001378
1379 data = kmap(page);
1380 while (data_size) {
Philipp Reisnerde0ff332011-02-07 16:56:20 +01001381 rr = drbd_recv(mdev->tconn, data, min_t(int, data_size, PAGE_SIZE));
Philipp Reisnerb411b362009-09-25 16:07:19 -07001382 if (rr != min_t(int, data_size, PAGE_SIZE)) {
1383 rv = 0;
Lars Ellenberg0ddc5542011-01-21 12:35:15 +01001384 if (!signal_pending(current))
1385 dev_warn(DEV,
1386 "short read receiving data: read %d expected %d\n",
1387 rr, min_t(int, data_size, PAGE_SIZE));
Philipp Reisnerb411b362009-09-25 16:07:19 -07001388 break;
1389 }
1390 data_size -= rr;
1391 }
1392 kunmap(page);
Lars Ellenberg435f0742010-09-06 12:30:25 +02001393 drbd_pp_free(mdev, page, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001394 return rv;
1395}
1396
1397static int recv_dless_read(struct drbd_conf *mdev, struct drbd_request *req,
1398 sector_t sector, int data_size)
1399{
1400 struct bio_vec *bvec;
1401 struct bio *bio;
1402 int dgs, rr, i, expect;
Philipp Reisnera0638452011-01-19 14:31:32 +01001403 void *dig_in = mdev->tconn->int_dig_in;
1404 void *dig_vv = mdev->tconn->int_dig_vv;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001405
Philipp Reisnera0638452011-01-19 14:31:32 +01001406 dgs = (mdev->tconn->agreed_pro_version >= 87 && mdev->tconn->integrity_r_tfm) ?
1407 crypto_hash_digestsize(mdev->tconn->integrity_r_tfm) : 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001408
1409 if (dgs) {
Philipp Reisnerde0ff332011-02-07 16:56:20 +01001410 rr = drbd_recv(mdev->tconn, dig_in, dgs);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001411 if (rr != dgs) {
Lars Ellenberg0ddc5542011-01-21 12:35:15 +01001412 if (!signal_pending(current))
1413 dev_warn(DEV,
1414 "short read receiving data reply digest: read %d expected %d\n",
1415 rr, dgs);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001416 return 0;
1417 }
1418 }
1419
1420 data_size -= dgs;
1421
1422 /* optimistically update recv_cnt. if receiving fails below,
1423 * we disconnect anyways, and counters will be reset. */
1424 mdev->recv_cnt += data_size>>9;
1425
1426 bio = req->master_bio;
1427 D_ASSERT(sector == bio->bi_sector);
1428
1429 bio_for_each_segment(bvec, bio, i) {
1430 expect = min_t(int, data_size, bvec->bv_len);
Philipp Reisnerde0ff332011-02-07 16:56:20 +01001431 rr = drbd_recv(mdev->tconn,
Philipp Reisnerb411b362009-09-25 16:07:19 -07001432 kmap(bvec->bv_page)+bvec->bv_offset,
1433 expect);
1434 kunmap(bvec->bv_page);
1435 if (rr != expect) {
Lars Ellenberg0ddc5542011-01-21 12:35:15 +01001436 if (!signal_pending(current))
1437 dev_warn(DEV, "short read receiving data reply: "
1438 "read %d expected %d\n",
1439 rr, expect);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001440 return 0;
1441 }
1442 data_size -= rr;
1443 }
1444
1445 if (dgs) {
Philipp Reisnera0638452011-01-19 14:31:32 +01001446 drbd_csum_bio(mdev, mdev->tconn->integrity_r_tfm, bio, dig_vv);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001447 if (memcmp(dig_in, dig_vv, dgs)) {
1448 dev_err(DEV, "Digest integrity check FAILED. Broken NICs?\n");
1449 return 0;
1450 }
1451 }
1452
1453 D_ASSERT(data_size == 0);
1454 return 1;
1455}
1456
1457/* e_end_resync_block() is called via
1458 * drbd_process_done_ee() by asender only */
Philipp Reisner00d56942011-02-09 18:09:48 +01001459static int e_end_resync_block(struct drbd_work *w, int unused)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001460{
Andreas Gruenbacher8050e6d2011-02-18 16:12:48 +01001461 struct drbd_peer_request *peer_req =
1462 container_of(w, struct drbd_peer_request, w);
Philipp Reisner00d56942011-02-09 18:09:48 +01001463 struct drbd_conf *mdev = w->mdev;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001464 sector_t sector = peer_req->i.sector;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001465 int ok;
1466
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001467 D_ASSERT(drbd_interval_empty(&peer_req->i));
Philipp Reisnerb411b362009-09-25 16:07:19 -07001468
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001469 if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
1470 drbd_set_in_sync(mdev, sector, peer_req->i.size);
1471 ok = drbd_send_ack(mdev, P_RS_WRITE_ACK, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001472 } else {
1473 /* Record failure to sync */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001474 drbd_rs_failed_io(mdev, sector, peer_req->i.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001475
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001476 ok = drbd_send_ack(mdev, P_NEG_ACK, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001477 }
1478 dec_unacked(mdev);
1479
1480 return ok;
1481}
1482
1483static int recv_resync_read(struct drbd_conf *mdev, sector_t sector, int data_size) __releases(local)
1484{
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001485 struct drbd_peer_request *peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001486
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001487 peer_req = read_in_block(mdev, ID_SYNCER, sector, data_size);
1488 if (!peer_req)
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001489 goto fail;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001490
1491 dec_rs_pending(mdev);
1492
Philipp Reisnerb411b362009-09-25 16:07:19 -07001493 inc_unacked(mdev);
1494 /* corresponding dec_unacked() in e_end_resync_block()
1495 * respective _drbd_clear_done_ee */
1496
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001497 peer_req->w.cb = e_end_resync_block;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001498
Philipp Reisner87eeee42011-01-19 14:16:30 +01001499 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001500 list_add(&peer_req->w.list, &mdev->sync_ee);
Philipp Reisner87eeee42011-01-19 14:16:30 +01001501 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001502
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02001503 atomic_add(data_size >> 9, &mdev->rs_sect_ev);
Andreas Gruenbacherfbe29de2011-02-17 16:38:35 +01001504 if (drbd_submit_peer_request(mdev, peer_req, WRITE, DRBD_FAULT_RS_WR) == 0)
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01001505 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001506
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001507 /* don't care for the reason here */
1508 dev_err(DEV, "submit failed, triggering re-connect\n");
Philipp Reisner87eeee42011-01-19 14:16:30 +01001509 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001510 list_del(&peer_req->w.list);
Philipp Reisner87eeee42011-01-19 14:16:30 +01001511 spin_unlock_irq(&mdev->tconn->req_lock);
Lars Ellenberg22cc37a2010-09-14 20:40:41 +02001512
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001513 drbd_free_ee(mdev, peer_req);
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001514fail:
1515 put_ldev(mdev);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01001516 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001517}
1518
Andreas Gruenbacher668eebc2011-01-20 17:14:26 +01001519static struct drbd_request *
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01001520find_request(struct drbd_conf *mdev, struct rb_root *root, u64 id,
1521 sector_t sector, bool missing_ok, const char *func)
Andreas Gruenbacher668eebc2011-01-20 17:14:26 +01001522{
Andreas Gruenbacher668eebc2011-01-20 17:14:26 +01001523 struct drbd_request *req;
1524
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01001525 /* Request object according to our peer */
1526 req = (struct drbd_request *)(unsigned long)id;
Andreas Gruenbacher5e472262011-01-27 14:42:51 +01001527 if (drbd_contains_interval(root, sector, &req->i) && req->i.local)
Andreas Gruenbacher668eebc2011-01-20 17:14:26 +01001528 return req;
Andreas Gruenbacherc3afd8f2011-01-20 22:25:40 +01001529 if (!missing_ok) {
1530 dev_err(DEV, "%s: failed to find request %lu, sector %llus\n", func,
1531 (unsigned long)id, (unsigned long long)sector);
1532 }
Andreas Gruenbacher668eebc2011-01-20 17:14:26 +01001533 return NULL;
1534}
1535
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01001536static int receive_DataReply(struct drbd_conf *mdev, enum drbd_packet cmd,
1537 unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001538{
1539 struct drbd_request *req;
1540 sector_t sector;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001541 int ok;
Philipp Reisnere42325a2011-01-19 13:55:45 +01001542 struct p_data *p = &mdev->tconn->data.rbuf.data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001543
1544 sector = be64_to_cpu(p->sector);
1545
Philipp Reisner87eeee42011-01-19 14:16:30 +01001546 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01001547 req = find_request(mdev, &mdev->read_requests, p->block_id, sector, false, __func__);
Philipp Reisner87eeee42011-01-19 14:16:30 +01001548 spin_unlock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherc3afd8f2011-01-20 22:25:40 +01001549 if (unlikely(!req))
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01001550 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001551
Bart Van Assche24c48302011-05-21 18:32:29 +02001552 /* hlist_del(&req->collision) is done in _req_may_be_done, to avoid
Philipp Reisnerb411b362009-09-25 16:07:19 -07001553 * special casing it there for the various failure cases.
1554 * still no race with drbd_fail_pending_reads */
1555 ok = recv_dless_read(mdev, req, sector, data_size);
1556
1557 if (ok)
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01001558 req_mod(req, DATA_RECEIVED);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001559 /* else: nothing. handled from drbd_disconnect...
1560 * I don't think we may complete this just yet
1561 * in case we are "on-disconnect: freeze" */
1562
1563 return ok;
1564}
1565
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01001566static int receive_RSDataReply(struct drbd_conf *mdev, enum drbd_packet cmd,
1567 unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001568{
1569 sector_t sector;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001570 int ok;
Philipp Reisnere42325a2011-01-19 13:55:45 +01001571 struct p_data *p = &mdev->tconn->data.rbuf.data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001572
1573 sector = be64_to_cpu(p->sector);
1574 D_ASSERT(p->block_id == ID_SYNCER);
1575
1576 if (get_ldev(mdev)) {
1577 /* data is submitted to disk within recv_resync_read.
1578 * corresponding put_ldev done below on error,
Andreas Gruenbacherfcefa622011-02-17 16:46:59 +01001579 * or in drbd_peer_request_endio. */
Philipp Reisnerb411b362009-09-25 16:07:19 -07001580 ok = recv_resync_read(mdev, sector, data_size);
1581 } else {
1582 if (__ratelimit(&drbd_ratelimit_state))
1583 dev_err(DEV, "Can not write resync data to local disk.\n");
1584
1585 ok = drbd_drain_block(mdev, data_size);
1586
Lars Ellenberg2b2bf212010-10-06 11:46:55 +02001587 drbd_send_ack_dp(mdev, P_NEG_ACK, p, data_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001588 }
1589
Philipp Reisner778f2712010-07-06 11:14:00 +02001590 atomic_add(data_size >> 9, &mdev->rs_sect_in);
1591
Philipp Reisnerb411b362009-09-25 16:07:19 -07001592 return ok;
1593}
1594
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001595static int w_restart_write(struct drbd_work *w, int cancel)
1596{
1597 struct drbd_request *req = container_of(w, struct drbd_request, w);
1598 struct drbd_conf *mdev = w->mdev;
1599 struct bio *bio;
1600 unsigned long start_time;
1601 unsigned long flags;
1602
1603 spin_lock_irqsave(&mdev->tconn->req_lock, flags);
1604 if (!expect(req->rq_state & RQ_POSTPONED)) {
1605 spin_unlock_irqrestore(&mdev->tconn->req_lock, flags);
1606 return 0;
1607 }
1608 bio = req->master_bio;
1609 start_time = req->start_time;
1610 /* Postponed requests will not have their master_bio completed! */
1611 __req_mod(req, DISCARD_WRITE, NULL);
1612 spin_unlock_irqrestore(&mdev->tconn->req_lock, flags);
1613
1614 while (__drbd_make_request(mdev, bio, start_time))
1615 /* retry */ ;
1616 return 1;
1617}
1618
1619static void restart_conflicting_writes(struct drbd_conf *mdev,
1620 sector_t sector, int size)
1621{
1622 struct drbd_interval *i;
1623 struct drbd_request *req;
1624
1625 drbd_for_each_overlap(i, &mdev->write_requests, sector, size) {
1626 if (!i->local)
1627 continue;
1628 req = container_of(i, struct drbd_request, i);
1629 if (req->rq_state & RQ_LOCAL_PENDING ||
1630 !(req->rq_state & RQ_POSTPONED))
1631 continue;
1632 if (expect(list_empty(&req->w.list))) {
1633 req->w.mdev = mdev;
1634 req->w.cb = w_restart_write;
1635 drbd_queue_work(&mdev->tconn->data.work, &req->w);
1636 }
1637 }
1638}
1639
Philipp Reisnerb411b362009-09-25 16:07:19 -07001640/* e_end_block() is called via drbd_process_done_ee().
1641 * this means this function only runs in the asender thread
1642 */
Philipp Reisner00d56942011-02-09 18:09:48 +01001643static int e_end_block(struct drbd_work *w, int cancel)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001644{
Andreas Gruenbacher8050e6d2011-02-18 16:12:48 +01001645 struct drbd_peer_request *peer_req =
1646 container_of(w, struct drbd_peer_request, w);
Philipp Reisner00d56942011-02-09 18:09:48 +01001647 struct drbd_conf *mdev = w->mdev;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001648 sector_t sector = peer_req->i.sector;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001649 int ok = 1, pcmd;
1650
Philipp Reisner89e58e72011-01-19 13:12:45 +01001651 if (mdev->tconn->net_conf->wire_protocol == DRBD_PROT_C) {
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001652 if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001653 pcmd = (mdev->state.conn >= C_SYNC_SOURCE &&
1654 mdev->state.conn <= C_PAUSED_SYNC_T &&
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001655 peer_req->flags & EE_MAY_SET_IN_SYNC) ?
Philipp Reisnerb411b362009-09-25 16:07:19 -07001656 P_RS_WRITE_ACK : P_WRITE_ACK;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001657 ok &= drbd_send_ack(mdev, pcmd, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001658 if (pcmd == P_RS_WRITE_ACK)
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001659 drbd_set_in_sync(mdev, sector, peer_req->i.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001660 } else {
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001661 ok = drbd_send_ack(mdev, P_NEG_ACK, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001662 /* we expect it to be marked out of sync anyways...
1663 * maybe assert this? */
1664 }
1665 dec_unacked(mdev);
1666 }
1667 /* we delete from the conflict detection hash _after_ we sent out the
1668 * P_WRITE_ACK / P_NEG_ACK, to get the sequence number right. */
Philipp Reisner89e58e72011-01-19 13:12:45 +01001669 if (mdev->tconn->net_conf->two_primaries) {
Philipp Reisner87eeee42011-01-19 14:16:30 +01001670 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001671 D_ASSERT(!drbd_interval_empty(&peer_req->i));
1672 drbd_remove_epoch_entry_interval(mdev, peer_req);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001673 if (peer_req->flags & EE_RESTART_REQUESTS)
1674 restart_conflicting_writes(mdev, sector, peer_req->i.size);
Philipp Reisner87eeee42011-01-19 14:16:30 +01001675 spin_unlock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherbb3bfe92011-01-21 15:59:23 +01001676 } else
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001677 D_ASSERT(drbd_interval_empty(&peer_req->i));
Philipp Reisnerb411b362009-09-25 16:07:19 -07001678
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001679 drbd_may_finish_epoch(mdev, peer_req->epoch, EV_PUT + (cancel ? EV_CLEANUP : 0));
Philipp Reisnerb411b362009-09-25 16:07:19 -07001680
1681 return ok;
1682}
1683
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001684static int e_send_ack(struct drbd_work *w, enum drbd_packet ack)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001685{
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001686 struct drbd_conf *mdev = w->mdev;
Andreas Gruenbacher8050e6d2011-02-18 16:12:48 +01001687 struct drbd_peer_request *peer_req =
1688 container_of(w, struct drbd_peer_request, w);
Andreas Gruenbacher206d3582011-02-26 23:19:15 +01001689 int ok;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001690
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001691 ok = drbd_send_ack(mdev, ack, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001692 dec_unacked(mdev);
1693
1694 return ok;
1695}
1696
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001697static int e_send_discard_write(struct drbd_work *w, int unused)
1698{
1699 return e_send_ack(w, P_DISCARD_WRITE);
1700}
1701
1702static int e_send_retry_write(struct drbd_work *w, int unused)
1703{
1704 struct drbd_tconn *tconn = w->mdev->tconn;
1705
1706 return e_send_ack(w, tconn->agreed_pro_version >= 100 ?
1707 P_RETRY_WRITE : P_DISCARD_WRITE);
1708}
1709
Andreas Gruenbacher3e394da2011-01-26 18:36:55 +01001710static bool seq_greater(u32 a, u32 b)
1711{
1712 /*
1713 * We assume 32-bit wrap-around here.
1714 * For 24-bit wrap-around, we would have to shift:
1715 * a <<= 8; b <<= 8;
1716 */
1717 return (s32)a - (s32)b > 0;
1718}
1719
1720static u32 seq_max(u32 a, u32 b)
1721{
1722 return seq_greater(a, b) ? a : b;
1723}
1724
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001725static bool need_peer_seq(struct drbd_conf *mdev)
1726{
1727 struct drbd_tconn *tconn = mdev->tconn;
1728
1729 /*
1730 * We only need to keep track of the last packet_seq number of our peer
1731 * if we are in dual-primary mode and we have the discard flag set; see
1732 * handle_write_conflicts().
1733 */
1734 return tconn->net_conf->two_primaries &&
1735 test_bit(DISCARD_CONCURRENT, &tconn->flags);
1736}
1737
Andreas Gruenbacher43ae0772011-02-03 18:42:08 +01001738static void update_peer_seq(struct drbd_conf *mdev, unsigned int peer_seq)
Andreas Gruenbacher3e394da2011-01-26 18:36:55 +01001739{
Lars Ellenberg3c13b682011-02-23 16:10:01 +01001740 unsigned int newest_peer_seq;
Andreas Gruenbacher3e394da2011-01-26 18:36:55 +01001741
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001742 if (need_peer_seq(mdev)) {
1743 spin_lock(&mdev->peer_seq_lock);
Lars Ellenberg3c13b682011-02-23 16:10:01 +01001744 newest_peer_seq = seq_max(mdev->peer_seq, peer_seq);
1745 mdev->peer_seq = newest_peer_seq;
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001746 spin_unlock(&mdev->peer_seq_lock);
Lars Ellenberg3c13b682011-02-23 16:10:01 +01001747 /* wake up only if we actually changed mdev->peer_seq */
1748 if (peer_seq == newest_peer_seq)
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001749 wake_up(&mdev->seq_wait);
1750 }
Andreas Gruenbacher3e394da2011-01-26 18:36:55 +01001751}
1752
Philipp Reisnerb411b362009-09-25 16:07:19 -07001753/* Called from receive_Data.
1754 * Synchronize packets on sock with packets on msock.
1755 *
1756 * This is here so even when a P_DATA packet traveling via sock overtook an Ack
1757 * packet traveling on msock, they are still processed in the order they have
1758 * been sent.
1759 *
1760 * Note: we don't care for Ack packets overtaking P_DATA packets.
1761 *
1762 * In case packet_seq is larger than mdev->peer_seq number, there are
1763 * outstanding packets on the msock. We wait for them to arrive.
1764 * In case we are the logically next packet, we update mdev->peer_seq
1765 * ourselves. Correctly handles 32bit wrap around.
1766 *
1767 * Assume we have a 10 GBit connection, that is about 1<<30 byte per second,
1768 * about 1<<21 sectors per second. So "worst" case, we have 1<<3 == 8 seconds
1769 * for the 24bit wrap (historical atomic_t guarantee on some archs), and we have
1770 * 1<<9 == 512 seconds aka ages for the 32bit wrap around...
1771 *
1772 * returns 0 if we may process the packet,
1773 * -ERESTARTSYS if we were interrupted (by disconnect signal). */
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001774static int wait_for_and_update_peer_seq(struct drbd_conf *mdev, const u32 peer_seq)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001775{
1776 DEFINE_WAIT(wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001777 long timeout;
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001778 int ret;
1779
1780 if (!need_peer_seq(mdev))
1781 return 0;
1782
Philipp Reisnerb411b362009-09-25 16:07:19 -07001783 spin_lock(&mdev->peer_seq_lock);
1784 for (;;) {
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001785 if (!seq_greater(peer_seq - 1, mdev->peer_seq)) {
1786 mdev->peer_seq = seq_max(mdev->peer_seq, peer_seq);
1787 ret = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001788 break;
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001789 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07001790 if (signal_pending(current)) {
1791 ret = -ERESTARTSYS;
1792 break;
1793 }
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001794 prepare_to_wait(&mdev->seq_wait, &wait, TASK_INTERRUPTIBLE);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001795 spin_unlock(&mdev->peer_seq_lock);
Andreas Gruenbacher71b1c1e2011-03-01 15:40:43 +01001796 timeout = mdev->tconn->net_conf->ping_timeo*HZ/10;
1797 timeout = schedule_timeout(timeout);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001798 spin_lock(&mdev->peer_seq_lock);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001799 if (!timeout) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001800 ret = -ETIMEDOUT;
Andreas Gruenbacher71b1c1e2011-03-01 15:40:43 +01001801 dev_err(DEV, "Timed out waiting for missing ack packets; disconnecting\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07001802 break;
1803 }
1804 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07001805 spin_unlock(&mdev->peer_seq_lock);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001806 finish_wait(&mdev->seq_wait, &wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001807 return ret;
1808}
1809
Lars Ellenberg688593c2010-11-17 22:25:03 +01001810/* see also bio_flags_to_wire()
1811 * DRBD_REQ_*, because we need to semantically map the flags to data packet
1812 * flags and back. We may replicate to other kernel versions. */
1813static unsigned long wire_flags_to_bio(struct drbd_conf *mdev, u32 dpf)
Philipp Reisner76d2e7e2010-08-25 11:58:05 +02001814{
Lars Ellenberg688593c2010-11-17 22:25:03 +01001815 return (dpf & DP_RW_SYNC ? REQ_SYNC : 0) |
1816 (dpf & DP_FUA ? REQ_FUA : 0) |
1817 (dpf & DP_FLUSH ? REQ_FLUSH : 0) |
1818 (dpf & DP_DISCARD ? REQ_DISCARD : 0);
Philipp Reisner76d2e7e2010-08-25 11:58:05 +02001819}
1820
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001821static void fail_postponed_requests(struct drbd_conf *mdev, sector_t sector,
1822 unsigned int size)
1823{
1824 struct drbd_interval *i;
1825
1826 repeat:
1827 drbd_for_each_overlap(i, &mdev->write_requests, sector, size) {
1828 struct drbd_request *req;
1829 struct bio_and_error m;
1830
1831 if (!i->local)
1832 continue;
1833 req = container_of(i, struct drbd_request, i);
1834 if (!(req->rq_state & RQ_POSTPONED))
1835 continue;
1836 req->rq_state &= ~RQ_POSTPONED;
1837 __req_mod(req, NEG_ACKED, &m);
1838 spin_unlock_irq(&mdev->tconn->req_lock);
1839 if (m.bio)
1840 complete_master_bio(mdev, &m);
1841 spin_lock_irq(&mdev->tconn->req_lock);
1842 goto repeat;
1843 }
1844}
1845
1846static int handle_write_conflicts(struct drbd_conf *mdev,
1847 struct drbd_peer_request *peer_req)
1848{
1849 struct drbd_tconn *tconn = mdev->tconn;
1850 bool resolve_conflicts = test_bit(DISCARD_CONCURRENT, &tconn->flags);
1851 sector_t sector = peer_req->i.sector;
1852 const unsigned int size = peer_req->i.size;
1853 struct drbd_interval *i;
1854 bool equal;
1855 int err;
1856
1857 /*
1858 * Inserting the peer request into the write_requests tree will prevent
1859 * new conflicting local requests from being added.
1860 */
1861 drbd_insert_interval(&mdev->write_requests, &peer_req->i);
1862
1863 repeat:
1864 drbd_for_each_overlap(i, &mdev->write_requests, sector, size) {
1865 if (i == &peer_req->i)
1866 continue;
1867
1868 if (!i->local) {
1869 /*
1870 * Our peer has sent a conflicting remote request; this
1871 * should not happen in a two-node setup. Wait for the
1872 * earlier peer request to complete.
1873 */
1874 err = drbd_wait_misc(mdev, i);
1875 if (err)
1876 goto out;
1877 goto repeat;
1878 }
1879
1880 equal = i->sector == sector && i->size == size;
1881 if (resolve_conflicts) {
1882 /*
1883 * If the peer request is fully contained within the
1884 * overlapping request, it can be discarded; otherwise,
1885 * it will be retried once all overlapping requests
1886 * have completed.
1887 */
1888 bool discard = i->sector <= sector && i->sector +
1889 (i->size >> 9) >= sector + (size >> 9);
1890
1891 if (!equal)
1892 dev_alert(DEV, "Concurrent writes detected: "
1893 "local=%llus +%u, remote=%llus +%u, "
1894 "assuming %s came first\n",
1895 (unsigned long long)i->sector, i->size,
1896 (unsigned long long)sector, size,
1897 discard ? "local" : "remote");
1898
1899 inc_unacked(mdev);
1900 peer_req->w.cb = discard ? e_send_discard_write :
1901 e_send_retry_write;
1902 list_add_tail(&peer_req->w.list, &mdev->done_ee);
1903 wake_asender(mdev->tconn);
1904
1905 err = -ENOENT;
1906 goto out;
1907 } else {
1908 struct drbd_request *req =
1909 container_of(i, struct drbd_request, i);
1910
1911 if (!equal)
1912 dev_alert(DEV, "Concurrent writes detected: "
1913 "local=%llus +%u, remote=%llus +%u\n",
1914 (unsigned long long)i->sector, i->size,
1915 (unsigned long long)sector, size);
1916
1917 if (req->rq_state & RQ_LOCAL_PENDING ||
1918 !(req->rq_state & RQ_POSTPONED)) {
1919 /*
1920 * Wait for the node with the discard flag to
1921 * decide if this request will be discarded or
1922 * retried. Requests that are discarded will
1923 * disappear from the write_requests tree.
1924 *
1925 * In addition, wait for the conflicting
1926 * request to finish locally before submitting
1927 * the conflicting peer request.
1928 */
1929 err = drbd_wait_misc(mdev, &req->i);
1930 if (err) {
1931 _conn_request_state(mdev->tconn,
1932 NS(conn, C_TIMEOUT),
1933 CS_HARD);
1934 fail_postponed_requests(mdev, sector, size);
1935 goto out;
1936 }
1937 goto repeat;
1938 }
1939 /*
1940 * Remember to restart the conflicting requests after
1941 * the new peer request has completed.
1942 */
1943 peer_req->flags |= EE_RESTART_REQUESTS;
1944 }
1945 }
1946 err = 0;
1947
1948 out:
1949 if (err)
1950 drbd_remove_epoch_entry_interval(mdev, peer_req);
1951 return err;
1952}
1953
Philipp Reisnerb411b362009-09-25 16:07:19 -07001954/* mirrored write */
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01001955static int receive_Data(struct drbd_conf *mdev, enum drbd_packet cmd,
1956 unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001957{
1958 sector_t sector;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001959 struct drbd_peer_request *peer_req;
Philipp Reisnere42325a2011-01-19 13:55:45 +01001960 struct p_data *p = &mdev->tconn->data.rbuf.data;
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001961 u32 peer_seq = be32_to_cpu(p->seq_num);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001962 int rw = WRITE;
1963 u32 dp_flags;
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001964 int err;
1965
Philipp Reisnerb411b362009-09-25 16:07:19 -07001966
Philipp Reisnerb411b362009-09-25 16:07:19 -07001967 if (!get_ldev(mdev)) {
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001968 err = wait_for_and_update_peer_seq(mdev, peer_seq);
Lars Ellenberg2b2bf212010-10-06 11:46:55 +02001969 drbd_send_ack_dp(mdev, P_NEG_ACK, p, data_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001970 atomic_inc(&mdev->current_epoch->epoch_size);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001971 return drbd_drain_block(mdev, data_size) && err == 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001972 }
1973
Andreas Gruenbacherfcefa622011-02-17 16:46:59 +01001974 /*
1975 * Corresponding put_ldev done either below (on various errors), or in
1976 * drbd_peer_request_endio, if we successfully submit the data at the
1977 * end of this function.
1978 */
Philipp Reisnerb411b362009-09-25 16:07:19 -07001979
1980 sector = be64_to_cpu(p->sector);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001981 peer_req = read_in_block(mdev, p->block_id, sector, data_size);
1982 if (!peer_req) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001983 put_ldev(mdev);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01001984 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001985 }
1986
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001987 peer_req->w.cb = e_end_block;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001988
Lars Ellenberg688593c2010-11-17 22:25:03 +01001989 dp_flags = be32_to_cpu(p->dp_flags);
1990 rw |= wire_flags_to_bio(mdev, dp_flags);
1991
1992 if (dp_flags & DP_MAY_SET_IN_SYNC)
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001993 peer_req->flags |= EE_MAY_SET_IN_SYNC;
Lars Ellenberg688593c2010-11-17 22:25:03 +01001994
Philipp Reisnerb411b362009-09-25 16:07:19 -07001995 spin_lock(&mdev->epoch_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001996 peer_req->epoch = mdev->current_epoch;
1997 atomic_inc(&peer_req->epoch->epoch_size);
1998 atomic_inc(&peer_req->epoch->active);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001999 spin_unlock(&mdev->epoch_lock);
2000
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002001 if (mdev->tconn->net_conf->two_primaries) {
2002 err = wait_for_and_update_peer_seq(mdev, peer_seq);
2003 if (err)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002004 goto out_interrupted;
Philipp Reisner87eeee42011-01-19 14:16:30 +01002005 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002006 err = handle_write_conflicts(mdev, peer_req);
2007 if (err) {
2008 spin_unlock_irq(&mdev->tconn->req_lock);
2009 if (err == -ENOENT) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002010 put_ldev(mdev);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002011 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002012 }
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002013 goto out_interrupted;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002014 }
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002015 } else
2016 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002017 list_add(&peer_req->w.list, &mdev->active_ee);
Philipp Reisner87eeee42011-01-19 14:16:30 +01002018 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002019
Philipp Reisner89e58e72011-01-19 13:12:45 +01002020 switch (mdev->tconn->net_conf->wire_protocol) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002021 case DRBD_PROT_C:
2022 inc_unacked(mdev);
2023 /* corresponding dec_unacked() in e_end_block()
2024 * respective _drbd_clear_done_ee */
2025 break;
2026 case DRBD_PROT_B:
2027 /* I really don't like it that the receiver thread
2028 * sends on the msock, but anyways */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002029 drbd_send_ack(mdev, P_RECV_ACK, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002030 break;
2031 case DRBD_PROT_A:
2032 /* nothing to do */
2033 break;
2034 }
2035
Lars Ellenberg6719fb02010-10-18 23:04:07 +02002036 if (mdev->state.pdsk < D_INCONSISTENT) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002037 /* In case we have the only disk of the cluster, */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002038 drbd_set_out_of_sync(mdev, peer_req->i.sector, peer_req->i.size);
2039 peer_req->flags |= EE_CALL_AL_COMPLETE_IO;
2040 peer_req->flags &= ~EE_MAY_SET_IN_SYNC;
2041 drbd_al_begin_io(mdev, peer_req->i.sector);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002042 }
2043
Andreas Gruenbacherfbe29de2011-02-17 16:38:35 +01002044 if (drbd_submit_peer_request(mdev, peer_req, rw, DRBD_FAULT_DT_WR) == 0)
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002045 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002046
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01002047 /* don't care for the reason here */
2048 dev_err(DEV, "submit failed, triggering re-connect\n");
Philipp Reisner87eeee42011-01-19 14:16:30 +01002049 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002050 list_del(&peer_req->w.list);
2051 drbd_remove_epoch_entry_interval(mdev, peer_req);
Philipp Reisner87eeee42011-01-19 14:16:30 +01002052 spin_unlock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002053 if (peer_req->flags & EE_CALL_AL_COMPLETE_IO)
2054 drbd_al_complete_io(mdev, peer_req->i.sector);
Lars Ellenberg22cc37a2010-09-14 20:40:41 +02002055
Philipp Reisnerb411b362009-09-25 16:07:19 -07002056out_interrupted:
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002057 drbd_may_finish_epoch(mdev, peer_req->epoch, EV_PUT + EV_CLEANUP);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002058 put_ldev(mdev);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002059 drbd_free_ee(mdev, peer_req);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002060 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002061}
2062
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002063/* We may throttle resync, if the lower device seems to be busy,
2064 * and current sync rate is above c_min_rate.
2065 *
2066 * To decide whether or not the lower device is busy, we use a scheme similar
2067 * to MD RAID is_mddev_idle(): if the partition stats reveal "significant"
2068 * (more than 64 sectors) of activity we cannot account for with our own resync
2069 * activity, it obviously is "busy".
2070 *
2071 * The current sync rate used here uses only the most recent two step marks,
2072 * to have a short time average so we can react faster.
2073 */
Philipp Reisnere3555d82010-11-07 15:56:29 +01002074int drbd_rs_should_slow_down(struct drbd_conf *mdev, sector_t sector)
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002075{
2076 struct gendisk *disk = mdev->ldev->backing_bdev->bd_contains->bd_disk;
2077 unsigned long db, dt, dbdt;
Philipp Reisnere3555d82010-11-07 15:56:29 +01002078 struct lc_element *tmp;
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002079 int curr_events;
2080 int throttle = 0;
2081
2082 /* feature disabled? */
2083 if (mdev->sync_conf.c_min_rate == 0)
2084 return 0;
2085
Philipp Reisnere3555d82010-11-07 15:56:29 +01002086 spin_lock_irq(&mdev->al_lock);
2087 tmp = lc_find(mdev->resync, BM_SECT_TO_EXT(sector));
2088 if (tmp) {
2089 struct bm_extent *bm_ext = lc_entry(tmp, struct bm_extent, lce);
2090 if (test_bit(BME_PRIORITY, &bm_ext->flags)) {
2091 spin_unlock_irq(&mdev->al_lock);
2092 return 0;
2093 }
2094 /* Do not slow down if app IO is already waiting for this extent */
2095 }
2096 spin_unlock_irq(&mdev->al_lock);
2097
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002098 curr_events = (int)part_stat_read(&disk->part0, sectors[0]) +
2099 (int)part_stat_read(&disk->part0, sectors[1]) -
2100 atomic_read(&mdev->rs_sect_ev);
Philipp Reisnere3555d82010-11-07 15:56:29 +01002101
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002102 if (!mdev->rs_last_events || curr_events - mdev->rs_last_events > 64) {
2103 unsigned long rs_left;
2104 int i;
2105
2106 mdev->rs_last_events = curr_events;
2107
2108 /* sync speed average over the last 2*DRBD_SYNC_MARK_STEP,
2109 * approx. */
Lars Ellenberg2649f082010-11-05 10:05:47 +01002110 i = (mdev->rs_last_mark + DRBD_SYNC_MARKS-1) % DRBD_SYNC_MARKS;
2111
2112 if (mdev->state.conn == C_VERIFY_S || mdev->state.conn == C_VERIFY_T)
2113 rs_left = mdev->ov_left;
2114 else
2115 rs_left = drbd_bm_total_weight(mdev) - mdev->rs_failed;
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002116
2117 dt = ((long)jiffies - (long)mdev->rs_mark_time[i]) / HZ;
2118 if (!dt)
2119 dt++;
2120 db = mdev->rs_mark_left[i] - rs_left;
2121 dbdt = Bit2KB(db/dt);
2122
2123 if (dbdt > mdev->sync_conf.c_min_rate)
2124 throttle = 1;
2125 }
2126 return throttle;
2127}
2128
2129
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01002130static int receive_DataRequest(struct drbd_conf *mdev, enum drbd_packet cmd,
2131 unsigned int digest_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002132{
2133 sector_t sector;
2134 const sector_t capacity = drbd_get_capacity(mdev->this_bdev);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002135 struct drbd_peer_request *peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002136 struct digest_info *di = NULL;
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02002137 int size, verb;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002138 unsigned int fault_type;
Philipp Reisnere42325a2011-01-19 13:55:45 +01002139 struct p_block_req *p = &mdev->tconn->data.rbuf.block_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002140
2141 sector = be64_to_cpu(p->sector);
2142 size = be32_to_cpu(p->blksize);
2143
Andreas Gruenbacherc670a392011-02-21 12:41:39 +01002144 if (size <= 0 || !IS_ALIGNED(size, 512) || size > DRBD_MAX_BIO_SIZE) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002145 dev_err(DEV, "%s:%d: sector: %llus, size: %u\n", __FILE__, __LINE__,
2146 (unsigned long long)sector, size);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002147 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002148 }
2149 if (sector + (size>>9) > capacity) {
2150 dev_err(DEV, "%s:%d: sector: %llus, size: %u\n", __FILE__, __LINE__,
2151 (unsigned long long)sector, size);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002152 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002153 }
2154
2155 if (!get_ldev_if_state(mdev, D_UP_TO_DATE)) {
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02002156 verb = 1;
2157 switch (cmd) {
2158 case P_DATA_REQUEST:
2159 drbd_send_ack_rp(mdev, P_NEG_DREPLY, p);
2160 break;
2161 case P_RS_DATA_REQUEST:
2162 case P_CSUM_RS_REQUEST:
2163 case P_OV_REQUEST:
2164 drbd_send_ack_rp(mdev, P_NEG_RS_DREPLY , p);
2165 break;
2166 case P_OV_REPLY:
2167 verb = 0;
2168 dec_rs_pending(mdev);
2169 drbd_send_ack_ex(mdev, P_OV_RESULT, sector, size, ID_IN_SYNC);
2170 break;
2171 default:
2172 dev_err(DEV, "unexpected command (%s) in receive_DataRequest\n",
2173 cmdname(cmd));
2174 }
2175 if (verb && __ratelimit(&drbd_ratelimit_state))
Philipp Reisnerb411b362009-09-25 16:07:19 -07002176 dev_err(DEV, "Can not satisfy peer's read request, "
2177 "no local data.\n");
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02002178
Lars Ellenberga821cc42010-09-06 12:31:37 +02002179 /* drain possibly payload */
2180 return drbd_drain_block(mdev, digest_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002181 }
2182
2183 /* GFP_NOIO, because we must not cause arbitrary write-out: in a DRBD
2184 * "criss-cross" setup, that might cause write-out on some other DRBD,
2185 * which in turn might block on the other node at this very place. */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002186 peer_req = drbd_alloc_ee(mdev, p->block_id, sector, size, GFP_NOIO);
2187 if (!peer_req) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002188 put_ldev(mdev);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002189 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002190 }
2191
Philipp Reisner02918be2010-08-20 14:35:10 +02002192 switch (cmd) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002193 case P_DATA_REQUEST:
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002194 peer_req->w.cb = w_e_end_data_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002195 fault_type = DRBD_FAULT_DT_RD;
Lars Ellenberg80a40e42010-08-11 23:28:00 +02002196 /* application IO, don't drbd_rs_begin_io */
2197 goto submit;
2198
Philipp Reisnerb411b362009-09-25 16:07:19 -07002199 case P_RS_DATA_REQUEST:
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002200 peer_req->w.cb = w_e_end_rsdata_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002201 fault_type = DRBD_FAULT_RS_RD;
Lars Ellenberg5f9915b2010-11-09 14:15:24 +01002202 /* used in the sector offset progress display */
2203 mdev->bm_resync_fo = BM_SECT_TO_BIT(sector);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002204 break;
2205
2206 case P_OV_REPLY:
2207 case P_CSUM_RS_REQUEST:
2208 fault_type = DRBD_FAULT_RS_RD;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002209 di = kmalloc(sizeof(*di) + digest_size, GFP_NOIO);
2210 if (!di)
2211 goto out_free_e;
2212
2213 di->digest_size = digest_size;
2214 di->digest = (((char *)di)+sizeof(struct digest_info));
2215
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002216 peer_req->digest = di;
2217 peer_req->flags |= EE_HAS_DIGEST;
Lars Ellenbergc36c3ce2010-08-11 20:42:55 +02002218
Philipp Reisnerde0ff332011-02-07 16:56:20 +01002219 if (drbd_recv(mdev->tconn, di->digest, digest_size) != digest_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002220 goto out_free_e;
2221
Philipp Reisner02918be2010-08-20 14:35:10 +02002222 if (cmd == P_CSUM_RS_REQUEST) {
Philipp Reisner31890f42011-01-19 14:12:51 +01002223 D_ASSERT(mdev->tconn->agreed_pro_version >= 89);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002224 peer_req->w.cb = w_e_end_csum_rs_req;
Lars Ellenberg5f9915b2010-11-09 14:15:24 +01002225 /* used in the sector offset progress display */
2226 mdev->bm_resync_fo = BM_SECT_TO_BIT(sector);
Philipp Reisner02918be2010-08-20 14:35:10 +02002227 } else if (cmd == P_OV_REPLY) {
Lars Ellenberg2649f082010-11-05 10:05:47 +01002228 /* track progress, we may need to throttle */
2229 atomic_add(size >> 9, &mdev->rs_sect_in);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002230 peer_req->w.cb = w_e_end_ov_reply;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002231 dec_rs_pending(mdev);
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002232 /* drbd_rs_begin_io done when we sent this request,
2233 * but accounting still needs to be done. */
2234 goto submit_for_resync;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002235 }
2236 break;
2237
2238 case P_OV_REQUEST:
Philipp Reisnerb411b362009-09-25 16:07:19 -07002239 if (mdev->ov_start_sector == ~(sector_t)0 &&
Philipp Reisner31890f42011-01-19 14:12:51 +01002240 mdev->tconn->agreed_pro_version >= 90) {
Lars Ellenbergde228bb2010-11-05 09:43:15 +01002241 unsigned long now = jiffies;
2242 int i;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002243 mdev->ov_start_sector = sector;
2244 mdev->ov_position = sector;
Lars Ellenberg30b743a2010-11-05 09:39:06 +01002245 mdev->ov_left = drbd_bm_bits(mdev) - BM_SECT_TO_BIT(sector);
2246 mdev->rs_total = mdev->ov_left;
Lars Ellenbergde228bb2010-11-05 09:43:15 +01002247 for (i = 0; i < DRBD_SYNC_MARKS; i++) {
2248 mdev->rs_mark_left[i] = mdev->ov_left;
2249 mdev->rs_mark_time[i] = now;
2250 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07002251 dev_info(DEV, "Online Verify start sector: %llu\n",
2252 (unsigned long long)sector);
2253 }
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002254 peer_req->w.cb = w_e_end_ov_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002255 fault_type = DRBD_FAULT_RS_RD;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002256 break;
2257
Philipp Reisnerb411b362009-09-25 16:07:19 -07002258 default:
2259 dev_err(DEV, "unexpected command (%s) in receive_DataRequest\n",
Philipp Reisner02918be2010-08-20 14:35:10 +02002260 cmdname(cmd));
Philipp Reisnerb411b362009-09-25 16:07:19 -07002261 fault_type = DRBD_FAULT_MAX;
Lars Ellenberg80a40e42010-08-11 23:28:00 +02002262 goto out_free_e;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002263 }
2264
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002265 /* Throttle, drbd_rs_begin_io and submit should become asynchronous
2266 * wrt the receiver, but it is not as straightforward as it may seem.
2267 * Various places in the resync start and stop logic assume resync
2268 * requests are processed in order, requeuing this on the worker thread
2269 * introduces a bunch of new code for synchronization between threads.
2270 *
2271 * Unlimited throttling before drbd_rs_begin_io may stall the resync
2272 * "forever", throttling after drbd_rs_begin_io will lock that extent
2273 * for application writes for the same time. For now, just throttle
2274 * here, where the rest of the code expects the receiver to sleep for
2275 * a while, anyways.
2276 */
Philipp Reisnerb411b362009-09-25 16:07:19 -07002277
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002278 /* Throttle before drbd_rs_begin_io, as that locks out application IO;
2279 * this defers syncer requests for some time, before letting at least
2280 * on request through. The resync controller on the receiving side
2281 * will adapt to the incoming rate accordingly.
2282 *
2283 * We cannot throttle here if remote is Primary/SyncTarget:
2284 * we would also throttle its application reads.
2285 * In that case, throttling is done on the SyncTarget only.
2286 */
Philipp Reisnere3555d82010-11-07 15:56:29 +01002287 if (mdev->state.peer != R_PRIMARY && drbd_rs_should_slow_down(mdev, sector))
2288 schedule_timeout_uninterruptible(HZ/10);
2289 if (drbd_rs_begin_io(mdev, sector))
Lars Ellenberg80a40e42010-08-11 23:28:00 +02002290 goto out_free_e;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002291
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002292submit_for_resync:
2293 atomic_add(size >> 9, &mdev->rs_sect_ev);
2294
Lars Ellenberg80a40e42010-08-11 23:28:00 +02002295submit:
Philipp Reisnerb411b362009-09-25 16:07:19 -07002296 inc_unacked(mdev);
Philipp Reisner87eeee42011-01-19 14:16:30 +01002297 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002298 list_add_tail(&peer_req->w.list, &mdev->read_ee);
Philipp Reisner87eeee42011-01-19 14:16:30 +01002299 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002300
Andreas Gruenbacherfbe29de2011-02-17 16:38:35 +01002301 if (drbd_submit_peer_request(mdev, peer_req, READ, fault_type) == 0)
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002302 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002303
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01002304 /* don't care for the reason here */
2305 dev_err(DEV, "submit failed, triggering re-connect\n");
Philipp Reisner87eeee42011-01-19 14:16:30 +01002306 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002307 list_del(&peer_req->w.list);
Philipp Reisner87eeee42011-01-19 14:16:30 +01002308 spin_unlock_irq(&mdev->tconn->req_lock);
Lars Ellenberg22cc37a2010-09-14 20:40:41 +02002309 /* no drbd_rs_complete_io(), we are dropping the connection anyways */
2310
Philipp Reisnerb411b362009-09-25 16:07:19 -07002311out_free_e:
Philipp Reisnerb411b362009-09-25 16:07:19 -07002312 put_ldev(mdev);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002313 drbd_free_ee(mdev, peer_req);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002314 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002315}
2316
2317static int drbd_asb_recover_0p(struct drbd_conf *mdev) __must_hold(local)
2318{
2319 int self, peer, rv = -100;
2320 unsigned long ch_self, ch_peer;
2321
2322 self = mdev->ldev->md.uuid[UI_BITMAP] & 1;
2323 peer = mdev->p_uuid[UI_BITMAP] & 1;
2324
2325 ch_peer = mdev->p_uuid[UI_SIZE];
2326 ch_self = mdev->comm_bm_set;
2327
Philipp Reisner89e58e72011-01-19 13:12:45 +01002328 switch (mdev->tconn->net_conf->after_sb_0p) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002329 case ASB_CONSENSUS:
2330 case ASB_DISCARD_SECONDARY:
2331 case ASB_CALL_HELPER:
2332 dev_err(DEV, "Configuration error.\n");
2333 break;
2334 case ASB_DISCONNECT:
2335 break;
2336 case ASB_DISCARD_YOUNGER_PRI:
2337 if (self == 0 && peer == 1) {
2338 rv = -1;
2339 break;
2340 }
2341 if (self == 1 && peer == 0) {
2342 rv = 1;
2343 break;
2344 }
2345 /* Else fall through to one of the other strategies... */
2346 case ASB_DISCARD_OLDER_PRI:
2347 if (self == 0 && peer == 1) {
2348 rv = 1;
2349 break;
2350 }
2351 if (self == 1 && peer == 0) {
2352 rv = -1;
2353 break;
2354 }
2355 /* Else fall through to one of the other strategies... */
Lars Ellenbergad19bf62009-10-14 09:36:49 +02002356 dev_warn(DEV, "Discard younger/older primary did not find a decision\n"
Philipp Reisnerb411b362009-09-25 16:07:19 -07002357 "Using discard-least-changes instead\n");
2358 case ASB_DISCARD_ZERO_CHG:
2359 if (ch_peer == 0 && ch_self == 0) {
Philipp Reisner25703f82011-02-07 14:35:25 +01002360 rv = test_bit(DISCARD_CONCURRENT, &mdev->tconn->flags)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002361 ? -1 : 1;
2362 break;
2363 } else {
2364 if (ch_peer == 0) { rv = 1; break; }
2365 if (ch_self == 0) { rv = -1; break; }
2366 }
Philipp Reisner89e58e72011-01-19 13:12:45 +01002367 if (mdev->tconn->net_conf->after_sb_0p == ASB_DISCARD_ZERO_CHG)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002368 break;
2369 case ASB_DISCARD_LEAST_CHG:
2370 if (ch_self < ch_peer)
2371 rv = -1;
2372 else if (ch_self > ch_peer)
2373 rv = 1;
2374 else /* ( ch_self == ch_peer ) */
2375 /* Well, then use something else. */
Philipp Reisner25703f82011-02-07 14:35:25 +01002376 rv = test_bit(DISCARD_CONCURRENT, &mdev->tconn->flags)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002377 ? -1 : 1;
2378 break;
2379 case ASB_DISCARD_LOCAL:
2380 rv = -1;
2381 break;
2382 case ASB_DISCARD_REMOTE:
2383 rv = 1;
2384 }
2385
2386 return rv;
2387}
2388
2389static int drbd_asb_recover_1p(struct drbd_conf *mdev) __must_hold(local)
2390{
Andreas Gruenbacher6184ea22010-12-09 14:23:27 +01002391 int hg, rv = -100;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002392
Philipp Reisner89e58e72011-01-19 13:12:45 +01002393 switch (mdev->tconn->net_conf->after_sb_1p) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002394 case ASB_DISCARD_YOUNGER_PRI:
2395 case ASB_DISCARD_OLDER_PRI:
2396 case ASB_DISCARD_LEAST_CHG:
2397 case ASB_DISCARD_LOCAL:
2398 case ASB_DISCARD_REMOTE:
2399 dev_err(DEV, "Configuration error.\n");
2400 break;
2401 case ASB_DISCONNECT:
2402 break;
2403 case ASB_CONSENSUS:
2404 hg = drbd_asb_recover_0p(mdev);
2405 if (hg == -1 && mdev->state.role == R_SECONDARY)
2406 rv = hg;
2407 if (hg == 1 && mdev->state.role == R_PRIMARY)
2408 rv = hg;
2409 break;
2410 case ASB_VIOLENTLY:
2411 rv = drbd_asb_recover_0p(mdev);
2412 break;
2413 case ASB_DISCARD_SECONDARY:
2414 return mdev->state.role == R_PRIMARY ? 1 : -1;
2415 case ASB_CALL_HELPER:
2416 hg = drbd_asb_recover_0p(mdev);
2417 if (hg == -1 && mdev->state.role == R_PRIMARY) {
Andreas Gruenbacherbb437942010-12-09 14:02:35 +01002418 enum drbd_state_rv rv2;
2419
2420 drbd_set_role(mdev, R_SECONDARY, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002421 /* drbd_change_state() does not sleep while in SS_IN_TRANSIENT_STATE,
2422 * we might be here in C_WF_REPORT_PARAMS which is transient.
2423 * we do not need to wait for the after state change work either. */
Andreas Gruenbacherbb437942010-12-09 14:02:35 +01002424 rv2 = drbd_change_state(mdev, CS_VERBOSE, NS(role, R_SECONDARY));
2425 if (rv2 != SS_SUCCESS) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002426 drbd_khelper(mdev, "pri-lost-after-sb");
2427 } else {
2428 dev_warn(DEV, "Successfully gave up primary role.\n");
2429 rv = hg;
2430 }
2431 } else
2432 rv = hg;
2433 }
2434
2435 return rv;
2436}
2437
2438static int drbd_asb_recover_2p(struct drbd_conf *mdev) __must_hold(local)
2439{
Andreas Gruenbacher6184ea22010-12-09 14:23:27 +01002440 int hg, rv = -100;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002441
Philipp Reisner89e58e72011-01-19 13:12:45 +01002442 switch (mdev->tconn->net_conf->after_sb_2p) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002443 case ASB_DISCARD_YOUNGER_PRI:
2444 case ASB_DISCARD_OLDER_PRI:
2445 case ASB_DISCARD_LEAST_CHG:
2446 case ASB_DISCARD_LOCAL:
2447 case ASB_DISCARD_REMOTE:
2448 case ASB_CONSENSUS:
2449 case ASB_DISCARD_SECONDARY:
2450 dev_err(DEV, "Configuration error.\n");
2451 break;
2452 case ASB_VIOLENTLY:
2453 rv = drbd_asb_recover_0p(mdev);
2454 break;
2455 case ASB_DISCONNECT:
2456 break;
2457 case ASB_CALL_HELPER:
2458 hg = drbd_asb_recover_0p(mdev);
2459 if (hg == -1) {
Andreas Gruenbacherbb437942010-12-09 14:02:35 +01002460 enum drbd_state_rv rv2;
2461
Philipp Reisnerb411b362009-09-25 16:07:19 -07002462 /* drbd_change_state() does not sleep while in SS_IN_TRANSIENT_STATE,
2463 * we might be here in C_WF_REPORT_PARAMS which is transient.
2464 * we do not need to wait for the after state change work either. */
Andreas Gruenbacherbb437942010-12-09 14:02:35 +01002465 rv2 = drbd_change_state(mdev, CS_VERBOSE, NS(role, R_SECONDARY));
2466 if (rv2 != SS_SUCCESS) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002467 drbd_khelper(mdev, "pri-lost-after-sb");
2468 } else {
2469 dev_warn(DEV, "Successfully gave up primary role.\n");
2470 rv = hg;
2471 }
2472 } else
2473 rv = hg;
2474 }
2475
2476 return rv;
2477}
2478
2479static void drbd_uuid_dump(struct drbd_conf *mdev, char *text, u64 *uuid,
2480 u64 bits, u64 flags)
2481{
2482 if (!uuid) {
2483 dev_info(DEV, "%s uuid info vanished while I was looking!\n", text);
2484 return;
2485 }
2486 dev_info(DEV, "%s %016llX:%016llX:%016llX:%016llX bits:%llu flags:%llX\n",
2487 text,
2488 (unsigned long long)uuid[UI_CURRENT],
2489 (unsigned long long)uuid[UI_BITMAP],
2490 (unsigned long long)uuid[UI_HISTORY_START],
2491 (unsigned long long)uuid[UI_HISTORY_END],
2492 (unsigned long long)bits,
2493 (unsigned long long)flags);
2494}
2495
2496/*
2497 100 after split brain try auto recover
2498 2 C_SYNC_SOURCE set BitMap
2499 1 C_SYNC_SOURCE use BitMap
2500 0 no Sync
2501 -1 C_SYNC_TARGET use BitMap
2502 -2 C_SYNC_TARGET set BitMap
2503 -100 after split brain, disconnect
2504-1000 unrelated data
Philipp Reisner4a23f262011-01-11 17:42:17 +01002505-1091 requires proto 91
2506-1096 requires proto 96
Philipp Reisnerb411b362009-09-25 16:07:19 -07002507 */
2508static int drbd_uuid_compare(struct drbd_conf *mdev, int *rule_nr) __must_hold(local)
2509{
2510 u64 self, peer;
2511 int i, j;
2512
2513 self = mdev->ldev->md.uuid[UI_CURRENT] & ~((u64)1);
2514 peer = mdev->p_uuid[UI_CURRENT] & ~((u64)1);
2515
2516 *rule_nr = 10;
2517 if (self == UUID_JUST_CREATED && peer == UUID_JUST_CREATED)
2518 return 0;
2519
2520 *rule_nr = 20;
2521 if ((self == UUID_JUST_CREATED || self == (u64)0) &&
2522 peer != UUID_JUST_CREATED)
2523 return -2;
2524
2525 *rule_nr = 30;
2526 if (self != UUID_JUST_CREATED &&
2527 (peer == UUID_JUST_CREATED || peer == (u64)0))
2528 return 2;
2529
2530 if (self == peer) {
2531 int rct, dc; /* roles at crash time */
2532
2533 if (mdev->p_uuid[UI_BITMAP] == (u64)0 && mdev->ldev->md.uuid[UI_BITMAP] != (u64)0) {
2534
Philipp Reisner31890f42011-01-19 14:12:51 +01002535 if (mdev->tconn->agreed_pro_version < 91)
Philipp Reisner4a23f262011-01-11 17:42:17 +01002536 return -1091;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002537
2538 if ((mdev->ldev->md.uuid[UI_BITMAP] & ~((u64)1)) == (mdev->p_uuid[UI_HISTORY_START] & ~((u64)1)) &&
2539 (mdev->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) == (mdev->p_uuid[UI_HISTORY_START + 1] & ~((u64)1))) {
2540 dev_info(DEV, "was SyncSource, missed the resync finished event, corrected myself:\n");
2541 drbd_uuid_set_bm(mdev, 0UL);
2542
2543 drbd_uuid_dump(mdev, "self", mdev->ldev->md.uuid,
2544 mdev->state.disk >= D_NEGOTIATING ? drbd_bm_total_weight(mdev) : 0, 0);
2545 *rule_nr = 34;
2546 } else {
2547 dev_info(DEV, "was SyncSource (peer failed to write sync_uuid)\n");
2548 *rule_nr = 36;
2549 }
2550
2551 return 1;
2552 }
2553
2554 if (mdev->ldev->md.uuid[UI_BITMAP] == (u64)0 && mdev->p_uuid[UI_BITMAP] != (u64)0) {
2555
Philipp Reisner31890f42011-01-19 14:12:51 +01002556 if (mdev->tconn->agreed_pro_version < 91)
Philipp Reisner4a23f262011-01-11 17:42:17 +01002557 return -1091;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002558
2559 if ((mdev->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) == (mdev->p_uuid[UI_BITMAP] & ~((u64)1)) &&
2560 (mdev->ldev->md.uuid[UI_HISTORY_START + 1] & ~((u64)1)) == (mdev->p_uuid[UI_HISTORY_START] & ~((u64)1))) {
2561 dev_info(DEV, "was SyncTarget, peer missed the resync finished event, corrected peer:\n");
2562
2563 mdev->p_uuid[UI_HISTORY_START + 1] = mdev->p_uuid[UI_HISTORY_START];
2564 mdev->p_uuid[UI_HISTORY_START] = mdev->p_uuid[UI_BITMAP];
2565 mdev->p_uuid[UI_BITMAP] = 0UL;
2566
2567 drbd_uuid_dump(mdev, "peer", mdev->p_uuid, mdev->p_uuid[UI_SIZE], mdev->p_uuid[UI_FLAGS]);
2568 *rule_nr = 35;
2569 } else {
2570 dev_info(DEV, "was SyncTarget (failed to write sync_uuid)\n");
2571 *rule_nr = 37;
2572 }
2573
2574 return -1;
2575 }
2576
2577 /* Common power [off|failure] */
2578 rct = (test_bit(CRASHED_PRIMARY, &mdev->flags) ? 1 : 0) +
2579 (mdev->p_uuid[UI_FLAGS] & 2);
2580 /* lowest bit is set when we were primary,
2581 * next bit (weight 2) is set when peer was primary */
2582 *rule_nr = 40;
2583
2584 switch (rct) {
2585 case 0: /* !self_pri && !peer_pri */ return 0;
2586 case 1: /* self_pri && !peer_pri */ return 1;
2587 case 2: /* !self_pri && peer_pri */ return -1;
2588 case 3: /* self_pri && peer_pri */
Philipp Reisner25703f82011-02-07 14:35:25 +01002589 dc = test_bit(DISCARD_CONCURRENT, &mdev->tconn->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002590 return dc ? -1 : 1;
2591 }
2592 }
2593
2594 *rule_nr = 50;
2595 peer = mdev->p_uuid[UI_BITMAP] & ~((u64)1);
2596 if (self == peer)
2597 return -1;
2598
2599 *rule_nr = 51;
2600 peer = mdev->p_uuid[UI_HISTORY_START] & ~((u64)1);
2601 if (self == peer) {
Philipp Reisner31890f42011-01-19 14:12:51 +01002602 if (mdev->tconn->agreed_pro_version < 96 ?
Philipp Reisner4a23f262011-01-11 17:42:17 +01002603 (mdev->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) ==
2604 (mdev->p_uuid[UI_HISTORY_START + 1] & ~((u64)1)) :
2605 peer + UUID_NEW_BM_OFFSET == (mdev->p_uuid[UI_BITMAP] & ~((u64)1))) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002606 /* The last P_SYNC_UUID did not get though. Undo the last start of
2607 resync as sync source modifications of the peer's UUIDs. */
2608
Philipp Reisner31890f42011-01-19 14:12:51 +01002609 if (mdev->tconn->agreed_pro_version < 91)
Philipp Reisner4a23f262011-01-11 17:42:17 +01002610 return -1091;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002611
2612 mdev->p_uuid[UI_BITMAP] = mdev->p_uuid[UI_HISTORY_START];
2613 mdev->p_uuid[UI_HISTORY_START] = mdev->p_uuid[UI_HISTORY_START + 1];
Philipp Reisner4a23f262011-01-11 17:42:17 +01002614
2615 dev_info(DEV, "Did not got last syncUUID packet, corrected:\n");
2616 drbd_uuid_dump(mdev, "peer", mdev->p_uuid, mdev->p_uuid[UI_SIZE], mdev->p_uuid[UI_FLAGS]);
2617
Philipp Reisnerb411b362009-09-25 16:07:19 -07002618 return -1;
2619 }
2620 }
2621
2622 *rule_nr = 60;
2623 self = mdev->ldev->md.uuid[UI_CURRENT] & ~((u64)1);
2624 for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) {
2625 peer = mdev->p_uuid[i] & ~((u64)1);
2626 if (self == peer)
2627 return -2;
2628 }
2629
2630 *rule_nr = 70;
2631 self = mdev->ldev->md.uuid[UI_BITMAP] & ~((u64)1);
2632 peer = mdev->p_uuid[UI_CURRENT] & ~((u64)1);
2633 if (self == peer)
2634 return 1;
2635
2636 *rule_nr = 71;
2637 self = mdev->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1);
2638 if (self == peer) {
Philipp Reisner31890f42011-01-19 14:12:51 +01002639 if (mdev->tconn->agreed_pro_version < 96 ?
Philipp Reisner4a23f262011-01-11 17:42:17 +01002640 (mdev->ldev->md.uuid[UI_HISTORY_START + 1] & ~((u64)1)) ==
2641 (mdev->p_uuid[UI_HISTORY_START] & ~((u64)1)) :
2642 self + UUID_NEW_BM_OFFSET == (mdev->ldev->md.uuid[UI_BITMAP] & ~((u64)1))) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002643 /* The last P_SYNC_UUID did not get though. Undo the last start of
2644 resync as sync source modifications of our UUIDs. */
2645
Philipp Reisner31890f42011-01-19 14:12:51 +01002646 if (mdev->tconn->agreed_pro_version < 91)
Philipp Reisner4a23f262011-01-11 17:42:17 +01002647 return -1091;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002648
2649 _drbd_uuid_set(mdev, UI_BITMAP, mdev->ldev->md.uuid[UI_HISTORY_START]);
2650 _drbd_uuid_set(mdev, UI_HISTORY_START, mdev->ldev->md.uuid[UI_HISTORY_START + 1]);
2651
Philipp Reisner4a23f262011-01-11 17:42:17 +01002652 dev_info(DEV, "Last syncUUID did not get through, corrected:\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002653 drbd_uuid_dump(mdev, "self", mdev->ldev->md.uuid,
2654 mdev->state.disk >= D_NEGOTIATING ? drbd_bm_total_weight(mdev) : 0, 0);
2655
2656 return 1;
2657 }
2658 }
2659
2660
2661 *rule_nr = 80;
Philipp Reisnerd8c2a362009-11-18 15:52:51 +01002662 peer = mdev->p_uuid[UI_CURRENT] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002663 for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) {
2664 self = mdev->ldev->md.uuid[i] & ~((u64)1);
2665 if (self == peer)
2666 return 2;
2667 }
2668
2669 *rule_nr = 90;
2670 self = mdev->ldev->md.uuid[UI_BITMAP] & ~((u64)1);
2671 peer = mdev->p_uuid[UI_BITMAP] & ~((u64)1);
2672 if (self == peer && self != ((u64)0))
2673 return 100;
2674
2675 *rule_nr = 100;
2676 for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) {
2677 self = mdev->ldev->md.uuid[i] & ~((u64)1);
2678 for (j = UI_HISTORY_START; j <= UI_HISTORY_END; j++) {
2679 peer = mdev->p_uuid[j] & ~((u64)1);
2680 if (self == peer)
2681 return -100;
2682 }
2683 }
2684
2685 return -1000;
2686}
2687
2688/* drbd_sync_handshake() returns the new conn state on success, or
2689 CONN_MASK (-1) on failure.
2690 */
2691static enum drbd_conns drbd_sync_handshake(struct drbd_conf *mdev, enum drbd_role peer_role,
2692 enum drbd_disk_state peer_disk) __must_hold(local)
2693{
2694 int hg, rule_nr;
2695 enum drbd_conns rv = C_MASK;
2696 enum drbd_disk_state mydisk;
2697
2698 mydisk = mdev->state.disk;
2699 if (mydisk == D_NEGOTIATING)
2700 mydisk = mdev->new_state_tmp.disk;
2701
2702 dev_info(DEV, "drbd_sync_handshake:\n");
2703 drbd_uuid_dump(mdev, "self", mdev->ldev->md.uuid, mdev->comm_bm_set, 0);
2704 drbd_uuid_dump(mdev, "peer", mdev->p_uuid,
2705 mdev->p_uuid[UI_SIZE], mdev->p_uuid[UI_FLAGS]);
2706
2707 hg = drbd_uuid_compare(mdev, &rule_nr);
2708
2709 dev_info(DEV, "uuid_compare()=%d by rule %d\n", hg, rule_nr);
2710
2711 if (hg == -1000) {
2712 dev_alert(DEV, "Unrelated data, aborting!\n");
2713 return C_MASK;
2714 }
Philipp Reisner4a23f262011-01-11 17:42:17 +01002715 if (hg < -1000) {
2716 dev_alert(DEV, "To resolve this both sides have to support at least protocol %d\n", -hg - 1000);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002717 return C_MASK;
2718 }
2719
2720 if ((mydisk == D_INCONSISTENT && peer_disk > D_INCONSISTENT) ||
2721 (peer_disk == D_INCONSISTENT && mydisk > D_INCONSISTENT)) {
2722 int f = (hg == -100) || abs(hg) == 2;
2723 hg = mydisk > D_INCONSISTENT ? 1 : -1;
2724 if (f)
2725 hg = hg*2;
2726 dev_info(DEV, "Becoming sync %s due to disk states.\n",
2727 hg > 0 ? "source" : "target");
2728 }
2729
Adam Gandelman3a11a482010-04-08 16:48:23 -07002730 if (abs(hg) == 100)
2731 drbd_khelper(mdev, "initial-split-brain");
2732
Philipp Reisner89e58e72011-01-19 13:12:45 +01002733 if (hg == 100 || (hg == -100 && mdev->tconn->net_conf->always_asbp)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002734 int pcount = (mdev->state.role == R_PRIMARY)
2735 + (peer_role == R_PRIMARY);
2736 int forced = (hg == -100);
2737
2738 switch (pcount) {
2739 case 0:
2740 hg = drbd_asb_recover_0p(mdev);
2741 break;
2742 case 1:
2743 hg = drbd_asb_recover_1p(mdev);
2744 break;
2745 case 2:
2746 hg = drbd_asb_recover_2p(mdev);
2747 break;
2748 }
2749 if (abs(hg) < 100) {
2750 dev_warn(DEV, "Split-Brain detected, %d primaries, "
2751 "automatically solved. Sync from %s node\n",
2752 pcount, (hg < 0) ? "peer" : "this");
2753 if (forced) {
2754 dev_warn(DEV, "Doing a full sync, since"
2755 " UUIDs where ambiguous.\n");
2756 hg = hg*2;
2757 }
2758 }
2759 }
2760
2761 if (hg == -100) {
Philipp Reisner89e58e72011-01-19 13:12:45 +01002762 if (mdev->tconn->net_conf->want_lose && !(mdev->p_uuid[UI_FLAGS]&1))
Philipp Reisnerb411b362009-09-25 16:07:19 -07002763 hg = -1;
Philipp Reisner89e58e72011-01-19 13:12:45 +01002764 if (!mdev->tconn->net_conf->want_lose && (mdev->p_uuid[UI_FLAGS]&1))
Philipp Reisnerb411b362009-09-25 16:07:19 -07002765 hg = 1;
2766
2767 if (abs(hg) < 100)
2768 dev_warn(DEV, "Split-Brain detected, manually solved. "
2769 "Sync from %s node\n",
2770 (hg < 0) ? "peer" : "this");
2771 }
2772
2773 if (hg == -100) {
Lars Ellenberg580b9762010-02-26 23:15:23 +01002774 /* FIXME this log message is not correct if we end up here
2775 * after an attempted attach on a diskless node.
2776 * We just refuse to attach -- well, we drop the "connection"
2777 * to that disk, in a way... */
Adam Gandelman3a11a482010-04-08 16:48:23 -07002778 dev_alert(DEV, "Split-Brain detected but unresolved, dropping connection!\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002779 drbd_khelper(mdev, "split-brain");
2780 return C_MASK;
2781 }
2782
2783 if (hg > 0 && mydisk <= D_INCONSISTENT) {
2784 dev_err(DEV, "I shall become SyncSource, but I am inconsistent!\n");
2785 return C_MASK;
2786 }
2787
2788 if (hg < 0 && /* by intention we do not use mydisk here. */
2789 mdev->state.role == R_PRIMARY && mdev->state.disk >= D_CONSISTENT) {
Philipp Reisner89e58e72011-01-19 13:12:45 +01002790 switch (mdev->tconn->net_conf->rr_conflict) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002791 case ASB_CALL_HELPER:
2792 drbd_khelper(mdev, "pri-lost");
2793 /* fall through */
2794 case ASB_DISCONNECT:
2795 dev_err(DEV, "I shall become SyncTarget, but I am primary!\n");
2796 return C_MASK;
2797 case ASB_VIOLENTLY:
2798 dev_warn(DEV, "Becoming SyncTarget, violating the stable-data"
2799 "assumption\n");
2800 }
2801 }
2802
Philipp Reisner8169e412011-03-15 18:40:27 +01002803 if (mdev->tconn->net_conf->dry_run || test_bit(CONN_DRY_RUN, &mdev->tconn->flags)) {
Philipp Reisnercf14c2e2010-02-02 21:03:50 +01002804 if (hg == 0)
2805 dev_info(DEV, "dry-run connect: No resync, would become Connected immediately.\n");
2806 else
2807 dev_info(DEV, "dry-run connect: Would become %s, doing a %s resync.",
2808 drbd_conn_str(hg > 0 ? C_SYNC_SOURCE : C_SYNC_TARGET),
2809 abs(hg) >= 2 ? "full" : "bit-map based");
2810 return C_MASK;
2811 }
2812
Philipp Reisnerb411b362009-09-25 16:07:19 -07002813 if (abs(hg) >= 2) {
2814 dev_info(DEV, "Writing the whole bitmap, full sync required after drbd_sync_handshake.\n");
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01002815 if (drbd_bitmap_io(mdev, &drbd_bmio_set_n_write, "set_n_write from sync_handshake",
2816 BM_LOCKED_SET_ALLOWED))
Philipp Reisnerb411b362009-09-25 16:07:19 -07002817 return C_MASK;
2818 }
2819
2820 if (hg > 0) { /* become sync source. */
2821 rv = C_WF_BITMAP_S;
2822 } else if (hg < 0) { /* become sync target */
2823 rv = C_WF_BITMAP_T;
2824 } else {
2825 rv = C_CONNECTED;
2826 if (drbd_bm_total_weight(mdev)) {
2827 dev_info(DEV, "No resync, but %lu bits in bitmap!\n",
2828 drbd_bm_total_weight(mdev));
2829 }
2830 }
2831
2832 return rv;
2833}
2834
2835/* returns 1 if invalid */
2836static int cmp_after_sb(enum drbd_after_sb_p peer, enum drbd_after_sb_p self)
2837{
2838 /* ASB_DISCARD_REMOTE - ASB_DISCARD_LOCAL is valid */
2839 if ((peer == ASB_DISCARD_REMOTE && self == ASB_DISCARD_LOCAL) ||
2840 (self == ASB_DISCARD_REMOTE && peer == ASB_DISCARD_LOCAL))
2841 return 0;
2842
2843 /* any other things with ASB_DISCARD_REMOTE or ASB_DISCARD_LOCAL are invalid */
2844 if (peer == ASB_DISCARD_REMOTE || peer == ASB_DISCARD_LOCAL ||
2845 self == ASB_DISCARD_REMOTE || self == ASB_DISCARD_LOCAL)
2846 return 1;
2847
2848 /* everything else is valid if they are equal on both sides. */
2849 if (peer == self)
2850 return 0;
2851
2852 /* everything es is invalid. */
2853 return 1;
2854}
2855
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01002856static int receive_protocol(struct drbd_conf *mdev, enum drbd_packet cmd,
2857 unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002858{
Philipp Reisnere42325a2011-01-19 13:55:45 +01002859 struct p_protocol *p = &mdev->tconn->data.rbuf.protocol;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002860 int p_proto, p_after_sb_0p, p_after_sb_1p, p_after_sb_2p;
Philipp Reisnercf14c2e2010-02-02 21:03:50 +01002861 int p_want_lose, p_two_primaries, cf;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002862 char p_integrity_alg[SHARED_SECRET_MAX] = "";
2863
Philipp Reisnerb411b362009-09-25 16:07:19 -07002864 p_proto = be32_to_cpu(p->protocol);
2865 p_after_sb_0p = be32_to_cpu(p->after_sb_0p);
2866 p_after_sb_1p = be32_to_cpu(p->after_sb_1p);
2867 p_after_sb_2p = be32_to_cpu(p->after_sb_2p);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002868 p_two_primaries = be32_to_cpu(p->two_primaries);
Philipp Reisnercf14c2e2010-02-02 21:03:50 +01002869 cf = be32_to_cpu(p->conn_flags);
2870 p_want_lose = cf & CF_WANT_LOSE;
2871
Philipp Reisner8169e412011-03-15 18:40:27 +01002872 clear_bit(CONN_DRY_RUN, &mdev->tconn->flags);
Philipp Reisnercf14c2e2010-02-02 21:03:50 +01002873
2874 if (cf & CF_DRY_RUN)
Philipp Reisner8169e412011-03-15 18:40:27 +01002875 set_bit(CONN_DRY_RUN, &mdev->tconn->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002876
Philipp Reisner89e58e72011-01-19 13:12:45 +01002877 if (p_proto != mdev->tconn->net_conf->wire_protocol) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002878 dev_err(DEV, "incompatible communication protocols\n");
2879 goto disconnect;
2880 }
2881
Philipp Reisner89e58e72011-01-19 13:12:45 +01002882 if (cmp_after_sb(p_after_sb_0p, mdev->tconn->net_conf->after_sb_0p)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002883 dev_err(DEV, "incompatible after-sb-0pri settings\n");
2884 goto disconnect;
2885 }
2886
Philipp Reisner89e58e72011-01-19 13:12:45 +01002887 if (cmp_after_sb(p_after_sb_1p, mdev->tconn->net_conf->after_sb_1p)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002888 dev_err(DEV, "incompatible after-sb-1pri settings\n");
2889 goto disconnect;
2890 }
2891
Philipp Reisner89e58e72011-01-19 13:12:45 +01002892 if (cmp_after_sb(p_after_sb_2p, mdev->tconn->net_conf->after_sb_2p)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002893 dev_err(DEV, "incompatible after-sb-2pri settings\n");
2894 goto disconnect;
2895 }
2896
Philipp Reisner89e58e72011-01-19 13:12:45 +01002897 if (p_want_lose && mdev->tconn->net_conf->want_lose) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002898 dev_err(DEV, "both sides have the 'want_lose' flag set\n");
2899 goto disconnect;
2900 }
2901
Philipp Reisner89e58e72011-01-19 13:12:45 +01002902 if (p_two_primaries != mdev->tconn->net_conf->two_primaries) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002903 dev_err(DEV, "incompatible setting of the two-primaries options\n");
2904 goto disconnect;
2905 }
2906
Philipp Reisner31890f42011-01-19 14:12:51 +01002907 if (mdev->tconn->agreed_pro_version >= 87) {
Philipp Reisner89e58e72011-01-19 13:12:45 +01002908 unsigned char *my_alg = mdev->tconn->net_conf->integrity_alg;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002909
Philipp Reisnerde0ff332011-02-07 16:56:20 +01002910 if (drbd_recv(mdev->tconn, p_integrity_alg, data_size) != data_size)
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002911 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002912
2913 p_integrity_alg[SHARED_SECRET_MAX-1] = 0;
2914 if (strcmp(p_integrity_alg, my_alg)) {
2915 dev_err(DEV, "incompatible setting of the data-integrity-alg\n");
2916 goto disconnect;
2917 }
2918 dev_info(DEV, "data-integrity-alg: %s\n",
2919 my_alg[0] ? my_alg : (unsigned char *)"<not-used>");
2920 }
2921
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002922 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002923
2924disconnect:
Philipp Reisner38fa9982011-03-15 18:24:49 +01002925 conn_request_state(mdev->tconn, NS(conn, C_DISCONNECTING), CS_HARD);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002926 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002927}
2928
2929/* helper function
2930 * input: alg name, feature name
2931 * return: NULL (alg name was "")
2932 * ERR_PTR(error) if something goes wrong
2933 * or the crypto hash ptr, if it worked out ok. */
2934struct crypto_hash *drbd_crypto_alloc_digest_safe(const struct drbd_conf *mdev,
2935 const char *alg, const char *name)
2936{
2937 struct crypto_hash *tfm;
2938
2939 if (!alg[0])
2940 return NULL;
2941
2942 tfm = crypto_alloc_hash(alg, 0, CRYPTO_ALG_ASYNC);
2943 if (IS_ERR(tfm)) {
2944 dev_err(DEV, "Can not allocate \"%s\" as %s (reason: %ld)\n",
2945 alg, name, PTR_ERR(tfm));
2946 return tfm;
2947 }
2948 if (!drbd_crypto_is_hash(crypto_hash_tfm(tfm))) {
2949 crypto_free_hash(tfm);
2950 dev_err(DEV, "\"%s\" is not a digest (%s)\n", alg, name);
2951 return ERR_PTR(-EINVAL);
2952 }
2953 return tfm;
2954}
2955
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01002956static int receive_SyncParam(struct drbd_conf *mdev, enum drbd_packet cmd,
2957 unsigned int packet_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002958{
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002959 int ok = true;
Philipp Reisnere42325a2011-01-19 13:55:45 +01002960 struct p_rs_param_95 *p = &mdev->tconn->data.rbuf.rs_param_95;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002961 unsigned int header_size, data_size, exp_max_sz;
2962 struct crypto_hash *verify_tfm = NULL;
2963 struct crypto_hash *csums_tfm = NULL;
Philipp Reisner31890f42011-01-19 14:12:51 +01002964 const int apv = mdev->tconn->agreed_pro_version;
Philipp Reisner778f2712010-07-06 11:14:00 +02002965 int *rs_plan_s = NULL;
2966 int fifo_size = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002967
2968 exp_max_sz = apv <= 87 ? sizeof(struct p_rs_param)
2969 : apv == 88 ? sizeof(struct p_rs_param)
2970 + SHARED_SECRET_MAX
Philipp Reisner8e26f9c2010-07-06 17:25:54 +02002971 : apv <= 94 ? sizeof(struct p_rs_param_89)
2972 : /* apv >= 95 */ sizeof(struct p_rs_param_95);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002973
Philipp Reisner02918be2010-08-20 14:35:10 +02002974 if (packet_size > exp_max_sz) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002975 dev_err(DEV, "SyncParam packet too long: received %u, expected <= %u bytes\n",
Philipp Reisner02918be2010-08-20 14:35:10 +02002976 packet_size, exp_max_sz);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002977 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002978 }
2979
2980 if (apv <= 88) {
Philipp Reisner257d0af2011-01-26 12:15:29 +01002981 header_size = sizeof(struct p_rs_param) - sizeof(struct p_header);
Philipp Reisner02918be2010-08-20 14:35:10 +02002982 data_size = packet_size - header_size;
Philipp Reisner8e26f9c2010-07-06 17:25:54 +02002983 } else if (apv <= 94) {
Philipp Reisner257d0af2011-01-26 12:15:29 +01002984 header_size = sizeof(struct p_rs_param_89) - sizeof(struct p_header);
Philipp Reisner02918be2010-08-20 14:35:10 +02002985 data_size = packet_size - header_size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002986 D_ASSERT(data_size == 0);
Philipp Reisner8e26f9c2010-07-06 17:25:54 +02002987 } else {
Philipp Reisner257d0af2011-01-26 12:15:29 +01002988 header_size = sizeof(struct p_rs_param_95) - sizeof(struct p_header);
Philipp Reisner02918be2010-08-20 14:35:10 +02002989 data_size = packet_size - header_size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002990 D_ASSERT(data_size == 0);
2991 }
2992
2993 /* initialize verify_alg and csums_alg */
2994 memset(p->verify_alg, 0, 2 * SHARED_SECRET_MAX);
2995
Philipp Reisnerde0ff332011-02-07 16:56:20 +01002996 if (drbd_recv(mdev->tconn, &p->head.payload, header_size) != header_size)
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002997 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002998
2999 mdev->sync_conf.rate = be32_to_cpu(p->rate);
3000
3001 if (apv >= 88) {
3002 if (apv == 88) {
3003 if (data_size > SHARED_SECRET_MAX) {
3004 dev_err(DEV, "verify-alg too long, "
3005 "peer wants %u, accepting only %u byte\n",
3006 data_size, SHARED_SECRET_MAX);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003007 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003008 }
3009
Philipp Reisnerde0ff332011-02-07 16:56:20 +01003010 if (drbd_recv(mdev->tconn, p->verify_alg, data_size) != data_size)
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003011 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003012
3013 /* we expect NUL terminated string */
3014 /* but just in case someone tries to be evil */
3015 D_ASSERT(p->verify_alg[data_size-1] == 0);
3016 p->verify_alg[data_size-1] = 0;
3017
3018 } else /* apv >= 89 */ {
3019 /* we still expect NUL terminated strings */
3020 /* but just in case someone tries to be evil */
3021 D_ASSERT(p->verify_alg[SHARED_SECRET_MAX-1] == 0);
3022 D_ASSERT(p->csums_alg[SHARED_SECRET_MAX-1] == 0);
3023 p->verify_alg[SHARED_SECRET_MAX-1] = 0;
3024 p->csums_alg[SHARED_SECRET_MAX-1] = 0;
3025 }
3026
3027 if (strcmp(mdev->sync_conf.verify_alg, p->verify_alg)) {
3028 if (mdev->state.conn == C_WF_REPORT_PARAMS) {
3029 dev_err(DEV, "Different verify-alg settings. me=\"%s\" peer=\"%s\"\n",
3030 mdev->sync_conf.verify_alg, p->verify_alg);
3031 goto disconnect;
3032 }
3033 verify_tfm = drbd_crypto_alloc_digest_safe(mdev,
3034 p->verify_alg, "verify-alg");
3035 if (IS_ERR(verify_tfm)) {
3036 verify_tfm = NULL;
3037 goto disconnect;
3038 }
3039 }
3040
3041 if (apv >= 89 && strcmp(mdev->sync_conf.csums_alg, p->csums_alg)) {
3042 if (mdev->state.conn == C_WF_REPORT_PARAMS) {
3043 dev_err(DEV, "Different csums-alg settings. me=\"%s\" peer=\"%s\"\n",
3044 mdev->sync_conf.csums_alg, p->csums_alg);
3045 goto disconnect;
3046 }
3047 csums_tfm = drbd_crypto_alloc_digest_safe(mdev,
3048 p->csums_alg, "csums-alg");
3049 if (IS_ERR(csums_tfm)) {
3050 csums_tfm = NULL;
3051 goto disconnect;
3052 }
3053 }
3054
Philipp Reisner8e26f9c2010-07-06 17:25:54 +02003055 if (apv > 94) {
3056 mdev->sync_conf.rate = be32_to_cpu(p->rate);
3057 mdev->sync_conf.c_plan_ahead = be32_to_cpu(p->c_plan_ahead);
3058 mdev->sync_conf.c_delay_target = be32_to_cpu(p->c_delay_target);
3059 mdev->sync_conf.c_fill_target = be32_to_cpu(p->c_fill_target);
3060 mdev->sync_conf.c_max_rate = be32_to_cpu(p->c_max_rate);
Philipp Reisner778f2712010-07-06 11:14:00 +02003061
3062 fifo_size = (mdev->sync_conf.c_plan_ahead * 10 * SLEEP_TIME) / HZ;
3063 if (fifo_size != mdev->rs_plan_s.size && fifo_size > 0) {
3064 rs_plan_s = kzalloc(sizeof(int) * fifo_size, GFP_KERNEL);
3065 if (!rs_plan_s) {
3066 dev_err(DEV, "kmalloc of fifo_buffer failed");
3067 goto disconnect;
3068 }
3069 }
Philipp Reisner8e26f9c2010-07-06 17:25:54 +02003070 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003071
3072 spin_lock(&mdev->peer_seq_lock);
3073 /* lock against drbd_nl_syncer_conf() */
3074 if (verify_tfm) {
3075 strcpy(mdev->sync_conf.verify_alg, p->verify_alg);
3076 mdev->sync_conf.verify_alg_len = strlen(p->verify_alg) + 1;
3077 crypto_free_hash(mdev->verify_tfm);
3078 mdev->verify_tfm = verify_tfm;
3079 dev_info(DEV, "using verify-alg: \"%s\"\n", p->verify_alg);
3080 }
3081 if (csums_tfm) {
3082 strcpy(mdev->sync_conf.csums_alg, p->csums_alg);
3083 mdev->sync_conf.csums_alg_len = strlen(p->csums_alg) + 1;
3084 crypto_free_hash(mdev->csums_tfm);
3085 mdev->csums_tfm = csums_tfm;
3086 dev_info(DEV, "using csums-alg: \"%s\"\n", p->csums_alg);
3087 }
Philipp Reisner778f2712010-07-06 11:14:00 +02003088 if (fifo_size != mdev->rs_plan_s.size) {
3089 kfree(mdev->rs_plan_s.values);
3090 mdev->rs_plan_s.values = rs_plan_s;
3091 mdev->rs_plan_s.size = fifo_size;
3092 mdev->rs_planed = 0;
3093 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003094 spin_unlock(&mdev->peer_seq_lock);
3095 }
3096
3097 return ok;
3098disconnect:
3099 /* just for completeness: actually not needed,
3100 * as this is not reached if csums_tfm was ok. */
3101 crypto_free_hash(csums_tfm);
3102 /* but free the verify_tfm again, if csums_tfm did not work out */
3103 crypto_free_hash(verify_tfm);
Philipp Reisner38fa9982011-03-15 18:24:49 +01003104 conn_request_state(mdev->tconn, NS(conn, C_DISCONNECTING), CS_HARD);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003105 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003106}
3107
Philipp Reisnerb411b362009-09-25 16:07:19 -07003108/* warn if the arguments differ by more than 12.5% */
3109static void warn_if_differ_considerably(struct drbd_conf *mdev,
3110 const char *s, sector_t a, sector_t b)
3111{
3112 sector_t d;
3113 if (a == 0 || b == 0)
3114 return;
3115 d = (a > b) ? (a - b) : (b - a);
3116 if (d > (a>>3) || d > (b>>3))
3117 dev_warn(DEV, "Considerable difference in %s: %llus vs. %llus\n", s,
3118 (unsigned long long)a, (unsigned long long)b);
3119}
3120
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01003121static int receive_sizes(struct drbd_conf *mdev, enum drbd_packet cmd,
3122 unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003123{
Philipp Reisnere42325a2011-01-19 13:55:45 +01003124 struct p_sizes *p = &mdev->tconn->data.rbuf.sizes;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003125 enum determine_dev_size dd = unchanged;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003126 sector_t p_size, p_usize, my_usize;
3127 int ldsc = 0; /* local disk size changed */
Philipp Reisnere89b5912010-03-24 17:11:33 +01003128 enum dds_flags ddsf;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003129
Philipp Reisnerb411b362009-09-25 16:07:19 -07003130 p_size = be64_to_cpu(p->d_size);
3131 p_usize = be64_to_cpu(p->u_size);
3132
Philipp Reisnerb411b362009-09-25 16:07:19 -07003133 /* just store the peer's disk size for now.
3134 * we still need to figure out whether we accept that. */
3135 mdev->p_size = p_size;
3136
Philipp Reisnerb411b362009-09-25 16:07:19 -07003137 if (get_ldev(mdev)) {
3138 warn_if_differ_considerably(mdev, "lower level device sizes",
3139 p_size, drbd_get_max_capacity(mdev->ldev));
3140 warn_if_differ_considerably(mdev, "user requested size",
3141 p_usize, mdev->ldev->dc.disk_size);
3142
3143 /* if this is the first connect, or an otherwise expected
3144 * param exchange, choose the minimum */
3145 if (mdev->state.conn == C_WF_REPORT_PARAMS)
3146 p_usize = min_not_zero((sector_t)mdev->ldev->dc.disk_size,
3147 p_usize);
3148
3149 my_usize = mdev->ldev->dc.disk_size;
3150
3151 if (mdev->ldev->dc.disk_size != p_usize) {
3152 mdev->ldev->dc.disk_size = p_usize;
3153 dev_info(DEV, "Peer sets u_size to %lu sectors\n",
3154 (unsigned long)mdev->ldev->dc.disk_size);
3155 }
3156
3157 /* Never shrink a device with usable data during connect.
3158 But allow online shrinking if we are connected. */
Philipp Reisnera393db62009-12-22 13:35:52 +01003159 if (drbd_new_dev_size(mdev, mdev->ldev, 0) <
Philipp Reisnerb411b362009-09-25 16:07:19 -07003160 drbd_get_capacity(mdev->this_bdev) &&
3161 mdev->state.disk >= D_OUTDATED &&
3162 mdev->state.conn < C_CONNECTED) {
3163 dev_err(DEV, "The peer's disk size is too small!\n");
Philipp Reisner38fa9982011-03-15 18:24:49 +01003164 conn_request_state(mdev->tconn, NS(conn, C_DISCONNECTING), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003165 mdev->ldev->dc.disk_size = my_usize;
3166 put_ldev(mdev);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003167 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003168 }
3169 put_ldev(mdev);
3170 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003171
Philipp Reisnere89b5912010-03-24 17:11:33 +01003172 ddsf = be16_to_cpu(p->dds_flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003173 if (get_ldev(mdev)) {
Bart Van Assche24c48302011-05-21 18:32:29 +02003174 dd = drbd_determine_dev_size(mdev, ddsf);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003175 put_ldev(mdev);
3176 if (dd == dev_size_error)
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003177 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003178 drbd_md_sync(mdev);
3179 } else {
3180 /* I am diskless, need to accept the peer's size. */
3181 drbd_set_my_capacity(mdev, p_size);
3182 }
3183
Philipp Reisner99432fc2011-05-20 16:39:13 +02003184 mdev->peer_max_bio_size = be32_to_cpu(p->max_bio_size);
3185 drbd_reconsider_max_bio_size(mdev);
3186
Philipp Reisnerb411b362009-09-25 16:07:19 -07003187 if (get_ldev(mdev)) {
3188 if (mdev->ldev->known_size != drbd_get_capacity(mdev->ldev->backing_bdev)) {
3189 mdev->ldev->known_size = drbd_get_capacity(mdev->ldev->backing_bdev);
3190 ldsc = 1;
3191 }
3192
Philipp Reisnerb411b362009-09-25 16:07:19 -07003193 put_ldev(mdev);
3194 }
3195
3196 if (mdev->state.conn > C_WF_REPORT_PARAMS) {
3197 if (be64_to_cpu(p->c_size) !=
3198 drbd_get_capacity(mdev->this_bdev) || ldsc) {
3199 /* we have different sizes, probably peer
3200 * needs to know my new size... */
Philipp Reisnere89b5912010-03-24 17:11:33 +01003201 drbd_send_sizes(mdev, 0, ddsf);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003202 }
3203 if (test_and_clear_bit(RESIZE_PENDING, &mdev->flags) ||
3204 (dd == grew && mdev->state.conn == C_CONNECTED)) {
3205 if (mdev->state.pdsk >= D_INCONSISTENT &&
Philipp Reisnere89b5912010-03-24 17:11:33 +01003206 mdev->state.disk >= D_INCONSISTENT) {
3207 if (ddsf & DDSF_NO_RESYNC)
3208 dev_info(DEV, "Resync of new storage suppressed with --assume-clean\n");
3209 else
3210 resync_after_online_grow(mdev);
3211 } else
Philipp Reisnerb411b362009-09-25 16:07:19 -07003212 set_bit(RESYNC_AFTER_NEG, &mdev->flags);
3213 }
3214 }
3215
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003216 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003217}
3218
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01003219static int receive_uuids(struct drbd_conf *mdev, enum drbd_packet cmd,
3220 unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003221{
Philipp Reisnere42325a2011-01-19 13:55:45 +01003222 struct p_uuids *p = &mdev->tconn->data.rbuf.uuids;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003223 u64 *p_uuid;
Lars Ellenberg62b0da32011-01-20 13:25:21 +01003224 int i, updated_uuids = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003225
Philipp Reisnerb411b362009-09-25 16:07:19 -07003226 p_uuid = kmalloc(sizeof(u64)*UI_EXTENDED_SIZE, GFP_NOIO);
3227
3228 for (i = UI_CURRENT; i < UI_EXTENDED_SIZE; i++)
3229 p_uuid[i] = be64_to_cpu(p->uuid[i]);
3230
3231 kfree(mdev->p_uuid);
3232 mdev->p_uuid = p_uuid;
3233
3234 if (mdev->state.conn < C_CONNECTED &&
3235 mdev->state.disk < D_INCONSISTENT &&
3236 mdev->state.role == R_PRIMARY &&
3237 (mdev->ed_uuid & ~((u64)1)) != (p_uuid[UI_CURRENT] & ~((u64)1))) {
3238 dev_err(DEV, "Can only connect to data with current UUID=%016llX\n",
3239 (unsigned long long)mdev->ed_uuid);
Philipp Reisner38fa9982011-03-15 18:24:49 +01003240 conn_request_state(mdev->tconn, NS(conn, C_DISCONNECTING), CS_HARD);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003241 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003242 }
3243
3244 if (get_ldev(mdev)) {
3245 int skip_initial_sync =
3246 mdev->state.conn == C_CONNECTED &&
Philipp Reisner31890f42011-01-19 14:12:51 +01003247 mdev->tconn->agreed_pro_version >= 90 &&
Philipp Reisnerb411b362009-09-25 16:07:19 -07003248 mdev->ldev->md.uuid[UI_CURRENT] == UUID_JUST_CREATED &&
3249 (p_uuid[UI_FLAGS] & 8);
3250 if (skip_initial_sync) {
3251 dev_info(DEV, "Accepted new current UUID, preparing to skip initial sync\n");
3252 drbd_bitmap_io(mdev, &drbd_bmio_clear_n_write,
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01003253 "clear_n_write from receive_uuids",
3254 BM_LOCKED_TEST_ALLOWED);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003255 _drbd_uuid_set(mdev, UI_CURRENT, p_uuid[UI_CURRENT]);
3256 _drbd_uuid_set(mdev, UI_BITMAP, 0);
3257 _drbd_set_state(_NS2(mdev, disk, D_UP_TO_DATE, pdsk, D_UP_TO_DATE),
3258 CS_VERBOSE, NULL);
3259 drbd_md_sync(mdev);
Lars Ellenberg62b0da32011-01-20 13:25:21 +01003260 updated_uuids = 1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003261 }
3262 put_ldev(mdev);
Philipp Reisner18a50fa2010-06-21 14:14:15 +02003263 } else if (mdev->state.disk < D_INCONSISTENT &&
3264 mdev->state.role == R_PRIMARY) {
3265 /* I am a diskless primary, the peer just created a new current UUID
3266 for me. */
Lars Ellenberg62b0da32011-01-20 13:25:21 +01003267 updated_uuids = drbd_set_ed_uuid(mdev, p_uuid[UI_CURRENT]);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003268 }
3269
3270 /* Before we test for the disk state, we should wait until an eventually
3271 ongoing cluster wide state change is finished. That is important if
3272 we are primary and are detaching from our disk. We need to see the
3273 new disk state... */
Philipp Reisner8410da82011-02-11 20:11:10 +01003274 mutex_lock(mdev->state_mutex);
3275 mutex_unlock(mdev->state_mutex);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003276 if (mdev->state.conn >= C_CONNECTED && mdev->state.disk < D_INCONSISTENT)
Lars Ellenberg62b0da32011-01-20 13:25:21 +01003277 updated_uuids |= drbd_set_ed_uuid(mdev, p_uuid[UI_CURRENT]);
3278
3279 if (updated_uuids)
3280 drbd_print_uuids(mdev, "receiver updated UUIDs to");
Philipp Reisnerb411b362009-09-25 16:07:19 -07003281
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003282 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003283}
3284
3285/**
3286 * convert_state() - Converts the peer's view of the cluster state to our point of view
3287 * @ps: The state as seen by the peer.
3288 */
3289static union drbd_state convert_state(union drbd_state ps)
3290{
3291 union drbd_state ms;
3292
3293 static enum drbd_conns c_tab[] = {
3294 [C_CONNECTED] = C_CONNECTED,
3295
3296 [C_STARTING_SYNC_S] = C_STARTING_SYNC_T,
3297 [C_STARTING_SYNC_T] = C_STARTING_SYNC_S,
3298 [C_DISCONNECTING] = C_TEAR_DOWN, /* C_NETWORK_FAILURE, */
3299 [C_VERIFY_S] = C_VERIFY_T,
3300 [C_MASK] = C_MASK,
3301 };
3302
3303 ms.i = ps.i;
3304
3305 ms.conn = c_tab[ps.conn];
3306 ms.peer = ps.role;
3307 ms.role = ps.peer;
3308 ms.pdsk = ps.disk;
3309 ms.disk = ps.pdsk;
3310 ms.peer_isp = (ps.aftr_isp | ps.user_isp);
3311
3312 return ms;
3313}
3314
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01003315static int receive_req_state(struct drbd_conf *mdev, enum drbd_packet cmd,
3316 unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003317{
Philipp Reisnere42325a2011-01-19 13:55:45 +01003318 struct p_req_state *p = &mdev->tconn->data.rbuf.req_state;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003319 union drbd_state mask, val;
Andreas Gruenbacherbf885f82010-12-08 00:39:32 +01003320 enum drbd_state_rv rv;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003321
Philipp Reisnerb411b362009-09-25 16:07:19 -07003322 mask.i = be32_to_cpu(p->mask);
3323 val.i = be32_to_cpu(p->val);
3324
Philipp Reisner25703f82011-02-07 14:35:25 +01003325 if (test_bit(DISCARD_CONCURRENT, &mdev->tconn->flags) &&
Philipp Reisner8410da82011-02-11 20:11:10 +01003326 mutex_is_locked(mdev->state_mutex)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07003327 drbd_send_sr_reply(mdev, SS_CONCURRENT_ST_CHG);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003328 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003329 }
3330
3331 mask = convert_state(mask);
3332 val = convert_state(val);
3333
Philipp Reisner047cd4a2011-02-15 11:09:33 +01003334 if (cmd == P_CONN_ST_CHG_REQ) {
3335 rv = conn_request_state(mdev->tconn, mask, val, CS_VERBOSE | CS_LOCAL_ONLY);
3336 conn_send_sr_reply(mdev->tconn, rv);
3337 } else {
3338 rv = drbd_change_state(mdev, CS_VERBOSE, mask, val);
3339 drbd_send_sr_reply(mdev, rv);
3340 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003341
Philipp Reisnerb411b362009-09-25 16:07:19 -07003342 drbd_md_sync(mdev);
3343
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003344 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003345}
3346
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01003347static int receive_state(struct drbd_conf *mdev, enum drbd_packet cmd,
3348 unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003349{
Philipp Reisnere42325a2011-01-19 13:55:45 +01003350 struct p_state *p = &mdev->tconn->data.rbuf.state;
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003351 union drbd_state os, ns, peer_state;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003352 enum drbd_disk_state real_peer_disk;
Philipp Reisner65d922c2010-06-16 16:18:09 +02003353 enum chg_state_flags cs_flags;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003354 int rv;
3355
Philipp Reisnerb411b362009-09-25 16:07:19 -07003356 peer_state.i = be32_to_cpu(p->state);
3357
3358 real_peer_disk = peer_state.disk;
3359 if (peer_state.disk == D_NEGOTIATING) {
3360 real_peer_disk = mdev->p_uuid[UI_FLAGS] & 4 ? D_INCONSISTENT : D_CONSISTENT;
3361 dev_info(DEV, "real peer disk state = %s\n", drbd_disk_str(real_peer_disk));
3362 }
3363
Philipp Reisner87eeee42011-01-19 14:16:30 +01003364 spin_lock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003365 retry:
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003366 os = ns = mdev->state;
Philipp Reisner87eeee42011-01-19 14:16:30 +01003367 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003368
Lars Ellenberge9ef7bb2010-10-07 15:55:39 +02003369 /* peer says his disk is uptodate, while we think it is inconsistent,
3370 * and this happens while we think we have a sync going on. */
3371 if (os.pdsk == D_INCONSISTENT && real_peer_disk == D_UP_TO_DATE &&
3372 os.conn > C_CONNECTED && os.disk == D_UP_TO_DATE) {
3373 /* If we are (becoming) SyncSource, but peer is still in sync
3374 * preparation, ignore its uptodate-ness to avoid flapping, it
3375 * will change to inconsistent once the peer reaches active
3376 * syncing states.
3377 * It may have changed syncer-paused flags, however, so we
3378 * cannot ignore this completely. */
3379 if (peer_state.conn > C_CONNECTED &&
3380 peer_state.conn < C_SYNC_SOURCE)
3381 real_peer_disk = D_INCONSISTENT;
3382
3383 /* if peer_state changes to connected at the same time,
3384 * it explicitly notifies us that it finished resync.
3385 * Maybe we should finish it up, too? */
3386 else if (os.conn >= C_SYNC_SOURCE &&
3387 peer_state.conn == C_CONNECTED) {
3388 if (drbd_bm_total_weight(mdev) <= mdev->rs_failed)
3389 drbd_resync_finished(mdev);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003390 return true;
Lars Ellenberge9ef7bb2010-10-07 15:55:39 +02003391 }
3392 }
3393
3394 /* peer says his disk is inconsistent, while we think it is uptodate,
3395 * and this happens while the peer still thinks we have a sync going on,
3396 * but we think we are already done with the sync.
3397 * We ignore this to avoid flapping pdsk.
3398 * This should not happen, if the peer is a recent version of drbd. */
3399 if (os.pdsk == D_UP_TO_DATE && real_peer_disk == D_INCONSISTENT &&
3400 os.conn == C_CONNECTED && peer_state.conn > C_SYNC_SOURCE)
3401 real_peer_disk = D_UP_TO_DATE;
3402
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003403 if (ns.conn == C_WF_REPORT_PARAMS)
3404 ns.conn = C_CONNECTED;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003405
Philipp Reisner67531712010-10-27 12:21:30 +02003406 if (peer_state.conn == C_AHEAD)
3407 ns.conn = C_BEHIND;
3408
Philipp Reisnerb411b362009-09-25 16:07:19 -07003409 if (mdev->p_uuid && peer_state.disk >= D_NEGOTIATING &&
3410 get_ldev_if_state(mdev, D_NEGOTIATING)) {
3411 int cr; /* consider resync */
3412
3413 /* if we established a new connection */
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003414 cr = (os.conn < C_CONNECTED);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003415 /* if we had an established connection
3416 * and one of the nodes newly attaches a disk */
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003417 cr |= (os.conn == C_CONNECTED &&
Philipp Reisnerb411b362009-09-25 16:07:19 -07003418 (peer_state.disk == D_NEGOTIATING ||
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003419 os.disk == D_NEGOTIATING));
Philipp Reisnerb411b362009-09-25 16:07:19 -07003420 /* if we have both been inconsistent, and the peer has been
3421 * forced to be UpToDate with --overwrite-data */
3422 cr |= test_bit(CONSIDER_RESYNC, &mdev->flags);
3423 /* if we had been plain connected, and the admin requested to
3424 * start a sync by "invalidate" or "invalidate-remote" */
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003425 cr |= (os.conn == C_CONNECTED &&
Philipp Reisnerb411b362009-09-25 16:07:19 -07003426 (peer_state.conn >= C_STARTING_SYNC_S &&
3427 peer_state.conn <= C_WF_BITMAP_T));
3428
3429 if (cr)
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003430 ns.conn = drbd_sync_handshake(mdev, peer_state.role, real_peer_disk);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003431
3432 put_ldev(mdev);
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003433 if (ns.conn == C_MASK) {
3434 ns.conn = C_CONNECTED;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003435 if (mdev->state.disk == D_NEGOTIATING) {
Lars Ellenberg82f59cc2010-10-16 12:13:47 +02003436 drbd_force_state(mdev, NS(disk, D_FAILED));
Philipp Reisnerb411b362009-09-25 16:07:19 -07003437 } else if (peer_state.disk == D_NEGOTIATING) {
3438 dev_err(DEV, "Disk attach process on the peer node was aborted.\n");
3439 peer_state.disk = D_DISKLESS;
Lars Ellenberg580b9762010-02-26 23:15:23 +01003440 real_peer_disk = D_DISKLESS;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003441 } else {
Philipp Reisner8169e412011-03-15 18:40:27 +01003442 if (test_and_clear_bit(CONN_DRY_RUN, &mdev->tconn->flags))
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003443 return false;
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003444 D_ASSERT(os.conn == C_WF_REPORT_PARAMS);
Philipp Reisner38fa9982011-03-15 18:24:49 +01003445 conn_request_state(mdev->tconn, NS(conn, C_DISCONNECTING), CS_HARD);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003446 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003447 }
3448 }
3449 }
3450
Philipp Reisner87eeee42011-01-19 14:16:30 +01003451 spin_lock_irq(&mdev->tconn->req_lock);
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003452 if (mdev->state.i != os.i)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003453 goto retry;
3454 clear_bit(CONSIDER_RESYNC, &mdev->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003455 ns.peer = peer_state.role;
3456 ns.pdsk = real_peer_disk;
3457 ns.peer_isp = (peer_state.aftr_isp | peer_state.user_isp);
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003458 if ((ns.conn == C_CONNECTED || ns.conn == C_WF_BITMAP_S) && ns.disk == D_NEGOTIATING)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003459 ns.disk = mdev->new_state_tmp.disk;
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003460 cs_flags = CS_VERBOSE + (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED ? 0 : CS_HARD);
3461 if (ns.pdsk == D_CONSISTENT && is_susp(ns) && ns.conn == C_CONNECTED && os.conn < C_CONNECTED &&
Philipp Reisner481c6f52010-06-22 14:03:27 +02003462 test_bit(NEW_CUR_UUID, &mdev->flags)) {
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01003463 /* Do not allow tl_restart(RESEND) for a rebooted peer. We can only allow this
Philipp Reisner481c6f52010-06-22 14:03:27 +02003464 for temporal network outages! */
Philipp Reisner87eeee42011-01-19 14:16:30 +01003465 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisner481c6f52010-06-22 14:03:27 +02003466 dev_err(DEV, "Aborting Connect, can not thaw IO with an only Consistent peer\n");
Philipp Reisner2f5cdd02011-02-21 14:29:27 +01003467 tl_clear(mdev->tconn);
Philipp Reisner481c6f52010-06-22 14:03:27 +02003468 drbd_uuid_new_current(mdev);
3469 clear_bit(NEW_CUR_UUID, &mdev->flags);
Philipp Reisner38fa9982011-03-15 18:24:49 +01003470 conn_request_state(mdev->tconn, NS2(conn, C_PROTOCOL_ERROR, susp, 0), CS_HARD);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003471 return false;
Philipp Reisner481c6f52010-06-22 14:03:27 +02003472 }
Philipp Reisner65d922c2010-06-16 16:18:09 +02003473 rv = _drbd_set_state(mdev, ns, cs_flags, NULL);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003474 ns = mdev->state;
Philipp Reisner87eeee42011-01-19 14:16:30 +01003475 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003476
3477 if (rv < SS_SUCCESS) {
Philipp Reisner38fa9982011-03-15 18:24:49 +01003478 conn_request_state(mdev->tconn, NS(conn, C_DISCONNECTING), CS_HARD);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003479 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003480 }
3481
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003482 if (os.conn > C_WF_REPORT_PARAMS) {
3483 if (ns.conn > C_CONNECTED && peer_state.conn <= C_CONNECTED &&
Philipp Reisnerb411b362009-09-25 16:07:19 -07003484 peer_state.disk != D_NEGOTIATING ) {
3485 /* we want resync, peer has not yet decided to sync... */
3486 /* Nowadays only used when forcing a node into primary role and
3487 setting its disk to UpToDate with that */
3488 drbd_send_uuids(mdev);
3489 drbd_send_state(mdev);
3490 }
3491 }
3492
Philipp Reisner89e58e72011-01-19 13:12:45 +01003493 mdev->tconn->net_conf->want_lose = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003494
3495 drbd_md_sync(mdev); /* update connected indicator, la_size, ... */
3496
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003497 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003498}
3499
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01003500static int receive_sync_uuid(struct drbd_conf *mdev, enum drbd_packet cmd,
3501 unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003502{
Philipp Reisnere42325a2011-01-19 13:55:45 +01003503 struct p_rs_uuid *p = &mdev->tconn->data.rbuf.rs_uuid;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003504
3505 wait_event(mdev->misc_wait,
3506 mdev->state.conn == C_WF_SYNC_UUID ||
Philipp Reisnerc4752ef2010-10-27 17:32:36 +02003507 mdev->state.conn == C_BEHIND ||
Philipp Reisnerb411b362009-09-25 16:07:19 -07003508 mdev->state.conn < C_CONNECTED ||
3509 mdev->state.disk < D_NEGOTIATING);
3510
3511 /* D_ASSERT( mdev->state.conn == C_WF_SYNC_UUID ); */
3512
Philipp Reisnerb411b362009-09-25 16:07:19 -07003513 /* Here the _drbd_uuid_ functions are right, current should
3514 _not_ be rotated into the history */
3515 if (get_ldev_if_state(mdev, D_NEGOTIATING)) {
3516 _drbd_uuid_set(mdev, UI_CURRENT, be64_to_cpu(p->uuid));
3517 _drbd_uuid_set(mdev, UI_BITMAP, 0UL);
3518
Lars Ellenberg62b0da32011-01-20 13:25:21 +01003519 drbd_print_uuids(mdev, "updated sync uuid");
Philipp Reisnerb411b362009-09-25 16:07:19 -07003520 drbd_start_resync(mdev, C_SYNC_TARGET);
3521
3522 put_ldev(mdev);
3523 } else
3524 dev_err(DEV, "Ignoring SyncUUID packet!\n");
3525
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003526 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003527}
3528
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003529/**
3530 * receive_bitmap_plain
3531 *
3532 * Return 0 when done, 1 when another iteration is needed, and a negative error
3533 * code upon failure.
3534 */
3535static int
Philipp Reisner02918be2010-08-20 14:35:10 +02003536receive_bitmap_plain(struct drbd_conf *mdev, unsigned int data_size,
3537 unsigned long *buffer, struct bm_xfer_ctx *c)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003538{
3539 unsigned num_words = min_t(size_t, BM_PACKET_WORDS, c->bm_words - c->word_offset);
3540 unsigned want = num_words * sizeof(long);
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003541 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003542
Philipp Reisner02918be2010-08-20 14:35:10 +02003543 if (want != data_size) {
3544 dev_err(DEV, "%s:want (%u) != data_size (%u)\n", __func__, want, data_size);
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003545 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003546 }
3547 if (want == 0)
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003548 return 0;
Philipp Reisnerde0ff332011-02-07 16:56:20 +01003549 err = drbd_recv(mdev->tconn, buffer, want);
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003550 if (err != want) {
3551 if (err >= 0)
3552 err = -EIO;
3553 return err;
3554 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003555
3556 drbd_bm_merge_lel(mdev, c->word_offset, num_words, buffer);
3557
3558 c->word_offset += num_words;
3559 c->bit_offset = c->word_offset * BITS_PER_LONG;
3560 if (c->bit_offset > c->bm_bits)
3561 c->bit_offset = c->bm_bits;
3562
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003563 return 1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003564}
3565
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003566/**
3567 * recv_bm_rle_bits
3568 *
3569 * Return 0 when done, 1 when another iteration is needed, and a negative error
3570 * code upon failure.
3571 */
3572static int
Philipp Reisnerb411b362009-09-25 16:07:19 -07003573recv_bm_rle_bits(struct drbd_conf *mdev,
3574 struct p_compressed_bm *p,
Philipp Reisnerc6d25cf2011-01-19 16:13:06 +01003575 struct bm_xfer_ctx *c,
3576 unsigned int len)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003577{
3578 struct bitstream bs;
3579 u64 look_ahead;
3580 u64 rl;
3581 u64 tmp;
3582 unsigned long s = c->bit_offset;
3583 unsigned long e;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003584 int toggle = DCBP_get_start(p);
3585 int have;
3586 int bits;
3587
3588 bitstream_init(&bs, p->code, len, DCBP_get_pad_bits(p));
3589
3590 bits = bitstream_get_bits(&bs, &look_ahead, 64);
3591 if (bits < 0)
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003592 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003593
3594 for (have = bits; have > 0; s += rl, toggle = !toggle) {
3595 bits = vli_decode_bits(&rl, look_ahead);
3596 if (bits <= 0)
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003597 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003598
3599 if (toggle) {
3600 e = s + rl -1;
3601 if (e >= c->bm_bits) {
3602 dev_err(DEV, "bitmap overflow (e:%lu) while decoding bm RLE packet\n", e);
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003603 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003604 }
3605 _drbd_bm_set_bits(mdev, s, e);
3606 }
3607
3608 if (have < bits) {
3609 dev_err(DEV, "bitmap decoding error: h:%d b:%d la:0x%08llx l:%u/%u\n",
3610 have, bits, look_ahead,
3611 (unsigned int)(bs.cur.b - p->code),
3612 (unsigned int)bs.buf_len);
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003613 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003614 }
3615 look_ahead >>= bits;
3616 have -= bits;
3617
3618 bits = bitstream_get_bits(&bs, &tmp, 64 - have);
3619 if (bits < 0)
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003620 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003621 look_ahead |= tmp << have;
3622 have += bits;
3623 }
3624
3625 c->bit_offset = s;
3626 bm_xfer_ctx_bit_to_word_offset(c);
3627
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003628 return (s != c->bm_bits);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003629}
3630
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003631/**
3632 * decode_bitmap_c
3633 *
3634 * Return 0 when done, 1 when another iteration is needed, and a negative error
3635 * code upon failure.
3636 */
3637static int
Philipp Reisnerb411b362009-09-25 16:07:19 -07003638decode_bitmap_c(struct drbd_conf *mdev,
3639 struct p_compressed_bm *p,
Philipp Reisnerc6d25cf2011-01-19 16:13:06 +01003640 struct bm_xfer_ctx *c,
3641 unsigned int len)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003642{
3643 if (DCBP_get_code(p) == RLE_VLI_Bits)
Philipp Reisnerc6d25cf2011-01-19 16:13:06 +01003644 return recv_bm_rle_bits(mdev, p, c, len);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003645
3646 /* other variants had been implemented for evaluation,
3647 * but have been dropped as this one turned out to be "best"
3648 * during all our tests. */
3649
3650 dev_err(DEV, "receive_bitmap_c: unknown encoding %u\n", p->encoding);
Philipp Reisner38fa9982011-03-15 18:24:49 +01003651 conn_request_state(mdev->tconn, NS(conn, C_PROTOCOL_ERROR), CS_HARD);
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003652 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003653}
3654
3655void INFO_bm_xfer_stats(struct drbd_conf *mdev,
3656 const char *direction, struct bm_xfer_ctx *c)
3657{
3658 /* what would it take to transfer it "plaintext" */
Philipp Reisnerc0129492011-01-19 16:58:16 +01003659 unsigned plain = sizeof(struct p_header) *
Philipp Reisnerb411b362009-09-25 16:07:19 -07003660 ((c->bm_words+BM_PACKET_WORDS-1)/BM_PACKET_WORDS+1)
3661 + c->bm_words * sizeof(long);
3662 unsigned total = c->bytes[0] + c->bytes[1];
3663 unsigned r;
3664
3665 /* total can not be zero. but just in case: */
3666 if (total == 0)
3667 return;
3668
3669 /* don't report if not compressed */
3670 if (total >= plain)
3671 return;
3672
3673 /* total < plain. check for overflow, still */
3674 r = (total > UINT_MAX/1000) ? (total / (plain/1000))
3675 : (1000 * total / plain);
3676
3677 if (r > 1000)
3678 r = 1000;
3679
3680 r = 1000 - r;
3681 dev_info(DEV, "%s bitmap stats [Bytes(packets)]: plain %u(%u), RLE %u(%u), "
3682 "total %u; compression: %u.%u%%\n",
3683 direction,
3684 c->bytes[1], c->packets[1],
3685 c->bytes[0], c->packets[0],
3686 total, r/10, r % 10);
3687}
3688
3689/* Since we are processing the bitfield from lower addresses to higher,
3690 it does not matter if the process it in 32 bit chunks or 64 bit
3691 chunks as long as it is little endian. (Understand it as byte stream,
3692 beginning with the lowest byte...) If we would use big endian
3693 we would need to process it from the highest address to the lowest,
3694 in order to be agnostic to the 32 vs 64 bits issue.
3695
3696 returns 0 on failure, 1 if we successfully received it. */
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01003697static int receive_bitmap(struct drbd_conf *mdev, enum drbd_packet cmd,
3698 unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003699{
3700 struct bm_xfer_ctx c;
3701 void *buffer;
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003702 int err;
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003703 int ok = false;
Philipp Reisner257d0af2011-01-26 12:15:29 +01003704 struct p_header *h = &mdev->tconn->data.rbuf.header;
Philipp Reisner77351055b2011-02-07 17:24:26 +01003705 struct packet_info pi;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003706
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01003707 drbd_bm_lock(mdev, "receive bitmap", BM_LOCKED_SET_ALLOWED);
3708 /* you are supposed to send additional out-of-sync information
3709 * if you actually set bits during this phase */
Philipp Reisnerb411b362009-09-25 16:07:19 -07003710
3711 /* maybe we should use some per thread scratch page,
3712 * and allocate that during initial device creation? */
3713 buffer = (unsigned long *) __get_free_page(GFP_NOIO);
3714 if (!buffer) {
3715 dev_err(DEV, "failed to allocate one page buffer in %s\n", __func__);
3716 goto out;
3717 }
3718
3719 c = (struct bm_xfer_ctx) {
3720 .bm_bits = drbd_bm_bits(mdev),
3721 .bm_words = drbd_bm_words(mdev),
3722 };
3723
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003724 for(;;) {
Philipp Reisner02918be2010-08-20 14:35:10 +02003725 if (cmd == P_BITMAP) {
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003726 err = receive_bitmap_plain(mdev, data_size, buffer, &c);
Philipp Reisner02918be2010-08-20 14:35:10 +02003727 } else if (cmd == P_COMPRESSED_BITMAP) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07003728 /* MAYBE: sanity check that we speak proto >= 90,
3729 * and the feature is enabled! */
3730 struct p_compressed_bm *p;
3731
Philipp Reisner02918be2010-08-20 14:35:10 +02003732 if (data_size > BM_PACKET_PAYLOAD_BYTES) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07003733 dev_err(DEV, "ReportCBitmap packet too large\n");
3734 goto out;
3735 }
3736 /* use the page buff */
3737 p = buffer;
3738 memcpy(p, h, sizeof(*h));
Philipp Reisnerde0ff332011-02-07 16:56:20 +01003739 if (drbd_recv(mdev->tconn, p->head.payload, data_size) != data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003740 goto out;
Lars Ellenberg004352f2010-10-05 20:13:58 +02003741 if (data_size <= (sizeof(*p) - sizeof(p->head))) {
3742 dev_err(DEV, "ReportCBitmap packet too small (l:%u)\n", data_size);
Andreas Gruenbacher78fcbda2010-12-10 22:18:27 +01003743 goto out;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003744 }
Philipp Reisnerc6d25cf2011-01-19 16:13:06 +01003745 err = decode_bitmap_c(mdev, p, &c, data_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003746 } else {
Philipp Reisner02918be2010-08-20 14:35:10 +02003747 dev_warn(DEV, "receive_bitmap: cmd neither ReportBitMap nor ReportCBitMap (is 0x%x)", cmd);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003748 goto out;
3749 }
3750
Philipp Reisner02918be2010-08-20 14:35:10 +02003751 c.packets[cmd == P_BITMAP]++;
Philipp Reisner257d0af2011-01-26 12:15:29 +01003752 c.bytes[cmd == P_BITMAP] += sizeof(struct p_header) + data_size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003753
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003754 if (err <= 0) {
3755 if (err < 0)
3756 goto out;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003757 break;
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003758 }
Philipp Reisner9ba7aa02011-02-07 17:32:41 +01003759 if (!drbd_recv_header(mdev->tconn, &pi))
Philipp Reisnerb411b362009-09-25 16:07:19 -07003760 goto out;
Philipp Reisner77351055b2011-02-07 17:24:26 +01003761 cmd = pi.cmd;
3762 data_size = pi.size;
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003763 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003764
3765 INFO_bm_xfer_stats(mdev, "receive", &c);
3766
3767 if (mdev->state.conn == C_WF_BITMAP_T) {
Andreas Gruenbacherde1f8e42010-12-10 21:04:00 +01003768 enum drbd_state_rv rv;
3769
Philipp Reisnerb411b362009-09-25 16:07:19 -07003770 ok = !drbd_send_bitmap(mdev);
3771 if (!ok)
3772 goto out;
3773 /* Omit CS_ORDERED with this state transition to avoid deadlocks. */
Andreas Gruenbacherde1f8e42010-12-10 21:04:00 +01003774 rv = _drbd_request_state(mdev, NS(conn, C_WF_SYNC_UUID), CS_VERBOSE);
3775 D_ASSERT(rv == SS_SUCCESS);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003776 } else if (mdev->state.conn != C_WF_BITMAP_S) {
3777 /* admin may have requested C_DISCONNECTING,
3778 * other threads may have noticed network errors */
3779 dev_info(DEV, "unexpected cstate (%s) in receive_bitmap\n",
3780 drbd_conn_str(mdev->state.conn));
3781 }
3782
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003783 ok = true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003784 out:
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01003785 drbd_bm_unlock(mdev);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003786 if (ok && mdev->state.conn == C_WF_BITMAP_S)
3787 drbd_start_resync(mdev, C_SYNC_SOURCE);
3788 free_page((unsigned long) buffer);
3789 return ok;
3790}
3791
Philipp Reisner2de876e2011-03-15 14:38:01 +01003792static int _tconn_receive_skip(struct drbd_tconn *tconn, unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003793{
3794 /* TODO zero copy sink :) */
3795 static char sink[128];
3796 int size, want, r;
3797
Philipp Reisner02918be2010-08-20 14:35:10 +02003798 size = data_size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003799 while (size > 0) {
3800 want = min_t(int, size, sizeof(sink));
Philipp Reisner2de876e2011-03-15 14:38:01 +01003801 r = drbd_recv(tconn, sink, want);
3802 if (r <= 0)
Andreas Gruenbacher841ce242010-12-15 19:31:20 +01003803 break;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003804 size -= r;
3805 }
3806 return size == 0;
3807}
3808
Philipp Reisner2de876e2011-03-15 14:38:01 +01003809static int receive_skip(struct drbd_conf *mdev, enum drbd_packet cmd,
3810 unsigned int data_size)
3811{
3812 dev_warn(DEV, "skipping unknown optional packet type %d, l: %d!\n",
3813 cmd, data_size);
3814
3815 return _tconn_receive_skip(mdev->tconn, data_size);
3816}
3817
3818static int tconn_receive_skip(struct drbd_tconn *tconn, enum drbd_packet cmd, unsigned int data_size)
3819{
3820 conn_warn(tconn, "skipping packet for non existing volume type %d, l: %d!\n",
3821 cmd, data_size);
3822
3823 return _tconn_receive_skip(tconn, data_size);
3824}
3825
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01003826static int receive_UnplugRemote(struct drbd_conf *mdev, enum drbd_packet cmd,
3827 unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003828{
Philipp Reisnerb411b362009-09-25 16:07:19 -07003829 /* Make sure we've acked all the TCP data associated
3830 * with the data requests being unplugged */
Philipp Reisnere42325a2011-01-19 13:55:45 +01003831 drbd_tcp_quickack(mdev->tconn->data.socket);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003832
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003833 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003834}
3835
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01003836static int receive_out_of_sync(struct drbd_conf *mdev, enum drbd_packet cmd,
3837 unsigned int data_size)
Philipp Reisner73a01a12010-10-27 14:33:00 +02003838{
Philipp Reisnere42325a2011-01-19 13:55:45 +01003839 struct p_block_desc *p = &mdev->tconn->data.rbuf.block_desc;
Philipp Reisner73a01a12010-10-27 14:33:00 +02003840
Lars Ellenbergf735e3632010-12-17 21:06:18 +01003841 switch (mdev->state.conn) {
3842 case C_WF_SYNC_UUID:
3843 case C_WF_BITMAP_T:
3844 case C_BEHIND:
3845 break;
3846 default:
3847 dev_err(DEV, "ASSERT FAILED cstate = %s, expected: WFSyncUUID|WFBitMapT|Behind\n",
3848 drbd_conn_str(mdev->state.conn));
3849 }
3850
Philipp Reisner73a01a12010-10-27 14:33:00 +02003851 drbd_set_out_of_sync(mdev, be64_to_cpu(p->sector), be32_to_cpu(p->blksize));
3852
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003853 return true;
Philipp Reisner73a01a12010-10-27 14:33:00 +02003854}
3855
Philipp Reisner02918be2010-08-20 14:35:10 +02003856struct data_cmd {
3857 int expect_payload;
3858 size_t pkt_size;
Philipp Reisnerd9ae84e2011-03-15 18:50:22 +01003859 enum {
3860 MDEV,
3861 CONN,
3862 } type;
3863 union {
3864 int (*mdev_fn)(struct drbd_conf *, enum drbd_packet cmd,
3865 unsigned int to_receive);
3866 int (*conn_fn)(struct drbd_tconn *, enum drbd_packet cmd,
3867 unsigned int to_receive);
3868 };
Philipp Reisnerb411b362009-09-25 16:07:19 -07003869};
3870
Philipp Reisner02918be2010-08-20 14:35:10 +02003871static struct data_cmd drbd_cmd_handler[] = {
Philipp Reisnerd9ae84e2011-03-15 18:50:22 +01003872 [P_DATA] = { 1, sizeof(struct p_data), MDEV, { receive_Data } },
3873 [P_DATA_REPLY] = { 1, sizeof(struct p_data), MDEV, { receive_DataReply } },
3874 [P_RS_DATA_REPLY] = { 1, sizeof(struct p_data), MDEV, { receive_RSDataReply } } ,
3875 [P_BARRIER] = { 0, sizeof(struct p_barrier), MDEV, { receive_Barrier } } ,
3876 [P_BITMAP] = { 1, sizeof(struct p_header), MDEV, { receive_bitmap } } ,
3877 [P_COMPRESSED_BITMAP] = { 1, sizeof(struct p_header), MDEV, { receive_bitmap } } ,
3878 [P_UNPLUG_REMOTE] = { 0, sizeof(struct p_header), MDEV, { receive_UnplugRemote } },
3879 [P_DATA_REQUEST] = { 0, sizeof(struct p_block_req), MDEV, { receive_DataRequest } },
3880 [P_RS_DATA_REQUEST] = { 0, sizeof(struct p_block_req), MDEV, { receive_DataRequest } },
3881 [P_SYNC_PARAM] = { 1, sizeof(struct p_header), MDEV, { receive_SyncParam } },
3882 [P_SYNC_PARAM89] = { 1, sizeof(struct p_header), MDEV, { receive_SyncParam } },
3883 [P_PROTOCOL] = { 1, sizeof(struct p_protocol), MDEV, { receive_protocol } },
3884 [P_UUIDS] = { 0, sizeof(struct p_uuids), MDEV, { receive_uuids } },
3885 [P_SIZES] = { 0, sizeof(struct p_sizes), MDEV, { receive_sizes } },
3886 [P_STATE] = { 0, sizeof(struct p_state), MDEV, { receive_state } },
3887 [P_STATE_CHG_REQ] = { 0, sizeof(struct p_req_state), MDEV, { receive_req_state } },
3888 [P_SYNC_UUID] = { 0, sizeof(struct p_rs_uuid), MDEV, { receive_sync_uuid } },
3889 [P_OV_REQUEST] = { 0, sizeof(struct p_block_req), MDEV, { receive_DataRequest } },
3890 [P_OV_REPLY] = { 1, sizeof(struct p_block_req), MDEV, { receive_DataRequest } },
3891 [P_CSUM_RS_REQUEST] = { 1, sizeof(struct p_block_req), MDEV, { receive_DataRequest } },
3892 [P_DELAY_PROBE] = { 0, sizeof(struct p_delay_probe93), MDEV, { receive_skip } },
3893 [P_OUT_OF_SYNC] = { 0, sizeof(struct p_block_desc), MDEV, { receive_out_of_sync } },
3894 [P_CONN_ST_CHG_REQ] = { 0, sizeof(struct p_req_state), MDEV, { receive_req_state } },
Philipp Reisner02918be2010-08-20 14:35:10 +02003895};
3896
3897/* All handler functions that expect a sub-header get that sub-heder in
Philipp Reisnere42325a2011-01-19 13:55:45 +01003898 mdev->tconn->data.rbuf.header.head.payload.
Philipp Reisner02918be2010-08-20 14:35:10 +02003899
Philipp Reisnere42325a2011-01-19 13:55:45 +01003900 Usually in mdev->tconn->data.rbuf.header.head the callback can find the usual
Philipp Reisner02918be2010-08-20 14:35:10 +02003901 p_header, but they may not rely on that. Since there is also p_header95 !
3902 */
Philipp Reisnerb411b362009-09-25 16:07:19 -07003903
Philipp Reisnereefc2f72011-02-08 12:55:24 +01003904static void drbdd(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003905{
Philipp Reisnereefc2f72011-02-08 12:55:24 +01003906 struct p_header *header = &tconn->data.rbuf.header;
Philipp Reisner77351055b2011-02-07 17:24:26 +01003907 struct packet_info pi;
Philipp Reisner02918be2010-08-20 14:35:10 +02003908 size_t shs; /* sub header size */
3909 int rv;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003910
Philipp Reisnereefc2f72011-02-08 12:55:24 +01003911 while (get_t_state(&tconn->receiver) == RUNNING) {
3912 drbd_thread_current_set_cpu(&tconn->receiver);
3913 if (!drbd_recv_header(tconn, &pi))
Philipp Reisner02918be2010-08-20 14:35:10 +02003914 goto err_out;
3915
Andreas Gruenbacher6e849ce2011-03-14 17:27:45 +01003916 if (unlikely(pi.cmd >= ARRAY_SIZE(drbd_cmd_handler) ||
Philipp Reisnerd9ae84e2011-03-15 18:50:22 +01003917 !drbd_cmd_handler[pi.cmd].mdev_fn)) {
Philipp Reisnereefc2f72011-02-08 12:55:24 +01003918 conn_err(tconn, "unknown packet type %d, l: %d!\n", pi.cmd, pi.size);
Philipp Reisner02918be2010-08-20 14:35:10 +02003919 goto err_out;
Lars Ellenberg0b33a912009-11-16 15:58:04 +01003920 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003921
Philipp Reisner77351055b2011-02-07 17:24:26 +01003922 shs = drbd_cmd_handler[pi.cmd].pkt_size - sizeof(struct p_header);
3923 if (pi.size - shs > 0 && !drbd_cmd_handler[pi.cmd].expect_payload) {
Philipp Reisnereefc2f72011-02-08 12:55:24 +01003924 conn_err(tconn, "No payload expected %s l:%d\n", cmdname(pi.cmd), pi.size);
Philipp Reisner02918be2010-08-20 14:35:10 +02003925 goto err_out;
3926 }
3927
Lars Ellenbergc13f7e12010-10-29 23:32:01 +02003928 if (shs) {
Philipp Reisnereefc2f72011-02-08 12:55:24 +01003929 rv = drbd_recv(tconn, &header->payload, shs);
Lars Ellenbergc13f7e12010-10-29 23:32:01 +02003930 if (unlikely(rv != shs)) {
Lars Ellenberg0ddc5542011-01-21 12:35:15 +01003931 if (!signal_pending(current))
Philipp Reisnereefc2f72011-02-08 12:55:24 +01003932 conn_warn(tconn, "short read while reading sub header: rv=%d\n", rv);
Lars Ellenbergc13f7e12010-10-29 23:32:01 +02003933 goto err_out;
3934 }
3935 }
3936
Philipp Reisnerd9ae84e2011-03-15 18:50:22 +01003937 if (drbd_cmd_handler[pi.cmd].type == CONN) {
3938 rv = drbd_cmd_handler[pi.cmd].conn_fn(tconn, pi.cmd, pi.size - shs);
3939 } else {
3940 struct drbd_conf *mdev = vnr_to_mdev(tconn, pi.vnr);
3941 rv = mdev ?
3942 drbd_cmd_handler[pi.cmd].mdev_fn(mdev, pi.cmd, pi.size - shs) :
3943 tconn_receive_skip(tconn, pi.cmd, pi.size - shs);
3944 }
Philipp Reisner02918be2010-08-20 14:35:10 +02003945
3946 if (unlikely(!rv)) {
Philipp Reisnereefc2f72011-02-08 12:55:24 +01003947 conn_err(tconn, "error receiving %s, l: %d!\n",
Philipp Reisner77351055b2011-02-07 17:24:26 +01003948 cmdname(pi.cmd), pi.size);
Philipp Reisner02918be2010-08-20 14:35:10 +02003949 goto err_out;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003950 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003951 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003952
Philipp Reisner02918be2010-08-20 14:35:10 +02003953 if (0) {
3954 err_out:
Philipp Reisnerbbeb6412011-02-10 13:45:46 +01003955 conn_request_state(tconn, NS(conn, C_PROTOCOL_ERROR), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003956 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003957}
3958
Philipp Reisner0e29d162011-02-18 14:23:11 +01003959void conn_flush_workqueue(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003960{
3961 struct drbd_wq_barrier barr;
3962
3963 barr.w.cb = w_prev_work_done;
Philipp Reisner0e29d162011-02-18 14:23:11 +01003964 barr.w.tconn = tconn;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003965 init_completion(&barr.done);
Philipp Reisner0e29d162011-02-18 14:23:11 +01003966 drbd_queue_work(&tconn->data.work, &barr.w);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003967 wait_for_completion(&barr.done);
3968}
3969
Philipp Reisner360cc742011-02-08 14:29:53 +01003970static void drbd_disconnect(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003971{
Philipp Reisnerbbeb6412011-02-10 13:45:46 +01003972 enum drbd_conns oc;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003973 int rv = SS_UNKNOWN_ERROR;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003974
Philipp Reisnerbbeb6412011-02-10 13:45:46 +01003975 if (tconn->cstate == C_STANDALONE)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003976 return;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003977
3978 /* asender does not clean up anything. it must not interfere, either */
Philipp Reisner360cc742011-02-08 14:29:53 +01003979 drbd_thread_stop(&tconn->asender);
3980 drbd_free_sock(tconn);
3981
3982 idr_for_each(&tconn->volumes, drbd_disconnected, tconn);
3983
3984 conn_info(tconn, "Connection closed\n");
3985
3986 spin_lock_irq(&tconn->req_lock);
Philipp Reisnerbbeb6412011-02-10 13:45:46 +01003987 oc = tconn->cstate;
3988 if (oc >= C_UNCONNECTED)
3989 rv = _conn_request_state(tconn, NS(conn, C_UNCONNECTED), CS_VERBOSE);
3990
Philipp Reisner360cc742011-02-08 14:29:53 +01003991 spin_unlock_irq(&tconn->req_lock);
3992
Philipp Reisnerbbeb6412011-02-10 13:45:46 +01003993 if (oc == C_DISCONNECTING) {
Philipp Reisner360cc742011-02-08 14:29:53 +01003994 wait_event(tconn->net_cnt_wait, atomic_read(&tconn->net_cnt) == 0);
3995
3996 crypto_free_hash(tconn->cram_hmac_tfm);
3997 tconn->cram_hmac_tfm = NULL;
3998
3999 kfree(tconn->net_conf);
4000 tconn->net_conf = NULL;
Philipp Reisnerbbeb6412011-02-10 13:45:46 +01004001 conn_request_state(tconn, NS(conn, C_STANDALONE), CS_VERBOSE);
Philipp Reisner360cc742011-02-08 14:29:53 +01004002 }
4003}
4004
4005static int drbd_disconnected(int vnr, void *p, void *data)
4006{
4007 struct drbd_conf *mdev = (struct drbd_conf *)p;
4008 enum drbd_fencing_p fp;
4009 unsigned int i;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004010
Philipp Reisner85719572010-07-21 10:20:17 +02004011 /* wait for current activity to cease. */
Philipp Reisner87eeee42011-01-19 14:16:30 +01004012 spin_lock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004013 _drbd_wait_ee_list_empty(mdev, &mdev->active_ee);
4014 _drbd_wait_ee_list_empty(mdev, &mdev->sync_ee);
4015 _drbd_wait_ee_list_empty(mdev, &mdev->read_ee);
Philipp Reisner87eeee42011-01-19 14:16:30 +01004016 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004017
4018 /* We do not have data structures that would allow us to
4019 * get the rs_pending_cnt down to 0 again.
4020 * * On C_SYNC_TARGET we do not have any data structures describing
4021 * the pending RSDataRequest's we have sent.
4022 * * On C_SYNC_SOURCE there is no data structure that tracks
4023 * the P_RS_DATA_REPLY blocks that we sent to the SyncTarget.
4024 * And no, it is not the sum of the reference counts in the
4025 * resync_LRU. The resync_LRU tracks the whole operation including
4026 * the disk-IO, while the rs_pending_cnt only tracks the blocks
4027 * on the fly. */
4028 drbd_rs_cancel_all(mdev);
4029 mdev->rs_total = 0;
4030 mdev->rs_failed = 0;
4031 atomic_set(&mdev->rs_pending_cnt, 0);
4032 wake_up(&mdev->misc_wait);
4033
Philipp Reisner7fde2be2011-03-01 11:08:28 +01004034 del_timer(&mdev->request_timer);
4035
Philipp Reisnerb411b362009-09-25 16:07:19 -07004036 del_timer_sync(&mdev->resync_timer);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004037 resync_timer_fn((unsigned long)mdev);
4038
Philipp Reisnerb411b362009-09-25 16:07:19 -07004039 /* wait for all w_e_end_data_req, w_e_end_rsdata_req, w_send_barrier,
4040 * w_make_resync_request etc. which may still be on the worker queue
4041 * to be "canceled" */
Philipp Reisnera21e9292011-02-08 15:08:49 +01004042 drbd_flush_workqueue(mdev);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004043
4044 /* This also does reclaim_net_ee(). If we do this too early, we might
4045 * miss some resync ee and pages.*/
4046 drbd_process_done_ee(mdev);
4047
4048 kfree(mdev->p_uuid);
4049 mdev->p_uuid = NULL;
4050
Philipp Reisnerfb22c402010-09-08 23:20:21 +02004051 if (!is_susp(mdev->state))
Philipp Reisner2f5cdd02011-02-21 14:29:27 +01004052 tl_clear(mdev->tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004053
Philipp Reisnerb411b362009-09-25 16:07:19 -07004054 drbd_md_sync(mdev);
4055
4056 fp = FP_DONT_CARE;
4057 if (get_ldev(mdev)) {
4058 fp = mdev->ldev->dc.fencing;
4059 put_ldev(mdev);
4060 }
4061
Philipp Reisner87f7be42010-06-11 13:56:33 +02004062 if (mdev->state.role == R_PRIMARY && fp >= FP_RESOURCE && mdev->state.pdsk >= D_UNKNOWN)
4063 drbd_try_outdate_peer_async(mdev);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004064
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01004065 /* serialize with bitmap writeout triggered by the state change,
4066 * if any. */
4067 wait_event(mdev->misc_wait, !test_bit(BITMAP_IO, &mdev->flags));
4068
Philipp Reisnerb411b362009-09-25 16:07:19 -07004069 /* tcp_close and release of sendpage pages can be deferred. I don't
4070 * want to use SO_LINGER, because apparently it can be deferred for
4071 * more than 20 seconds (longest time I checked).
4072 *
4073 * Actually we don't care for exactly when the network stack does its
4074 * put_page(), but release our reference on these pages right here.
4075 */
4076 i = drbd_release_ee(mdev, &mdev->net_ee);
4077 if (i)
4078 dev_info(DEV, "net_ee not empty, killed %u entries\n", i);
Lars Ellenberg435f0742010-09-06 12:30:25 +02004079 i = atomic_read(&mdev->pp_in_use_by_net);
4080 if (i)
4081 dev_info(DEV, "pp_in_use_by_net = %d, expected 0\n", i);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004082 i = atomic_read(&mdev->pp_in_use);
4083 if (i)
Lars Ellenberg45bb9122010-05-14 17:10:48 +02004084 dev_info(DEV, "pp_in_use = %d, expected 0\n", i);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004085
4086 D_ASSERT(list_empty(&mdev->read_ee));
4087 D_ASSERT(list_empty(&mdev->active_ee));
4088 D_ASSERT(list_empty(&mdev->sync_ee));
4089 D_ASSERT(list_empty(&mdev->done_ee));
4090
4091 /* ok, no more ee's on the fly, it is safe to reset the epoch_size */
4092 atomic_set(&mdev->current_epoch->epoch_size, 0);
4093 D_ASSERT(list_empty(&mdev->current_epoch->list));
Philipp Reisner360cc742011-02-08 14:29:53 +01004094
4095 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004096}
4097
4098/*
4099 * We support PRO_VERSION_MIN to PRO_VERSION_MAX. The protocol version
4100 * we can agree on is stored in agreed_pro_version.
4101 *
4102 * feature flags and the reserved array should be enough room for future
4103 * enhancements of the handshake protocol, and possible plugins...
4104 *
4105 * for now, they are expected to be zero, but ignored.
4106 */
Philipp Reisner8a22ccc2011-02-07 16:47:12 +01004107static int drbd_send_handshake(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004108{
Philipp Reisnere6b3ea82011-01-19 14:02:01 +01004109 /* ASSERT current == mdev->tconn->receiver ... */
Philipp Reisner8a22ccc2011-02-07 16:47:12 +01004110 struct p_handshake *p = &tconn->data.sbuf.handshake;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004111 int ok;
4112
Philipp Reisner8a22ccc2011-02-07 16:47:12 +01004113 if (mutex_lock_interruptible(&tconn->data.mutex)) {
4114 conn_err(tconn, "interrupted during initial handshake\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07004115 return 0; /* interrupted. not ok. */
4116 }
4117
Philipp Reisner8a22ccc2011-02-07 16:47:12 +01004118 if (tconn->data.socket == NULL) {
4119 mutex_unlock(&tconn->data.mutex);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004120 return 0;
4121 }
4122
4123 memset(p, 0, sizeof(*p));
4124 p->protocol_min = cpu_to_be32(PRO_VERSION_MIN);
4125 p->protocol_max = cpu_to_be32(PRO_VERSION_MAX);
Philipp Reisner8a22ccc2011-02-07 16:47:12 +01004126 ok = _conn_send_cmd(tconn, 0, tconn->data.socket, P_HAND_SHAKE,
4127 &p->head, sizeof(*p), 0);
4128 mutex_unlock(&tconn->data.mutex);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004129 return ok;
4130}
4131
4132/*
4133 * return values:
4134 * 1 yes, we have a valid connection
4135 * 0 oops, did not work out, please try again
4136 * -1 peer talks different language,
4137 * no point in trying again, please go standalone.
4138 */
Philipp Reisner65d11ed2011-02-07 17:35:59 +01004139static int drbd_do_handshake(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004140{
Philipp Reisner65d11ed2011-02-07 17:35:59 +01004141 /* ASSERT current == tconn->receiver ... */
4142 struct p_handshake *p = &tconn->data.rbuf.handshake;
Philipp Reisner02918be2010-08-20 14:35:10 +02004143 const int expect = sizeof(struct p_handshake) - sizeof(struct p_header80);
Philipp Reisner77351055b2011-02-07 17:24:26 +01004144 struct packet_info pi;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004145 int rv;
4146
Philipp Reisner65d11ed2011-02-07 17:35:59 +01004147 rv = drbd_send_handshake(tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004148 if (!rv)
4149 return 0;
4150
Philipp Reisner65d11ed2011-02-07 17:35:59 +01004151 rv = drbd_recv_header(tconn, &pi);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004152 if (!rv)
4153 return 0;
4154
Philipp Reisner77351055b2011-02-07 17:24:26 +01004155 if (pi.cmd != P_HAND_SHAKE) {
Philipp Reisner65d11ed2011-02-07 17:35:59 +01004156 conn_err(tconn, "expected HandShake packet, received: %s (0x%04x)\n",
Philipp Reisner77351055b2011-02-07 17:24:26 +01004157 cmdname(pi.cmd), pi.cmd);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004158 return -1;
4159 }
4160
Philipp Reisner77351055b2011-02-07 17:24:26 +01004161 if (pi.size != expect) {
Philipp Reisner65d11ed2011-02-07 17:35:59 +01004162 conn_err(tconn, "expected HandShake length: %u, received: %u\n",
Philipp Reisner77351055b2011-02-07 17:24:26 +01004163 expect, pi.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004164 return -1;
4165 }
4166
Philipp Reisner65d11ed2011-02-07 17:35:59 +01004167 rv = drbd_recv(tconn, &p->head.payload, expect);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004168
4169 if (rv != expect) {
Lars Ellenberg0ddc5542011-01-21 12:35:15 +01004170 if (!signal_pending(current))
Philipp Reisner65d11ed2011-02-07 17:35:59 +01004171 conn_warn(tconn, "short read receiving handshake packet: l=%u\n", rv);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004172 return 0;
4173 }
4174
Philipp Reisnerb411b362009-09-25 16:07:19 -07004175 p->protocol_min = be32_to_cpu(p->protocol_min);
4176 p->protocol_max = be32_to_cpu(p->protocol_max);
4177 if (p->protocol_max == 0)
4178 p->protocol_max = p->protocol_min;
4179
4180 if (PRO_VERSION_MAX < p->protocol_min ||
4181 PRO_VERSION_MIN > p->protocol_max)
4182 goto incompat;
4183
Philipp Reisner65d11ed2011-02-07 17:35:59 +01004184 tconn->agreed_pro_version = min_t(int, PRO_VERSION_MAX, p->protocol_max);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004185
Philipp Reisner65d11ed2011-02-07 17:35:59 +01004186 conn_info(tconn, "Handshake successful: "
4187 "Agreed network protocol version %d\n", tconn->agreed_pro_version);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004188
4189 return 1;
4190
4191 incompat:
Philipp Reisner65d11ed2011-02-07 17:35:59 +01004192 conn_err(tconn, "incompatible DRBD dialects: "
Philipp Reisnerb411b362009-09-25 16:07:19 -07004193 "I support %d-%d, peer supports %d-%d\n",
4194 PRO_VERSION_MIN, PRO_VERSION_MAX,
4195 p->protocol_min, p->protocol_max);
4196 return -1;
4197}
4198
4199#if !defined(CONFIG_CRYPTO_HMAC) && !defined(CONFIG_CRYPTO_HMAC_MODULE)
Philipp Reisner13e60372011-02-08 09:54:40 +01004200static int drbd_do_auth(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004201{
4202 dev_err(DEV, "This kernel was build without CONFIG_CRYPTO_HMAC.\n");
4203 dev_err(DEV, "You need to disable 'cram-hmac-alg' in drbd.conf.\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01004204 return -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004205}
4206#else
4207#define CHALLENGE_LEN 64
Johannes Thomab10d96c2010-01-07 16:02:50 +01004208
4209/* Return value:
4210 1 - auth succeeded,
4211 0 - failed, try again (network error),
4212 -1 - auth failed, don't try again.
4213*/
4214
Philipp Reisner13e60372011-02-08 09:54:40 +01004215static int drbd_do_auth(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004216{
4217 char my_challenge[CHALLENGE_LEN]; /* 64 Bytes... */
4218 struct scatterlist sg;
4219 char *response = NULL;
4220 char *right_response = NULL;
4221 char *peers_ch = NULL;
Philipp Reisner13e60372011-02-08 09:54:40 +01004222 unsigned int key_len = strlen(tconn->net_conf->shared_secret);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004223 unsigned int resp_size;
4224 struct hash_desc desc;
Philipp Reisner77351055b2011-02-07 17:24:26 +01004225 struct packet_info pi;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004226 int rv;
4227
Philipp Reisner13e60372011-02-08 09:54:40 +01004228 desc.tfm = tconn->cram_hmac_tfm;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004229 desc.flags = 0;
4230
Philipp Reisner13e60372011-02-08 09:54:40 +01004231 rv = crypto_hash_setkey(tconn->cram_hmac_tfm,
4232 (u8 *)tconn->net_conf->shared_secret, key_len);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004233 if (rv) {
Philipp Reisner13e60372011-02-08 09:54:40 +01004234 conn_err(tconn, "crypto_hash_setkey() failed with %d\n", rv);
Johannes Thomab10d96c2010-01-07 16:02:50 +01004235 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004236 goto fail;
4237 }
4238
4239 get_random_bytes(my_challenge, CHALLENGE_LEN);
4240
Philipp Reisner13e60372011-02-08 09:54:40 +01004241 rv = conn_send_cmd2(tconn, P_AUTH_CHALLENGE, my_challenge, CHALLENGE_LEN);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004242 if (!rv)
4243 goto fail;
4244
Philipp Reisner13e60372011-02-08 09:54:40 +01004245 rv = drbd_recv_header(tconn, &pi);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004246 if (!rv)
4247 goto fail;
4248
Philipp Reisner77351055b2011-02-07 17:24:26 +01004249 if (pi.cmd != P_AUTH_CHALLENGE) {
Philipp Reisner13e60372011-02-08 09:54:40 +01004250 conn_err(tconn, "expected AuthChallenge packet, received: %s (0x%04x)\n",
Philipp Reisner77351055b2011-02-07 17:24:26 +01004251 cmdname(pi.cmd), pi.cmd);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004252 rv = 0;
4253 goto fail;
4254 }
4255
Philipp Reisner77351055b2011-02-07 17:24:26 +01004256 if (pi.size > CHALLENGE_LEN * 2) {
Philipp Reisner13e60372011-02-08 09:54:40 +01004257 conn_err(tconn, "expected AuthChallenge payload too big.\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01004258 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004259 goto fail;
4260 }
4261
Philipp Reisner77351055b2011-02-07 17:24:26 +01004262 peers_ch = kmalloc(pi.size, GFP_NOIO);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004263 if (peers_ch == NULL) {
Philipp Reisner13e60372011-02-08 09:54:40 +01004264 conn_err(tconn, "kmalloc of peers_ch failed\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01004265 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004266 goto fail;
4267 }
4268
Philipp Reisner13e60372011-02-08 09:54:40 +01004269 rv = drbd_recv(tconn, peers_ch, pi.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004270
Philipp Reisner77351055b2011-02-07 17:24:26 +01004271 if (rv != pi.size) {
Lars Ellenberg0ddc5542011-01-21 12:35:15 +01004272 if (!signal_pending(current))
Philipp Reisner13e60372011-02-08 09:54:40 +01004273 conn_warn(tconn, "short read AuthChallenge: l=%u\n", rv);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004274 rv = 0;
4275 goto fail;
4276 }
4277
Philipp Reisner13e60372011-02-08 09:54:40 +01004278 resp_size = crypto_hash_digestsize(tconn->cram_hmac_tfm);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004279 response = kmalloc(resp_size, GFP_NOIO);
4280 if (response == NULL) {
Philipp Reisner13e60372011-02-08 09:54:40 +01004281 conn_err(tconn, "kmalloc of response failed\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01004282 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004283 goto fail;
4284 }
4285
4286 sg_init_table(&sg, 1);
Philipp Reisner77351055b2011-02-07 17:24:26 +01004287 sg_set_buf(&sg, peers_ch, pi.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004288
4289 rv = crypto_hash_digest(&desc, &sg, sg.length, response);
4290 if (rv) {
Philipp Reisner13e60372011-02-08 09:54:40 +01004291 conn_err(tconn, "crypto_hash_digest() failed with %d\n", rv);
Johannes Thomab10d96c2010-01-07 16:02:50 +01004292 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004293 goto fail;
4294 }
4295
Philipp Reisner13e60372011-02-08 09:54:40 +01004296 rv = conn_send_cmd2(tconn, P_AUTH_RESPONSE, response, resp_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004297 if (!rv)
4298 goto fail;
4299
Philipp Reisner13e60372011-02-08 09:54:40 +01004300 rv = drbd_recv_header(tconn, &pi);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004301 if (!rv)
4302 goto fail;
4303
Philipp Reisner77351055b2011-02-07 17:24:26 +01004304 if (pi.cmd != P_AUTH_RESPONSE) {
Philipp Reisner13e60372011-02-08 09:54:40 +01004305 conn_err(tconn, "expected AuthResponse packet, received: %s (0x%04x)\n",
Philipp Reisner77351055b2011-02-07 17:24:26 +01004306 cmdname(pi.cmd), pi.cmd);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004307 rv = 0;
4308 goto fail;
4309 }
4310
Philipp Reisner77351055b2011-02-07 17:24:26 +01004311 if (pi.size != resp_size) {
Philipp Reisner13e60372011-02-08 09:54:40 +01004312 conn_err(tconn, "expected AuthResponse payload of wrong size\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07004313 rv = 0;
4314 goto fail;
4315 }
4316
Philipp Reisner13e60372011-02-08 09:54:40 +01004317 rv = drbd_recv(tconn, response , resp_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004318
4319 if (rv != resp_size) {
Lars Ellenberg0ddc5542011-01-21 12:35:15 +01004320 if (!signal_pending(current))
Philipp Reisner13e60372011-02-08 09:54:40 +01004321 conn_warn(tconn, "short read receiving AuthResponse: l=%u\n", rv);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004322 rv = 0;
4323 goto fail;
4324 }
4325
4326 right_response = kmalloc(resp_size, GFP_NOIO);
Julia Lawall2d1ee872009-12-27 22:27:11 +01004327 if (right_response == NULL) {
Philipp Reisner13e60372011-02-08 09:54:40 +01004328 conn_err(tconn, "kmalloc of right_response failed\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01004329 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004330 goto fail;
4331 }
4332
4333 sg_set_buf(&sg, my_challenge, CHALLENGE_LEN);
4334
4335 rv = crypto_hash_digest(&desc, &sg, sg.length, right_response);
4336 if (rv) {
Philipp Reisner13e60372011-02-08 09:54:40 +01004337 conn_err(tconn, "crypto_hash_digest() failed with %d\n", rv);
Johannes Thomab10d96c2010-01-07 16:02:50 +01004338 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004339 goto fail;
4340 }
4341
4342 rv = !memcmp(response, right_response, resp_size);
4343
4344 if (rv)
Philipp Reisner13e60372011-02-08 09:54:40 +01004345 conn_info(tconn, "Peer authenticated using %d bytes of '%s' HMAC\n",
4346 resp_size, tconn->net_conf->cram_hmac_alg);
Johannes Thomab10d96c2010-01-07 16:02:50 +01004347 else
4348 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004349
4350 fail:
4351 kfree(peers_ch);
4352 kfree(response);
4353 kfree(right_response);
4354
4355 return rv;
4356}
4357#endif
4358
4359int drbdd_init(struct drbd_thread *thi)
4360{
Philipp Reisner392c8802011-02-09 10:33:31 +01004361 struct drbd_tconn *tconn = thi->tconn;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004362 int h;
4363
Philipp Reisner4d641dd2011-02-08 15:40:24 +01004364 conn_info(tconn, "receiver (re)started\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07004365
4366 do {
Philipp Reisner4d641dd2011-02-08 15:40:24 +01004367 h = drbd_connect(tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004368 if (h == 0) {
Philipp Reisner4d641dd2011-02-08 15:40:24 +01004369 drbd_disconnect(tconn);
Philipp Reisner20ee6392011-01-18 15:28:59 +01004370 schedule_timeout_interruptible(HZ);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004371 }
4372 if (h == -1) {
Philipp Reisner4d641dd2011-02-08 15:40:24 +01004373 conn_warn(tconn, "Discarding network configuration.\n");
Philipp Reisnerbbeb6412011-02-10 13:45:46 +01004374 conn_request_state(tconn, NS(conn, C_DISCONNECTING), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004375 }
4376 } while (h == 0);
4377
4378 if (h > 0) {
Philipp Reisner4d641dd2011-02-08 15:40:24 +01004379 if (get_net_conf(tconn)) {
4380 drbdd(tconn);
4381 put_net_conf(tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004382 }
4383 }
4384
Philipp Reisner4d641dd2011-02-08 15:40:24 +01004385 drbd_disconnect(tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004386
Philipp Reisner4d641dd2011-02-08 15:40:24 +01004387 conn_info(tconn, "receiver terminated\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07004388 return 0;
4389}
4390
4391/* ********* acknowledge sender ******** */
4392
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01004393static int got_RqSReply(struct drbd_conf *mdev, enum drbd_packet cmd)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004394{
Philipp Reisner257d0af2011-01-26 12:15:29 +01004395 struct p_req_state_reply *p = &mdev->tconn->meta.rbuf.req_state_reply;
Philipp Reisnerfc3b10a2011-02-15 11:07:59 +01004396 struct drbd_tconn *tconn = mdev->tconn;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004397
4398 int retcode = be32_to_cpu(p->retcode);
4399
Philipp Reisnerfc3b10a2011-02-15 11:07:59 +01004400 if (cmd == P_STATE_CHG_REPLY) {
4401 if (retcode >= SS_SUCCESS) {
4402 set_bit(CL_ST_CHG_SUCCESS, &mdev->flags);
4403 } else {
4404 set_bit(CL_ST_CHG_FAIL, &mdev->flags);
4405 dev_err(DEV, "Requested state change failed by peer: %s (%d)\n",
4406 drbd_set_st_err_str(retcode), retcode);
4407 }
4408 wake_up(&mdev->state_wait);
4409 } else /* conn == P_CONN_ST_CHG_REPLY */ {
4410 if (retcode >= SS_SUCCESS) {
4411 set_bit(CONN_WD_ST_CHG_OKAY, &tconn->flags);
4412 } else {
4413 set_bit(CONN_WD_ST_CHG_FAIL, &tconn->flags);
4414 conn_err(tconn, "Requested state change failed by peer: %s (%d)\n",
4415 drbd_set_st_err_str(retcode), retcode);
4416 }
4417 wake_up(&tconn->ping_wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004418 }
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004419 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004420}
4421
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01004422static int got_Ping(struct drbd_conf *mdev, enum drbd_packet cmd)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004423{
Philipp Reisner2a67d8b2011-02-09 14:10:32 +01004424 return drbd_send_ping_ack(mdev->tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004425
4426}
4427
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01004428static int got_PingAck(struct drbd_conf *mdev, enum drbd_packet cmd)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004429{
Philipp Reisner2a67d8b2011-02-09 14:10:32 +01004430 struct drbd_tconn *tconn = mdev->tconn;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004431 /* restore idle timeout */
Philipp Reisner2a67d8b2011-02-09 14:10:32 +01004432 tconn->meta.socket->sk->sk_rcvtimeo = tconn->net_conf->ping_int*HZ;
4433 if (!test_and_set_bit(GOT_PING_ACK, &tconn->flags))
4434 wake_up(&tconn->ping_wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004435
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004436 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004437}
4438
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01004439static int got_IsInSync(struct drbd_conf *mdev, enum drbd_packet cmd)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004440{
Philipp Reisner257d0af2011-01-26 12:15:29 +01004441 struct p_block_ack *p = &mdev->tconn->meta.rbuf.block_ack;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004442 sector_t sector = be64_to_cpu(p->sector);
4443 int blksize = be32_to_cpu(p->blksize);
4444
Philipp Reisner31890f42011-01-19 14:12:51 +01004445 D_ASSERT(mdev->tconn->agreed_pro_version >= 89);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004446
4447 update_peer_seq(mdev, be32_to_cpu(p->seq_num));
4448
Lars Ellenberg1d53f092010-09-05 01:13:24 +02004449 if (get_ldev(mdev)) {
4450 drbd_rs_complete_io(mdev, sector);
4451 drbd_set_in_sync(mdev, sector, blksize);
4452 /* rs_same_csums is supposed to count in units of BM_BLOCK_SIZE */
4453 mdev->rs_same_csum += (blksize >> BM_BLOCK_SHIFT);
4454 put_ldev(mdev);
4455 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07004456 dec_rs_pending(mdev);
Philipp Reisner778f2712010-07-06 11:14:00 +02004457 atomic_add(blksize >> 9, &mdev->rs_sect_in);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004458
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004459 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004460}
4461
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01004462static int
4463validate_req_change_req_state(struct drbd_conf *mdev, u64 id, sector_t sector,
4464 struct rb_root *root, const char *func,
4465 enum drbd_req_event what, bool missing_ok)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004466{
4467 struct drbd_request *req;
4468 struct bio_and_error m;
4469
Philipp Reisner87eeee42011-01-19 14:16:30 +01004470 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01004471 req = find_request(mdev, root, id, sector, missing_ok, func);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004472 if (unlikely(!req)) {
Philipp Reisner87eeee42011-01-19 14:16:30 +01004473 spin_unlock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004474 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004475 }
4476 __req_mod(req, what, &m);
Philipp Reisner87eeee42011-01-19 14:16:30 +01004477 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004478
4479 if (m.bio)
4480 complete_master_bio(mdev, &m);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004481 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004482}
4483
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01004484static int got_BlockAck(struct drbd_conf *mdev, enum drbd_packet cmd)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004485{
Philipp Reisner257d0af2011-01-26 12:15:29 +01004486 struct p_block_ack *p = &mdev->tconn->meta.rbuf.block_ack;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004487 sector_t sector = be64_to_cpu(p->sector);
4488 int blksize = be32_to_cpu(p->blksize);
4489 enum drbd_req_event what;
4490
4491 update_peer_seq(mdev, be32_to_cpu(p->seq_num));
4492
Andreas Gruenbacher579b57e2011-01-13 18:40:57 +01004493 if (p->block_id == ID_SYNCER) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004494 drbd_set_in_sync(mdev, sector, blksize);
4495 dec_rs_pending(mdev);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004496 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004497 }
Philipp Reisner257d0af2011-01-26 12:15:29 +01004498 switch (cmd) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004499 case P_RS_WRITE_ACK:
Philipp Reisner89e58e72011-01-19 13:12:45 +01004500 D_ASSERT(mdev->tconn->net_conf->wire_protocol == DRBD_PROT_C);
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01004501 what = WRITE_ACKED_BY_PEER_AND_SIS;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004502 break;
4503 case P_WRITE_ACK:
Philipp Reisner89e58e72011-01-19 13:12:45 +01004504 D_ASSERT(mdev->tconn->net_conf->wire_protocol == DRBD_PROT_C);
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01004505 what = WRITE_ACKED_BY_PEER;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004506 break;
4507 case P_RECV_ACK:
Philipp Reisner89e58e72011-01-19 13:12:45 +01004508 D_ASSERT(mdev->tconn->net_conf->wire_protocol == DRBD_PROT_B);
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01004509 what = RECV_ACKED_BY_PEER;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004510 break;
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01004511 case P_DISCARD_WRITE:
Philipp Reisner89e58e72011-01-19 13:12:45 +01004512 D_ASSERT(mdev->tconn->net_conf->wire_protocol == DRBD_PROT_C);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01004513 what = DISCARD_WRITE;
4514 break;
4515 case P_RETRY_WRITE:
4516 D_ASSERT(mdev->tconn->net_conf->wire_protocol == DRBD_PROT_C);
4517 what = POSTPONE_WRITE;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004518 break;
4519 default:
4520 D_ASSERT(0);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004521 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004522 }
4523
4524 return validate_req_change_req_state(mdev, p->block_id, sector,
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01004525 &mdev->write_requests, __func__,
4526 what, false);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004527}
4528
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01004529static int got_NegAck(struct drbd_conf *mdev, enum drbd_packet cmd)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004530{
Philipp Reisner257d0af2011-01-26 12:15:29 +01004531 struct p_block_ack *p = &mdev->tconn->meta.rbuf.block_ack;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004532 sector_t sector = be64_to_cpu(p->sector);
Philipp Reisner2deb8332011-01-17 18:39:18 +01004533 int size = be32_to_cpu(p->blksize);
Philipp Reisner89e58e72011-01-19 13:12:45 +01004534 bool missing_ok = mdev->tconn->net_conf->wire_protocol == DRBD_PROT_A ||
4535 mdev->tconn->net_conf->wire_protocol == DRBD_PROT_B;
Andreas Gruenbacherc3afd8f2011-01-20 22:25:40 +01004536 bool found;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004537
4538 update_peer_seq(mdev, be32_to_cpu(p->seq_num));
4539
Andreas Gruenbacher579b57e2011-01-13 18:40:57 +01004540 if (p->block_id == ID_SYNCER) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004541 dec_rs_pending(mdev);
4542 drbd_rs_failed_io(mdev, sector, size);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004543 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004544 }
Philipp Reisner2deb8332011-01-17 18:39:18 +01004545
Andreas Gruenbacherc3afd8f2011-01-20 22:25:40 +01004546 found = validate_req_change_req_state(mdev, p->block_id, sector,
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01004547 &mdev->write_requests, __func__,
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01004548 NEG_ACKED, missing_ok);
Andreas Gruenbacherc3afd8f2011-01-20 22:25:40 +01004549 if (!found) {
4550 /* Protocol A has no P_WRITE_ACKs, but has P_NEG_ACKs.
4551 The master bio might already be completed, therefore the
4552 request is no longer in the collision hash. */
4553 /* In Protocol B we might already have got a P_RECV_ACK
4554 but then get a P_NEG_ACK afterwards. */
4555 if (!missing_ok)
Philipp Reisner2deb8332011-01-17 18:39:18 +01004556 return false;
Andreas Gruenbacherc3afd8f2011-01-20 22:25:40 +01004557 drbd_set_out_of_sync(mdev, sector, size);
Philipp Reisner2deb8332011-01-17 18:39:18 +01004558 }
Philipp Reisner2deb8332011-01-17 18:39:18 +01004559 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004560}
4561
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01004562static int got_NegDReply(struct drbd_conf *mdev, enum drbd_packet cmd)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004563{
Philipp Reisner257d0af2011-01-26 12:15:29 +01004564 struct p_block_ack *p = &mdev->tconn->meta.rbuf.block_ack;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004565 sector_t sector = be64_to_cpu(p->sector);
4566
4567 update_peer_seq(mdev, be32_to_cpu(p->seq_num));
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01004568
Philipp Reisnerb411b362009-09-25 16:07:19 -07004569 dev_err(DEV, "Got NegDReply; Sector %llus, len %u; Fail original request.\n",
4570 (unsigned long long)sector, be32_to_cpu(p->blksize));
4571
4572 return validate_req_change_req_state(mdev, p->block_id, sector,
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01004573 &mdev->read_requests, __func__,
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01004574 NEG_ACKED, false);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004575}
4576
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01004577static int got_NegRSDReply(struct drbd_conf *mdev, enum drbd_packet cmd)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004578{
4579 sector_t sector;
4580 int size;
Philipp Reisner257d0af2011-01-26 12:15:29 +01004581 struct p_block_ack *p = &mdev->tconn->meta.rbuf.block_ack;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004582
4583 sector = be64_to_cpu(p->sector);
4584 size = be32_to_cpu(p->blksize);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004585
4586 update_peer_seq(mdev, be32_to_cpu(p->seq_num));
4587
4588 dec_rs_pending(mdev);
4589
4590 if (get_ldev_if_state(mdev, D_FAILED)) {
4591 drbd_rs_complete_io(mdev, sector);
Philipp Reisner257d0af2011-01-26 12:15:29 +01004592 switch (cmd) {
Philipp Reisnerd612d302010-12-27 10:53:28 +01004593 case P_NEG_RS_DREPLY:
4594 drbd_rs_failed_io(mdev, sector, size);
4595 case P_RS_CANCEL:
4596 break;
4597 default:
4598 D_ASSERT(0);
4599 put_ldev(mdev);
4600 return false;
4601 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07004602 put_ldev(mdev);
4603 }
4604
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004605 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004606}
4607
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01004608static int got_BarrierAck(struct drbd_conf *mdev, enum drbd_packet cmd)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004609{
Philipp Reisner257d0af2011-01-26 12:15:29 +01004610 struct p_barrier_ack *p = &mdev->tconn->meta.rbuf.barrier_ack;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004611
Philipp Reisner2f5cdd02011-02-21 14:29:27 +01004612 tl_release(mdev->tconn, p->barrier, be32_to_cpu(p->set_size));
Philipp Reisnerb411b362009-09-25 16:07:19 -07004613
Philipp Reisnerc4752ef2010-10-27 17:32:36 +02004614 if (mdev->state.conn == C_AHEAD &&
4615 atomic_read(&mdev->ap_in_flight) == 0 &&
Philipp Reisner370a43e2011-01-14 16:03:11 +01004616 !test_and_set_bit(AHEAD_TO_SYNC_SOURCE, &mdev->current_epoch->flags)) {
4617 mdev->start_resync_timer.expires = jiffies + HZ;
4618 add_timer(&mdev->start_resync_timer);
Philipp Reisnerc4752ef2010-10-27 17:32:36 +02004619 }
4620
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004621 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004622}
4623
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01004624static int got_OVResult(struct drbd_conf *mdev, enum drbd_packet cmd)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004625{
Philipp Reisner257d0af2011-01-26 12:15:29 +01004626 struct p_block_ack *p = &mdev->tconn->meta.rbuf.block_ack;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004627 struct drbd_work *w;
4628 sector_t sector;
4629 int size;
4630
4631 sector = be64_to_cpu(p->sector);
4632 size = be32_to_cpu(p->blksize);
4633
4634 update_peer_seq(mdev, be32_to_cpu(p->seq_num));
4635
4636 if (be64_to_cpu(p->block_id) == ID_OUT_OF_SYNC)
4637 drbd_ov_oos_found(mdev, sector, size);
4638 else
4639 ov_oos_print(mdev);
4640
Lars Ellenberg1d53f092010-09-05 01:13:24 +02004641 if (!get_ldev(mdev))
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004642 return true;
Lars Ellenberg1d53f092010-09-05 01:13:24 +02004643
Philipp Reisnerb411b362009-09-25 16:07:19 -07004644 drbd_rs_complete_io(mdev, sector);
4645 dec_rs_pending(mdev);
4646
Lars Ellenbergea5442a2010-11-05 09:48:01 +01004647 --mdev->ov_left;
4648
4649 /* let's advance progress step marks only for every other megabyte */
4650 if ((mdev->ov_left & 0x200) == 0x200)
4651 drbd_advance_rs_marks(mdev, mdev->ov_left);
4652
4653 if (mdev->ov_left == 0) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004654 w = kmalloc(sizeof(*w), GFP_NOIO);
4655 if (w) {
4656 w->cb = w_ov_finished;
Philipp Reisnera21e9292011-02-08 15:08:49 +01004657 w->mdev = mdev;
Philipp Reisnere42325a2011-01-19 13:55:45 +01004658 drbd_queue_work_front(&mdev->tconn->data.work, w);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004659 } else {
4660 dev_err(DEV, "kmalloc(w) failed.");
4661 ov_oos_print(mdev);
4662 drbd_resync_finished(mdev);
4663 }
4664 }
Lars Ellenberg1d53f092010-09-05 01:13:24 +02004665 put_ldev(mdev);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004666 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004667}
4668
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01004669static int got_skip(struct drbd_conf *mdev, enum drbd_packet cmd)
Philipp Reisner0ced55a2010-04-30 15:26:20 +02004670{
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004671 return true;
Philipp Reisner0ced55a2010-04-30 15:26:20 +02004672}
4673
Philipp Reisner32862ec2011-02-08 16:41:01 +01004674static int tconn_process_done_ee(struct drbd_tconn *tconn)
4675{
Philipp Reisner082a3432011-03-15 16:05:42 +01004676 struct drbd_conf *mdev;
4677 int i, not_empty = 0;
Philipp Reisner32862ec2011-02-08 16:41:01 +01004678
4679 do {
4680 clear_bit(SIGNAL_ASENDER, &tconn->flags);
4681 flush_signals(current);
Philipp Reisner082a3432011-03-15 16:05:42 +01004682 idr_for_each_entry(&tconn->volumes, mdev, i) {
4683 if (!drbd_process_done_ee(mdev))
4684 return 1; /* error */
4685 }
Philipp Reisner32862ec2011-02-08 16:41:01 +01004686 set_bit(SIGNAL_ASENDER, &tconn->flags);
Philipp Reisner082a3432011-03-15 16:05:42 +01004687
4688 spin_lock_irq(&tconn->req_lock);
4689 idr_for_each_entry(&tconn->volumes, mdev, i) {
4690 not_empty = !list_empty(&mdev->done_ee);
4691 if (not_empty)
4692 break;
4693 }
4694 spin_unlock_irq(&tconn->req_lock);
Philipp Reisner32862ec2011-02-08 16:41:01 +01004695 } while (not_empty);
4696
4697 return 0;
4698}
4699
Andreas Gruenbacher7201b972011-03-14 18:23:00 +01004700struct asender_cmd {
4701 size_t pkt_size;
4702 int (*process)(struct drbd_conf *, enum drbd_packet);
4703};
4704
4705static struct asender_cmd asender_tbl[] = {
4706 [P_PING] = { sizeof(struct p_header), got_Ping },
4707 [P_PING_ACK] = { sizeof(struct p_header), got_PingAck },
4708 [P_RECV_ACK] = { sizeof(struct p_block_ack), got_BlockAck },
4709 [P_WRITE_ACK] = { sizeof(struct p_block_ack), got_BlockAck },
4710 [P_RS_WRITE_ACK] = { sizeof(struct p_block_ack), got_BlockAck },
4711 [P_DISCARD_WRITE] = { sizeof(struct p_block_ack), got_BlockAck },
4712 [P_NEG_ACK] = { sizeof(struct p_block_ack), got_NegAck },
4713 [P_NEG_DREPLY] = { sizeof(struct p_block_ack), got_NegDReply },
4714 [P_NEG_RS_DREPLY] = { sizeof(struct p_block_ack), got_NegRSDReply},
4715 [P_OV_RESULT] = { sizeof(struct p_block_ack), got_OVResult },
4716 [P_BARRIER_ACK] = { sizeof(struct p_barrier_ack), got_BarrierAck },
4717 [P_STATE_CHG_REPLY] = { sizeof(struct p_req_state_reply), got_RqSReply },
4718 [P_RS_IS_IN_SYNC] = { sizeof(struct p_block_ack), got_IsInSync },
4719 [P_DELAY_PROBE] = { sizeof(struct p_delay_probe93), got_skip },
4720 [P_RS_CANCEL] = { sizeof(struct p_block_ack), got_NegRSDReply},
4721 [P_CONN_ST_CHG_REPLY]={ sizeof(struct p_req_state_reply), got_RqSReply },
4722 [P_RETRY_WRITE] = { sizeof(struct p_block_ack), got_BlockAck },
4723};
4724
Philipp Reisnerb411b362009-09-25 16:07:19 -07004725int drbd_asender(struct drbd_thread *thi)
4726{
Philipp Reisner392c8802011-02-09 10:33:31 +01004727 struct drbd_tconn *tconn = thi->tconn;
Philipp Reisner32862ec2011-02-08 16:41:01 +01004728 struct p_header *h = &tconn->meta.rbuf.header;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004729 struct asender_cmd *cmd = NULL;
Philipp Reisner77351055b2011-02-07 17:24:26 +01004730 struct packet_info pi;
Philipp Reisner257d0af2011-01-26 12:15:29 +01004731 int rv;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004732 void *buf = h;
4733 int received = 0;
Philipp Reisner257d0af2011-01-26 12:15:29 +01004734 int expect = sizeof(struct p_header);
Lars Ellenbergf36af182011-03-09 22:44:55 +01004735 int ping_timeout_active = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004736
Philipp Reisnerb411b362009-09-25 16:07:19 -07004737 current->policy = SCHED_RR; /* Make this a realtime task! */
4738 current->rt_priority = 2; /* more important than all other tasks */
4739
Andreas Gruenbachere77a0a52011-01-25 15:43:39 +01004740 while (get_t_state(thi) == RUNNING) {
Philipp Reisner80822282011-02-08 12:46:30 +01004741 drbd_thread_current_set_cpu(thi);
Philipp Reisner32862ec2011-02-08 16:41:01 +01004742 if (test_and_clear_bit(SEND_PING, &tconn->flags)) {
Philipp Reisner2a67d8b2011-02-09 14:10:32 +01004743 if (!drbd_send_ping(tconn)) {
Philipp Reisner32862ec2011-02-08 16:41:01 +01004744 conn_err(tconn, "drbd_send_ping has failed\n");
Andreas Gruenbacher841ce242010-12-15 19:31:20 +01004745 goto reconnect;
4746 }
Philipp Reisner32862ec2011-02-08 16:41:01 +01004747 tconn->meta.socket->sk->sk_rcvtimeo =
4748 tconn->net_conf->ping_timeo*HZ/10;
Lars Ellenbergf36af182011-03-09 22:44:55 +01004749 ping_timeout_active = 1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004750 }
4751
Philipp Reisner32862ec2011-02-08 16:41:01 +01004752 /* TODO: conditionally cork; it may hurt latency if we cork without
4753 much to send */
4754 if (!tconn->net_conf->no_cork)
4755 drbd_tcp_cork(tconn->meta.socket);
Philipp Reisner082a3432011-03-15 16:05:42 +01004756 if (tconn_process_done_ee(tconn)) {
4757 conn_err(tconn, "tconn_process_done_ee() failed\n");
Philipp Reisner32862ec2011-02-08 16:41:01 +01004758 goto reconnect;
Philipp Reisner082a3432011-03-15 16:05:42 +01004759 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07004760 /* but unconditionally uncork unless disabled */
Philipp Reisner32862ec2011-02-08 16:41:01 +01004761 if (!tconn->net_conf->no_cork)
4762 drbd_tcp_uncork(tconn->meta.socket);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004763
4764 /* short circuit, recv_msg would return EINTR anyways. */
4765 if (signal_pending(current))
4766 continue;
4767
Philipp Reisner32862ec2011-02-08 16:41:01 +01004768 rv = drbd_recv_short(tconn->meta.socket, buf, expect-received, 0);
4769 clear_bit(SIGNAL_ASENDER, &tconn->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004770
4771 flush_signals(current);
4772
4773 /* Note:
4774 * -EINTR (on meta) we got a signal
4775 * -EAGAIN (on meta) rcvtimeo expired
4776 * -ECONNRESET other side closed the connection
4777 * -ERESTARTSYS (on data) we got a signal
4778 * rv < 0 other than above: unexpected error!
4779 * rv == expected: full header or command
4780 * rv < expected: "woken" by signal during receive
4781 * rv == 0 : "connection shut down by peer"
4782 */
4783 if (likely(rv > 0)) {
4784 received += rv;
4785 buf += rv;
4786 } else if (rv == 0) {
Philipp Reisner32862ec2011-02-08 16:41:01 +01004787 conn_err(tconn, "meta connection shut down by peer.\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07004788 goto reconnect;
4789 } else if (rv == -EAGAIN) {
Lars Ellenbergcb6518c2011-06-20 14:44:45 +02004790 /* If the data socket received something meanwhile,
4791 * that is good enough: peer is still alive. */
Philipp Reisner32862ec2011-02-08 16:41:01 +01004792 if (time_after(tconn->last_received,
4793 jiffies - tconn->meta.socket->sk->sk_rcvtimeo))
Lars Ellenbergcb6518c2011-06-20 14:44:45 +02004794 continue;
Lars Ellenbergf36af182011-03-09 22:44:55 +01004795 if (ping_timeout_active) {
Philipp Reisner32862ec2011-02-08 16:41:01 +01004796 conn_err(tconn, "PingAck did not arrive in time.\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07004797 goto reconnect;
4798 }
Philipp Reisner32862ec2011-02-08 16:41:01 +01004799 set_bit(SEND_PING, &tconn->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004800 continue;
4801 } else if (rv == -EINTR) {
4802 continue;
4803 } else {
Philipp Reisner32862ec2011-02-08 16:41:01 +01004804 conn_err(tconn, "sock_recvmsg returned %d\n", rv);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004805 goto reconnect;
4806 }
4807
4808 if (received == expect && cmd == NULL) {
Philipp Reisner32862ec2011-02-08 16:41:01 +01004809 if (!decode_header(tconn, h, &pi))
Philipp Reisnerb411b362009-09-25 16:07:19 -07004810 goto reconnect;
Andreas Gruenbacher7201b972011-03-14 18:23:00 +01004811 cmd = &asender_tbl[pi.cmd];
4812 if (pi.cmd >= ARRAY_SIZE(asender_tbl) || !cmd) {
Philipp Reisner32862ec2011-02-08 16:41:01 +01004813 conn_err(tconn, "unknown command %d on meta (l: %d)\n",
Philipp Reisner77351055b2011-02-07 17:24:26 +01004814 pi.cmd, pi.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004815 goto disconnect;
4816 }
4817 expect = cmd->pkt_size;
Philipp Reisner77351055b2011-02-07 17:24:26 +01004818 if (pi.size != expect - sizeof(struct p_header)) {
Philipp Reisner32862ec2011-02-08 16:41:01 +01004819 conn_err(tconn, "Wrong packet size on meta (c: %d, l: %d)\n",
Philipp Reisner77351055b2011-02-07 17:24:26 +01004820 pi.cmd, pi.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004821 goto reconnect;
Philipp Reisner257d0af2011-01-26 12:15:29 +01004822 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07004823 }
4824 if (received == expect) {
Philipp Reisner32862ec2011-02-08 16:41:01 +01004825 tconn->last_received = jiffies;
4826 if (!cmd->process(vnr_to_mdev(tconn, pi.vnr), pi.cmd))
Philipp Reisnerb411b362009-09-25 16:07:19 -07004827 goto reconnect;
4828
Lars Ellenbergf36af182011-03-09 22:44:55 +01004829 /* the idle_timeout (ping-int)
4830 * has been restored in got_PingAck() */
Andreas Gruenbacher7201b972011-03-14 18:23:00 +01004831 if (cmd == &asender_tbl[P_PING_ACK])
Lars Ellenbergf36af182011-03-09 22:44:55 +01004832 ping_timeout_active = 0;
4833
Philipp Reisnerb411b362009-09-25 16:07:19 -07004834 buf = h;
4835 received = 0;
Philipp Reisner257d0af2011-01-26 12:15:29 +01004836 expect = sizeof(struct p_header);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004837 cmd = NULL;
4838 }
4839 }
4840
4841 if (0) {
4842reconnect:
Philipp Reisnerbbeb6412011-02-10 13:45:46 +01004843 conn_request_state(tconn, NS(conn, C_NETWORK_FAILURE), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004844 }
4845 if (0) {
4846disconnect:
Philipp Reisnerbbeb6412011-02-10 13:45:46 +01004847 conn_request_state(tconn, NS(conn, C_DISCONNECTING), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004848 }
Philipp Reisner32862ec2011-02-08 16:41:01 +01004849 clear_bit(SIGNAL_ASENDER, &tconn->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004850
Philipp Reisner32862ec2011-02-08 16:41:01 +01004851 conn_info(tconn, "asender terminated\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07004852
4853 return 0;
4854}