blob: c456a141eeec5c19c53a3d21dfa4e8047d10aff0 [file] [log] [blame]
Philipp Reisnerb411b362009-09-25 16:07:19 -07001/*
2 drbd_receiver.c
3
4 This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
5
6 Copyright (C) 2001-2008, LINBIT Information Technologies GmbH.
7 Copyright (C) 1999-2008, Philipp Reisner <philipp.reisner@linbit.com>.
8 Copyright (C) 2002-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
9
10 drbd is free software; you can redistribute it and/or modify
11 it under the terms of the GNU General Public License as published by
12 the Free Software Foundation; either version 2, or (at your option)
13 any later version.
14
15 drbd is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License for more details.
19
20 You should have received a copy of the GNU General Public License
21 along with drbd; see the file COPYING. If not, write to
22 the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
23 */
24
25
Philipp Reisnerb411b362009-09-25 16:07:19 -070026#include <linux/module.h>
27
28#include <asm/uaccess.h>
29#include <net/sock.h>
30
Philipp Reisnerb411b362009-09-25 16:07:19 -070031#include <linux/drbd.h>
32#include <linux/fs.h>
33#include <linux/file.h>
34#include <linux/in.h>
35#include <linux/mm.h>
36#include <linux/memcontrol.h>
37#include <linux/mm_inline.h>
38#include <linux/slab.h>
Philipp Reisnerb411b362009-09-25 16:07:19 -070039#include <linux/pkt_sched.h>
40#define __KERNEL_SYSCALLS__
41#include <linux/unistd.h>
42#include <linux/vmalloc.h>
43#include <linux/random.h>
Philipp Reisnerb411b362009-09-25 16:07:19 -070044#include <linux/string.h>
45#include <linux/scatterlist.h>
46#include "drbd_int.h"
Philipp Reisnerb411b362009-09-25 16:07:19 -070047#include "drbd_req.h"
48
49#include "drbd_vli.h"
50
Philipp Reisner77351055b2011-02-07 17:24:26 +010051struct packet_info {
52 enum drbd_packet cmd;
Andreas Gruenbachere2857212011-03-25 00:57:38 +010053 unsigned int size;
54 unsigned int vnr;
Philipp Reisner77351055b2011-02-07 17:24:26 +010055};
56
Philipp Reisnerb411b362009-09-25 16:07:19 -070057enum finish_epoch {
58 FE_STILL_LIVE,
59 FE_DESTROYED,
60 FE_RECYCLED,
61};
62
Andreas Gruenbacher60381782011-03-28 17:05:50 +020063static int drbd_do_features(struct drbd_tconn *tconn);
Philipp Reisner13e60372011-02-08 09:54:40 +010064static int drbd_do_auth(struct drbd_tconn *tconn);
Philipp Reisner360cc742011-02-08 14:29:53 +010065static int drbd_disconnected(int vnr, void *p, void *data);
Philipp Reisnerb411b362009-09-25 16:07:19 -070066
67static enum finish_epoch drbd_may_finish_epoch(struct drbd_conf *, struct drbd_epoch *, enum epoch_event);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +010068static int e_end_block(struct drbd_work *, int);
Philipp Reisnerb411b362009-09-25 16:07:19 -070069
Philipp Reisnerb411b362009-09-25 16:07:19 -070070
71#define GFP_TRY (__GFP_HIGHMEM | __GFP_NOWARN)
72
Lars Ellenberg45bb9122010-05-14 17:10:48 +020073/*
74 * some helper functions to deal with single linked page lists,
75 * page->private being our "next" pointer.
76 */
77
78/* If at least n pages are linked at head, get n pages off.
79 * Otherwise, don't modify head, and return NULL.
80 * Locking is the responsibility of the caller.
81 */
82static struct page *page_chain_del(struct page **head, int n)
83{
84 struct page *page;
85 struct page *tmp;
86
87 BUG_ON(!n);
88 BUG_ON(!head);
89
90 page = *head;
Philipp Reisner23ce4222010-05-20 13:35:31 +020091
92 if (!page)
93 return NULL;
94
Lars Ellenberg45bb9122010-05-14 17:10:48 +020095 while (page) {
96 tmp = page_chain_next(page);
97 if (--n == 0)
98 break; /* found sufficient pages */
99 if (tmp == NULL)
100 /* insufficient pages, don't use any of them. */
101 return NULL;
102 page = tmp;
103 }
104
105 /* add end of list marker for the returned list */
106 set_page_private(page, 0);
107 /* actual return value, and adjustment of head */
108 page = *head;
109 *head = tmp;
110 return page;
111}
112
113/* may be used outside of locks to find the tail of a (usually short)
114 * "private" page chain, before adding it back to a global chain head
115 * with page_chain_add() under a spinlock. */
116static struct page *page_chain_tail(struct page *page, int *len)
117{
118 struct page *tmp;
119 int i = 1;
120 while ((tmp = page_chain_next(page)))
121 ++i, page = tmp;
122 if (len)
123 *len = i;
124 return page;
125}
126
127static int page_chain_free(struct page *page)
128{
129 struct page *tmp;
130 int i = 0;
131 page_chain_for_each_safe(page, tmp) {
132 put_page(page);
133 ++i;
134 }
135 return i;
136}
137
138static void page_chain_add(struct page **head,
139 struct page *chain_first, struct page *chain_last)
140{
141#if 1
142 struct page *tmp;
143 tmp = page_chain_tail(chain_first, NULL);
144 BUG_ON(tmp != chain_last);
145#endif
146
147 /* add chain to head */
148 set_page_private(chain_last, (unsigned long)*head);
149 *head = chain_first;
150}
151
152static struct page *drbd_pp_first_pages_or_try_alloc(struct drbd_conf *mdev, int number)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700153{
154 struct page *page = NULL;
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200155 struct page *tmp = NULL;
156 int i = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700157
158 /* Yes, testing drbd_pp_vacant outside the lock is racy.
159 * So what. It saves a spin_lock. */
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200160 if (drbd_pp_vacant >= number) {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700161 spin_lock(&drbd_pp_lock);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200162 page = page_chain_del(&drbd_pp_pool, number);
163 if (page)
164 drbd_pp_vacant -= number;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700165 spin_unlock(&drbd_pp_lock);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200166 if (page)
167 return page;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700168 }
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200169
Philipp Reisnerb411b362009-09-25 16:07:19 -0700170 /* GFP_TRY, because we must not cause arbitrary write-out: in a DRBD
171 * "criss-cross" setup, that might cause write-out on some other DRBD,
172 * which in turn might block on the other node at this very place. */
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200173 for (i = 0; i < number; i++) {
174 tmp = alloc_page(GFP_TRY);
175 if (!tmp)
176 break;
177 set_page_private(tmp, (unsigned long)page);
178 page = tmp;
179 }
180
181 if (i == number)
182 return page;
183
184 /* Not enough pages immediately available this time.
185 * No need to jump around here, drbd_pp_alloc will retry this
186 * function "soon". */
187 if (page) {
188 tmp = page_chain_tail(page, NULL);
189 spin_lock(&drbd_pp_lock);
190 page_chain_add(&drbd_pp_pool, page, tmp);
191 drbd_pp_vacant += i;
192 spin_unlock(&drbd_pp_lock);
193 }
194 return NULL;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700195}
196
Philipp Reisnerb411b362009-09-25 16:07:19 -0700197static void reclaim_net_ee(struct drbd_conf *mdev, struct list_head *to_be_freed)
198{
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100199 struct drbd_peer_request *peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700200 struct list_head *le, *tle;
201
202 /* The EEs are always appended to the end of the list. Since
203 they are sent in order over the wire, they have to finish
204 in order. As soon as we see the first not finished we can
205 stop to examine the list... */
206
207 list_for_each_safe(le, tle, &mdev->net_ee) {
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100208 peer_req = list_entry(le, struct drbd_peer_request, w.list);
209 if (drbd_ee_has_active_page(peer_req))
Philipp Reisnerb411b362009-09-25 16:07:19 -0700210 break;
211 list_move(le, to_be_freed);
212 }
213}
214
215static void drbd_kick_lo_and_reclaim_net(struct drbd_conf *mdev)
216{
217 LIST_HEAD(reclaimed);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100218 struct drbd_peer_request *peer_req, *t;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700219
Philipp Reisner87eeee42011-01-19 14:16:30 +0100220 spin_lock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700221 reclaim_net_ee(mdev, &reclaimed);
Philipp Reisner87eeee42011-01-19 14:16:30 +0100222 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700223
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100224 list_for_each_entry_safe(peer_req, t, &reclaimed, w.list)
225 drbd_free_net_ee(mdev, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700226}
227
228/**
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200229 * drbd_pp_alloc() - Returns @number pages, retries forever (or until signalled)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700230 * @mdev: DRBD device.
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200231 * @number: number of pages requested
232 * @retry: whether to retry, if not enough pages are available right now
Philipp Reisnerb411b362009-09-25 16:07:19 -0700233 *
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200234 * Tries to allocate number pages, first from our own page pool, then from
235 * the kernel, unless this allocation would exceed the max_buffers setting.
236 * Possibly retry until DRBD frees sufficient pages somewhere else.
237 *
238 * Returns a page chain linked via page->private.
Philipp Reisnerb411b362009-09-25 16:07:19 -0700239 */
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200240static struct page *drbd_pp_alloc(struct drbd_conf *mdev, unsigned number, bool retry)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700241{
242 struct page *page = NULL;
243 DEFINE_WAIT(wait);
244
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200245 /* Yes, we may run up to @number over max_buffers. If we
246 * follow it strictly, the admin will get it wrong anyways. */
Philipp Reisner89e58e72011-01-19 13:12:45 +0100247 if (atomic_read(&mdev->pp_in_use) < mdev->tconn->net_conf->max_buffers)
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200248 page = drbd_pp_first_pages_or_try_alloc(mdev, number);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700249
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200250 while (page == NULL) {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700251 prepare_to_wait(&drbd_pp_wait, &wait, TASK_INTERRUPTIBLE);
252
253 drbd_kick_lo_and_reclaim_net(mdev);
254
Philipp Reisner89e58e72011-01-19 13:12:45 +0100255 if (atomic_read(&mdev->pp_in_use) < mdev->tconn->net_conf->max_buffers) {
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200256 page = drbd_pp_first_pages_or_try_alloc(mdev, number);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700257 if (page)
258 break;
259 }
260
261 if (!retry)
262 break;
263
264 if (signal_pending(current)) {
265 dev_warn(DEV, "drbd_pp_alloc interrupted!\n");
266 break;
267 }
268
269 schedule();
270 }
271 finish_wait(&drbd_pp_wait, &wait);
272
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200273 if (page)
274 atomic_add(number, &mdev->pp_in_use);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700275 return page;
276}
277
278/* Must not be used from irq, as that may deadlock: see drbd_pp_alloc.
Philipp Reisner87eeee42011-01-19 14:16:30 +0100279 * Is also used from inside an other spin_lock_irq(&mdev->tconn->req_lock);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200280 * Either links the page chain back to the global pool,
281 * or returns all pages to the system. */
Lars Ellenberg435f0742010-09-06 12:30:25 +0200282static void drbd_pp_free(struct drbd_conf *mdev, struct page *page, int is_net)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700283{
Lars Ellenberg435f0742010-09-06 12:30:25 +0200284 atomic_t *a = is_net ? &mdev->pp_in_use_by_net : &mdev->pp_in_use;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700285 int i;
Lars Ellenberg435f0742010-09-06 12:30:25 +0200286
Philipp Reisner81a5d602011-02-22 19:53:16 -0500287 if (drbd_pp_vacant > (DRBD_MAX_BIO_SIZE/PAGE_SIZE) * minor_count)
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200288 i = page_chain_free(page);
289 else {
290 struct page *tmp;
291 tmp = page_chain_tail(page, &i);
292 spin_lock(&drbd_pp_lock);
293 page_chain_add(&drbd_pp_pool, page, tmp);
294 drbd_pp_vacant += i;
295 spin_unlock(&drbd_pp_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700296 }
Lars Ellenberg435f0742010-09-06 12:30:25 +0200297 i = atomic_sub_return(i, a);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200298 if (i < 0)
Lars Ellenberg435f0742010-09-06 12:30:25 +0200299 dev_warn(DEV, "ASSERTION FAILED: %s: %d < 0\n",
300 is_net ? "pp_in_use_by_net" : "pp_in_use", i);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700301 wake_up(&drbd_pp_wait);
302}
303
304/*
305You need to hold the req_lock:
306 _drbd_wait_ee_list_empty()
307
308You must not have the req_lock:
309 drbd_free_ee()
310 drbd_alloc_ee()
311 drbd_init_ee()
312 drbd_release_ee()
313 drbd_ee_fix_bhs()
314 drbd_process_done_ee()
315 drbd_clear_done_ee()
316 drbd_wait_ee_list_empty()
317*/
318
Andreas Gruenbacherf6ffca92011-02-04 15:30:34 +0100319struct drbd_peer_request *
320drbd_alloc_ee(struct drbd_conf *mdev, u64 id, sector_t sector,
321 unsigned int data_size, gfp_t gfp_mask) __must_hold(local)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700322{
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100323 struct drbd_peer_request *peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700324 struct page *page;
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200325 unsigned nr_pages = (data_size + PAGE_SIZE -1) >> PAGE_SHIFT;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700326
Andreas Gruenbacher0cf9d272010-12-07 10:43:29 +0100327 if (drbd_insert_fault(mdev, DRBD_FAULT_AL_EE))
Philipp Reisnerb411b362009-09-25 16:07:19 -0700328 return NULL;
329
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100330 peer_req = mempool_alloc(drbd_ee_mempool, gfp_mask & ~__GFP_HIGHMEM);
331 if (!peer_req) {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700332 if (!(gfp_mask & __GFP_NOWARN))
333 dev_err(DEV, "alloc_ee: Allocation of an EE failed\n");
334 return NULL;
335 }
336
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200337 page = drbd_pp_alloc(mdev, nr_pages, (gfp_mask & __GFP_WAIT));
338 if (!page)
339 goto fail;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700340
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100341 drbd_clear_interval(&peer_req->i);
342 peer_req->i.size = data_size;
343 peer_req->i.sector = sector;
344 peer_req->i.local = false;
345 peer_req->i.waiting = false;
Andreas Gruenbacher53840642011-01-28 10:31:04 +0100346
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100347 peer_req->epoch = NULL;
Philipp Reisnera21e9292011-02-08 15:08:49 +0100348 peer_req->w.mdev = mdev;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100349 peer_req->pages = page;
350 atomic_set(&peer_req->pending_bios, 0);
351 peer_req->flags = 0;
Andreas Gruenbacher9a8e7752011-01-11 14:04:09 +0100352 /*
353 * The block_id is opaque to the receiver. It is not endianness
354 * converted, and sent back to the sender unchanged.
355 */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100356 peer_req->block_id = id;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700357
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100358 return peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700359
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200360 fail:
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100361 mempool_free(peer_req, drbd_ee_mempool);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700362 return NULL;
363}
364
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100365void drbd_free_some_ee(struct drbd_conf *mdev, struct drbd_peer_request *peer_req,
Andreas Gruenbacherf6ffca92011-02-04 15:30:34 +0100366 int is_net)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700367{
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100368 if (peer_req->flags & EE_HAS_DIGEST)
369 kfree(peer_req->digest);
370 drbd_pp_free(mdev, peer_req->pages, is_net);
371 D_ASSERT(atomic_read(&peer_req->pending_bios) == 0);
372 D_ASSERT(drbd_interval_empty(&peer_req->i));
373 mempool_free(peer_req, drbd_ee_mempool);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700374}
375
376int drbd_release_ee(struct drbd_conf *mdev, struct list_head *list)
377{
378 LIST_HEAD(work_list);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100379 struct drbd_peer_request *peer_req, *t;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700380 int count = 0;
Lars Ellenberg435f0742010-09-06 12:30:25 +0200381 int is_net = list == &mdev->net_ee;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700382
Philipp Reisner87eeee42011-01-19 14:16:30 +0100383 spin_lock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700384 list_splice_init(list, &work_list);
Philipp Reisner87eeee42011-01-19 14:16:30 +0100385 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700386
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100387 list_for_each_entry_safe(peer_req, t, &work_list, w.list) {
388 drbd_free_some_ee(mdev, peer_req, is_net);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700389 count++;
390 }
391 return count;
392}
393
394
Philipp Reisner32862ec2011-02-08 16:41:01 +0100395/* See also comments in _req_mod(,BARRIER_ACKED)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700396 * and receive_Barrier.
397 *
398 * Move entries from net_ee to done_ee, if ready.
399 * Grab done_ee, call all callbacks, free the entries.
400 * The callbacks typically send out ACKs.
401 */
402static int drbd_process_done_ee(struct drbd_conf *mdev)
403{
404 LIST_HEAD(work_list);
405 LIST_HEAD(reclaimed);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100406 struct drbd_peer_request *peer_req, *t;
Andreas Gruenbachere2b30322011-03-16 17:16:12 +0100407 int err = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700408
Philipp Reisner87eeee42011-01-19 14:16:30 +0100409 spin_lock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700410 reclaim_net_ee(mdev, &reclaimed);
411 list_splice_init(&mdev->done_ee, &work_list);
Philipp Reisner87eeee42011-01-19 14:16:30 +0100412 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700413
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100414 list_for_each_entry_safe(peer_req, t, &reclaimed, w.list)
415 drbd_free_net_ee(mdev, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700416
417 /* possible callbacks here:
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +0100418 * e_end_block, and e_end_resync_block, e_send_discard_write.
Philipp Reisnerb411b362009-09-25 16:07:19 -0700419 * all ignore the last argument.
420 */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100421 list_for_each_entry_safe(peer_req, t, &work_list, w.list) {
Andreas Gruenbachere2b30322011-03-16 17:16:12 +0100422 int err2;
423
Philipp Reisnerb411b362009-09-25 16:07:19 -0700424 /* list_del not necessary, next/prev members not touched */
Andreas Gruenbachere2b30322011-03-16 17:16:12 +0100425 err2 = peer_req->w.cb(&peer_req->w, !!err);
426 if (!err)
427 err = err2;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100428 drbd_free_ee(mdev, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700429 }
430 wake_up(&mdev->ee_wait);
431
Andreas Gruenbachere2b30322011-03-16 17:16:12 +0100432 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700433}
434
435void _drbd_wait_ee_list_empty(struct drbd_conf *mdev, struct list_head *head)
436{
437 DEFINE_WAIT(wait);
438
439 /* avoids spin_lock/unlock
440 * and calling prepare_to_wait in the fast path */
441 while (!list_empty(head)) {
442 prepare_to_wait(&mdev->ee_wait, &wait, TASK_UNINTERRUPTIBLE);
Philipp Reisner87eeee42011-01-19 14:16:30 +0100443 spin_unlock_irq(&mdev->tconn->req_lock);
Jens Axboe7eaceac2011-03-10 08:52:07 +0100444 io_schedule();
Philipp Reisnerb411b362009-09-25 16:07:19 -0700445 finish_wait(&mdev->ee_wait, &wait);
Philipp Reisner87eeee42011-01-19 14:16:30 +0100446 spin_lock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700447 }
448}
449
450void drbd_wait_ee_list_empty(struct drbd_conf *mdev, struct list_head *head)
451{
Philipp Reisner87eeee42011-01-19 14:16:30 +0100452 spin_lock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700453 _drbd_wait_ee_list_empty(mdev, head);
Philipp Reisner87eeee42011-01-19 14:16:30 +0100454 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700455}
456
457/* see also kernel_accept; which is only present since 2.6.18.
458 * also we want to log which part of it failed, exactly */
Philipp Reisner76536202011-02-07 14:09:54 +0100459static int drbd_accept(const char **what, struct socket *sock, struct socket **newsock)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700460{
461 struct sock *sk = sock->sk;
462 int err = 0;
463
464 *what = "listen";
465 err = sock->ops->listen(sock, 5);
466 if (err < 0)
467 goto out;
468
469 *what = "sock_create_lite";
470 err = sock_create_lite(sk->sk_family, sk->sk_type, sk->sk_protocol,
471 newsock);
472 if (err < 0)
473 goto out;
474
475 *what = "accept";
476 err = sock->ops->accept(sock, *newsock, 0);
477 if (err < 0) {
478 sock_release(*newsock);
479 *newsock = NULL;
480 goto out;
481 }
482 (*newsock)->ops = sock->ops;
483
484out:
485 return err;
486}
487
Philipp Reisnerdbd9eea2011-02-07 15:34:16 +0100488static int drbd_recv_short(struct socket *sock, void *buf, size_t size, int flags)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700489{
490 mm_segment_t oldfs;
491 struct kvec iov = {
492 .iov_base = buf,
493 .iov_len = size,
494 };
495 struct msghdr msg = {
496 .msg_iovlen = 1,
497 .msg_iov = (struct iovec *)&iov,
498 .msg_flags = (flags ? flags : MSG_WAITALL | MSG_NOSIGNAL)
499 };
500 int rv;
501
502 oldfs = get_fs();
503 set_fs(KERNEL_DS);
504 rv = sock_recvmsg(sock, &msg, size, msg.msg_flags);
505 set_fs(oldfs);
506
507 return rv;
508}
509
Philipp Reisnerde0ff332011-02-07 16:56:20 +0100510static int drbd_recv(struct drbd_tconn *tconn, void *buf, size_t size)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700511{
512 mm_segment_t oldfs;
513 struct kvec iov = {
514 .iov_base = buf,
515 .iov_len = size,
516 };
517 struct msghdr msg = {
518 .msg_iovlen = 1,
519 .msg_iov = (struct iovec *)&iov,
520 .msg_flags = MSG_WAITALL | MSG_NOSIGNAL
521 };
522 int rv;
523
524 oldfs = get_fs();
525 set_fs(KERNEL_DS);
526
527 for (;;) {
Philipp Reisnerde0ff332011-02-07 16:56:20 +0100528 rv = sock_recvmsg(tconn->data.socket, &msg, size, msg.msg_flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700529 if (rv == size)
530 break;
531
532 /* Note:
533 * ECONNRESET other side closed the connection
534 * ERESTARTSYS (on sock) we got a signal
535 */
536
537 if (rv < 0) {
538 if (rv == -ECONNRESET)
Philipp Reisnerde0ff332011-02-07 16:56:20 +0100539 conn_info(tconn, "sock was reset by peer\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -0700540 else if (rv != -ERESTARTSYS)
Philipp Reisnerde0ff332011-02-07 16:56:20 +0100541 conn_err(tconn, "sock_recvmsg returned %d\n", rv);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700542 break;
543 } else if (rv == 0) {
Philipp Reisnerde0ff332011-02-07 16:56:20 +0100544 conn_info(tconn, "sock was shut down by peer\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -0700545 break;
546 } else {
547 /* signal came in, or peer/link went down,
548 * after we read a partial message
549 */
550 /* D_ASSERT(signal_pending(current)); */
551 break;
552 }
553 };
554
555 set_fs(oldfs);
556
557 if (rv != size)
Philipp Reisnerbbeb6412011-02-10 13:45:46 +0100558 conn_request_state(tconn, NS(conn, C_BROKEN_PIPE), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700559
560 return rv;
561}
562
Andreas Gruenbacherc6967742011-03-17 17:15:20 +0100563static int drbd_recv_all(struct drbd_tconn *tconn, void *buf, size_t size)
564{
565 int err;
566
567 err = drbd_recv(tconn, buf, size);
568 if (err != size) {
569 if (err >= 0)
570 err = -EIO;
571 } else
572 err = 0;
573 return err;
574}
575
Andreas Gruenbachera5c31902011-03-24 03:28:04 +0100576static int drbd_recv_all_warn(struct drbd_tconn *tconn, void *buf, size_t size)
577{
578 int err;
579
580 err = drbd_recv_all(tconn, buf, size);
581 if (err && !signal_pending(current))
582 conn_warn(tconn, "short read (expected size %d)\n", (int)size);
583 return err;
584}
585
Lars Ellenberg5dbf1672010-05-25 16:18:01 +0200586/* quoting tcp(7):
587 * On individual connections, the socket buffer size must be set prior to the
588 * listen(2) or connect(2) calls in order to have it take effect.
589 * This is our wrapper to do so.
590 */
591static void drbd_setbufsize(struct socket *sock, unsigned int snd,
592 unsigned int rcv)
593{
594 /* open coded SO_SNDBUF, SO_RCVBUF */
595 if (snd) {
596 sock->sk->sk_sndbuf = snd;
597 sock->sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
598 }
599 if (rcv) {
600 sock->sk->sk_rcvbuf = rcv;
601 sock->sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
602 }
603}
604
Philipp Reisnereac3e992011-02-07 14:05:07 +0100605static struct socket *drbd_try_connect(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700606{
607 const char *what;
608 struct socket *sock;
609 struct sockaddr_in6 src_in6;
610 int err;
611 int disconnect_on_error = 1;
612
Philipp Reisnereac3e992011-02-07 14:05:07 +0100613 if (!get_net_conf(tconn))
Philipp Reisnerb411b362009-09-25 16:07:19 -0700614 return NULL;
615
616 what = "sock_create_kern";
Philipp Reisnereac3e992011-02-07 14:05:07 +0100617 err = sock_create_kern(((struct sockaddr *)tconn->net_conf->my_addr)->sa_family,
Philipp Reisnerb411b362009-09-25 16:07:19 -0700618 SOCK_STREAM, IPPROTO_TCP, &sock);
619 if (err < 0) {
620 sock = NULL;
621 goto out;
622 }
623
624 sock->sk->sk_rcvtimeo =
Philipp Reisnereac3e992011-02-07 14:05:07 +0100625 sock->sk->sk_sndtimeo = tconn->net_conf->try_connect_int*HZ;
626 drbd_setbufsize(sock, tconn->net_conf->sndbuf_size,
627 tconn->net_conf->rcvbuf_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700628
629 /* explicitly bind to the configured IP as source IP
630 * for the outgoing connections.
631 * This is needed for multihomed hosts and to be
632 * able to use lo: interfaces for drbd.
633 * Make sure to use 0 as port number, so linux selects
634 * a free one dynamically.
635 */
Philipp Reisnereac3e992011-02-07 14:05:07 +0100636 memcpy(&src_in6, tconn->net_conf->my_addr,
637 min_t(int, tconn->net_conf->my_addr_len, sizeof(src_in6)));
638 if (((struct sockaddr *)tconn->net_conf->my_addr)->sa_family == AF_INET6)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700639 src_in6.sin6_port = 0;
640 else
641 ((struct sockaddr_in *)&src_in6)->sin_port = 0; /* AF_INET & AF_SCI */
642
643 what = "bind before connect";
644 err = sock->ops->bind(sock,
645 (struct sockaddr *) &src_in6,
Philipp Reisnereac3e992011-02-07 14:05:07 +0100646 tconn->net_conf->my_addr_len);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700647 if (err < 0)
648 goto out;
649
650 /* connect may fail, peer not yet available.
651 * stay C_WF_CONNECTION, don't go Disconnecting! */
652 disconnect_on_error = 0;
653 what = "connect";
654 err = sock->ops->connect(sock,
Philipp Reisnereac3e992011-02-07 14:05:07 +0100655 (struct sockaddr *)tconn->net_conf->peer_addr,
656 tconn->net_conf->peer_addr_len, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700657
658out:
659 if (err < 0) {
660 if (sock) {
661 sock_release(sock);
662 sock = NULL;
663 }
664 switch (-err) {
665 /* timeout, busy, signal pending */
666 case ETIMEDOUT: case EAGAIN: case EINPROGRESS:
667 case EINTR: case ERESTARTSYS:
668 /* peer not (yet) available, network problem */
669 case ECONNREFUSED: case ENETUNREACH:
670 case EHOSTDOWN: case EHOSTUNREACH:
671 disconnect_on_error = 0;
672 break;
673 default:
Philipp Reisnereac3e992011-02-07 14:05:07 +0100674 conn_err(tconn, "%s failed, err = %d\n", what, err);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700675 }
676 if (disconnect_on_error)
Philipp Reisnerbbeb6412011-02-10 13:45:46 +0100677 conn_request_state(tconn, NS(conn, C_DISCONNECTING), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700678 }
Philipp Reisnereac3e992011-02-07 14:05:07 +0100679 put_net_conf(tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700680 return sock;
681}
682
Philipp Reisner76536202011-02-07 14:09:54 +0100683static struct socket *drbd_wait_for_connect(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700684{
685 int timeo, err;
686 struct socket *s_estab = NULL, *s_listen;
687 const char *what;
688
Philipp Reisner76536202011-02-07 14:09:54 +0100689 if (!get_net_conf(tconn))
Philipp Reisnerb411b362009-09-25 16:07:19 -0700690 return NULL;
691
692 what = "sock_create_kern";
Philipp Reisner76536202011-02-07 14:09:54 +0100693 err = sock_create_kern(((struct sockaddr *)tconn->net_conf->my_addr)->sa_family,
Philipp Reisnerb411b362009-09-25 16:07:19 -0700694 SOCK_STREAM, IPPROTO_TCP, &s_listen);
695 if (err) {
696 s_listen = NULL;
697 goto out;
698 }
699
Philipp Reisner76536202011-02-07 14:09:54 +0100700 timeo = tconn->net_conf->try_connect_int * HZ;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700701 timeo += (random32() & 1) ? timeo / 7 : -timeo / 7; /* 28.5% random jitter */
702
703 s_listen->sk->sk_reuse = 1; /* SO_REUSEADDR */
704 s_listen->sk->sk_rcvtimeo = timeo;
705 s_listen->sk->sk_sndtimeo = timeo;
Philipp Reisner76536202011-02-07 14:09:54 +0100706 drbd_setbufsize(s_listen, tconn->net_conf->sndbuf_size,
707 tconn->net_conf->rcvbuf_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700708
709 what = "bind before listen";
710 err = s_listen->ops->bind(s_listen,
Philipp Reisner76536202011-02-07 14:09:54 +0100711 (struct sockaddr *) tconn->net_conf->my_addr,
712 tconn->net_conf->my_addr_len);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700713 if (err < 0)
714 goto out;
715
Philipp Reisner76536202011-02-07 14:09:54 +0100716 err = drbd_accept(&what, s_listen, &s_estab);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700717
718out:
719 if (s_listen)
720 sock_release(s_listen);
721 if (err < 0) {
722 if (err != -EAGAIN && err != -EINTR && err != -ERESTARTSYS) {
Philipp Reisner76536202011-02-07 14:09:54 +0100723 conn_err(tconn, "%s failed, err = %d\n", what, err);
Philipp Reisnerbbeb6412011-02-10 13:45:46 +0100724 conn_request_state(tconn, NS(conn, C_DISCONNECTING), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700725 }
726 }
Philipp Reisner76536202011-02-07 14:09:54 +0100727 put_net_conf(tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700728
729 return s_estab;
730}
731
Andreas Gruenbacher7c967152011-03-22 00:49:36 +0100732static int drbd_send_fp(struct drbd_tconn *tconn, struct drbd_socket *sock, enum drbd_packet cmd)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700733{
Andreas Gruenbacher5a87d922011-03-24 21:17:52 +0100734 struct p_header *h = tconn->data.sbuf;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700735
Andreas Gruenbacherecf23632011-03-15 23:48:25 +0100736 return !_conn_send_cmd(tconn, 0, sock, cmd, h, sizeof(*h), 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700737}
738
Philipp Reisnera25b63f2011-02-07 15:43:45 +0100739static enum drbd_packet drbd_recv_fp(struct drbd_tconn *tconn, struct socket *sock)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700740{
Andreas Gruenbacher7c967152011-03-22 00:49:36 +0100741 struct p_header80 h;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700742 int rr;
743
Andreas Gruenbacher7c967152011-03-22 00:49:36 +0100744 rr = drbd_recv_short(sock, &h, sizeof(h), 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700745
Andreas Gruenbacher7c967152011-03-22 00:49:36 +0100746 if (rr == sizeof(h) && h.magic == cpu_to_be32(DRBD_MAGIC))
747 return be16_to_cpu(h.command);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700748
749 return 0xffff;
750}
751
752/**
753 * drbd_socket_okay() - Free the socket if its connection is not okay
Philipp Reisnerb411b362009-09-25 16:07:19 -0700754 * @sock: pointer to the pointer to the socket.
755 */
Philipp Reisnerdbd9eea2011-02-07 15:34:16 +0100756static int drbd_socket_okay(struct socket **sock)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700757{
758 int rr;
759 char tb[4];
760
761 if (!*sock)
Andreas Gruenbacher81e84652010-12-09 15:03:57 +0100762 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700763
Philipp Reisnerdbd9eea2011-02-07 15:34:16 +0100764 rr = drbd_recv_short(*sock, tb, 4, MSG_DONTWAIT | MSG_PEEK);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700765
766 if (rr > 0 || rr == -EAGAIN) {
Andreas Gruenbacher81e84652010-12-09 15:03:57 +0100767 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700768 } else {
769 sock_release(*sock);
770 *sock = NULL;
Andreas Gruenbacher81e84652010-12-09 15:03:57 +0100771 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700772 }
773}
Philipp Reisner2325eb62011-03-15 16:56:18 +0100774/* Gets called if a connection is established, or if a new minor gets created
775 in a connection */
776int drbd_connected(int vnr, void *p, void *data)
Philipp Reisner907599e2011-02-08 11:25:37 +0100777{
778 struct drbd_conf *mdev = (struct drbd_conf *)p;
Andreas Gruenbacher0829f5e2011-03-24 14:31:22 +0100779 int err;
Philipp Reisner907599e2011-02-08 11:25:37 +0100780
781 atomic_set(&mdev->packet_seq, 0);
782 mdev->peer_seq = 0;
783
Philipp Reisner8410da82011-02-11 20:11:10 +0100784 mdev->state_mutex = mdev->tconn->agreed_pro_version < 100 ?
785 &mdev->tconn->cstate_mutex :
786 &mdev->own_state_mutex;
787
Andreas Gruenbacher0829f5e2011-03-24 14:31:22 +0100788 err = drbd_send_sync_param(mdev);
789 if (!err)
790 err = drbd_send_sizes(mdev, 0, 0);
791 if (!err)
792 err = drbd_send_uuids(mdev);
793 if (!err)
794 err = drbd_send_state(mdev);
Philipp Reisner907599e2011-02-08 11:25:37 +0100795 clear_bit(USE_DEGR_WFC_T, &mdev->flags);
796 clear_bit(RESIZE_PENDING, &mdev->flags);
Andreas Gruenbacher0829f5e2011-03-24 14:31:22 +0100797 return err;
Philipp Reisner907599e2011-02-08 11:25:37 +0100798}
799
Philipp Reisnerb411b362009-09-25 16:07:19 -0700800/*
801 * return values:
802 * 1 yes, we have a valid connection
803 * 0 oops, did not work out, please try again
804 * -1 peer talks different language,
805 * no point in trying again, please go standalone.
806 * -2 We do not have a network config...
807 */
Philipp Reisner907599e2011-02-08 11:25:37 +0100808static int drbd_connect(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700809{
Andreas Gruenbacher2bf89622011-03-28 16:33:12 +0200810 struct socket *sock, *msock;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700811 int try, h, ok;
812
Philipp Reisnerbbeb6412011-02-10 13:45:46 +0100813 if (conn_request_state(tconn, NS(conn, C_WF_CONNECTION), CS_VERBOSE) < SS_SUCCESS)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700814 return -2;
815
Philipp Reisner907599e2011-02-08 11:25:37 +0100816 clear_bit(DISCARD_CONCURRENT, &tconn->flags);
Andreas Gruenbacher0916e0e2011-03-21 14:10:15 +0100817
818 /* Assume that the peer only understands protocol 80 until we know better. */
819 tconn->agreed_pro_version = 80;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700820
Philipp Reisnerb411b362009-09-25 16:07:19 -0700821 do {
Andreas Gruenbacher2bf89622011-03-28 16:33:12 +0200822 struct socket *s;
823
Philipp Reisnerb411b362009-09-25 16:07:19 -0700824 for (try = 0;;) {
825 /* 3 tries, this should take less than a second! */
Philipp Reisner907599e2011-02-08 11:25:37 +0100826 s = drbd_try_connect(tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700827 if (s || ++try >= 3)
828 break;
829 /* give the other side time to call bind() & listen() */
Philipp Reisner20ee6392011-01-18 15:28:59 +0100830 schedule_timeout_interruptible(HZ / 10);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700831 }
832
833 if (s) {
Andreas Gruenbacher2bf89622011-03-28 16:33:12 +0200834 if (!tconn->data.socket) {
835 tconn->data.socket = s;
Andreas Gruenbachere5d6f332011-03-28 16:44:40 +0200836 drbd_send_fp(tconn, &tconn->data, P_INITIAL_DATA);
Andreas Gruenbacher2bf89622011-03-28 16:33:12 +0200837 } else if (!tconn->meta.socket) {
838 tconn->meta.socket = s;
Andreas Gruenbachere5d6f332011-03-28 16:44:40 +0200839 drbd_send_fp(tconn, &tconn->meta, P_INITIAL_META);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700840 } else {
Philipp Reisner907599e2011-02-08 11:25:37 +0100841 conn_err(tconn, "Logic error in drbd_connect()\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -0700842 goto out_release_sockets;
843 }
844 }
845
Andreas Gruenbacher2bf89622011-03-28 16:33:12 +0200846 if (tconn->data.socket && tconn->meta.socket) {
Philipp Reisner907599e2011-02-08 11:25:37 +0100847 schedule_timeout_interruptible(tconn->net_conf->ping_timeo*HZ/10);
Andreas Gruenbacher2bf89622011-03-28 16:33:12 +0200848 ok = drbd_socket_okay(&tconn->data.socket);
849 ok = drbd_socket_okay(&tconn->meta.socket) && ok;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700850 if (ok)
851 break;
852 }
853
854retry:
Philipp Reisner907599e2011-02-08 11:25:37 +0100855 s = drbd_wait_for_connect(tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700856 if (s) {
Philipp Reisner907599e2011-02-08 11:25:37 +0100857 try = drbd_recv_fp(tconn, s);
Andreas Gruenbacher2bf89622011-03-28 16:33:12 +0200858 drbd_socket_okay(&tconn->data.socket);
859 drbd_socket_okay(&tconn->meta.socket);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700860 switch (try) {
Andreas Gruenbachere5d6f332011-03-28 16:44:40 +0200861 case P_INITIAL_DATA:
Andreas Gruenbacher2bf89622011-03-28 16:33:12 +0200862 if (tconn->data.socket) {
Philipp Reisner907599e2011-02-08 11:25:37 +0100863 conn_warn(tconn, "initial packet S crossed\n");
Andreas Gruenbacher2bf89622011-03-28 16:33:12 +0200864 sock_release(tconn->data.socket);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700865 }
Andreas Gruenbacher2bf89622011-03-28 16:33:12 +0200866 tconn->data.socket = s;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700867 break;
Andreas Gruenbachere5d6f332011-03-28 16:44:40 +0200868 case P_INITIAL_META:
Andreas Gruenbacher2bf89622011-03-28 16:33:12 +0200869 if (tconn->meta.socket) {
Philipp Reisner907599e2011-02-08 11:25:37 +0100870 conn_warn(tconn, "initial packet M crossed\n");
Andreas Gruenbacher2bf89622011-03-28 16:33:12 +0200871 sock_release(tconn->meta.socket);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700872 }
Andreas Gruenbacher2bf89622011-03-28 16:33:12 +0200873 tconn->meta.socket = s;
Philipp Reisner907599e2011-02-08 11:25:37 +0100874 set_bit(DISCARD_CONCURRENT, &tconn->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700875 break;
876 default:
Philipp Reisner907599e2011-02-08 11:25:37 +0100877 conn_warn(tconn, "Error receiving initial packet\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -0700878 sock_release(s);
879 if (random32() & 1)
880 goto retry;
881 }
882 }
883
Philipp Reisnerbbeb6412011-02-10 13:45:46 +0100884 if (tconn->cstate <= C_DISCONNECTING)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700885 goto out_release_sockets;
886 if (signal_pending(current)) {
887 flush_signals(current);
888 smp_rmb();
Philipp Reisner907599e2011-02-08 11:25:37 +0100889 if (get_t_state(&tconn->receiver) == EXITING)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700890 goto out_release_sockets;
891 }
892
Andreas Gruenbacher2bf89622011-03-28 16:33:12 +0200893 if (tconn->data.socket && &tconn->meta.socket) {
894 ok = drbd_socket_okay(&tconn->data.socket);
895 ok = drbd_socket_okay(&tconn->meta.socket) && ok;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700896 if (ok)
897 break;
898 }
899 } while (1);
900
Andreas Gruenbacher2bf89622011-03-28 16:33:12 +0200901 sock = tconn->data.socket;
902 msock = tconn->meta.socket;
903
Philipp Reisnerb411b362009-09-25 16:07:19 -0700904 msock->sk->sk_reuse = 1; /* SO_REUSEADDR */
905 sock->sk->sk_reuse = 1; /* SO_REUSEADDR */
906
907 sock->sk->sk_allocation = GFP_NOIO;
908 msock->sk->sk_allocation = GFP_NOIO;
909
910 sock->sk->sk_priority = TC_PRIO_INTERACTIVE_BULK;
911 msock->sk->sk_priority = TC_PRIO_INTERACTIVE;
912
Philipp Reisnerb411b362009-09-25 16:07:19 -0700913 /* NOT YET ...
Philipp Reisner907599e2011-02-08 11:25:37 +0100914 * sock->sk->sk_sndtimeo = tconn->net_conf->timeout*HZ/10;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700915 * sock->sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
Andreas Gruenbacher60381782011-03-28 17:05:50 +0200916 * first set it to the P_CONNECTION_FEATURES timeout,
Philipp Reisnerb411b362009-09-25 16:07:19 -0700917 * which we set to 4x the configured ping_timeout. */
918 sock->sk->sk_sndtimeo =
Philipp Reisner907599e2011-02-08 11:25:37 +0100919 sock->sk->sk_rcvtimeo = tconn->net_conf->ping_timeo*4*HZ/10;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700920
Philipp Reisner907599e2011-02-08 11:25:37 +0100921 msock->sk->sk_sndtimeo = tconn->net_conf->timeout*HZ/10;
922 msock->sk->sk_rcvtimeo = tconn->net_conf->ping_int*HZ;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700923
924 /* we don't want delays.
Lucas De Marchi25985ed2011-03-30 22:57:33 -0300925 * we use TCP_CORK where appropriate, though */
Philipp Reisnerb411b362009-09-25 16:07:19 -0700926 drbd_tcp_nodelay(sock);
927 drbd_tcp_nodelay(msock);
928
Philipp Reisner907599e2011-02-08 11:25:37 +0100929 tconn->last_received = jiffies;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700930
Andreas Gruenbacher60381782011-03-28 17:05:50 +0200931 h = drbd_do_features(tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700932 if (h <= 0)
933 return h;
934
Philipp Reisner907599e2011-02-08 11:25:37 +0100935 if (tconn->cram_hmac_tfm) {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700936 /* drbd_request_state(mdev, NS(conn, WFAuth)); */
Philipp Reisner907599e2011-02-08 11:25:37 +0100937 switch (drbd_do_auth(tconn)) {
Johannes Thomab10d96c2010-01-07 16:02:50 +0100938 case -1:
Philipp Reisner907599e2011-02-08 11:25:37 +0100939 conn_err(tconn, "Authentication of peer failed\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -0700940 return -1;
Johannes Thomab10d96c2010-01-07 16:02:50 +0100941 case 0:
Philipp Reisner907599e2011-02-08 11:25:37 +0100942 conn_err(tconn, "Authentication of peer failed, trying again.\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +0100943 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700944 }
945 }
946
Philipp Reisnerbbeb6412011-02-10 13:45:46 +0100947 if (conn_request_state(tconn, NS(conn, C_WF_REPORT_PARAMS), CS_VERBOSE) < SS_SUCCESS)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700948 return 0;
949
Philipp Reisner907599e2011-02-08 11:25:37 +0100950 sock->sk->sk_sndtimeo = tconn->net_conf->timeout*HZ/10;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700951 sock->sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
952
Philipp Reisner907599e2011-02-08 11:25:37 +0100953 drbd_thread_start(&tconn->asender);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700954
Andreas Gruenbacher387eb302011-03-16 01:05:37 +0100955 if (drbd_send_protocol(tconn) == -EOPNOTSUPP)
Philipp Reisner7e2455c2010-04-22 14:50:23 +0200956 return -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700957
Philipp Reisner907599e2011-02-08 11:25:37 +0100958 return !idr_for_each(&tconn->volumes, drbd_connected, tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700959
960out_release_sockets:
Andreas Gruenbacher2bf89622011-03-28 16:33:12 +0200961 if (tconn->data.socket) {
962 sock_release(tconn->data.socket);
963 tconn->data.socket = NULL;
964 }
965 if (tconn->meta.socket) {
966 sock_release(tconn->meta.socket);
967 tconn->meta.socket = NULL;
968 }
Philipp Reisnerb411b362009-09-25 16:07:19 -0700969 return -1;
970}
971
Andreas Gruenbacher8172f3e2011-03-16 17:22:39 +0100972static int decode_header(struct drbd_tconn *tconn, struct p_header *h, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700973{
Philipp Reisnerfd340c12011-01-19 16:57:39 +0100974 if (h->h80.magic == cpu_to_be32(DRBD_MAGIC)) {
Philipp Reisner77351055b2011-02-07 17:24:26 +0100975 pi->cmd = be16_to_cpu(h->h80.command);
976 pi->size = be16_to_cpu(h->h80.length);
Philipp Reisnereefc2f72011-02-08 12:55:24 +0100977 pi->vnr = 0;
Andreas Gruenbacherca9bc122011-01-11 13:47:24 +0100978 } else if (h->h95.magic == cpu_to_be16(DRBD_MAGIC_BIG)) {
Philipp Reisner77351055b2011-02-07 17:24:26 +0100979 pi->cmd = be16_to_cpu(h->h95.command);
980 pi->size = be32_to_cpu(h->h95.length) & 0x00ffffff;
981 pi->vnr = 0;
Philipp Reisner02918be2010-08-20 14:35:10 +0200982 } else {
Philipp Reisnerce243852011-02-07 17:27:47 +0100983 conn_err(tconn, "magic?? on data m: 0x%08x c: %d l: %d\n",
Lars Ellenberg004352f2010-10-05 20:13:58 +0200984 be32_to_cpu(h->h80.magic),
985 be16_to_cpu(h->h80.command),
986 be16_to_cpu(h->h80.length));
Andreas Gruenbacher8172f3e2011-03-16 17:22:39 +0100987 return -EINVAL;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700988 }
Andreas Gruenbacher8172f3e2011-03-16 17:22:39 +0100989 return 0;
Philipp Reisner257d0af2011-01-26 12:15:29 +0100990}
991
Philipp Reisner9ba7aa02011-02-07 17:32:41 +0100992static int drbd_recv_header(struct drbd_tconn *tconn, struct packet_info *pi)
Philipp Reisner257d0af2011-01-26 12:15:29 +0100993{
Andreas Gruenbachere6ef8a52011-03-24 18:07:54 +0100994 struct p_header *h = tconn->data.rbuf;
Andreas Gruenbacher69bc7bc2011-03-16 17:31:52 +0100995 int err;
Philipp Reisner257d0af2011-01-26 12:15:29 +0100996
Andreas Gruenbachera5c31902011-03-24 03:28:04 +0100997 err = drbd_recv_all_warn(tconn, h, sizeof(*h));
998 if (err)
Andreas Gruenbacher69bc7bc2011-03-16 17:31:52 +0100999 return err;
Philipp Reisner257d0af2011-01-26 12:15:29 +01001000
Andreas Gruenbacher69bc7bc2011-03-16 17:31:52 +01001001 err = decode_header(tconn, h, pi);
Philipp Reisner9ba7aa02011-02-07 17:32:41 +01001002 tconn->last_received = jiffies;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001003
Andreas Gruenbacher69bc7bc2011-03-16 17:31:52 +01001004 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001005}
1006
Philipp Reisner2451fc32010-08-24 13:43:11 +02001007static void drbd_flush(struct drbd_conf *mdev)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001008{
1009 int rv;
1010
1011 if (mdev->write_ordering >= WO_bdev_flush && get_ldev(mdev)) {
Dmitry Monakhovfbd9b092010-04-28 17:55:06 +04001012 rv = blkdev_issue_flush(mdev->ldev->backing_bdev, GFP_KERNEL,
Christoph Hellwigdd3932e2010-09-16 20:51:46 +02001013 NULL);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001014 if (rv) {
1015 dev_err(DEV, "local disk flush failed with status %d\n", rv);
1016 /* would rather check on EOPNOTSUPP, but that is not reliable.
1017 * don't try again for ANY return value != 0
1018 * if (rv == -EOPNOTSUPP) */
1019 drbd_bump_write_ordering(mdev, WO_drain_io);
1020 }
1021 put_ldev(mdev);
1022 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07001023}
1024
1025/**
1026 * drbd_may_finish_epoch() - Applies an epoch_event to the epoch's state, eventually finishes it.
1027 * @mdev: DRBD device.
1028 * @epoch: Epoch object.
1029 * @ev: Epoch event.
1030 */
1031static enum finish_epoch drbd_may_finish_epoch(struct drbd_conf *mdev,
1032 struct drbd_epoch *epoch,
1033 enum epoch_event ev)
1034{
Philipp Reisner2451fc32010-08-24 13:43:11 +02001035 int epoch_size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001036 struct drbd_epoch *next_epoch;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001037 enum finish_epoch rv = FE_STILL_LIVE;
1038
1039 spin_lock(&mdev->epoch_lock);
1040 do {
1041 next_epoch = NULL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001042
1043 epoch_size = atomic_read(&epoch->epoch_size);
1044
1045 switch (ev & ~EV_CLEANUP) {
1046 case EV_PUT:
1047 atomic_dec(&epoch->active);
1048 break;
1049 case EV_GOT_BARRIER_NR:
1050 set_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001051 break;
1052 case EV_BECAME_LAST:
1053 /* nothing to do*/
1054 break;
1055 }
1056
Philipp Reisnerb411b362009-09-25 16:07:19 -07001057 if (epoch_size != 0 &&
1058 atomic_read(&epoch->active) == 0 &&
Philipp Reisner2451fc32010-08-24 13:43:11 +02001059 test_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001060 if (!(ev & EV_CLEANUP)) {
1061 spin_unlock(&mdev->epoch_lock);
1062 drbd_send_b_ack(mdev, epoch->barrier_nr, epoch_size);
1063 spin_lock(&mdev->epoch_lock);
1064 }
1065 dec_unacked(mdev);
1066
1067 if (mdev->current_epoch != epoch) {
1068 next_epoch = list_entry(epoch->list.next, struct drbd_epoch, list);
1069 list_del(&epoch->list);
1070 ev = EV_BECAME_LAST | (ev & EV_CLEANUP);
1071 mdev->epochs--;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001072 kfree(epoch);
1073
1074 if (rv == FE_STILL_LIVE)
1075 rv = FE_DESTROYED;
1076 } else {
1077 epoch->flags = 0;
1078 atomic_set(&epoch->epoch_size, 0);
Uwe Kleine-König698f9312010-07-02 20:41:51 +02001079 /* atomic_set(&epoch->active, 0); is already zero */
Philipp Reisnerb411b362009-09-25 16:07:19 -07001080 if (rv == FE_STILL_LIVE)
1081 rv = FE_RECYCLED;
Philipp Reisner2451fc32010-08-24 13:43:11 +02001082 wake_up(&mdev->ee_wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001083 }
1084 }
1085
1086 if (!next_epoch)
1087 break;
1088
1089 epoch = next_epoch;
1090 } while (1);
1091
1092 spin_unlock(&mdev->epoch_lock);
1093
Philipp Reisnerb411b362009-09-25 16:07:19 -07001094 return rv;
1095}
1096
1097/**
1098 * drbd_bump_write_ordering() - Fall back to an other write ordering method
1099 * @mdev: DRBD device.
1100 * @wo: Write ordering method to try.
1101 */
1102void drbd_bump_write_ordering(struct drbd_conf *mdev, enum write_ordering_e wo) __must_hold(local)
1103{
1104 enum write_ordering_e pwo;
1105 static char *write_ordering_str[] = {
1106 [WO_none] = "none",
1107 [WO_drain_io] = "drain",
1108 [WO_bdev_flush] = "flush",
Philipp Reisnerb411b362009-09-25 16:07:19 -07001109 };
1110
1111 pwo = mdev->write_ordering;
1112 wo = min(pwo, wo);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001113 if (wo == WO_bdev_flush && mdev->ldev->dc.no_disk_flush)
1114 wo = WO_drain_io;
1115 if (wo == WO_drain_io && mdev->ldev->dc.no_disk_drain)
1116 wo = WO_none;
1117 mdev->write_ordering = wo;
Philipp Reisner2451fc32010-08-24 13:43:11 +02001118 if (pwo != mdev->write_ordering || wo == WO_bdev_flush)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001119 dev_info(DEV, "Method to ensure write ordering: %s\n", write_ordering_str[mdev->write_ordering]);
1120}
1121
1122/**
Andreas Gruenbacherfbe29de2011-02-17 16:38:35 +01001123 * drbd_submit_peer_request()
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001124 * @mdev: DRBD device.
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001125 * @peer_req: peer request
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001126 * @rw: flag field, see bio->bi_rw
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001127 *
1128 * May spread the pages to multiple bios,
1129 * depending on bio_add_page restrictions.
1130 *
1131 * Returns 0 if all bios have been submitted,
1132 * -ENOMEM if we could not allocate enough bios,
1133 * -ENOSPC (any better suggestion?) if we have not been able to bio_add_page a
1134 * single page to an empty bio (which should never happen and likely indicates
1135 * that the lower level IO stack is in some way broken). This has been observed
1136 * on certain Xen deployments.
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001137 */
1138/* TODO allocate from our own bio_set. */
Andreas Gruenbacherfbe29de2011-02-17 16:38:35 +01001139int drbd_submit_peer_request(struct drbd_conf *mdev,
1140 struct drbd_peer_request *peer_req,
1141 const unsigned rw, const int fault_type)
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001142{
1143 struct bio *bios = NULL;
1144 struct bio *bio;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001145 struct page *page = peer_req->pages;
1146 sector_t sector = peer_req->i.sector;
1147 unsigned ds = peer_req->i.size;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001148 unsigned n_bios = 0;
1149 unsigned nr_pages = (ds + PAGE_SIZE -1) >> PAGE_SHIFT;
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001150 int err = -ENOMEM;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001151
1152 /* In most cases, we will only need one bio. But in case the lower
1153 * level restrictions happen to be different at this offset on this
1154 * side than those of the sending peer, we may need to submit the
Lars Ellenbergda4a75d2011-02-23 17:02:01 +01001155 * request in more than one bio.
1156 *
1157 * Plain bio_alloc is good enough here, this is no DRBD internally
1158 * generated bio, but a bio allocated on behalf of the peer.
1159 */
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001160next_bio:
1161 bio = bio_alloc(GFP_NOIO, nr_pages);
1162 if (!bio) {
1163 dev_err(DEV, "submit_ee: Allocation of a bio failed\n");
1164 goto fail;
1165 }
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001166 /* > peer_req->i.sector, unless this is the first bio */
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001167 bio->bi_sector = sector;
1168 bio->bi_bdev = mdev->ldev->backing_bdev;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001169 bio->bi_rw = rw;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001170 bio->bi_private = peer_req;
Andreas Gruenbacherfcefa622011-02-17 16:46:59 +01001171 bio->bi_end_io = drbd_peer_request_endio;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001172
1173 bio->bi_next = bios;
1174 bios = bio;
1175 ++n_bios;
1176
1177 page_chain_for_each(page) {
1178 unsigned len = min_t(unsigned, ds, PAGE_SIZE);
1179 if (!bio_add_page(bio, page, len, 0)) {
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001180 /* A single page must always be possible!
1181 * But in case it fails anyways,
1182 * we deal with it, and complain (below). */
1183 if (bio->bi_vcnt == 0) {
1184 dev_err(DEV,
1185 "bio_add_page failed for len=%u, "
1186 "bi_vcnt=0 (bi_sector=%llu)\n",
1187 len, (unsigned long long)bio->bi_sector);
1188 err = -ENOSPC;
1189 goto fail;
1190 }
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001191 goto next_bio;
1192 }
1193 ds -= len;
1194 sector += len >> 9;
1195 --nr_pages;
1196 }
1197 D_ASSERT(page == NULL);
1198 D_ASSERT(ds == 0);
1199
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001200 atomic_set(&peer_req->pending_bios, n_bios);
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001201 do {
1202 bio = bios;
1203 bios = bios->bi_next;
1204 bio->bi_next = NULL;
1205
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001206 drbd_generic_make_request(mdev, fault_type, bio);
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001207 } while (bios);
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001208 return 0;
1209
1210fail:
1211 while (bios) {
1212 bio = bios;
1213 bios = bios->bi_next;
1214 bio_put(bio);
1215 }
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001216 return err;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001217}
1218
Andreas Gruenbacher53840642011-01-28 10:31:04 +01001219static void drbd_remove_epoch_entry_interval(struct drbd_conf *mdev,
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001220 struct drbd_peer_request *peer_req)
Andreas Gruenbacher53840642011-01-28 10:31:04 +01001221{
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001222 struct drbd_interval *i = &peer_req->i;
Andreas Gruenbacher53840642011-01-28 10:31:04 +01001223
1224 drbd_remove_interval(&mdev->write_requests, i);
1225 drbd_clear_interval(i);
1226
Andreas Gruenbacher6c852be2011-02-04 15:38:52 +01001227 /* Wake up any processes waiting for this peer request to complete. */
Andreas Gruenbacher53840642011-01-28 10:31:04 +01001228 if (i->waiting)
1229 wake_up(&mdev->misc_wait);
1230}
1231
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01001232static int receive_Barrier(struct drbd_tconn *tconn, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001233{
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01001234 struct drbd_conf *mdev;
Philipp Reisner2451fc32010-08-24 13:43:11 +02001235 int rv;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01001236 struct p_barrier *p = tconn->data.rbuf;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001237 struct drbd_epoch *epoch;
1238
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01001239 mdev = vnr_to_mdev(tconn, pi->vnr);
1240 if (!mdev)
1241 return -EIO;
1242
Philipp Reisnerb411b362009-09-25 16:07:19 -07001243 inc_unacked(mdev);
1244
Philipp Reisnerb411b362009-09-25 16:07:19 -07001245 mdev->current_epoch->barrier_nr = p->barrier;
1246 rv = drbd_may_finish_epoch(mdev, mdev->current_epoch, EV_GOT_BARRIER_NR);
1247
1248 /* P_BARRIER_ACK may imply that the corresponding extent is dropped from
1249 * the activity log, which means it would not be resynced in case the
1250 * R_PRIMARY crashes now.
1251 * Therefore we must send the barrier_ack after the barrier request was
1252 * completed. */
1253 switch (mdev->write_ordering) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001254 case WO_none:
1255 if (rv == FE_RECYCLED)
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001256 return 0;
Philipp Reisner2451fc32010-08-24 13:43:11 +02001257
1258 /* receiver context, in the writeout path of the other node.
1259 * avoid potential distributed deadlock */
1260 epoch = kmalloc(sizeof(struct drbd_epoch), GFP_NOIO);
1261 if (epoch)
1262 break;
1263 else
1264 dev_warn(DEV, "Allocation of an epoch failed, slowing down\n");
1265 /* Fall through */
Philipp Reisnerb411b362009-09-25 16:07:19 -07001266
1267 case WO_bdev_flush:
1268 case WO_drain_io:
Philipp Reisnerb411b362009-09-25 16:07:19 -07001269 drbd_wait_ee_list_empty(mdev, &mdev->active_ee);
Philipp Reisner2451fc32010-08-24 13:43:11 +02001270 drbd_flush(mdev);
1271
1272 if (atomic_read(&mdev->current_epoch->epoch_size)) {
1273 epoch = kmalloc(sizeof(struct drbd_epoch), GFP_NOIO);
1274 if (epoch)
1275 break;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001276 }
1277
Philipp Reisner2451fc32010-08-24 13:43:11 +02001278 epoch = mdev->current_epoch;
1279 wait_event(mdev->ee_wait, atomic_read(&epoch->epoch_size) == 0);
1280
1281 D_ASSERT(atomic_read(&epoch->active) == 0);
1282 D_ASSERT(epoch->flags == 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001283
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001284 return 0;
Philipp Reisner2451fc32010-08-24 13:43:11 +02001285 default:
1286 dev_err(DEV, "Strangeness in mdev->write_ordering %d\n", mdev->write_ordering);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001287 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001288 }
1289
1290 epoch->flags = 0;
1291 atomic_set(&epoch->epoch_size, 0);
1292 atomic_set(&epoch->active, 0);
1293
1294 spin_lock(&mdev->epoch_lock);
1295 if (atomic_read(&mdev->current_epoch->epoch_size)) {
1296 list_add(&epoch->list, &mdev->current_epoch->list);
1297 mdev->current_epoch = epoch;
1298 mdev->epochs++;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001299 } else {
1300 /* The current_epoch got recycled while we allocated this one... */
1301 kfree(epoch);
1302 }
1303 spin_unlock(&mdev->epoch_lock);
1304
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001305 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001306}
1307
1308/* used from receive_RSDataReply (recv_resync_read)
1309 * and from receive_Data */
Andreas Gruenbacherf6ffca92011-02-04 15:30:34 +01001310static struct drbd_peer_request *
1311read_in_block(struct drbd_conf *mdev, u64 id, sector_t sector,
1312 int data_size) __must_hold(local)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001313{
Lars Ellenberg66660322010-04-06 12:15:04 +02001314 const sector_t capacity = drbd_get_capacity(mdev->this_bdev);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001315 struct drbd_peer_request *peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001316 struct page *page;
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001317 int dgs, ds, err;
Philipp Reisnera0638452011-01-19 14:31:32 +01001318 void *dig_in = mdev->tconn->int_dig_in;
1319 void *dig_vv = mdev->tconn->int_dig_vv;
Philipp Reisner6b4388a2010-04-26 14:11:45 +02001320 unsigned long *data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001321
Philipp Reisnera0638452011-01-19 14:31:32 +01001322 dgs = (mdev->tconn->agreed_pro_version >= 87 && mdev->tconn->integrity_r_tfm) ?
1323 crypto_hash_digestsize(mdev->tconn->integrity_r_tfm) : 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001324
1325 if (dgs) {
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001326 err = drbd_recv_all_warn(mdev->tconn, dig_in, dgs);
1327 if (err)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001328 return NULL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001329 }
1330
1331 data_size -= dgs;
1332
Andreas Gruenbacher841ce242010-12-15 19:31:20 +01001333 if (!expect(data_size != 0))
1334 return NULL;
1335 if (!expect(IS_ALIGNED(data_size, 512)))
1336 return NULL;
1337 if (!expect(data_size <= DRBD_MAX_BIO_SIZE))
1338 return NULL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001339
Lars Ellenberg66660322010-04-06 12:15:04 +02001340 /* even though we trust out peer,
1341 * we sometimes have to double check. */
1342 if (sector + (data_size>>9) > capacity) {
Lars Ellenbergfdda6542011-01-24 15:11:01 +01001343 dev_err(DEV, "request from peer beyond end of local disk: "
1344 "capacity: %llus < sector: %llus + size: %u\n",
Lars Ellenberg66660322010-04-06 12:15:04 +02001345 (unsigned long long)capacity,
1346 (unsigned long long)sector, data_size);
1347 return NULL;
1348 }
1349
Philipp Reisnerb411b362009-09-25 16:07:19 -07001350 /* GFP_NOIO, because we must not cause arbitrary write-out: in a DRBD
1351 * "criss-cross" setup, that might cause write-out on some other DRBD,
1352 * which in turn might block on the other node at this very place. */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001353 peer_req = drbd_alloc_ee(mdev, id, sector, data_size, GFP_NOIO);
1354 if (!peer_req)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001355 return NULL;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001356
Philipp Reisnerb411b362009-09-25 16:07:19 -07001357 ds = data_size;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001358 page = peer_req->pages;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001359 page_chain_for_each(page) {
1360 unsigned len = min_t(int, ds, PAGE_SIZE);
Philipp Reisner6b4388a2010-04-26 14:11:45 +02001361 data = kmap(page);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001362 err = drbd_recv_all_warn(mdev->tconn, data, len);
Andreas Gruenbacher0cf9d272010-12-07 10:43:29 +01001363 if (drbd_insert_fault(mdev, DRBD_FAULT_RECEIVE)) {
Philipp Reisner6b4388a2010-04-26 14:11:45 +02001364 dev_err(DEV, "Fault injection: Corrupting data on receive\n");
1365 data[0] = data[0] ^ (unsigned long)-1;
1366 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07001367 kunmap(page);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001368 if (err) {
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001369 drbd_free_ee(mdev, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001370 return NULL;
1371 }
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001372 ds -= len;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001373 }
1374
1375 if (dgs) {
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001376 drbd_csum_ee(mdev, mdev->tconn->integrity_r_tfm, peer_req, dig_vv);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001377 if (memcmp(dig_in, dig_vv, dgs)) {
Lars Ellenberg470be442010-11-10 10:36:52 +01001378 dev_err(DEV, "Digest integrity check FAILED: %llus +%u\n",
1379 (unsigned long long)sector, data_size);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001380 drbd_free_ee(mdev, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001381 return NULL;
1382 }
1383 }
1384 mdev->recv_cnt += data_size>>9;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001385 return peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001386}
1387
1388/* drbd_drain_block() just takes a data block
1389 * out of the socket input buffer, and discards it.
1390 */
1391static int drbd_drain_block(struct drbd_conf *mdev, int data_size)
1392{
1393 struct page *page;
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001394 int err = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001395 void *data;
1396
Lars Ellenbergc3470cd2010-04-01 16:57:19 +02001397 if (!data_size)
Andreas Gruenbacherfc5be832011-03-16 17:50:50 +01001398 return 0;
Lars Ellenbergc3470cd2010-04-01 16:57:19 +02001399
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001400 page = drbd_pp_alloc(mdev, 1, 1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001401
1402 data = kmap(page);
1403 while (data_size) {
Andreas Gruenbacherfc5be832011-03-16 17:50:50 +01001404 unsigned int len = min_t(int, data_size, PAGE_SIZE);
1405
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001406 err = drbd_recv_all_warn(mdev->tconn, data, len);
1407 if (err)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001408 break;
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001409 data_size -= len;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001410 }
1411 kunmap(page);
Lars Ellenberg435f0742010-09-06 12:30:25 +02001412 drbd_pp_free(mdev, page, 0);
Andreas Gruenbacherfc5be832011-03-16 17:50:50 +01001413 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001414}
1415
1416static int recv_dless_read(struct drbd_conf *mdev, struct drbd_request *req,
1417 sector_t sector, int data_size)
1418{
1419 struct bio_vec *bvec;
1420 struct bio *bio;
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001421 int dgs, err, i, expect;
Philipp Reisnera0638452011-01-19 14:31:32 +01001422 void *dig_in = mdev->tconn->int_dig_in;
1423 void *dig_vv = mdev->tconn->int_dig_vv;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001424
Philipp Reisnera0638452011-01-19 14:31:32 +01001425 dgs = (mdev->tconn->agreed_pro_version >= 87 && mdev->tconn->integrity_r_tfm) ?
1426 crypto_hash_digestsize(mdev->tconn->integrity_r_tfm) : 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001427
1428 if (dgs) {
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001429 err = drbd_recv_all_warn(mdev->tconn, dig_in, dgs);
1430 if (err)
1431 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001432 }
1433
1434 data_size -= dgs;
1435
1436 /* optimistically update recv_cnt. if receiving fails below,
1437 * we disconnect anyways, and counters will be reset. */
1438 mdev->recv_cnt += data_size>>9;
1439
1440 bio = req->master_bio;
1441 D_ASSERT(sector == bio->bi_sector);
1442
1443 bio_for_each_segment(bvec, bio, i) {
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001444 void *mapped = kmap(bvec->bv_page) + bvec->bv_offset;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001445 expect = min_t(int, data_size, bvec->bv_len);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001446 err = drbd_recv_all_warn(mdev->tconn, mapped, expect);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001447 kunmap(bvec->bv_page);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001448 if (err)
1449 return err;
1450 data_size -= expect;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001451 }
1452
1453 if (dgs) {
Philipp Reisnera0638452011-01-19 14:31:32 +01001454 drbd_csum_bio(mdev, mdev->tconn->integrity_r_tfm, bio, dig_vv);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001455 if (memcmp(dig_in, dig_vv, dgs)) {
1456 dev_err(DEV, "Digest integrity check FAILED. Broken NICs?\n");
Andreas Gruenbacher28284ce2011-03-16 17:54:02 +01001457 return -EINVAL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001458 }
1459 }
1460
1461 D_ASSERT(data_size == 0);
Andreas Gruenbacher28284ce2011-03-16 17:54:02 +01001462 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001463}
1464
1465/* e_end_resync_block() is called via
1466 * drbd_process_done_ee() by asender only */
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001467static int e_end_resync_block(struct drbd_work *w, int unused)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001468{
Andreas Gruenbacher8050e6d2011-02-18 16:12:48 +01001469 struct drbd_peer_request *peer_req =
1470 container_of(w, struct drbd_peer_request, w);
Philipp Reisner00d56942011-02-09 18:09:48 +01001471 struct drbd_conf *mdev = w->mdev;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001472 sector_t sector = peer_req->i.sector;
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001473 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001474
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001475 D_ASSERT(drbd_interval_empty(&peer_req->i));
Philipp Reisnerb411b362009-09-25 16:07:19 -07001476
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001477 if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
1478 drbd_set_in_sync(mdev, sector, peer_req->i.size);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001479 err = drbd_send_ack(mdev, P_RS_WRITE_ACK, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001480 } else {
1481 /* Record failure to sync */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001482 drbd_rs_failed_io(mdev, sector, peer_req->i.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001483
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001484 err = drbd_send_ack(mdev, P_NEG_ACK, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001485 }
1486 dec_unacked(mdev);
1487
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001488 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001489}
1490
1491static int recv_resync_read(struct drbd_conf *mdev, sector_t sector, int data_size) __releases(local)
1492{
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001493 struct drbd_peer_request *peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001494
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001495 peer_req = read_in_block(mdev, ID_SYNCER, sector, data_size);
1496 if (!peer_req)
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001497 goto fail;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001498
1499 dec_rs_pending(mdev);
1500
Philipp Reisnerb411b362009-09-25 16:07:19 -07001501 inc_unacked(mdev);
1502 /* corresponding dec_unacked() in e_end_resync_block()
1503 * respective _drbd_clear_done_ee */
1504
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001505 peer_req->w.cb = e_end_resync_block;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001506
Philipp Reisner87eeee42011-01-19 14:16:30 +01001507 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001508 list_add(&peer_req->w.list, &mdev->sync_ee);
Philipp Reisner87eeee42011-01-19 14:16:30 +01001509 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001510
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02001511 atomic_add(data_size >> 9, &mdev->rs_sect_ev);
Andreas Gruenbacherfbe29de2011-02-17 16:38:35 +01001512 if (drbd_submit_peer_request(mdev, peer_req, WRITE, DRBD_FAULT_RS_WR) == 0)
Andreas Gruenbachere1c1b0f2011-03-16 17:58:27 +01001513 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001514
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001515 /* don't care for the reason here */
1516 dev_err(DEV, "submit failed, triggering re-connect\n");
Philipp Reisner87eeee42011-01-19 14:16:30 +01001517 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001518 list_del(&peer_req->w.list);
Philipp Reisner87eeee42011-01-19 14:16:30 +01001519 spin_unlock_irq(&mdev->tconn->req_lock);
Lars Ellenberg22cc37a2010-09-14 20:40:41 +02001520
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001521 drbd_free_ee(mdev, peer_req);
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001522fail:
1523 put_ldev(mdev);
Andreas Gruenbachere1c1b0f2011-03-16 17:58:27 +01001524 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001525}
1526
Andreas Gruenbacher668eebc2011-01-20 17:14:26 +01001527static struct drbd_request *
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01001528find_request(struct drbd_conf *mdev, struct rb_root *root, u64 id,
1529 sector_t sector, bool missing_ok, const char *func)
Andreas Gruenbacher668eebc2011-01-20 17:14:26 +01001530{
Andreas Gruenbacher668eebc2011-01-20 17:14:26 +01001531 struct drbd_request *req;
1532
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01001533 /* Request object according to our peer */
1534 req = (struct drbd_request *)(unsigned long)id;
Andreas Gruenbacher5e472262011-01-27 14:42:51 +01001535 if (drbd_contains_interval(root, sector, &req->i) && req->i.local)
Andreas Gruenbacher668eebc2011-01-20 17:14:26 +01001536 return req;
Andreas Gruenbacherc3afd8f2011-01-20 22:25:40 +01001537 if (!missing_ok) {
1538 dev_err(DEV, "%s: failed to find request %lu, sector %llus\n", func,
1539 (unsigned long)id, (unsigned long long)sector);
1540 }
Andreas Gruenbacher668eebc2011-01-20 17:14:26 +01001541 return NULL;
1542}
1543
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01001544static int receive_DataReply(struct drbd_tconn *tconn, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001545{
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01001546 struct drbd_conf *mdev;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001547 struct drbd_request *req;
1548 sector_t sector;
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001549 int err;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01001550 struct p_data *p = tconn->data.rbuf;
1551
1552 mdev = vnr_to_mdev(tconn, pi->vnr);
1553 if (!mdev)
1554 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001555
1556 sector = be64_to_cpu(p->sector);
1557
Philipp Reisner87eeee42011-01-19 14:16:30 +01001558 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01001559 req = find_request(mdev, &mdev->read_requests, p->block_id, sector, false, __func__);
Philipp Reisner87eeee42011-01-19 14:16:30 +01001560 spin_unlock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherc3afd8f2011-01-20 22:25:40 +01001561 if (unlikely(!req))
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001562 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001563
Bart Van Assche24c48302011-05-21 18:32:29 +02001564 /* hlist_del(&req->collision) is done in _req_may_be_done, to avoid
Philipp Reisnerb411b362009-09-25 16:07:19 -07001565 * special casing it there for the various failure cases.
1566 * still no race with drbd_fail_pending_reads */
Andreas Gruenbachere2857212011-03-25 00:57:38 +01001567 err = recv_dless_read(mdev, req, sector, pi->size);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001568 if (!err)
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01001569 req_mod(req, DATA_RECEIVED);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001570 /* else: nothing. handled from drbd_disconnect...
1571 * I don't think we may complete this just yet
1572 * in case we are "on-disconnect: freeze" */
1573
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001574 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001575}
1576
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01001577static int receive_RSDataReply(struct drbd_tconn *tconn, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001578{
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01001579 struct drbd_conf *mdev;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001580 sector_t sector;
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001581 int err;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01001582 struct p_data *p = tconn->data.rbuf;
1583
1584 mdev = vnr_to_mdev(tconn, pi->vnr);
1585 if (!mdev)
1586 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001587
1588 sector = be64_to_cpu(p->sector);
1589 D_ASSERT(p->block_id == ID_SYNCER);
1590
1591 if (get_ldev(mdev)) {
1592 /* data is submitted to disk within recv_resync_read.
1593 * corresponding put_ldev done below on error,
Andreas Gruenbacherfcefa622011-02-17 16:46:59 +01001594 * or in drbd_peer_request_endio. */
Andreas Gruenbachere2857212011-03-25 00:57:38 +01001595 err = recv_resync_read(mdev, sector, pi->size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001596 } else {
1597 if (__ratelimit(&drbd_ratelimit_state))
1598 dev_err(DEV, "Can not write resync data to local disk.\n");
1599
Andreas Gruenbachere2857212011-03-25 00:57:38 +01001600 err = drbd_drain_block(mdev, pi->size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001601
Andreas Gruenbachere2857212011-03-25 00:57:38 +01001602 drbd_send_ack_dp(mdev, P_NEG_ACK, p, pi->size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001603 }
1604
Andreas Gruenbachere2857212011-03-25 00:57:38 +01001605 atomic_add(pi->size >> 9, &mdev->rs_sect_in);
Philipp Reisner778f2712010-07-06 11:14:00 +02001606
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001607 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001608}
1609
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001610static int w_restart_write(struct drbd_work *w, int cancel)
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001611{
1612 struct drbd_request *req = container_of(w, struct drbd_request, w);
1613 struct drbd_conf *mdev = w->mdev;
1614 struct bio *bio;
1615 unsigned long start_time;
1616 unsigned long flags;
1617
1618 spin_lock_irqsave(&mdev->tconn->req_lock, flags);
1619 if (!expect(req->rq_state & RQ_POSTPONED)) {
1620 spin_unlock_irqrestore(&mdev->tconn->req_lock, flags);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001621 return -EIO;
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001622 }
1623 bio = req->master_bio;
1624 start_time = req->start_time;
1625 /* Postponed requests will not have their master_bio completed! */
1626 __req_mod(req, DISCARD_WRITE, NULL);
1627 spin_unlock_irqrestore(&mdev->tconn->req_lock, flags);
1628
1629 while (__drbd_make_request(mdev, bio, start_time))
1630 /* retry */ ;
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001631 return 0;
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001632}
1633
1634static void restart_conflicting_writes(struct drbd_conf *mdev,
1635 sector_t sector, int size)
1636{
1637 struct drbd_interval *i;
1638 struct drbd_request *req;
1639
1640 drbd_for_each_overlap(i, &mdev->write_requests, sector, size) {
1641 if (!i->local)
1642 continue;
1643 req = container_of(i, struct drbd_request, i);
1644 if (req->rq_state & RQ_LOCAL_PENDING ||
1645 !(req->rq_state & RQ_POSTPONED))
1646 continue;
1647 if (expect(list_empty(&req->w.list))) {
1648 req->w.mdev = mdev;
1649 req->w.cb = w_restart_write;
1650 drbd_queue_work(&mdev->tconn->data.work, &req->w);
1651 }
1652 }
1653}
1654
Philipp Reisnerb411b362009-09-25 16:07:19 -07001655/* e_end_block() is called via drbd_process_done_ee().
1656 * this means this function only runs in the asender thread
1657 */
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001658static int e_end_block(struct drbd_work *w, int cancel)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001659{
Andreas Gruenbacher8050e6d2011-02-18 16:12:48 +01001660 struct drbd_peer_request *peer_req =
1661 container_of(w, struct drbd_peer_request, w);
Philipp Reisner00d56942011-02-09 18:09:48 +01001662 struct drbd_conf *mdev = w->mdev;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001663 sector_t sector = peer_req->i.sector;
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001664 int err = 0, pcmd;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001665
Philipp Reisner89e58e72011-01-19 13:12:45 +01001666 if (mdev->tconn->net_conf->wire_protocol == DRBD_PROT_C) {
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001667 if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001668 pcmd = (mdev->state.conn >= C_SYNC_SOURCE &&
1669 mdev->state.conn <= C_PAUSED_SYNC_T &&
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001670 peer_req->flags & EE_MAY_SET_IN_SYNC) ?
Philipp Reisnerb411b362009-09-25 16:07:19 -07001671 P_RS_WRITE_ACK : P_WRITE_ACK;
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001672 err = drbd_send_ack(mdev, pcmd, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001673 if (pcmd == P_RS_WRITE_ACK)
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001674 drbd_set_in_sync(mdev, sector, peer_req->i.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001675 } else {
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001676 err = drbd_send_ack(mdev, P_NEG_ACK, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001677 /* we expect it to be marked out of sync anyways...
1678 * maybe assert this? */
1679 }
1680 dec_unacked(mdev);
1681 }
1682 /* we delete from the conflict detection hash _after_ we sent out the
1683 * P_WRITE_ACK / P_NEG_ACK, to get the sequence number right. */
Philipp Reisner89e58e72011-01-19 13:12:45 +01001684 if (mdev->tconn->net_conf->two_primaries) {
Philipp Reisner87eeee42011-01-19 14:16:30 +01001685 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001686 D_ASSERT(!drbd_interval_empty(&peer_req->i));
1687 drbd_remove_epoch_entry_interval(mdev, peer_req);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001688 if (peer_req->flags & EE_RESTART_REQUESTS)
1689 restart_conflicting_writes(mdev, sector, peer_req->i.size);
Philipp Reisner87eeee42011-01-19 14:16:30 +01001690 spin_unlock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherbb3bfe92011-01-21 15:59:23 +01001691 } else
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001692 D_ASSERT(drbd_interval_empty(&peer_req->i));
Philipp Reisnerb411b362009-09-25 16:07:19 -07001693
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001694 drbd_may_finish_epoch(mdev, peer_req->epoch, EV_PUT + (cancel ? EV_CLEANUP : 0));
Philipp Reisnerb411b362009-09-25 16:07:19 -07001695
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001696 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001697}
1698
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001699static int e_send_ack(struct drbd_work *w, enum drbd_packet ack)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001700{
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001701 struct drbd_conf *mdev = w->mdev;
Andreas Gruenbacher8050e6d2011-02-18 16:12:48 +01001702 struct drbd_peer_request *peer_req =
1703 container_of(w, struct drbd_peer_request, w);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001704 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001705
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001706 err = drbd_send_ack(mdev, ack, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001707 dec_unacked(mdev);
1708
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001709 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001710}
1711
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001712static int e_send_discard_write(struct drbd_work *w, int unused)
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001713{
1714 return e_send_ack(w, P_DISCARD_WRITE);
1715}
1716
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001717static int e_send_retry_write(struct drbd_work *w, int unused)
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001718{
1719 struct drbd_tconn *tconn = w->mdev->tconn;
1720
1721 return e_send_ack(w, tconn->agreed_pro_version >= 100 ?
1722 P_RETRY_WRITE : P_DISCARD_WRITE);
1723}
1724
Andreas Gruenbacher3e394da2011-01-26 18:36:55 +01001725static bool seq_greater(u32 a, u32 b)
1726{
1727 /*
1728 * We assume 32-bit wrap-around here.
1729 * For 24-bit wrap-around, we would have to shift:
1730 * a <<= 8; b <<= 8;
1731 */
1732 return (s32)a - (s32)b > 0;
1733}
1734
1735static u32 seq_max(u32 a, u32 b)
1736{
1737 return seq_greater(a, b) ? a : b;
1738}
1739
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001740static bool need_peer_seq(struct drbd_conf *mdev)
1741{
1742 struct drbd_tconn *tconn = mdev->tconn;
1743
1744 /*
1745 * We only need to keep track of the last packet_seq number of our peer
1746 * if we are in dual-primary mode and we have the discard flag set; see
1747 * handle_write_conflicts().
1748 */
1749 return tconn->net_conf->two_primaries &&
1750 test_bit(DISCARD_CONCURRENT, &tconn->flags);
1751}
1752
Andreas Gruenbacher43ae0772011-02-03 18:42:08 +01001753static void update_peer_seq(struct drbd_conf *mdev, unsigned int peer_seq)
Andreas Gruenbacher3e394da2011-01-26 18:36:55 +01001754{
Lars Ellenberg3c13b682011-02-23 16:10:01 +01001755 unsigned int newest_peer_seq;
Andreas Gruenbacher3e394da2011-01-26 18:36:55 +01001756
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001757 if (need_peer_seq(mdev)) {
1758 spin_lock(&mdev->peer_seq_lock);
Lars Ellenberg3c13b682011-02-23 16:10:01 +01001759 newest_peer_seq = seq_max(mdev->peer_seq, peer_seq);
1760 mdev->peer_seq = newest_peer_seq;
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001761 spin_unlock(&mdev->peer_seq_lock);
Lars Ellenberg3c13b682011-02-23 16:10:01 +01001762 /* wake up only if we actually changed mdev->peer_seq */
1763 if (peer_seq == newest_peer_seq)
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001764 wake_up(&mdev->seq_wait);
1765 }
Andreas Gruenbacher3e394da2011-01-26 18:36:55 +01001766}
1767
Philipp Reisnerb411b362009-09-25 16:07:19 -07001768/* Called from receive_Data.
1769 * Synchronize packets on sock with packets on msock.
1770 *
1771 * This is here so even when a P_DATA packet traveling via sock overtook an Ack
1772 * packet traveling on msock, they are still processed in the order they have
1773 * been sent.
1774 *
1775 * Note: we don't care for Ack packets overtaking P_DATA packets.
1776 *
1777 * In case packet_seq is larger than mdev->peer_seq number, there are
1778 * outstanding packets on the msock. We wait for them to arrive.
1779 * In case we are the logically next packet, we update mdev->peer_seq
1780 * ourselves. Correctly handles 32bit wrap around.
1781 *
1782 * Assume we have a 10 GBit connection, that is about 1<<30 byte per second,
1783 * about 1<<21 sectors per second. So "worst" case, we have 1<<3 == 8 seconds
1784 * for the 24bit wrap (historical atomic_t guarantee on some archs), and we have
1785 * 1<<9 == 512 seconds aka ages for the 32bit wrap around...
1786 *
1787 * returns 0 if we may process the packet,
1788 * -ERESTARTSYS if we were interrupted (by disconnect signal). */
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001789static int wait_for_and_update_peer_seq(struct drbd_conf *mdev, const u32 peer_seq)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001790{
1791 DEFINE_WAIT(wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001792 long timeout;
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001793 int ret;
1794
1795 if (!need_peer_seq(mdev))
1796 return 0;
1797
Philipp Reisnerb411b362009-09-25 16:07:19 -07001798 spin_lock(&mdev->peer_seq_lock);
1799 for (;;) {
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001800 if (!seq_greater(peer_seq - 1, mdev->peer_seq)) {
1801 mdev->peer_seq = seq_max(mdev->peer_seq, peer_seq);
1802 ret = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001803 break;
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001804 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07001805 if (signal_pending(current)) {
1806 ret = -ERESTARTSYS;
1807 break;
1808 }
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001809 prepare_to_wait(&mdev->seq_wait, &wait, TASK_INTERRUPTIBLE);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001810 spin_unlock(&mdev->peer_seq_lock);
Andreas Gruenbacher71b1c1e2011-03-01 15:40:43 +01001811 timeout = mdev->tconn->net_conf->ping_timeo*HZ/10;
1812 timeout = schedule_timeout(timeout);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001813 spin_lock(&mdev->peer_seq_lock);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001814 if (!timeout) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001815 ret = -ETIMEDOUT;
Andreas Gruenbacher71b1c1e2011-03-01 15:40:43 +01001816 dev_err(DEV, "Timed out waiting for missing ack packets; disconnecting\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07001817 break;
1818 }
1819 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07001820 spin_unlock(&mdev->peer_seq_lock);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001821 finish_wait(&mdev->seq_wait, &wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001822 return ret;
1823}
1824
Lars Ellenberg688593c2010-11-17 22:25:03 +01001825/* see also bio_flags_to_wire()
1826 * DRBD_REQ_*, because we need to semantically map the flags to data packet
1827 * flags and back. We may replicate to other kernel versions. */
1828static unsigned long wire_flags_to_bio(struct drbd_conf *mdev, u32 dpf)
Philipp Reisner76d2e7e2010-08-25 11:58:05 +02001829{
Lars Ellenberg688593c2010-11-17 22:25:03 +01001830 return (dpf & DP_RW_SYNC ? REQ_SYNC : 0) |
1831 (dpf & DP_FUA ? REQ_FUA : 0) |
1832 (dpf & DP_FLUSH ? REQ_FLUSH : 0) |
1833 (dpf & DP_DISCARD ? REQ_DISCARD : 0);
Philipp Reisner76d2e7e2010-08-25 11:58:05 +02001834}
1835
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001836static void fail_postponed_requests(struct drbd_conf *mdev, sector_t sector,
1837 unsigned int size)
1838{
1839 struct drbd_interval *i;
1840
1841 repeat:
1842 drbd_for_each_overlap(i, &mdev->write_requests, sector, size) {
1843 struct drbd_request *req;
1844 struct bio_and_error m;
1845
1846 if (!i->local)
1847 continue;
1848 req = container_of(i, struct drbd_request, i);
1849 if (!(req->rq_state & RQ_POSTPONED))
1850 continue;
1851 req->rq_state &= ~RQ_POSTPONED;
1852 __req_mod(req, NEG_ACKED, &m);
1853 spin_unlock_irq(&mdev->tconn->req_lock);
1854 if (m.bio)
1855 complete_master_bio(mdev, &m);
1856 spin_lock_irq(&mdev->tconn->req_lock);
1857 goto repeat;
1858 }
1859}
1860
1861static int handle_write_conflicts(struct drbd_conf *mdev,
1862 struct drbd_peer_request *peer_req)
1863{
1864 struct drbd_tconn *tconn = mdev->tconn;
1865 bool resolve_conflicts = test_bit(DISCARD_CONCURRENT, &tconn->flags);
1866 sector_t sector = peer_req->i.sector;
1867 const unsigned int size = peer_req->i.size;
1868 struct drbd_interval *i;
1869 bool equal;
1870 int err;
1871
1872 /*
1873 * Inserting the peer request into the write_requests tree will prevent
1874 * new conflicting local requests from being added.
1875 */
1876 drbd_insert_interval(&mdev->write_requests, &peer_req->i);
1877
1878 repeat:
1879 drbd_for_each_overlap(i, &mdev->write_requests, sector, size) {
1880 if (i == &peer_req->i)
1881 continue;
1882
1883 if (!i->local) {
1884 /*
1885 * Our peer has sent a conflicting remote request; this
1886 * should not happen in a two-node setup. Wait for the
1887 * earlier peer request to complete.
1888 */
1889 err = drbd_wait_misc(mdev, i);
1890 if (err)
1891 goto out;
1892 goto repeat;
1893 }
1894
1895 equal = i->sector == sector && i->size == size;
1896 if (resolve_conflicts) {
1897 /*
1898 * If the peer request is fully contained within the
1899 * overlapping request, it can be discarded; otherwise,
1900 * it will be retried once all overlapping requests
1901 * have completed.
1902 */
1903 bool discard = i->sector <= sector && i->sector +
1904 (i->size >> 9) >= sector + (size >> 9);
1905
1906 if (!equal)
1907 dev_alert(DEV, "Concurrent writes detected: "
1908 "local=%llus +%u, remote=%llus +%u, "
1909 "assuming %s came first\n",
1910 (unsigned long long)i->sector, i->size,
1911 (unsigned long long)sector, size,
1912 discard ? "local" : "remote");
1913
1914 inc_unacked(mdev);
1915 peer_req->w.cb = discard ? e_send_discard_write :
1916 e_send_retry_write;
1917 list_add_tail(&peer_req->w.list, &mdev->done_ee);
1918 wake_asender(mdev->tconn);
1919
1920 err = -ENOENT;
1921 goto out;
1922 } else {
1923 struct drbd_request *req =
1924 container_of(i, struct drbd_request, i);
1925
1926 if (!equal)
1927 dev_alert(DEV, "Concurrent writes detected: "
1928 "local=%llus +%u, remote=%llus +%u\n",
1929 (unsigned long long)i->sector, i->size,
1930 (unsigned long long)sector, size);
1931
1932 if (req->rq_state & RQ_LOCAL_PENDING ||
1933 !(req->rq_state & RQ_POSTPONED)) {
1934 /*
1935 * Wait for the node with the discard flag to
1936 * decide if this request will be discarded or
1937 * retried. Requests that are discarded will
1938 * disappear from the write_requests tree.
1939 *
1940 * In addition, wait for the conflicting
1941 * request to finish locally before submitting
1942 * the conflicting peer request.
1943 */
1944 err = drbd_wait_misc(mdev, &req->i);
1945 if (err) {
1946 _conn_request_state(mdev->tconn,
1947 NS(conn, C_TIMEOUT),
1948 CS_HARD);
1949 fail_postponed_requests(mdev, sector, size);
1950 goto out;
1951 }
1952 goto repeat;
1953 }
1954 /*
1955 * Remember to restart the conflicting requests after
1956 * the new peer request has completed.
1957 */
1958 peer_req->flags |= EE_RESTART_REQUESTS;
1959 }
1960 }
1961 err = 0;
1962
1963 out:
1964 if (err)
1965 drbd_remove_epoch_entry_interval(mdev, peer_req);
1966 return err;
1967}
1968
Philipp Reisnerb411b362009-09-25 16:07:19 -07001969/* mirrored write */
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01001970static int receive_Data(struct drbd_tconn *tconn, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001971{
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01001972 struct drbd_conf *mdev;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001973 sector_t sector;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001974 struct drbd_peer_request *peer_req;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01001975 struct p_data *p = tconn->data.rbuf;
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001976 u32 peer_seq = be32_to_cpu(p->seq_num);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001977 int rw = WRITE;
1978 u32 dp_flags;
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001979 int err;
1980
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01001981 mdev = vnr_to_mdev(tconn, pi->vnr);
1982 if (!mdev)
1983 return -EIO;
1984
Philipp Reisnerb411b362009-09-25 16:07:19 -07001985 if (!get_ldev(mdev)) {
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001986 int err2;
1987
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001988 err = wait_for_and_update_peer_seq(mdev, peer_seq);
Andreas Gruenbachere2857212011-03-25 00:57:38 +01001989 drbd_send_ack_dp(mdev, P_NEG_ACK, p, pi->size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001990 atomic_inc(&mdev->current_epoch->epoch_size);
Andreas Gruenbachere2857212011-03-25 00:57:38 +01001991 err2 = drbd_drain_block(mdev, pi->size);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001992 if (!err)
1993 err = err2;
1994 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001995 }
1996
Andreas Gruenbacherfcefa622011-02-17 16:46:59 +01001997 /*
1998 * Corresponding put_ldev done either below (on various errors), or in
1999 * drbd_peer_request_endio, if we successfully submit the data at the
2000 * end of this function.
2001 */
Philipp Reisnerb411b362009-09-25 16:07:19 -07002002
2003 sector = be64_to_cpu(p->sector);
Andreas Gruenbachere2857212011-03-25 00:57:38 +01002004 peer_req = read_in_block(mdev, p->block_id, sector, pi->size);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002005 if (!peer_req) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002006 put_ldev(mdev);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002007 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002008 }
2009
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002010 peer_req->w.cb = e_end_block;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002011
Lars Ellenberg688593c2010-11-17 22:25:03 +01002012 dp_flags = be32_to_cpu(p->dp_flags);
2013 rw |= wire_flags_to_bio(mdev, dp_flags);
2014
2015 if (dp_flags & DP_MAY_SET_IN_SYNC)
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002016 peer_req->flags |= EE_MAY_SET_IN_SYNC;
Lars Ellenberg688593c2010-11-17 22:25:03 +01002017
Philipp Reisnerb411b362009-09-25 16:07:19 -07002018 spin_lock(&mdev->epoch_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002019 peer_req->epoch = mdev->current_epoch;
2020 atomic_inc(&peer_req->epoch->epoch_size);
2021 atomic_inc(&peer_req->epoch->active);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002022 spin_unlock(&mdev->epoch_lock);
2023
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002024 if (mdev->tconn->net_conf->two_primaries) {
2025 err = wait_for_and_update_peer_seq(mdev, peer_seq);
2026 if (err)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002027 goto out_interrupted;
Philipp Reisner87eeee42011-01-19 14:16:30 +01002028 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002029 err = handle_write_conflicts(mdev, peer_req);
2030 if (err) {
2031 spin_unlock_irq(&mdev->tconn->req_lock);
2032 if (err == -ENOENT) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002033 put_ldev(mdev);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002034 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002035 }
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002036 goto out_interrupted;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002037 }
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002038 } else
2039 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002040 list_add(&peer_req->w.list, &mdev->active_ee);
Philipp Reisner87eeee42011-01-19 14:16:30 +01002041 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002042
Philipp Reisner89e58e72011-01-19 13:12:45 +01002043 switch (mdev->tconn->net_conf->wire_protocol) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002044 case DRBD_PROT_C:
2045 inc_unacked(mdev);
2046 /* corresponding dec_unacked() in e_end_block()
2047 * respective _drbd_clear_done_ee */
2048 break;
2049 case DRBD_PROT_B:
2050 /* I really don't like it that the receiver thread
2051 * sends on the msock, but anyways */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002052 drbd_send_ack(mdev, P_RECV_ACK, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002053 break;
2054 case DRBD_PROT_A:
2055 /* nothing to do */
2056 break;
2057 }
2058
Lars Ellenberg6719fb02010-10-18 23:04:07 +02002059 if (mdev->state.pdsk < D_INCONSISTENT) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002060 /* In case we have the only disk of the cluster, */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002061 drbd_set_out_of_sync(mdev, peer_req->i.sector, peer_req->i.size);
2062 peer_req->flags |= EE_CALL_AL_COMPLETE_IO;
2063 peer_req->flags &= ~EE_MAY_SET_IN_SYNC;
Lars Ellenberg181286a2011-03-31 15:18:56 +02002064 drbd_al_begin_io(mdev, &peer_req->i);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002065 }
2066
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002067 err = drbd_submit_peer_request(mdev, peer_req, rw, DRBD_FAULT_DT_WR);
2068 if (!err)
2069 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002070
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01002071 /* don't care for the reason here */
2072 dev_err(DEV, "submit failed, triggering re-connect\n");
Philipp Reisner87eeee42011-01-19 14:16:30 +01002073 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002074 list_del(&peer_req->w.list);
2075 drbd_remove_epoch_entry_interval(mdev, peer_req);
Philipp Reisner87eeee42011-01-19 14:16:30 +01002076 spin_unlock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002077 if (peer_req->flags & EE_CALL_AL_COMPLETE_IO)
Lars Ellenberg181286a2011-03-31 15:18:56 +02002078 drbd_al_complete_io(mdev, &peer_req->i);
Lars Ellenberg22cc37a2010-09-14 20:40:41 +02002079
Philipp Reisnerb411b362009-09-25 16:07:19 -07002080out_interrupted:
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002081 drbd_may_finish_epoch(mdev, peer_req->epoch, EV_PUT + EV_CLEANUP);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002082 put_ldev(mdev);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002083 drbd_free_ee(mdev, peer_req);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002084 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002085}
2086
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002087/* We may throttle resync, if the lower device seems to be busy,
2088 * and current sync rate is above c_min_rate.
2089 *
2090 * To decide whether or not the lower device is busy, we use a scheme similar
2091 * to MD RAID is_mddev_idle(): if the partition stats reveal "significant"
2092 * (more than 64 sectors) of activity we cannot account for with our own resync
2093 * activity, it obviously is "busy".
2094 *
2095 * The current sync rate used here uses only the most recent two step marks,
2096 * to have a short time average so we can react faster.
2097 */
Philipp Reisnere3555d82010-11-07 15:56:29 +01002098int drbd_rs_should_slow_down(struct drbd_conf *mdev, sector_t sector)
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002099{
2100 struct gendisk *disk = mdev->ldev->backing_bdev->bd_contains->bd_disk;
2101 unsigned long db, dt, dbdt;
Philipp Reisnere3555d82010-11-07 15:56:29 +01002102 struct lc_element *tmp;
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002103 int curr_events;
2104 int throttle = 0;
2105
2106 /* feature disabled? */
Lars Ellenbergf3990022011-03-23 14:31:09 +01002107 if (mdev->ldev->dc.c_min_rate == 0)
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002108 return 0;
2109
Philipp Reisnere3555d82010-11-07 15:56:29 +01002110 spin_lock_irq(&mdev->al_lock);
2111 tmp = lc_find(mdev->resync, BM_SECT_TO_EXT(sector));
2112 if (tmp) {
2113 struct bm_extent *bm_ext = lc_entry(tmp, struct bm_extent, lce);
2114 if (test_bit(BME_PRIORITY, &bm_ext->flags)) {
2115 spin_unlock_irq(&mdev->al_lock);
2116 return 0;
2117 }
2118 /* Do not slow down if app IO is already waiting for this extent */
2119 }
2120 spin_unlock_irq(&mdev->al_lock);
2121
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002122 curr_events = (int)part_stat_read(&disk->part0, sectors[0]) +
2123 (int)part_stat_read(&disk->part0, sectors[1]) -
2124 atomic_read(&mdev->rs_sect_ev);
Philipp Reisnere3555d82010-11-07 15:56:29 +01002125
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002126 if (!mdev->rs_last_events || curr_events - mdev->rs_last_events > 64) {
2127 unsigned long rs_left;
2128 int i;
2129
2130 mdev->rs_last_events = curr_events;
2131
2132 /* sync speed average over the last 2*DRBD_SYNC_MARK_STEP,
2133 * approx. */
Lars Ellenberg2649f082010-11-05 10:05:47 +01002134 i = (mdev->rs_last_mark + DRBD_SYNC_MARKS-1) % DRBD_SYNC_MARKS;
2135
2136 if (mdev->state.conn == C_VERIFY_S || mdev->state.conn == C_VERIFY_T)
2137 rs_left = mdev->ov_left;
2138 else
2139 rs_left = drbd_bm_total_weight(mdev) - mdev->rs_failed;
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002140
2141 dt = ((long)jiffies - (long)mdev->rs_mark_time[i]) / HZ;
2142 if (!dt)
2143 dt++;
2144 db = mdev->rs_mark_left[i] - rs_left;
2145 dbdt = Bit2KB(db/dt);
2146
Lars Ellenbergf3990022011-03-23 14:31:09 +01002147 if (dbdt > mdev->ldev->dc.c_min_rate)
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002148 throttle = 1;
2149 }
2150 return throttle;
2151}
2152
2153
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01002154static int receive_DataRequest(struct drbd_tconn *tconn, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002155{
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01002156 struct drbd_conf *mdev;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002157 sector_t sector;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01002158 sector_t capacity;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002159 struct drbd_peer_request *peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002160 struct digest_info *di = NULL;
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02002161 int size, verb;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002162 unsigned int fault_type;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01002163 struct p_block_req *p = tconn->data.rbuf;
2164
2165 mdev = vnr_to_mdev(tconn, pi->vnr);
2166 if (!mdev)
2167 return -EIO;
2168 capacity = drbd_get_capacity(mdev->this_bdev);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002169
2170 sector = be64_to_cpu(p->sector);
2171 size = be32_to_cpu(p->blksize);
2172
Andreas Gruenbacherc670a392011-02-21 12:41:39 +01002173 if (size <= 0 || !IS_ALIGNED(size, 512) || size > DRBD_MAX_BIO_SIZE) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002174 dev_err(DEV, "%s:%d: sector: %llus, size: %u\n", __FILE__, __LINE__,
2175 (unsigned long long)sector, size);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002176 return -EINVAL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002177 }
2178 if (sector + (size>>9) > capacity) {
2179 dev_err(DEV, "%s:%d: sector: %llus, size: %u\n", __FILE__, __LINE__,
2180 (unsigned long long)sector, size);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002181 return -EINVAL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002182 }
2183
2184 if (!get_ldev_if_state(mdev, D_UP_TO_DATE)) {
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02002185 verb = 1;
Andreas Gruenbachere2857212011-03-25 00:57:38 +01002186 switch (pi->cmd) {
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02002187 case P_DATA_REQUEST:
2188 drbd_send_ack_rp(mdev, P_NEG_DREPLY, p);
2189 break;
2190 case P_RS_DATA_REQUEST:
2191 case P_CSUM_RS_REQUEST:
2192 case P_OV_REQUEST:
2193 drbd_send_ack_rp(mdev, P_NEG_RS_DREPLY , p);
2194 break;
2195 case P_OV_REPLY:
2196 verb = 0;
2197 dec_rs_pending(mdev);
2198 drbd_send_ack_ex(mdev, P_OV_RESULT, sector, size, ID_IN_SYNC);
2199 break;
2200 default:
Andreas Gruenbacher49ba9b12011-03-25 00:35:45 +01002201 BUG();
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02002202 }
2203 if (verb && __ratelimit(&drbd_ratelimit_state))
Philipp Reisnerb411b362009-09-25 16:07:19 -07002204 dev_err(DEV, "Can not satisfy peer's read request, "
2205 "no local data.\n");
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02002206
Lars Ellenberga821cc42010-09-06 12:31:37 +02002207 /* drain possibly payload */
Andreas Gruenbachere2857212011-03-25 00:57:38 +01002208 return drbd_drain_block(mdev, pi->size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002209 }
2210
2211 /* GFP_NOIO, because we must not cause arbitrary write-out: in a DRBD
2212 * "criss-cross" setup, that might cause write-out on some other DRBD,
2213 * which in turn might block on the other node at this very place. */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002214 peer_req = drbd_alloc_ee(mdev, p->block_id, sector, size, GFP_NOIO);
2215 if (!peer_req) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002216 put_ldev(mdev);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002217 return -ENOMEM;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002218 }
2219
Andreas Gruenbachere2857212011-03-25 00:57:38 +01002220 switch (pi->cmd) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002221 case P_DATA_REQUEST:
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002222 peer_req->w.cb = w_e_end_data_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002223 fault_type = DRBD_FAULT_DT_RD;
Lars Ellenberg80a40e42010-08-11 23:28:00 +02002224 /* application IO, don't drbd_rs_begin_io */
2225 goto submit;
2226
Philipp Reisnerb411b362009-09-25 16:07:19 -07002227 case P_RS_DATA_REQUEST:
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002228 peer_req->w.cb = w_e_end_rsdata_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002229 fault_type = DRBD_FAULT_RS_RD;
Lars Ellenberg5f9915b2010-11-09 14:15:24 +01002230 /* used in the sector offset progress display */
2231 mdev->bm_resync_fo = BM_SECT_TO_BIT(sector);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002232 break;
2233
2234 case P_OV_REPLY:
2235 case P_CSUM_RS_REQUEST:
2236 fault_type = DRBD_FAULT_RS_RD;
Andreas Gruenbachere2857212011-03-25 00:57:38 +01002237 di = kmalloc(sizeof(*di) + pi->size, GFP_NOIO);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002238 if (!di)
2239 goto out_free_e;
2240
Andreas Gruenbachere2857212011-03-25 00:57:38 +01002241 di->digest_size = pi->size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002242 di->digest = (((char *)di)+sizeof(struct digest_info));
2243
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002244 peer_req->digest = di;
2245 peer_req->flags |= EE_HAS_DIGEST;
Lars Ellenbergc36c3ce2010-08-11 20:42:55 +02002246
Andreas Gruenbachere2857212011-03-25 00:57:38 +01002247 if (drbd_recv_all(mdev->tconn, di->digest, pi->size))
Philipp Reisnerb411b362009-09-25 16:07:19 -07002248 goto out_free_e;
2249
Andreas Gruenbachere2857212011-03-25 00:57:38 +01002250 if (pi->cmd == P_CSUM_RS_REQUEST) {
Philipp Reisner31890f42011-01-19 14:12:51 +01002251 D_ASSERT(mdev->tconn->agreed_pro_version >= 89);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002252 peer_req->w.cb = w_e_end_csum_rs_req;
Lars Ellenberg5f9915b2010-11-09 14:15:24 +01002253 /* used in the sector offset progress display */
2254 mdev->bm_resync_fo = BM_SECT_TO_BIT(sector);
Andreas Gruenbachere2857212011-03-25 00:57:38 +01002255 } else if (pi->cmd == P_OV_REPLY) {
Lars Ellenberg2649f082010-11-05 10:05:47 +01002256 /* track progress, we may need to throttle */
2257 atomic_add(size >> 9, &mdev->rs_sect_in);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002258 peer_req->w.cb = w_e_end_ov_reply;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002259 dec_rs_pending(mdev);
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002260 /* drbd_rs_begin_io done when we sent this request,
2261 * but accounting still needs to be done. */
2262 goto submit_for_resync;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002263 }
2264 break;
2265
2266 case P_OV_REQUEST:
Philipp Reisnerb411b362009-09-25 16:07:19 -07002267 if (mdev->ov_start_sector == ~(sector_t)0 &&
Philipp Reisner31890f42011-01-19 14:12:51 +01002268 mdev->tconn->agreed_pro_version >= 90) {
Lars Ellenbergde228bb2010-11-05 09:43:15 +01002269 unsigned long now = jiffies;
2270 int i;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002271 mdev->ov_start_sector = sector;
2272 mdev->ov_position = sector;
Lars Ellenberg30b743a2010-11-05 09:39:06 +01002273 mdev->ov_left = drbd_bm_bits(mdev) - BM_SECT_TO_BIT(sector);
2274 mdev->rs_total = mdev->ov_left;
Lars Ellenbergde228bb2010-11-05 09:43:15 +01002275 for (i = 0; i < DRBD_SYNC_MARKS; i++) {
2276 mdev->rs_mark_left[i] = mdev->ov_left;
2277 mdev->rs_mark_time[i] = now;
2278 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07002279 dev_info(DEV, "Online Verify start sector: %llu\n",
2280 (unsigned long long)sector);
2281 }
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002282 peer_req->w.cb = w_e_end_ov_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002283 fault_type = DRBD_FAULT_RS_RD;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002284 break;
2285
Philipp Reisnerb411b362009-09-25 16:07:19 -07002286 default:
Andreas Gruenbacher49ba9b12011-03-25 00:35:45 +01002287 BUG();
Philipp Reisnerb411b362009-09-25 16:07:19 -07002288 }
2289
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002290 /* Throttle, drbd_rs_begin_io and submit should become asynchronous
2291 * wrt the receiver, but it is not as straightforward as it may seem.
2292 * Various places in the resync start and stop logic assume resync
2293 * requests are processed in order, requeuing this on the worker thread
2294 * introduces a bunch of new code for synchronization between threads.
2295 *
2296 * Unlimited throttling before drbd_rs_begin_io may stall the resync
2297 * "forever", throttling after drbd_rs_begin_io will lock that extent
2298 * for application writes for the same time. For now, just throttle
2299 * here, where the rest of the code expects the receiver to sleep for
2300 * a while, anyways.
2301 */
Philipp Reisnerb411b362009-09-25 16:07:19 -07002302
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002303 /* Throttle before drbd_rs_begin_io, as that locks out application IO;
2304 * this defers syncer requests for some time, before letting at least
2305 * on request through. The resync controller on the receiving side
2306 * will adapt to the incoming rate accordingly.
2307 *
2308 * We cannot throttle here if remote is Primary/SyncTarget:
2309 * we would also throttle its application reads.
2310 * In that case, throttling is done on the SyncTarget only.
2311 */
Philipp Reisnere3555d82010-11-07 15:56:29 +01002312 if (mdev->state.peer != R_PRIMARY && drbd_rs_should_slow_down(mdev, sector))
2313 schedule_timeout_uninterruptible(HZ/10);
2314 if (drbd_rs_begin_io(mdev, sector))
Lars Ellenberg80a40e42010-08-11 23:28:00 +02002315 goto out_free_e;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002316
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002317submit_for_resync:
2318 atomic_add(size >> 9, &mdev->rs_sect_ev);
2319
Lars Ellenberg80a40e42010-08-11 23:28:00 +02002320submit:
Philipp Reisnerb411b362009-09-25 16:07:19 -07002321 inc_unacked(mdev);
Philipp Reisner87eeee42011-01-19 14:16:30 +01002322 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002323 list_add_tail(&peer_req->w.list, &mdev->read_ee);
Philipp Reisner87eeee42011-01-19 14:16:30 +01002324 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002325
Andreas Gruenbacherfbe29de2011-02-17 16:38:35 +01002326 if (drbd_submit_peer_request(mdev, peer_req, READ, fault_type) == 0)
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002327 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002328
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01002329 /* don't care for the reason here */
2330 dev_err(DEV, "submit failed, triggering re-connect\n");
Philipp Reisner87eeee42011-01-19 14:16:30 +01002331 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002332 list_del(&peer_req->w.list);
Philipp Reisner87eeee42011-01-19 14:16:30 +01002333 spin_unlock_irq(&mdev->tconn->req_lock);
Lars Ellenberg22cc37a2010-09-14 20:40:41 +02002334 /* no drbd_rs_complete_io(), we are dropping the connection anyways */
2335
Philipp Reisnerb411b362009-09-25 16:07:19 -07002336out_free_e:
Philipp Reisnerb411b362009-09-25 16:07:19 -07002337 put_ldev(mdev);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002338 drbd_free_ee(mdev, peer_req);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002339 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002340}
2341
2342static int drbd_asb_recover_0p(struct drbd_conf *mdev) __must_hold(local)
2343{
2344 int self, peer, rv = -100;
2345 unsigned long ch_self, ch_peer;
2346
2347 self = mdev->ldev->md.uuid[UI_BITMAP] & 1;
2348 peer = mdev->p_uuid[UI_BITMAP] & 1;
2349
2350 ch_peer = mdev->p_uuid[UI_SIZE];
2351 ch_self = mdev->comm_bm_set;
2352
Philipp Reisner89e58e72011-01-19 13:12:45 +01002353 switch (mdev->tconn->net_conf->after_sb_0p) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002354 case ASB_CONSENSUS:
2355 case ASB_DISCARD_SECONDARY:
2356 case ASB_CALL_HELPER:
2357 dev_err(DEV, "Configuration error.\n");
2358 break;
2359 case ASB_DISCONNECT:
2360 break;
2361 case ASB_DISCARD_YOUNGER_PRI:
2362 if (self == 0 && peer == 1) {
2363 rv = -1;
2364 break;
2365 }
2366 if (self == 1 && peer == 0) {
2367 rv = 1;
2368 break;
2369 }
2370 /* Else fall through to one of the other strategies... */
2371 case ASB_DISCARD_OLDER_PRI:
2372 if (self == 0 && peer == 1) {
2373 rv = 1;
2374 break;
2375 }
2376 if (self == 1 && peer == 0) {
2377 rv = -1;
2378 break;
2379 }
2380 /* Else fall through to one of the other strategies... */
Lars Ellenbergad19bf62009-10-14 09:36:49 +02002381 dev_warn(DEV, "Discard younger/older primary did not find a decision\n"
Philipp Reisnerb411b362009-09-25 16:07:19 -07002382 "Using discard-least-changes instead\n");
2383 case ASB_DISCARD_ZERO_CHG:
2384 if (ch_peer == 0 && ch_self == 0) {
Philipp Reisner25703f82011-02-07 14:35:25 +01002385 rv = test_bit(DISCARD_CONCURRENT, &mdev->tconn->flags)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002386 ? -1 : 1;
2387 break;
2388 } else {
2389 if (ch_peer == 0) { rv = 1; break; }
2390 if (ch_self == 0) { rv = -1; break; }
2391 }
Philipp Reisner89e58e72011-01-19 13:12:45 +01002392 if (mdev->tconn->net_conf->after_sb_0p == ASB_DISCARD_ZERO_CHG)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002393 break;
2394 case ASB_DISCARD_LEAST_CHG:
2395 if (ch_self < ch_peer)
2396 rv = -1;
2397 else if (ch_self > ch_peer)
2398 rv = 1;
2399 else /* ( ch_self == ch_peer ) */
2400 /* Well, then use something else. */
Philipp Reisner25703f82011-02-07 14:35:25 +01002401 rv = test_bit(DISCARD_CONCURRENT, &mdev->tconn->flags)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002402 ? -1 : 1;
2403 break;
2404 case ASB_DISCARD_LOCAL:
2405 rv = -1;
2406 break;
2407 case ASB_DISCARD_REMOTE:
2408 rv = 1;
2409 }
2410
2411 return rv;
2412}
2413
2414static int drbd_asb_recover_1p(struct drbd_conf *mdev) __must_hold(local)
2415{
Andreas Gruenbacher6184ea22010-12-09 14:23:27 +01002416 int hg, rv = -100;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002417
Philipp Reisner89e58e72011-01-19 13:12:45 +01002418 switch (mdev->tconn->net_conf->after_sb_1p) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002419 case ASB_DISCARD_YOUNGER_PRI:
2420 case ASB_DISCARD_OLDER_PRI:
2421 case ASB_DISCARD_LEAST_CHG:
2422 case ASB_DISCARD_LOCAL:
2423 case ASB_DISCARD_REMOTE:
2424 dev_err(DEV, "Configuration error.\n");
2425 break;
2426 case ASB_DISCONNECT:
2427 break;
2428 case ASB_CONSENSUS:
2429 hg = drbd_asb_recover_0p(mdev);
2430 if (hg == -1 && mdev->state.role == R_SECONDARY)
2431 rv = hg;
2432 if (hg == 1 && mdev->state.role == R_PRIMARY)
2433 rv = hg;
2434 break;
2435 case ASB_VIOLENTLY:
2436 rv = drbd_asb_recover_0p(mdev);
2437 break;
2438 case ASB_DISCARD_SECONDARY:
2439 return mdev->state.role == R_PRIMARY ? 1 : -1;
2440 case ASB_CALL_HELPER:
2441 hg = drbd_asb_recover_0p(mdev);
2442 if (hg == -1 && mdev->state.role == R_PRIMARY) {
Andreas Gruenbacherbb437942010-12-09 14:02:35 +01002443 enum drbd_state_rv rv2;
2444
2445 drbd_set_role(mdev, R_SECONDARY, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002446 /* drbd_change_state() does not sleep while in SS_IN_TRANSIENT_STATE,
2447 * we might be here in C_WF_REPORT_PARAMS which is transient.
2448 * we do not need to wait for the after state change work either. */
Andreas Gruenbacherbb437942010-12-09 14:02:35 +01002449 rv2 = drbd_change_state(mdev, CS_VERBOSE, NS(role, R_SECONDARY));
2450 if (rv2 != SS_SUCCESS) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002451 drbd_khelper(mdev, "pri-lost-after-sb");
2452 } else {
2453 dev_warn(DEV, "Successfully gave up primary role.\n");
2454 rv = hg;
2455 }
2456 } else
2457 rv = hg;
2458 }
2459
2460 return rv;
2461}
2462
2463static int drbd_asb_recover_2p(struct drbd_conf *mdev) __must_hold(local)
2464{
Andreas Gruenbacher6184ea22010-12-09 14:23:27 +01002465 int hg, rv = -100;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002466
Philipp Reisner89e58e72011-01-19 13:12:45 +01002467 switch (mdev->tconn->net_conf->after_sb_2p) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002468 case ASB_DISCARD_YOUNGER_PRI:
2469 case ASB_DISCARD_OLDER_PRI:
2470 case ASB_DISCARD_LEAST_CHG:
2471 case ASB_DISCARD_LOCAL:
2472 case ASB_DISCARD_REMOTE:
2473 case ASB_CONSENSUS:
2474 case ASB_DISCARD_SECONDARY:
2475 dev_err(DEV, "Configuration error.\n");
2476 break;
2477 case ASB_VIOLENTLY:
2478 rv = drbd_asb_recover_0p(mdev);
2479 break;
2480 case ASB_DISCONNECT:
2481 break;
2482 case ASB_CALL_HELPER:
2483 hg = drbd_asb_recover_0p(mdev);
2484 if (hg == -1) {
Andreas Gruenbacherbb437942010-12-09 14:02:35 +01002485 enum drbd_state_rv rv2;
2486
Philipp Reisnerb411b362009-09-25 16:07:19 -07002487 /* drbd_change_state() does not sleep while in SS_IN_TRANSIENT_STATE,
2488 * we might be here in C_WF_REPORT_PARAMS which is transient.
2489 * we do not need to wait for the after state change work either. */
Andreas Gruenbacherbb437942010-12-09 14:02:35 +01002490 rv2 = drbd_change_state(mdev, CS_VERBOSE, NS(role, R_SECONDARY));
2491 if (rv2 != SS_SUCCESS) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002492 drbd_khelper(mdev, "pri-lost-after-sb");
2493 } else {
2494 dev_warn(DEV, "Successfully gave up primary role.\n");
2495 rv = hg;
2496 }
2497 } else
2498 rv = hg;
2499 }
2500
2501 return rv;
2502}
2503
2504static void drbd_uuid_dump(struct drbd_conf *mdev, char *text, u64 *uuid,
2505 u64 bits, u64 flags)
2506{
2507 if (!uuid) {
2508 dev_info(DEV, "%s uuid info vanished while I was looking!\n", text);
2509 return;
2510 }
2511 dev_info(DEV, "%s %016llX:%016llX:%016llX:%016llX bits:%llu flags:%llX\n",
2512 text,
2513 (unsigned long long)uuid[UI_CURRENT],
2514 (unsigned long long)uuid[UI_BITMAP],
2515 (unsigned long long)uuid[UI_HISTORY_START],
2516 (unsigned long long)uuid[UI_HISTORY_END],
2517 (unsigned long long)bits,
2518 (unsigned long long)flags);
2519}
2520
2521/*
2522 100 after split brain try auto recover
2523 2 C_SYNC_SOURCE set BitMap
2524 1 C_SYNC_SOURCE use BitMap
2525 0 no Sync
2526 -1 C_SYNC_TARGET use BitMap
2527 -2 C_SYNC_TARGET set BitMap
2528 -100 after split brain, disconnect
2529-1000 unrelated data
Philipp Reisner4a23f262011-01-11 17:42:17 +01002530-1091 requires proto 91
2531-1096 requires proto 96
Philipp Reisnerb411b362009-09-25 16:07:19 -07002532 */
2533static int drbd_uuid_compare(struct drbd_conf *mdev, int *rule_nr) __must_hold(local)
2534{
2535 u64 self, peer;
2536 int i, j;
2537
2538 self = mdev->ldev->md.uuid[UI_CURRENT] & ~((u64)1);
2539 peer = mdev->p_uuid[UI_CURRENT] & ~((u64)1);
2540
2541 *rule_nr = 10;
2542 if (self == UUID_JUST_CREATED && peer == UUID_JUST_CREATED)
2543 return 0;
2544
2545 *rule_nr = 20;
2546 if ((self == UUID_JUST_CREATED || self == (u64)0) &&
2547 peer != UUID_JUST_CREATED)
2548 return -2;
2549
2550 *rule_nr = 30;
2551 if (self != UUID_JUST_CREATED &&
2552 (peer == UUID_JUST_CREATED || peer == (u64)0))
2553 return 2;
2554
2555 if (self == peer) {
2556 int rct, dc; /* roles at crash time */
2557
2558 if (mdev->p_uuid[UI_BITMAP] == (u64)0 && mdev->ldev->md.uuid[UI_BITMAP] != (u64)0) {
2559
Philipp Reisner31890f42011-01-19 14:12:51 +01002560 if (mdev->tconn->agreed_pro_version < 91)
Philipp Reisner4a23f262011-01-11 17:42:17 +01002561 return -1091;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002562
2563 if ((mdev->ldev->md.uuid[UI_BITMAP] & ~((u64)1)) == (mdev->p_uuid[UI_HISTORY_START] & ~((u64)1)) &&
2564 (mdev->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) == (mdev->p_uuid[UI_HISTORY_START + 1] & ~((u64)1))) {
2565 dev_info(DEV, "was SyncSource, missed the resync finished event, corrected myself:\n");
2566 drbd_uuid_set_bm(mdev, 0UL);
2567
2568 drbd_uuid_dump(mdev, "self", mdev->ldev->md.uuid,
2569 mdev->state.disk >= D_NEGOTIATING ? drbd_bm_total_weight(mdev) : 0, 0);
2570 *rule_nr = 34;
2571 } else {
2572 dev_info(DEV, "was SyncSource (peer failed to write sync_uuid)\n");
2573 *rule_nr = 36;
2574 }
2575
2576 return 1;
2577 }
2578
2579 if (mdev->ldev->md.uuid[UI_BITMAP] == (u64)0 && mdev->p_uuid[UI_BITMAP] != (u64)0) {
2580
Philipp Reisner31890f42011-01-19 14:12:51 +01002581 if (mdev->tconn->agreed_pro_version < 91)
Philipp Reisner4a23f262011-01-11 17:42:17 +01002582 return -1091;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002583
2584 if ((mdev->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) == (mdev->p_uuid[UI_BITMAP] & ~((u64)1)) &&
2585 (mdev->ldev->md.uuid[UI_HISTORY_START + 1] & ~((u64)1)) == (mdev->p_uuid[UI_HISTORY_START] & ~((u64)1))) {
2586 dev_info(DEV, "was SyncTarget, peer missed the resync finished event, corrected peer:\n");
2587
2588 mdev->p_uuid[UI_HISTORY_START + 1] = mdev->p_uuid[UI_HISTORY_START];
2589 mdev->p_uuid[UI_HISTORY_START] = mdev->p_uuid[UI_BITMAP];
2590 mdev->p_uuid[UI_BITMAP] = 0UL;
2591
2592 drbd_uuid_dump(mdev, "peer", mdev->p_uuid, mdev->p_uuid[UI_SIZE], mdev->p_uuid[UI_FLAGS]);
2593 *rule_nr = 35;
2594 } else {
2595 dev_info(DEV, "was SyncTarget (failed to write sync_uuid)\n");
2596 *rule_nr = 37;
2597 }
2598
2599 return -1;
2600 }
2601
2602 /* Common power [off|failure] */
2603 rct = (test_bit(CRASHED_PRIMARY, &mdev->flags) ? 1 : 0) +
2604 (mdev->p_uuid[UI_FLAGS] & 2);
2605 /* lowest bit is set when we were primary,
2606 * next bit (weight 2) is set when peer was primary */
2607 *rule_nr = 40;
2608
2609 switch (rct) {
2610 case 0: /* !self_pri && !peer_pri */ return 0;
2611 case 1: /* self_pri && !peer_pri */ return 1;
2612 case 2: /* !self_pri && peer_pri */ return -1;
2613 case 3: /* self_pri && peer_pri */
Philipp Reisner25703f82011-02-07 14:35:25 +01002614 dc = test_bit(DISCARD_CONCURRENT, &mdev->tconn->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002615 return dc ? -1 : 1;
2616 }
2617 }
2618
2619 *rule_nr = 50;
2620 peer = mdev->p_uuid[UI_BITMAP] & ~((u64)1);
2621 if (self == peer)
2622 return -1;
2623
2624 *rule_nr = 51;
2625 peer = mdev->p_uuid[UI_HISTORY_START] & ~((u64)1);
2626 if (self == peer) {
Philipp Reisner31890f42011-01-19 14:12:51 +01002627 if (mdev->tconn->agreed_pro_version < 96 ?
Philipp Reisner4a23f262011-01-11 17:42:17 +01002628 (mdev->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) ==
2629 (mdev->p_uuid[UI_HISTORY_START + 1] & ~((u64)1)) :
2630 peer + UUID_NEW_BM_OFFSET == (mdev->p_uuid[UI_BITMAP] & ~((u64)1))) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002631 /* The last P_SYNC_UUID did not get though. Undo the last start of
2632 resync as sync source modifications of the peer's UUIDs. */
2633
Philipp Reisner31890f42011-01-19 14:12:51 +01002634 if (mdev->tconn->agreed_pro_version < 91)
Philipp Reisner4a23f262011-01-11 17:42:17 +01002635 return -1091;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002636
2637 mdev->p_uuid[UI_BITMAP] = mdev->p_uuid[UI_HISTORY_START];
2638 mdev->p_uuid[UI_HISTORY_START] = mdev->p_uuid[UI_HISTORY_START + 1];
Philipp Reisner4a23f262011-01-11 17:42:17 +01002639
2640 dev_info(DEV, "Did not got last syncUUID packet, corrected:\n");
2641 drbd_uuid_dump(mdev, "peer", mdev->p_uuid, mdev->p_uuid[UI_SIZE], mdev->p_uuid[UI_FLAGS]);
2642
Philipp Reisnerb411b362009-09-25 16:07:19 -07002643 return -1;
2644 }
2645 }
2646
2647 *rule_nr = 60;
2648 self = mdev->ldev->md.uuid[UI_CURRENT] & ~((u64)1);
2649 for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) {
2650 peer = mdev->p_uuid[i] & ~((u64)1);
2651 if (self == peer)
2652 return -2;
2653 }
2654
2655 *rule_nr = 70;
2656 self = mdev->ldev->md.uuid[UI_BITMAP] & ~((u64)1);
2657 peer = mdev->p_uuid[UI_CURRENT] & ~((u64)1);
2658 if (self == peer)
2659 return 1;
2660
2661 *rule_nr = 71;
2662 self = mdev->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1);
2663 if (self == peer) {
Philipp Reisner31890f42011-01-19 14:12:51 +01002664 if (mdev->tconn->agreed_pro_version < 96 ?
Philipp Reisner4a23f262011-01-11 17:42:17 +01002665 (mdev->ldev->md.uuid[UI_HISTORY_START + 1] & ~((u64)1)) ==
2666 (mdev->p_uuid[UI_HISTORY_START] & ~((u64)1)) :
2667 self + UUID_NEW_BM_OFFSET == (mdev->ldev->md.uuid[UI_BITMAP] & ~((u64)1))) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002668 /* The last P_SYNC_UUID did not get though. Undo the last start of
2669 resync as sync source modifications of our UUIDs. */
2670
Philipp Reisner31890f42011-01-19 14:12:51 +01002671 if (mdev->tconn->agreed_pro_version < 91)
Philipp Reisner4a23f262011-01-11 17:42:17 +01002672 return -1091;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002673
2674 _drbd_uuid_set(mdev, UI_BITMAP, mdev->ldev->md.uuid[UI_HISTORY_START]);
2675 _drbd_uuid_set(mdev, UI_HISTORY_START, mdev->ldev->md.uuid[UI_HISTORY_START + 1]);
2676
Philipp Reisner4a23f262011-01-11 17:42:17 +01002677 dev_info(DEV, "Last syncUUID did not get through, corrected:\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002678 drbd_uuid_dump(mdev, "self", mdev->ldev->md.uuid,
2679 mdev->state.disk >= D_NEGOTIATING ? drbd_bm_total_weight(mdev) : 0, 0);
2680
2681 return 1;
2682 }
2683 }
2684
2685
2686 *rule_nr = 80;
Philipp Reisnerd8c2a362009-11-18 15:52:51 +01002687 peer = mdev->p_uuid[UI_CURRENT] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002688 for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) {
2689 self = mdev->ldev->md.uuid[i] & ~((u64)1);
2690 if (self == peer)
2691 return 2;
2692 }
2693
2694 *rule_nr = 90;
2695 self = mdev->ldev->md.uuid[UI_BITMAP] & ~((u64)1);
2696 peer = mdev->p_uuid[UI_BITMAP] & ~((u64)1);
2697 if (self == peer && self != ((u64)0))
2698 return 100;
2699
2700 *rule_nr = 100;
2701 for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) {
2702 self = mdev->ldev->md.uuid[i] & ~((u64)1);
2703 for (j = UI_HISTORY_START; j <= UI_HISTORY_END; j++) {
2704 peer = mdev->p_uuid[j] & ~((u64)1);
2705 if (self == peer)
2706 return -100;
2707 }
2708 }
2709
2710 return -1000;
2711}
2712
2713/* drbd_sync_handshake() returns the new conn state on success, or
2714 CONN_MASK (-1) on failure.
2715 */
2716static enum drbd_conns drbd_sync_handshake(struct drbd_conf *mdev, enum drbd_role peer_role,
2717 enum drbd_disk_state peer_disk) __must_hold(local)
2718{
2719 int hg, rule_nr;
2720 enum drbd_conns rv = C_MASK;
2721 enum drbd_disk_state mydisk;
2722
2723 mydisk = mdev->state.disk;
2724 if (mydisk == D_NEGOTIATING)
2725 mydisk = mdev->new_state_tmp.disk;
2726
2727 dev_info(DEV, "drbd_sync_handshake:\n");
2728 drbd_uuid_dump(mdev, "self", mdev->ldev->md.uuid, mdev->comm_bm_set, 0);
2729 drbd_uuid_dump(mdev, "peer", mdev->p_uuid,
2730 mdev->p_uuid[UI_SIZE], mdev->p_uuid[UI_FLAGS]);
2731
2732 hg = drbd_uuid_compare(mdev, &rule_nr);
2733
2734 dev_info(DEV, "uuid_compare()=%d by rule %d\n", hg, rule_nr);
2735
2736 if (hg == -1000) {
2737 dev_alert(DEV, "Unrelated data, aborting!\n");
2738 return C_MASK;
2739 }
Philipp Reisner4a23f262011-01-11 17:42:17 +01002740 if (hg < -1000) {
2741 dev_alert(DEV, "To resolve this both sides have to support at least protocol %d\n", -hg - 1000);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002742 return C_MASK;
2743 }
2744
2745 if ((mydisk == D_INCONSISTENT && peer_disk > D_INCONSISTENT) ||
2746 (peer_disk == D_INCONSISTENT && mydisk > D_INCONSISTENT)) {
2747 int f = (hg == -100) || abs(hg) == 2;
2748 hg = mydisk > D_INCONSISTENT ? 1 : -1;
2749 if (f)
2750 hg = hg*2;
2751 dev_info(DEV, "Becoming sync %s due to disk states.\n",
2752 hg > 0 ? "source" : "target");
2753 }
2754
Adam Gandelman3a11a482010-04-08 16:48:23 -07002755 if (abs(hg) == 100)
2756 drbd_khelper(mdev, "initial-split-brain");
2757
Philipp Reisner89e58e72011-01-19 13:12:45 +01002758 if (hg == 100 || (hg == -100 && mdev->tconn->net_conf->always_asbp)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002759 int pcount = (mdev->state.role == R_PRIMARY)
2760 + (peer_role == R_PRIMARY);
2761 int forced = (hg == -100);
2762
2763 switch (pcount) {
2764 case 0:
2765 hg = drbd_asb_recover_0p(mdev);
2766 break;
2767 case 1:
2768 hg = drbd_asb_recover_1p(mdev);
2769 break;
2770 case 2:
2771 hg = drbd_asb_recover_2p(mdev);
2772 break;
2773 }
2774 if (abs(hg) < 100) {
2775 dev_warn(DEV, "Split-Brain detected, %d primaries, "
2776 "automatically solved. Sync from %s node\n",
2777 pcount, (hg < 0) ? "peer" : "this");
2778 if (forced) {
2779 dev_warn(DEV, "Doing a full sync, since"
2780 " UUIDs where ambiguous.\n");
2781 hg = hg*2;
2782 }
2783 }
2784 }
2785
2786 if (hg == -100) {
Philipp Reisner89e58e72011-01-19 13:12:45 +01002787 if (mdev->tconn->net_conf->want_lose && !(mdev->p_uuid[UI_FLAGS]&1))
Philipp Reisnerb411b362009-09-25 16:07:19 -07002788 hg = -1;
Philipp Reisner89e58e72011-01-19 13:12:45 +01002789 if (!mdev->tconn->net_conf->want_lose && (mdev->p_uuid[UI_FLAGS]&1))
Philipp Reisnerb411b362009-09-25 16:07:19 -07002790 hg = 1;
2791
2792 if (abs(hg) < 100)
2793 dev_warn(DEV, "Split-Brain detected, manually solved. "
2794 "Sync from %s node\n",
2795 (hg < 0) ? "peer" : "this");
2796 }
2797
2798 if (hg == -100) {
Lars Ellenberg580b9762010-02-26 23:15:23 +01002799 /* FIXME this log message is not correct if we end up here
2800 * after an attempted attach on a diskless node.
2801 * We just refuse to attach -- well, we drop the "connection"
2802 * to that disk, in a way... */
Adam Gandelman3a11a482010-04-08 16:48:23 -07002803 dev_alert(DEV, "Split-Brain detected but unresolved, dropping connection!\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002804 drbd_khelper(mdev, "split-brain");
2805 return C_MASK;
2806 }
2807
2808 if (hg > 0 && mydisk <= D_INCONSISTENT) {
2809 dev_err(DEV, "I shall become SyncSource, but I am inconsistent!\n");
2810 return C_MASK;
2811 }
2812
2813 if (hg < 0 && /* by intention we do not use mydisk here. */
2814 mdev->state.role == R_PRIMARY && mdev->state.disk >= D_CONSISTENT) {
Philipp Reisner89e58e72011-01-19 13:12:45 +01002815 switch (mdev->tconn->net_conf->rr_conflict) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002816 case ASB_CALL_HELPER:
2817 drbd_khelper(mdev, "pri-lost");
2818 /* fall through */
2819 case ASB_DISCONNECT:
2820 dev_err(DEV, "I shall become SyncTarget, but I am primary!\n");
2821 return C_MASK;
2822 case ASB_VIOLENTLY:
2823 dev_warn(DEV, "Becoming SyncTarget, violating the stable-data"
2824 "assumption\n");
2825 }
2826 }
2827
Philipp Reisner8169e412011-03-15 18:40:27 +01002828 if (mdev->tconn->net_conf->dry_run || test_bit(CONN_DRY_RUN, &mdev->tconn->flags)) {
Philipp Reisnercf14c2e2010-02-02 21:03:50 +01002829 if (hg == 0)
2830 dev_info(DEV, "dry-run connect: No resync, would become Connected immediately.\n");
2831 else
2832 dev_info(DEV, "dry-run connect: Would become %s, doing a %s resync.",
2833 drbd_conn_str(hg > 0 ? C_SYNC_SOURCE : C_SYNC_TARGET),
2834 abs(hg) >= 2 ? "full" : "bit-map based");
2835 return C_MASK;
2836 }
2837
Philipp Reisnerb411b362009-09-25 16:07:19 -07002838 if (abs(hg) >= 2) {
2839 dev_info(DEV, "Writing the whole bitmap, full sync required after drbd_sync_handshake.\n");
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01002840 if (drbd_bitmap_io(mdev, &drbd_bmio_set_n_write, "set_n_write from sync_handshake",
2841 BM_LOCKED_SET_ALLOWED))
Philipp Reisnerb411b362009-09-25 16:07:19 -07002842 return C_MASK;
2843 }
2844
2845 if (hg > 0) { /* become sync source. */
2846 rv = C_WF_BITMAP_S;
2847 } else if (hg < 0) { /* become sync target */
2848 rv = C_WF_BITMAP_T;
2849 } else {
2850 rv = C_CONNECTED;
2851 if (drbd_bm_total_weight(mdev)) {
2852 dev_info(DEV, "No resync, but %lu bits in bitmap!\n",
2853 drbd_bm_total_weight(mdev));
2854 }
2855 }
2856
2857 return rv;
2858}
2859
2860/* returns 1 if invalid */
2861static int cmp_after_sb(enum drbd_after_sb_p peer, enum drbd_after_sb_p self)
2862{
2863 /* ASB_DISCARD_REMOTE - ASB_DISCARD_LOCAL is valid */
2864 if ((peer == ASB_DISCARD_REMOTE && self == ASB_DISCARD_LOCAL) ||
2865 (self == ASB_DISCARD_REMOTE && peer == ASB_DISCARD_LOCAL))
2866 return 0;
2867
2868 /* any other things with ASB_DISCARD_REMOTE or ASB_DISCARD_LOCAL are invalid */
2869 if (peer == ASB_DISCARD_REMOTE || peer == ASB_DISCARD_LOCAL ||
2870 self == ASB_DISCARD_REMOTE || self == ASB_DISCARD_LOCAL)
2871 return 1;
2872
2873 /* everything else is valid if they are equal on both sides. */
2874 if (peer == self)
2875 return 0;
2876
2877 /* everything es is invalid. */
2878 return 1;
2879}
2880
Andreas Gruenbachere2857212011-03-25 00:57:38 +01002881static int receive_protocol(struct drbd_tconn *tconn, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002882{
Andreas Gruenbachere6ef8a52011-03-24 18:07:54 +01002883 struct p_protocol *p = tconn->data.rbuf;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002884 int p_proto, p_after_sb_0p, p_after_sb_1p, p_after_sb_2p;
Philipp Reisnercf14c2e2010-02-02 21:03:50 +01002885 int p_want_lose, p_two_primaries, cf;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002886 char p_integrity_alg[SHARED_SECRET_MAX] = "";
2887
Philipp Reisnerb411b362009-09-25 16:07:19 -07002888 p_proto = be32_to_cpu(p->protocol);
2889 p_after_sb_0p = be32_to_cpu(p->after_sb_0p);
2890 p_after_sb_1p = be32_to_cpu(p->after_sb_1p);
2891 p_after_sb_2p = be32_to_cpu(p->after_sb_2p);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002892 p_two_primaries = be32_to_cpu(p->two_primaries);
Philipp Reisnercf14c2e2010-02-02 21:03:50 +01002893 cf = be32_to_cpu(p->conn_flags);
2894 p_want_lose = cf & CF_WANT_LOSE;
2895
Philipp Reisner72046242011-03-15 18:51:47 +01002896 clear_bit(CONN_DRY_RUN, &tconn->flags);
Philipp Reisnercf14c2e2010-02-02 21:03:50 +01002897
2898 if (cf & CF_DRY_RUN)
Philipp Reisner72046242011-03-15 18:51:47 +01002899 set_bit(CONN_DRY_RUN, &tconn->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002900
Philipp Reisner72046242011-03-15 18:51:47 +01002901 if (p_proto != tconn->net_conf->wire_protocol) {
2902 conn_err(tconn, "incompatible communication protocols\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002903 goto disconnect;
2904 }
2905
Philipp Reisner72046242011-03-15 18:51:47 +01002906 if (cmp_after_sb(p_after_sb_0p, tconn->net_conf->after_sb_0p)) {
2907 conn_err(tconn, "incompatible after-sb-0pri settings\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002908 goto disconnect;
2909 }
2910
Philipp Reisner72046242011-03-15 18:51:47 +01002911 if (cmp_after_sb(p_after_sb_1p, tconn->net_conf->after_sb_1p)) {
2912 conn_err(tconn, "incompatible after-sb-1pri settings\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002913 goto disconnect;
2914 }
2915
Philipp Reisner72046242011-03-15 18:51:47 +01002916 if (cmp_after_sb(p_after_sb_2p, tconn->net_conf->after_sb_2p)) {
2917 conn_err(tconn, "incompatible after-sb-2pri settings\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002918 goto disconnect;
2919 }
2920
Philipp Reisner72046242011-03-15 18:51:47 +01002921 if (p_want_lose && tconn->net_conf->want_lose) {
2922 conn_err(tconn, "both sides have the 'want_lose' flag set\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002923 goto disconnect;
2924 }
2925
Philipp Reisner72046242011-03-15 18:51:47 +01002926 if (p_two_primaries != tconn->net_conf->two_primaries) {
2927 conn_err(tconn, "incompatible setting of the two-primaries options\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002928 goto disconnect;
2929 }
2930
Philipp Reisner72046242011-03-15 18:51:47 +01002931 if (tconn->agreed_pro_version >= 87) {
2932 unsigned char *my_alg = tconn->net_conf->integrity_alg;
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002933 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002934
Andreas Gruenbachere2857212011-03-25 00:57:38 +01002935 err = drbd_recv_all(tconn, p_integrity_alg, pi->size);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002936 if (err)
2937 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002938
2939 p_integrity_alg[SHARED_SECRET_MAX-1] = 0;
2940 if (strcmp(p_integrity_alg, my_alg)) {
Philipp Reisner72046242011-03-15 18:51:47 +01002941 conn_err(tconn, "incompatible setting of the data-integrity-alg\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002942 goto disconnect;
2943 }
Philipp Reisner72046242011-03-15 18:51:47 +01002944 conn_info(tconn, "data-integrity-alg: %s\n",
Philipp Reisnerb411b362009-09-25 16:07:19 -07002945 my_alg[0] ? my_alg : (unsigned char *)"<not-used>");
2946 }
2947
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002948 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002949
2950disconnect:
Philipp Reisner72046242011-03-15 18:51:47 +01002951 conn_request_state(tconn, NS(conn, C_DISCONNECTING), CS_HARD);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002952 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002953}
2954
2955/* helper function
2956 * input: alg name, feature name
2957 * return: NULL (alg name was "")
2958 * ERR_PTR(error) if something goes wrong
2959 * or the crypto hash ptr, if it worked out ok. */
2960struct crypto_hash *drbd_crypto_alloc_digest_safe(const struct drbd_conf *mdev,
2961 const char *alg, const char *name)
2962{
2963 struct crypto_hash *tfm;
2964
2965 if (!alg[0])
2966 return NULL;
2967
2968 tfm = crypto_alloc_hash(alg, 0, CRYPTO_ALG_ASYNC);
2969 if (IS_ERR(tfm)) {
2970 dev_err(DEV, "Can not allocate \"%s\" as %s (reason: %ld)\n",
2971 alg, name, PTR_ERR(tfm));
2972 return tfm;
2973 }
2974 if (!drbd_crypto_is_hash(crypto_hash_tfm(tfm))) {
2975 crypto_free_hash(tfm);
2976 dev_err(DEV, "\"%s\" is not a digest (%s)\n", alg, name);
2977 return ERR_PTR(-EINVAL);
2978 }
2979 return tfm;
2980}
2981
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01002982static int ignore_remaining_packet(struct drbd_tconn *tconn, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002983{
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01002984 void *buffer = tconn->data.rbuf;
2985 int size = pi->size;
2986
2987 while (size) {
2988 int s = min_t(int, size, DRBD_SOCKET_BUFFER_SIZE);
2989 s = drbd_recv(tconn, buffer, s);
2990 if (s <= 0) {
2991 if (s < 0)
2992 return s;
2993 break;
2994 }
2995 size -= s;
2996 }
2997 if (size)
2998 return -EIO;
2999 return 0;
3000}
3001
3002/*
3003 * config_unknown_volume - device configuration command for unknown volume
3004 *
3005 * When a device is added to an existing connection, the node on which the
3006 * device is added first will send configuration commands to its peer but the
3007 * peer will not know about the device yet. It will warn and ignore these
3008 * commands. Once the device is added on the second node, the second node will
3009 * send the same device configuration commands, but in the other direction.
3010 *
3011 * (We can also end up here if drbd is misconfigured.)
3012 */
3013static int config_unknown_volume(struct drbd_tconn *tconn, struct packet_info *pi)
3014{
3015 conn_warn(tconn, "Volume %u unknown; ignoring %s packet\n",
3016 pi->vnr, cmdname(pi->cmd));
3017 return ignore_remaining_packet(tconn, pi);
3018}
3019
3020static int receive_SyncParam(struct drbd_tconn *tconn, struct packet_info *pi)
3021{
3022 struct drbd_conf *mdev;
3023 struct p_rs_param_95 *p = tconn->data.rbuf;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003024 unsigned int header_size, data_size, exp_max_sz;
3025 struct crypto_hash *verify_tfm = NULL;
3026 struct crypto_hash *csums_tfm = NULL;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003027 const int apv = tconn->agreed_pro_version;
Philipp Reisner778f2712010-07-06 11:14:00 +02003028 int *rs_plan_s = NULL;
3029 int fifo_size = 0;
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003030 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003031
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003032 mdev = vnr_to_mdev(tconn, pi->vnr);
3033 if (!mdev)
3034 return config_unknown_volume(tconn, pi);
3035
Philipp Reisnerb411b362009-09-25 16:07:19 -07003036 exp_max_sz = apv <= 87 ? sizeof(struct p_rs_param)
3037 : apv == 88 ? sizeof(struct p_rs_param)
3038 + SHARED_SECRET_MAX
Philipp Reisner8e26f9c2010-07-06 17:25:54 +02003039 : apv <= 94 ? sizeof(struct p_rs_param_89)
3040 : /* apv >= 95 */ sizeof(struct p_rs_param_95);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003041
Andreas Gruenbachere2857212011-03-25 00:57:38 +01003042 if (pi->size > exp_max_sz) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07003043 dev_err(DEV, "SyncParam packet too long: received %u, expected <= %u bytes\n",
Andreas Gruenbachere2857212011-03-25 00:57:38 +01003044 pi->size, exp_max_sz);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003045 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003046 }
3047
3048 if (apv <= 88) {
Philipp Reisner257d0af2011-01-26 12:15:29 +01003049 header_size = sizeof(struct p_rs_param) - sizeof(struct p_header);
Andreas Gruenbachere2857212011-03-25 00:57:38 +01003050 data_size = pi->size - header_size;
Philipp Reisner8e26f9c2010-07-06 17:25:54 +02003051 } else if (apv <= 94) {
Philipp Reisner257d0af2011-01-26 12:15:29 +01003052 header_size = sizeof(struct p_rs_param_89) - sizeof(struct p_header);
Andreas Gruenbachere2857212011-03-25 00:57:38 +01003053 data_size = pi->size - header_size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003054 D_ASSERT(data_size == 0);
Philipp Reisner8e26f9c2010-07-06 17:25:54 +02003055 } else {
Philipp Reisner257d0af2011-01-26 12:15:29 +01003056 header_size = sizeof(struct p_rs_param_95) - sizeof(struct p_header);
Andreas Gruenbachere2857212011-03-25 00:57:38 +01003057 data_size = pi->size - header_size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003058 D_ASSERT(data_size == 0);
3059 }
3060
3061 /* initialize verify_alg and csums_alg */
3062 memset(p->verify_alg, 0, 2 * SHARED_SECRET_MAX);
3063
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003064 err = drbd_recv_all(mdev->tconn, &p->head.payload, header_size);
3065 if (err)
3066 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003067
Lars Ellenbergf3990022011-03-23 14:31:09 +01003068 if (get_ldev(mdev)) {
3069 mdev->ldev->dc.resync_rate = be32_to_cpu(p->rate);
3070 put_ldev(mdev);
3071 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003072
3073 if (apv >= 88) {
3074 if (apv == 88) {
3075 if (data_size > SHARED_SECRET_MAX) {
3076 dev_err(DEV, "verify-alg too long, "
3077 "peer wants %u, accepting only %u byte\n",
3078 data_size, SHARED_SECRET_MAX);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003079 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003080 }
3081
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003082 err = drbd_recv_all(mdev->tconn, p->verify_alg, data_size);
3083 if (err)
3084 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003085
3086 /* we expect NUL terminated string */
3087 /* but just in case someone tries to be evil */
3088 D_ASSERT(p->verify_alg[data_size-1] == 0);
3089 p->verify_alg[data_size-1] = 0;
3090
3091 } else /* apv >= 89 */ {
3092 /* we still expect NUL terminated strings */
3093 /* but just in case someone tries to be evil */
3094 D_ASSERT(p->verify_alg[SHARED_SECRET_MAX-1] == 0);
3095 D_ASSERT(p->csums_alg[SHARED_SECRET_MAX-1] == 0);
3096 p->verify_alg[SHARED_SECRET_MAX-1] = 0;
3097 p->csums_alg[SHARED_SECRET_MAX-1] = 0;
3098 }
3099
Lars Ellenbergf3990022011-03-23 14:31:09 +01003100 if (strcmp(mdev->tconn->net_conf->verify_alg, p->verify_alg)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07003101 if (mdev->state.conn == C_WF_REPORT_PARAMS) {
3102 dev_err(DEV, "Different verify-alg settings. me=\"%s\" peer=\"%s\"\n",
Lars Ellenbergf3990022011-03-23 14:31:09 +01003103 mdev->tconn->net_conf->verify_alg, p->verify_alg);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003104 goto disconnect;
3105 }
3106 verify_tfm = drbd_crypto_alloc_digest_safe(mdev,
3107 p->verify_alg, "verify-alg");
3108 if (IS_ERR(verify_tfm)) {
3109 verify_tfm = NULL;
3110 goto disconnect;
3111 }
3112 }
3113
Lars Ellenbergf3990022011-03-23 14:31:09 +01003114 if (apv >= 89 && strcmp(mdev->tconn->net_conf->csums_alg, p->csums_alg)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07003115 if (mdev->state.conn == C_WF_REPORT_PARAMS) {
3116 dev_err(DEV, "Different csums-alg settings. me=\"%s\" peer=\"%s\"\n",
Lars Ellenbergf3990022011-03-23 14:31:09 +01003117 mdev->tconn->net_conf->csums_alg, p->csums_alg);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003118 goto disconnect;
3119 }
3120 csums_tfm = drbd_crypto_alloc_digest_safe(mdev,
3121 p->csums_alg, "csums-alg");
3122 if (IS_ERR(csums_tfm)) {
3123 csums_tfm = NULL;
3124 goto disconnect;
3125 }
3126 }
3127
Lars Ellenbergf3990022011-03-23 14:31:09 +01003128 if (apv > 94 && get_ldev(mdev)) {
3129 mdev->ldev->dc.resync_rate = be32_to_cpu(p->rate);
3130 mdev->ldev->dc.c_plan_ahead = be32_to_cpu(p->c_plan_ahead);
3131 mdev->ldev->dc.c_delay_target = be32_to_cpu(p->c_delay_target);
3132 mdev->ldev->dc.c_fill_target = be32_to_cpu(p->c_fill_target);
3133 mdev->ldev->dc.c_max_rate = be32_to_cpu(p->c_max_rate);
Philipp Reisner778f2712010-07-06 11:14:00 +02003134
Lars Ellenbergf3990022011-03-23 14:31:09 +01003135 fifo_size = (mdev->ldev->dc.c_plan_ahead * 10 * SLEEP_TIME) / HZ;
Philipp Reisner778f2712010-07-06 11:14:00 +02003136 if (fifo_size != mdev->rs_plan_s.size && fifo_size > 0) {
3137 rs_plan_s = kzalloc(sizeof(int) * fifo_size, GFP_KERNEL);
3138 if (!rs_plan_s) {
3139 dev_err(DEV, "kmalloc of fifo_buffer failed");
Lars Ellenbergf3990022011-03-23 14:31:09 +01003140 put_ldev(mdev);
Philipp Reisner778f2712010-07-06 11:14:00 +02003141 goto disconnect;
3142 }
3143 }
Lars Ellenbergf3990022011-03-23 14:31:09 +01003144 put_ldev(mdev);
Philipp Reisner8e26f9c2010-07-06 17:25:54 +02003145 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003146
3147 spin_lock(&mdev->peer_seq_lock);
3148 /* lock against drbd_nl_syncer_conf() */
3149 if (verify_tfm) {
Lars Ellenbergf3990022011-03-23 14:31:09 +01003150 strcpy(mdev->tconn->net_conf->verify_alg, p->verify_alg);
3151 mdev->tconn->net_conf->verify_alg_len = strlen(p->verify_alg) + 1;
3152 crypto_free_hash(mdev->tconn->verify_tfm);
3153 mdev->tconn->verify_tfm = verify_tfm;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003154 dev_info(DEV, "using verify-alg: \"%s\"\n", p->verify_alg);
3155 }
3156 if (csums_tfm) {
Lars Ellenbergf3990022011-03-23 14:31:09 +01003157 strcpy(mdev->tconn->net_conf->csums_alg, p->csums_alg);
3158 mdev->tconn->net_conf->csums_alg_len = strlen(p->csums_alg) + 1;
3159 crypto_free_hash(mdev->tconn->csums_tfm);
3160 mdev->tconn->csums_tfm = csums_tfm;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003161 dev_info(DEV, "using csums-alg: \"%s\"\n", p->csums_alg);
3162 }
Philipp Reisner778f2712010-07-06 11:14:00 +02003163 if (fifo_size != mdev->rs_plan_s.size) {
3164 kfree(mdev->rs_plan_s.values);
3165 mdev->rs_plan_s.values = rs_plan_s;
3166 mdev->rs_plan_s.size = fifo_size;
3167 mdev->rs_planed = 0;
3168 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003169 spin_unlock(&mdev->peer_seq_lock);
3170 }
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003171 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003172
Philipp Reisnerb411b362009-09-25 16:07:19 -07003173disconnect:
3174 /* just for completeness: actually not needed,
3175 * as this is not reached if csums_tfm was ok. */
3176 crypto_free_hash(csums_tfm);
3177 /* but free the verify_tfm again, if csums_tfm did not work out */
3178 crypto_free_hash(verify_tfm);
Philipp Reisner38fa9982011-03-15 18:24:49 +01003179 conn_request_state(mdev->tconn, NS(conn, C_DISCONNECTING), CS_HARD);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003180 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003181}
3182
Philipp Reisnerb411b362009-09-25 16:07:19 -07003183/* warn if the arguments differ by more than 12.5% */
3184static void warn_if_differ_considerably(struct drbd_conf *mdev,
3185 const char *s, sector_t a, sector_t b)
3186{
3187 sector_t d;
3188 if (a == 0 || b == 0)
3189 return;
3190 d = (a > b) ? (a - b) : (b - a);
3191 if (d > (a>>3) || d > (b>>3))
3192 dev_warn(DEV, "Considerable difference in %s: %llus vs. %llus\n", s,
3193 (unsigned long long)a, (unsigned long long)b);
3194}
3195
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003196static int receive_sizes(struct drbd_tconn *tconn, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003197{
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003198 struct drbd_conf *mdev;
3199 struct p_sizes *p = tconn->data.rbuf;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003200 enum determine_dev_size dd = unchanged;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003201 sector_t p_size, p_usize, my_usize;
3202 int ldsc = 0; /* local disk size changed */
Philipp Reisnere89b5912010-03-24 17:11:33 +01003203 enum dds_flags ddsf;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003204
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003205 mdev = vnr_to_mdev(tconn, pi->vnr);
3206 if (!mdev)
3207 return config_unknown_volume(tconn, pi);
3208
Philipp Reisnerb411b362009-09-25 16:07:19 -07003209 p_size = be64_to_cpu(p->d_size);
3210 p_usize = be64_to_cpu(p->u_size);
3211
Philipp Reisnerb411b362009-09-25 16:07:19 -07003212 /* just store the peer's disk size for now.
3213 * we still need to figure out whether we accept that. */
3214 mdev->p_size = p_size;
3215
Philipp Reisnerb411b362009-09-25 16:07:19 -07003216 if (get_ldev(mdev)) {
3217 warn_if_differ_considerably(mdev, "lower level device sizes",
3218 p_size, drbd_get_max_capacity(mdev->ldev));
3219 warn_if_differ_considerably(mdev, "user requested size",
3220 p_usize, mdev->ldev->dc.disk_size);
3221
3222 /* if this is the first connect, or an otherwise expected
3223 * param exchange, choose the minimum */
3224 if (mdev->state.conn == C_WF_REPORT_PARAMS)
3225 p_usize = min_not_zero((sector_t)mdev->ldev->dc.disk_size,
3226 p_usize);
3227
3228 my_usize = mdev->ldev->dc.disk_size;
3229
3230 if (mdev->ldev->dc.disk_size != p_usize) {
3231 mdev->ldev->dc.disk_size = p_usize;
3232 dev_info(DEV, "Peer sets u_size to %lu sectors\n",
3233 (unsigned long)mdev->ldev->dc.disk_size);
3234 }
3235
3236 /* Never shrink a device with usable data during connect.
3237 But allow online shrinking if we are connected. */
Philipp Reisnera393db62009-12-22 13:35:52 +01003238 if (drbd_new_dev_size(mdev, mdev->ldev, 0) <
Philipp Reisnerb411b362009-09-25 16:07:19 -07003239 drbd_get_capacity(mdev->this_bdev) &&
3240 mdev->state.disk >= D_OUTDATED &&
3241 mdev->state.conn < C_CONNECTED) {
3242 dev_err(DEV, "The peer's disk size is too small!\n");
Philipp Reisner38fa9982011-03-15 18:24:49 +01003243 conn_request_state(mdev->tconn, NS(conn, C_DISCONNECTING), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003244 mdev->ldev->dc.disk_size = my_usize;
3245 put_ldev(mdev);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003246 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003247 }
3248 put_ldev(mdev);
3249 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003250
Philipp Reisnere89b5912010-03-24 17:11:33 +01003251 ddsf = be16_to_cpu(p->dds_flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003252 if (get_ldev(mdev)) {
Bart Van Assche24c48302011-05-21 18:32:29 +02003253 dd = drbd_determine_dev_size(mdev, ddsf);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003254 put_ldev(mdev);
3255 if (dd == dev_size_error)
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003256 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003257 drbd_md_sync(mdev);
3258 } else {
3259 /* I am diskless, need to accept the peer's size. */
3260 drbd_set_my_capacity(mdev, p_size);
3261 }
3262
Philipp Reisner99432fc2011-05-20 16:39:13 +02003263 mdev->peer_max_bio_size = be32_to_cpu(p->max_bio_size);
3264 drbd_reconsider_max_bio_size(mdev);
3265
Philipp Reisnerb411b362009-09-25 16:07:19 -07003266 if (get_ldev(mdev)) {
3267 if (mdev->ldev->known_size != drbd_get_capacity(mdev->ldev->backing_bdev)) {
3268 mdev->ldev->known_size = drbd_get_capacity(mdev->ldev->backing_bdev);
3269 ldsc = 1;
3270 }
3271
Philipp Reisnerb411b362009-09-25 16:07:19 -07003272 put_ldev(mdev);
3273 }
3274
3275 if (mdev->state.conn > C_WF_REPORT_PARAMS) {
3276 if (be64_to_cpu(p->c_size) !=
3277 drbd_get_capacity(mdev->this_bdev) || ldsc) {
3278 /* we have different sizes, probably peer
3279 * needs to know my new size... */
Philipp Reisnere89b5912010-03-24 17:11:33 +01003280 drbd_send_sizes(mdev, 0, ddsf);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003281 }
3282 if (test_and_clear_bit(RESIZE_PENDING, &mdev->flags) ||
3283 (dd == grew && mdev->state.conn == C_CONNECTED)) {
3284 if (mdev->state.pdsk >= D_INCONSISTENT &&
Philipp Reisnere89b5912010-03-24 17:11:33 +01003285 mdev->state.disk >= D_INCONSISTENT) {
3286 if (ddsf & DDSF_NO_RESYNC)
3287 dev_info(DEV, "Resync of new storage suppressed with --assume-clean\n");
3288 else
3289 resync_after_online_grow(mdev);
3290 } else
Philipp Reisnerb411b362009-09-25 16:07:19 -07003291 set_bit(RESYNC_AFTER_NEG, &mdev->flags);
3292 }
3293 }
3294
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003295 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003296}
3297
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003298static int receive_uuids(struct drbd_tconn *tconn, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003299{
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003300 struct drbd_conf *mdev;
3301 struct p_uuids *p = tconn->data.rbuf;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003302 u64 *p_uuid;
Lars Ellenberg62b0da32011-01-20 13:25:21 +01003303 int i, updated_uuids = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003304
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003305 mdev = vnr_to_mdev(tconn, pi->vnr);
3306 if (!mdev)
3307 return config_unknown_volume(tconn, pi);
3308
Philipp Reisnerb411b362009-09-25 16:07:19 -07003309 p_uuid = kmalloc(sizeof(u64)*UI_EXTENDED_SIZE, GFP_NOIO);
3310
3311 for (i = UI_CURRENT; i < UI_EXTENDED_SIZE; i++)
3312 p_uuid[i] = be64_to_cpu(p->uuid[i]);
3313
3314 kfree(mdev->p_uuid);
3315 mdev->p_uuid = p_uuid;
3316
3317 if (mdev->state.conn < C_CONNECTED &&
3318 mdev->state.disk < D_INCONSISTENT &&
3319 mdev->state.role == R_PRIMARY &&
3320 (mdev->ed_uuid & ~((u64)1)) != (p_uuid[UI_CURRENT] & ~((u64)1))) {
3321 dev_err(DEV, "Can only connect to data with current UUID=%016llX\n",
3322 (unsigned long long)mdev->ed_uuid);
Philipp Reisner38fa9982011-03-15 18:24:49 +01003323 conn_request_state(mdev->tconn, NS(conn, C_DISCONNECTING), CS_HARD);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003324 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003325 }
3326
3327 if (get_ldev(mdev)) {
3328 int skip_initial_sync =
3329 mdev->state.conn == C_CONNECTED &&
Philipp Reisner31890f42011-01-19 14:12:51 +01003330 mdev->tconn->agreed_pro_version >= 90 &&
Philipp Reisnerb411b362009-09-25 16:07:19 -07003331 mdev->ldev->md.uuid[UI_CURRENT] == UUID_JUST_CREATED &&
3332 (p_uuid[UI_FLAGS] & 8);
3333 if (skip_initial_sync) {
3334 dev_info(DEV, "Accepted new current UUID, preparing to skip initial sync\n");
3335 drbd_bitmap_io(mdev, &drbd_bmio_clear_n_write,
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01003336 "clear_n_write from receive_uuids",
3337 BM_LOCKED_TEST_ALLOWED);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003338 _drbd_uuid_set(mdev, UI_CURRENT, p_uuid[UI_CURRENT]);
3339 _drbd_uuid_set(mdev, UI_BITMAP, 0);
3340 _drbd_set_state(_NS2(mdev, disk, D_UP_TO_DATE, pdsk, D_UP_TO_DATE),
3341 CS_VERBOSE, NULL);
3342 drbd_md_sync(mdev);
Lars Ellenberg62b0da32011-01-20 13:25:21 +01003343 updated_uuids = 1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003344 }
3345 put_ldev(mdev);
Philipp Reisner18a50fa2010-06-21 14:14:15 +02003346 } else if (mdev->state.disk < D_INCONSISTENT &&
3347 mdev->state.role == R_PRIMARY) {
3348 /* I am a diskless primary, the peer just created a new current UUID
3349 for me. */
Lars Ellenberg62b0da32011-01-20 13:25:21 +01003350 updated_uuids = drbd_set_ed_uuid(mdev, p_uuid[UI_CURRENT]);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003351 }
3352
3353 /* Before we test for the disk state, we should wait until an eventually
3354 ongoing cluster wide state change is finished. That is important if
3355 we are primary and are detaching from our disk. We need to see the
3356 new disk state... */
Philipp Reisner8410da82011-02-11 20:11:10 +01003357 mutex_lock(mdev->state_mutex);
3358 mutex_unlock(mdev->state_mutex);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003359 if (mdev->state.conn >= C_CONNECTED && mdev->state.disk < D_INCONSISTENT)
Lars Ellenberg62b0da32011-01-20 13:25:21 +01003360 updated_uuids |= drbd_set_ed_uuid(mdev, p_uuid[UI_CURRENT]);
3361
3362 if (updated_uuids)
3363 drbd_print_uuids(mdev, "receiver updated UUIDs to");
Philipp Reisnerb411b362009-09-25 16:07:19 -07003364
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003365 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003366}
3367
3368/**
3369 * convert_state() - Converts the peer's view of the cluster state to our point of view
3370 * @ps: The state as seen by the peer.
3371 */
3372static union drbd_state convert_state(union drbd_state ps)
3373{
3374 union drbd_state ms;
3375
3376 static enum drbd_conns c_tab[] = {
3377 [C_CONNECTED] = C_CONNECTED,
3378
3379 [C_STARTING_SYNC_S] = C_STARTING_SYNC_T,
3380 [C_STARTING_SYNC_T] = C_STARTING_SYNC_S,
3381 [C_DISCONNECTING] = C_TEAR_DOWN, /* C_NETWORK_FAILURE, */
3382 [C_VERIFY_S] = C_VERIFY_T,
3383 [C_MASK] = C_MASK,
3384 };
3385
3386 ms.i = ps.i;
3387
3388 ms.conn = c_tab[ps.conn];
3389 ms.peer = ps.role;
3390 ms.role = ps.peer;
3391 ms.pdsk = ps.disk;
3392 ms.disk = ps.pdsk;
3393 ms.peer_isp = (ps.aftr_isp | ps.user_isp);
3394
3395 return ms;
3396}
3397
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003398static int receive_req_state(struct drbd_tconn *tconn, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003399{
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003400 struct drbd_conf *mdev;
3401 struct p_req_state *p = tconn->data.rbuf;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003402 union drbd_state mask, val;
Andreas Gruenbacherbf885f82010-12-08 00:39:32 +01003403 enum drbd_state_rv rv;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003404
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003405 mdev = vnr_to_mdev(tconn, pi->vnr);
3406 if (!mdev)
3407 return -EIO;
3408
Philipp Reisnerb411b362009-09-25 16:07:19 -07003409 mask.i = be32_to_cpu(p->mask);
3410 val.i = be32_to_cpu(p->val);
3411
Philipp Reisner25703f82011-02-07 14:35:25 +01003412 if (test_bit(DISCARD_CONCURRENT, &mdev->tconn->flags) &&
Philipp Reisner8410da82011-02-11 20:11:10 +01003413 mutex_is_locked(mdev->state_mutex)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07003414 drbd_send_sr_reply(mdev, SS_CONCURRENT_ST_CHG);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003415 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003416 }
3417
3418 mask = convert_state(mask);
3419 val = convert_state(val);
3420
Philipp Reisnerdfafcc82011-03-16 10:55:07 +01003421 rv = drbd_change_state(mdev, CS_VERBOSE, mask, val);
3422 drbd_send_sr_reply(mdev, rv);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003423
Philipp Reisnerb411b362009-09-25 16:07:19 -07003424 drbd_md_sync(mdev);
3425
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003426 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003427}
3428
Andreas Gruenbachere2857212011-03-25 00:57:38 +01003429static int receive_req_conn_state(struct drbd_tconn *tconn, struct packet_info *pi)
Philipp Reisnerdfafcc82011-03-16 10:55:07 +01003430{
Andreas Gruenbachere6ef8a52011-03-24 18:07:54 +01003431 struct p_req_state *p = tconn->data.rbuf;
Philipp Reisnerdfafcc82011-03-16 10:55:07 +01003432 union drbd_state mask, val;
3433 enum drbd_state_rv rv;
3434
3435 mask.i = be32_to_cpu(p->mask);
3436 val.i = be32_to_cpu(p->val);
3437
3438 if (test_bit(DISCARD_CONCURRENT, &tconn->flags) &&
3439 mutex_is_locked(&tconn->cstate_mutex)) {
3440 conn_send_sr_reply(tconn, SS_CONCURRENT_ST_CHG);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003441 return 0;
Philipp Reisnerdfafcc82011-03-16 10:55:07 +01003442 }
3443
3444 mask = convert_state(mask);
3445 val = convert_state(val);
3446
Philipp Reisner778bcf22011-03-28 12:55:03 +02003447 rv = conn_request_state(tconn, mask, val, CS_VERBOSE | CS_LOCAL_ONLY | CS_IGN_OUTD_FAIL);
Philipp Reisnerdfafcc82011-03-16 10:55:07 +01003448 conn_send_sr_reply(tconn, rv);
3449
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003450 return 0;
Philipp Reisnerdfafcc82011-03-16 10:55:07 +01003451}
3452
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003453static int receive_state(struct drbd_tconn *tconn, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003454{
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003455 struct drbd_conf *mdev;
3456 struct p_state *p = tconn->data.rbuf;
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003457 union drbd_state os, ns, peer_state;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003458 enum drbd_disk_state real_peer_disk;
Philipp Reisner65d922c2010-06-16 16:18:09 +02003459 enum chg_state_flags cs_flags;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003460 int rv;
3461
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003462 mdev = vnr_to_mdev(tconn, pi->vnr);
3463 if (!mdev)
3464 return config_unknown_volume(tconn, pi);
3465
Philipp Reisnerb411b362009-09-25 16:07:19 -07003466 peer_state.i = be32_to_cpu(p->state);
3467
3468 real_peer_disk = peer_state.disk;
3469 if (peer_state.disk == D_NEGOTIATING) {
3470 real_peer_disk = mdev->p_uuid[UI_FLAGS] & 4 ? D_INCONSISTENT : D_CONSISTENT;
3471 dev_info(DEV, "real peer disk state = %s\n", drbd_disk_str(real_peer_disk));
3472 }
3473
Philipp Reisner87eeee42011-01-19 14:16:30 +01003474 spin_lock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003475 retry:
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003476 os = ns = mdev->state;
Philipp Reisner87eeee42011-01-19 14:16:30 +01003477 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003478
Lars Ellenberge9ef7bb2010-10-07 15:55:39 +02003479 /* peer says his disk is uptodate, while we think it is inconsistent,
3480 * and this happens while we think we have a sync going on. */
3481 if (os.pdsk == D_INCONSISTENT && real_peer_disk == D_UP_TO_DATE &&
3482 os.conn > C_CONNECTED && os.disk == D_UP_TO_DATE) {
3483 /* If we are (becoming) SyncSource, but peer is still in sync
3484 * preparation, ignore its uptodate-ness to avoid flapping, it
3485 * will change to inconsistent once the peer reaches active
3486 * syncing states.
3487 * It may have changed syncer-paused flags, however, so we
3488 * cannot ignore this completely. */
3489 if (peer_state.conn > C_CONNECTED &&
3490 peer_state.conn < C_SYNC_SOURCE)
3491 real_peer_disk = D_INCONSISTENT;
3492
3493 /* if peer_state changes to connected at the same time,
3494 * it explicitly notifies us that it finished resync.
3495 * Maybe we should finish it up, too? */
3496 else if (os.conn >= C_SYNC_SOURCE &&
3497 peer_state.conn == C_CONNECTED) {
3498 if (drbd_bm_total_weight(mdev) <= mdev->rs_failed)
3499 drbd_resync_finished(mdev);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003500 return 0;
Lars Ellenberge9ef7bb2010-10-07 15:55:39 +02003501 }
3502 }
3503
3504 /* peer says his disk is inconsistent, while we think it is uptodate,
3505 * and this happens while the peer still thinks we have a sync going on,
3506 * but we think we are already done with the sync.
3507 * We ignore this to avoid flapping pdsk.
3508 * This should not happen, if the peer is a recent version of drbd. */
3509 if (os.pdsk == D_UP_TO_DATE && real_peer_disk == D_INCONSISTENT &&
3510 os.conn == C_CONNECTED && peer_state.conn > C_SYNC_SOURCE)
3511 real_peer_disk = D_UP_TO_DATE;
3512
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003513 if (ns.conn == C_WF_REPORT_PARAMS)
3514 ns.conn = C_CONNECTED;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003515
Philipp Reisner67531712010-10-27 12:21:30 +02003516 if (peer_state.conn == C_AHEAD)
3517 ns.conn = C_BEHIND;
3518
Philipp Reisnerb411b362009-09-25 16:07:19 -07003519 if (mdev->p_uuid && peer_state.disk >= D_NEGOTIATING &&
3520 get_ldev_if_state(mdev, D_NEGOTIATING)) {
3521 int cr; /* consider resync */
3522
3523 /* if we established a new connection */
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003524 cr = (os.conn < C_CONNECTED);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003525 /* if we had an established connection
3526 * and one of the nodes newly attaches a disk */
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003527 cr |= (os.conn == C_CONNECTED &&
Philipp Reisnerb411b362009-09-25 16:07:19 -07003528 (peer_state.disk == D_NEGOTIATING ||
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003529 os.disk == D_NEGOTIATING));
Philipp Reisnerb411b362009-09-25 16:07:19 -07003530 /* if we have both been inconsistent, and the peer has been
3531 * forced to be UpToDate with --overwrite-data */
3532 cr |= test_bit(CONSIDER_RESYNC, &mdev->flags);
3533 /* if we had been plain connected, and the admin requested to
3534 * start a sync by "invalidate" or "invalidate-remote" */
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003535 cr |= (os.conn == C_CONNECTED &&
Philipp Reisnerb411b362009-09-25 16:07:19 -07003536 (peer_state.conn >= C_STARTING_SYNC_S &&
3537 peer_state.conn <= C_WF_BITMAP_T));
3538
3539 if (cr)
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003540 ns.conn = drbd_sync_handshake(mdev, peer_state.role, real_peer_disk);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003541
3542 put_ldev(mdev);
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003543 if (ns.conn == C_MASK) {
3544 ns.conn = C_CONNECTED;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003545 if (mdev->state.disk == D_NEGOTIATING) {
Lars Ellenberg82f59cc2010-10-16 12:13:47 +02003546 drbd_force_state(mdev, NS(disk, D_FAILED));
Philipp Reisnerb411b362009-09-25 16:07:19 -07003547 } else if (peer_state.disk == D_NEGOTIATING) {
3548 dev_err(DEV, "Disk attach process on the peer node was aborted.\n");
3549 peer_state.disk = D_DISKLESS;
Lars Ellenberg580b9762010-02-26 23:15:23 +01003550 real_peer_disk = D_DISKLESS;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003551 } else {
Philipp Reisner8169e412011-03-15 18:40:27 +01003552 if (test_and_clear_bit(CONN_DRY_RUN, &mdev->tconn->flags))
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003553 return -EIO;
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003554 D_ASSERT(os.conn == C_WF_REPORT_PARAMS);
Philipp Reisner38fa9982011-03-15 18:24:49 +01003555 conn_request_state(mdev->tconn, NS(conn, C_DISCONNECTING), CS_HARD);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003556 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003557 }
3558 }
3559 }
3560
Philipp Reisner87eeee42011-01-19 14:16:30 +01003561 spin_lock_irq(&mdev->tconn->req_lock);
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003562 if (mdev->state.i != os.i)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003563 goto retry;
3564 clear_bit(CONSIDER_RESYNC, &mdev->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003565 ns.peer = peer_state.role;
3566 ns.pdsk = real_peer_disk;
3567 ns.peer_isp = (peer_state.aftr_isp | peer_state.user_isp);
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003568 if ((ns.conn == C_CONNECTED || ns.conn == C_WF_BITMAP_S) && ns.disk == D_NEGOTIATING)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003569 ns.disk = mdev->new_state_tmp.disk;
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003570 cs_flags = CS_VERBOSE + (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED ? 0 : CS_HARD);
3571 if (ns.pdsk == D_CONSISTENT && is_susp(ns) && ns.conn == C_CONNECTED && os.conn < C_CONNECTED &&
Philipp Reisner481c6f52010-06-22 14:03:27 +02003572 test_bit(NEW_CUR_UUID, &mdev->flags)) {
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01003573 /* Do not allow tl_restart(RESEND) for a rebooted peer. We can only allow this
Philipp Reisner481c6f52010-06-22 14:03:27 +02003574 for temporal network outages! */
Philipp Reisner87eeee42011-01-19 14:16:30 +01003575 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisner481c6f52010-06-22 14:03:27 +02003576 dev_err(DEV, "Aborting Connect, can not thaw IO with an only Consistent peer\n");
Philipp Reisner2f5cdd02011-02-21 14:29:27 +01003577 tl_clear(mdev->tconn);
Philipp Reisner481c6f52010-06-22 14:03:27 +02003578 drbd_uuid_new_current(mdev);
3579 clear_bit(NEW_CUR_UUID, &mdev->flags);
Philipp Reisner38fa9982011-03-15 18:24:49 +01003580 conn_request_state(mdev->tconn, NS2(conn, C_PROTOCOL_ERROR, susp, 0), CS_HARD);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003581 return -EIO;
Philipp Reisner481c6f52010-06-22 14:03:27 +02003582 }
Philipp Reisner65d922c2010-06-16 16:18:09 +02003583 rv = _drbd_set_state(mdev, ns, cs_flags, NULL);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003584 ns = mdev->state;
Philipp Reisner87eeee42011-01-19 14:16:30 +01003585 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003586
3587 if (rv < SS_SUCCESS) {
Philipp Reisner38fa9982011-03-15 18:24:49 +01003588 conn_request_state(mdev->tconn, NS(conn, C_DISCONNECTING), CS_HARD);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003589 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003590 }
3591
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003592 if (os.conn > C_WF_REPORT_PARAMS) {
3593 if (ns.conn > C_CONNECTED && peer_state.conn <= C_CONNECTED &&
Philipp Reisnerb411b362009-09-25 16:07:19 -07003594 peer_state.disk != D_NEGOTIATING ) {
3595 /* we want resync, peer has not yet decided to sync... */
3596 /* Nowadays only used when forcing a node into primary role and
3597 setting its disk to UpToDate with that */
3598 drbd_send_uuids(mdev);
3599 drbd_send_state(mdev);
3600 }
3601 }
3602
Philipp Reisner89e58e72011-01-19 13:12:45 +01003603 mdev->tconn->net_conf->want_lose = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003604
3605 drbd_md_sync(mdev); /* update connected indicator, la_size, ... */
3606
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003607 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003608}
3609
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003610static int receive_sync_uuid(struct drbd_tconn *tconn, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003611{
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003612 struct drbd_conf *mdev;
3613 struct p_rs_uuid *p = tconn->data.rbuf;
3614
3615 mdev = vnr_to_mdev(tconn, pi->vnr);
3616 if (!mdev)
3617 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003618
3619 wait_event(mdev->misc_wait,
3620 mdev->state.conn == C_WF_SYNC_UUID ||
Philipp Reisnerc4752ef2010-10-27 17:32:36 +02003621 mdev->state.conn == C_BEHIND ||
Philipp Reisnerb411b362009-09-25 16:07:19 -07003622 mdev->state.conn < C_CONNECTED ||
3623 mdev->state.disk < D_NEGOTIATING);
3624
3625 /* D_ASSERT( mdev->state.conn == C_WF_SYNC_UUID ); */
3626
Philipp Reisnerb411b362009-09-25 16:07:19 -07003627 /* Here the _drbd_uuid_ functions are right, current should
3628 _not_ be rotated into the history */
3629 if (get_ldev_if_state(mdev, D_NEGOTIATING)) {
3630 _drbd_uuid_set(mdev, UI_CURRENT, be64_to_cpu(p->uuid));
3631 _drbd_uuid_set(mdev, UI_BITMAP, 0UL);
3632
Lars Ellenberg62b0da32011-01-20 13:25:21 +01003633 drbd_print_uuids(mdev, "updated sync uuid");
Philipp Reisnerb411b362009-09-25 16:07:19 -07003634 drbd_start_resync(mdev, C_SYNC_TARGET);
3635
3636 put_ldev(mdev);
3637 } else
3638 dev_err(DEV, "Ignoring SyncUUID packet!\n");
3639
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003640 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003641}
3642
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003643/**
3644 * receive_bitmap_plain
3645 *
3646 * Return 0 when done, 1 when another iteration is needed, and a negative error
3647 * code upon failure.
3648 */
3649static int
Philipp Reisner02918be2010-08-20 14:35:10 +02003650receive_bitmap_plain(struct drbd_conf *mdev, unsigned int data_size,
Andreas Gruenbacherfc568152011-03-24 21:23:50 +01003651 struct p_header *h, struct bm_xfer_ctx *c)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003652{
Andreas Gruenbacherfc568152011-03-24 21:23:50 +01003653 unsigned long *buffer = (unsigned long *)h->payload;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003654 unsigned num_words = min_t(size_t, BM_PACKET_WORDS, c->bm_words - c->word_offset);
3655 unsigned want = num_words * sizeof(long);
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003656 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003657
Philipp Reisner02918be2010-08-20 14:35:10 +02003658 if (want != data_size) {
3659 dev_err(DEV, "%s:want (%u) != data_size (%u)\n", __func__, want, data_size);
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003660 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003661 }
3662 if (want == 0)
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003663 return 0;
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003664 err = drbd_recv_all(mdev->tconn, buffer, want);
3665 if (err)
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003666 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003667
3668 drbd_bm_merge_lel(mdev, c->word_offset, num_words, buffer);
3669
3670 c->word_offset += num_words;
3671 c->bit_offset = c->word_offset * BITS_PER_LONG;
3672 if (c->bit_offset > c->bm_bits)
3673 c->bit_offset = c->bm_bits;
3674
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003675 return 1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003676}
3677
Andreas Gruenbachera02d1242011-03-22 17:20:45 +01003678static enum drbd_bitmap_code dcbp_get_code(struct p_compressed_bm *p)
3679{
3680 return (enum drbd_bitmap_code)(p->encoding & 0x0f);
3681}
3682
3683static int dcbp_get_start(struct p_compressed_bm *p)
3684{
3685 return (p->encoding & 0x80) != 0;
3686}
3687
3688static int dcbp_get_pad_bits(struct p_compressed_bm *p)
3689{
3690 return (p->encoding >> 4) & 0x7;
3691}
3692
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003693/**
3694 * recv_bm_rle_bits
3695 *
3696 * Return 0 when done, 1 when another iteration is needed, and a negative error
3697 * code upon failure.
3698 */
3699static int
Philipp Reisnerb411b362009-09-25 16:07:19 -07003700recv_bm_rle_bits(struct drbd_conf *mdev,
3701 struct p_compressed_bm *p,
Philipp Reisnerc6d25cf2011-01-19 16:13:06 +01003702 struct bm_xfer_ctx *c,
3703 unsigned int len)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003704{
3705 struct bitstream bs;
3706 u64 look_ahead;
3707 u64 rl;
3708 u64 tmp;
3709 unsigned long s = c->bit_offset;
3710 unsigned long e;
Andreas Gruenbachera02d1242011-03-22 17:20:45 +01003711 int toggle = dcbp_get_start(p);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003712 int have;
3713 int bits;
3714
Andreas Gruenbachera02d1242011-03-22 17:20:45 +01003715 bitstream_init(&bs, p->code, len, dcbp_get_pad_bits(p));
Philipp Reisnerb411b362009-09-25 16:07:19 -07003716
3717 bits = bitstream_get_bits(&bs, &look_ahead, 64);
3718 if (bits < 0)
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003719 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003720
3721 for (have = bits; have > 0; s += rl, toggle = !toggle) {
3722 bits = vli_decode_bits(&rl, look_ahead);
3723 if (bits <= 0)
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003724 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003725
3726 if (toggle) {
3727 e = s + rl -1;
3728 if (e >= c->bm_bits) {
3729 dev_err(DEV, "bitmap overflow (e:%lu) while decoding bm RLE packet\n", e);
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003730 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003731 }
3732 _drbd_bm_set_bits(mdev, s, e);
3733 }
3734
3735 if (have < bits) {
3736 dev_err(DEV, "bitmap decoding error: h:%d b:%d la:0x%08llx l:%u/%u\n",
3737 have, bits, look_ahead,
3738 (unsigned int)(bs.cur.b - p->code),
3739 (unsigned int)bs.buf_len);
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003740 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003741 }
3742 look_ahead >>= bits;
3743 have -= bits;
3744
3745 bits = bitstream_get_bits(&bs, &tmp, 64 - have);
3746 if (bits < 0)
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003747 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003748 look_ahead |= tmp << have;
3749 have += bits;
3750 }
3751
3752 c->bit_offset = s;
3753 bm_xfer_ctx_bit_to_word_offset(c);
3754
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003755 return (s != c->bm_bits);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003756}
3757
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003758/**
3759 * decode_bitmap_c
3760 *
3761 * Return 0 when done, 1 when another iteration is needed, and a negative error
3762 * code upon failure.
3763 */
3764static int
Philipp Reisnerb411b362009-09-25 16:07:19 -07003765decode_bitmap_c(struct drbd_conf *mdev,
3766 struct p_compressed_bm *p,
Philipp Reisnerc6d25cf2011-01-19 16:13:06 +01003767 struct bm_xfer_ctx *c,
3768 unsigned int len)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003769{
Andreas Gruenbachera02d1242011-03-22 17:20:45 +01003770 if (dcbp_get_code(p) == RLE_VLI_Bits)
Philipp Reisnerc6d25cf2011-01-19 16:13:06 +01003771 return recv_bm_rle_bits(mdev, p, c, len);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003772
3773 /* other variants had been implemented for evaluation,
3774 * but have been dropped as this one turned out to be "best"
3775 * during all our tests. */
3776
3777 dev_err(DEV, "receive_bitmap_c: unknown encoding %u\n", p->encoding);
Philipp Reisner38fa9982011-03-15 18:24:49 +01003778 conn_request_state(mdev->tconn, NS(conn, C_PROTOCOL_ERROR), CS_HARD);
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003779 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003780}
3781
3782void INFO_bm_xfer_stats(struct drbd_conf *mdev,
3783 const char *direction, struct bm_xfer_ctx *c)
3784{
3785 /* what would it take to transfer it "plaintext" */
Philipp Reisnerc0129492011-01-19 16:58:16 +01003786 unsigned plain = sizeof(struct p_header) *
Philipp Reisnerb411b362009-09-25 16:07:19 -07003787 ((c->bm_words+BM_PACKET_WORDS-1)/BM_PACKET_WORDS+1)
3788 + c->bm_words * sizeof(long);
3789 unsigned total = c->bytes[0] + c->bytes[1];
3790 unsigned r;
3791
3792 /* total can not be zero. but just in case: */
3793 if (total == 0)
3794 return;
3795
3796 /* don't report if not compressed */
3797 if (total >= plain)
3798 return;
3799
3800 /* total < plain. check for overflow, still */
3801 r = (total > UINT_MAX/1000) ? (total / (plain/1000))
3802 : (1000 * total / plain);
3803
3804 if (r > 1000)
3805 r = 1000;
3806
3807 r = 1000 - r;
3808 dev_info(DEV, "%s bitmap stats [Bytes(packets)]: plain %u(%u), RLE %u(%u), "
3809 "total %u; compression: %u.%u%%\n",
3810 direction,
3811 c->bytes[1], c->packets[1],
3812 c->bytes[0], c->packets[0],
3813 total, r/10, r % 10);
3814}
3815
3816/* Since we are processing the bitfield from lower addresses to higher,
3817 it does not matter if the process it in 32 bit chunks or 64 bit
3818 chunks as long as it is little endian. (Understand it as byte stream,
3819 beginning with the lowest byte...) If we would use big endian
3820 we would need to process it from the highest address to the lowest,
3821 in order to be agnostic to the 32 vs 64 bits issue.
3822
3823 returns 0 on failure, 1 if we successfully received it. */
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003824static int receive_bitmap(struct drbd_tconn *tconn, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003825{
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003826 struct drbd_conf *mdev;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003827 struct bm_xfer_ctx c;
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003828 int err;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003829 struct p_header *h = tconn->data.rbuf;
3830
3831 mdev = vnr_to_mdev(tconn, pi->vnr);
3832 if (!mdev)
3833 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003834
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01003835 drbd_bm_lock(mdev, "receive bitmap", BM_LOCKED_SET_ALLOWED);
3836 /* you are supposed to send additional out-of-sync information
3837 * if you actually set bits during this phase */
Philipp Reisnerb411b362009-09-25 16:07:19 -07003838
Philipp Reisnerb411b362009-09-25 16:07:19 -07003839 c = (struct bm_xfer_ctx) {
3840 .bm_bits = drbd_bm_bits(mdev),
3841 .bm_words = drbd_bm_words(mdev),
3842 };
3843
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003844 for(;;) {
Andreas Gruenbachere2857212011-03-25 00:57:38 +01003845 if (pi->cmd == P_BITMAP) {
3846 err = receive_bitmap_plain(mdev, pi->size, h, &c);
3847 } else if (pi->cmd == P_COMPRESSED_BITMAP) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07003848 /* MAYBE: sanity check that we speak proto >= 90,
3849 * and the feature is enabled! */
3850 struct p_compressed_bm *p;
3851
Andreas Gruenbachere2857212011-03-25 00:57:38 +01003852 if (pi->size > BM_PACKET_PAYLOAD_BYTES) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07003853 dev_err(DEV, "ReportCBitmap packet too large\n");
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003854 err = -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003855 goto out;
3856 }
Andreas Gruenbacherfc568152011-03-24 21:23:50 +01003857
3858 p = mdev->tconn->data.rbuf;
Andreas Gruenbachere2857212011-03-25 00:57:38 +01003859 err = drbd_recv_all(mdev->tconn, p->head.payload, pi->size);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003860 if (err)
3861 goto out;
Andreas Gruenbachere2857212011-03-25 00:57:38 +01003862 if (pi->size <= (sizeof(*p) - sizeof(p->head))) {
3863 dev_err(DEV, "ReportCBitmap packet too small (l:%u)\n", pi->size);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003864 err = -EIO;
Andreas Gruenbacher78fcbda2010-12-10 22:18:27 +01003865 goto out;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003866 }
Andreas Gruenbachere2857212011-03-25 00:57:38 +01003867 err = decode_bitmap_c(mdev, p, &c, pi->size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003868 } else {
Andreas Gruenbachere2857212011-03-25 00:57:38 +01003869 dev_warn(DEV, "receive_bitmap: cmd neither ReportBitMap nor ReportCBitMap (is 0x%x)", pi->cmd);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003870 err = -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003871 goto out;
3872 }
3873
Andreas Gruenbachere2857212011-03-25 00:57:38 +01003874 c.packets[pi->cmd == P_BITMAP]++;
3875 c.bytes[pi->cmd == P_BITMAP] += sizeof(struct p_header) + pi->size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003876
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003877 if (err <= 0) {
3878 if (err < 0)
3879 goto out;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003880 break;
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003881 }
Andreas Gruenbachere2857212011-03-25 00:57:38 +01003882 err = drbd_recv_header(mdev->tconn, pi);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003883 if (err)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003884 goto out;
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003885 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003886
3887 INFO_bm_xfer_stats(mdev, "receive", &c);
3888
3889 if (mdev->state.conn == C_WF_BITMAP_T) {
Andreas Gruenbacherde1f8e42010-12-10 21:04:00 +01003890 enum drbd_state_rv rv;
3891
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003892 err = drbd_send_bitmap(mdev);
3893 if (err)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003894 goto out;
3895 /* Omit CS_ORDERED with this state transition to avoid deadlocks. */
Andreas Gruenbacherde1f8e42010-12-10 21:04:00 +01003896 rv = _drbd_request_state(mdev, NS(conn, C_WF_SYNC_UUID), CS_VERBOSE);
3897 D_ASSERT(rv == SS_SUCCESS);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003898 } else if (mdev->state.conn != C_WF_BITMAP_S) {
3899 /* admin may have requested C_DISCONNECTING,
3900 * other threads may have noticed network errors */
3901 dev_info(DEV, "unexpected cstate (%s) in receive_bitmap\n",
3902 drbd_conn_str(mdev->state.conn));
3903 }
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003904 err = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003905
Philipp Reisnerb411b362009-09-25 16:07:19 -07003906 out:
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01003907 drbd_bm_unlock(mdev);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003908 if (!err && mdev->state.conn == C_WF_BITMAP_S)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003909 drbd_start_resync(mdev, C_SYNC_SOURCE);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003910 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003911}
3912
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003913static int receive_skip(struct drbd_tconn *tconn, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003914{
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003915 conn_warn(tconn, "skipping unknown optional packet type %d, l: %d!\n",
Andreas Gruenbachere2857212011-03-25 00:57:38 +01003916 pi->cmd, pi->size);
Philipp Reisner2de876e2011-03-15 14:38:01 +01003917
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003918 return ignore_remaining_packet(tconn, pi);
Philipp Reisner2de876e2011-03-15 14:38:01 +01003919}
3920
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003921static int receive_UnplugRemote(struct drbd_tconn *tconn, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003922{
Philipp Reisnerb411b362009-09-25 16:07:19 -07003923 /* Make sure we've acked all the TCP data associated
3924 * with the data requests being unplugged */
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003925 drbd_tcp_quickack(tconn->data.socket);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003926
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003927 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003928}
3929
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003930static int receive_out_of_sync(struct drbd_tconn *tconn, struct packet_info *pi)
Philipp Reisner73a01a12010-10-27 14:33:00 +02003931{
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003932 struct drbd_conf *mdev;
3933 struct p_block_desc *p = tconn->data.rbuf;
3934
3935 mdev = vnr_to_mdev(tconn, pi->vnr);
3936 if (!mdev)
3937 return -EIO;
Philipp Reisner73a01a12010-10-27 14:33:00 +02003938
Lars Ellenbergf735e3632010-12-17 21:06:18 +01003939 switch (mdev->state.conn) {
3940 case C_WF_SYNC_UUID:
3941 case C_WF_BITMAP_T:
3942 case C_BEHIND:
3943 break;
3944 default:
3945 dev_err(DEV, "ASSERT FAILED cstate = %s, expected: WFSyncUUID|WFBitMapT|Behind\n",
3946 drbd_conn_str(mdev->state.conn));
3947 }
3948
Philipp Reisner73a01a12010-10-27 14:33:00 +02003949 drbd_set_out_of_sync(mdev, be64_to_cpu(p->sector), be32_to_cpu(p->blksize));
3950
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003951 return 0;
Philipp Reisner73a01a12010-10-27 14:33:00 +02003952}
3953
Philipp Reisner02918be2010-08-20 14:35:10 +02003954struct data_cmd {
3955 int expect_payload;
3956 size_t pkt_size;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003957 int (*fn)(struct drbd_tconn *, struct packet_info *);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003958};
3959
Philipp Reisner02918be2010-08-20 14:35:10 +02003960static struct data_cmd drbd_cmd_handler[] = {
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003961 [P_DATA] = { 1, sizeof(struct p_data), receive_Data },
3962 [P_DATA_REPLY] = { 1, sizeof(struct p_data), receive_DataReply },
3963 [P_RS_DATA_REPLY] = { 1, sizeof(struct p_data), receive_RSDataReply } ,
3964 [P_BARRIER] = { 0, sizeof(struct p_barrier), receive_Barrier } ,
3965 [P_BITMAP] = { 1, sizeof(struct p_header), receive_bitmap } ,
3966 [P_COMPRESSED_BITMAP] = { 1, sizeof(struct p_header), receive_bitmap } ,
3967 [P_UNPLUG_REMOTE] = { 0, sizeof(struct p_header), receive_UnplugRemote },
3968 [P_DATA_REQUEST] = { 0, sizeof(struct p_block_req), receive_DataRequest },
3969 [P_RS_DATA_REQUEST] = { 0, sizeof(struct p_block_req), receive_DataRequest },
3970 [P_SYNC_PARAM] = { 1, sizeof(struct p_header), receive_SyncParam },
3971 [P_SYNC_PARAM89] = { 1, sizeof(struct p_header), receive_SyncParam },
3972 [P_PROTOCOL] = { 1, sizeof(struct p_protocol), receive_protocol },
3973 [P_UUIDS] = { 0, sizeof(struct p_uuids), receive_uuids },
3974 [P_SIZES] = { 0, sizeof(struct p_sizes), receive_sizes },
3975 [P_STATE] = { 0, sizeof(struct p_state), receive_state },
3976 [P_STATE_CHG_REQ] = { 0, sizeof(struct p_req_state), receive_req_state },
3977 [P_SYNC_UUID] = { 0, sizeof(struct p_rs_uuid), receive_sync_uuid },
3978 [P_OV_REQUEST] = { 0, sizeof(struct p_block_req), receive_DataRequest },
3979 [P_OV_REPLY] = { 1, sizeof(struct p_block_req), receive_DataRequest },
3980 [P_CSUM_RS_REQUEST] = { 1, sizeof(struct p_block_req), receive_DataRequest },
3981 [P_DELAY_PROBE] = { 0, sizeof(struct p_delay_probe93), receive_skip },
3982 [P_OUT_OF_SYNC] = { 0, sizeof(struct p_block_desc), receive_out_of_sync },
3983 [P_CONN_ST_CHG_REQ] = { 0, sizeof(struct p_req_state), receive_req_conn_state },
Philipp Reisner02918be2010-08-20 14:35:10 +02003984};
3985
Philipp Reisnereefc2f72011-02-08 12:55:24 +01003986static void drbdd(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003987{
Andreas Gruenbachere6ef8a52011-03-24 18:07:54 +01003988 struct p_header *header = tconn->data.rbuf;
Philipp Reisner77351055b2011-02-07 17:24:26 +01003989 struct packet_info pi;
Philipp Reisner02918be2010-08-20 14:35:10 +02003990 size_t shs; /* sub header size */
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003991 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003992
Philipp Reisnereefc2f72011-02-08 12:55:24 +01003993 while (get_t_state(&tconn->receiver) == RUNNING) {
Andreas Gruenbacherdeebe192011-03-25 00:01:04 +01003994 struct data_cmd *cmd;
3995
Philipp Reisnereefc2f72011-02-08 12:55:24 +01003996 drbd_thread_current_set_cpu(&tconn->receiver);
Andreas Gruenbacher69bc7bc2011-03-16 17:31:52 +01003997 if (drbd_recv_header(tconn, &pi))
Philipp Reisner02918be2010-08-20 14:35:10 +02003998 goto err_out;
3999
Andreas Gruenbacherdeebe192011-03-25 00:01:04 +01004000 cmd = &drbd_cmd_handler[pi.cmd];
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004001 if (unlikely(pi.cmd >= ARRAY_SIZE(drbd_cmd_handler) || !cmd->fn)) {
Philipp Reisnereefc2f72011-02-08 12:55:24 +01004002 conn_err(tconn, "unknown packet type %d, l: %d!\n", pi.cmd, pi.size);
Philipp Reisner02918be2010-08-20 14:35:10 +02004003 goto err_out;
Lars Ellenberg0b33a912009-11-16 15:58:04 +01004004 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07004005
Andreas Gruenbacherdeebe192011-03-25 00:01:04 +01004006 shs = cmd->pkt_size - sizeof(struct p_header);
4007 if (pi.size - shs > 0 && !cmd->expect_payload) {
Philipp Reisnereefc2f72011-02-08 12:55:24 +01004008 conn_err(tconn, "No payload expected %s l:%d\n", cmdname(pi.cmd), pi.size);
Philipp Reisner02918be2010-08-20 14:35:10 +02004009 goto err_out;
4010 }
4011
Lars Ellenbergc13f7e12010-10-29 23:32:01 +02004012 if (shs) {
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01004013 err = drbd_recv_all_warn(tconn, &header->payload, shs);
4014 if (err)
Lars Ellenbergc13f7e12010-10-29 23:32:01 +02004015 goto err_out;
Andreas Gruenbachere2857212011-03-25 00:57:38 +01004016 pi.size -= shs;
Lars Ellenbergc13f7e12010-10-29 23:32:01 +02004017 }
4018
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004019 err = cmd->fn(tconn, &pi);
4020 if (err) {
Philipp Reisnereefc2f72011-02-08 12:55:24 +01004021 conn_err(tconn, "error receiving %s, l: %d!\n",
Philipp Reisner77351055b2011-02-07 17:24:26 +01004022 cmdname(pi.cmd), pi.size);
Philipp Reisner02918be2010-08-20 14:35:10 +02004023 goto err_out;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004024 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07004025 }
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004026 return;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004027
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004028 err_out:
4029 conn_request_state(tconn, NS(conn, C_PROTOCOL_ERROR), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004030}
4031
Philipp Reisner0e29d162011-02-18 14:23:11 +01004032void conn_flush_workqueue(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004033{
4034 struct drbd_wq_barrier barr;
4035
4036 barr.w.cb = w_prev_work_done;
Philipp Reisner0e29d162011-02-18 14:23:11 +01004037 barr.w.tconn = tconn;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004038 init_completion(&barr.done);
Philipp Reisner0e29d162011-02-18 14:23:11 +01004039 drbd_queue_work(&tconn->data.work, &barr.w);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004040 wait_for_completion(&barr.done);
4041}
4042
Philipp Reisner360cc742011-02-08 14:29:53 +01004043static void drbd_disconnect(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004044{
Philipp Reisnerbbeb6412011-02-10 13:45:46 +01004045 enum drbd_conns oc;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004046 int rv = SS_UNKNOWN_ERROR;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004047
Philipp Reisnerbbeb6412011-02-10 13:45:46 +01004048 if (tconn->cstate == C_STANDALONE)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004049 return;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004050
4051 /* asender does not clean up anything. it must not interfere, either */
Philipp Reisner360cc742011-02-08 14:29:53 +01004052 drbd_thread_stop(&tconn->asender);
4053 drbd_free_sock(tconn);
4054
4055 idr_for_each(&tconn->volumes, drbd_disconnected, tconn);
Philipp Reisner360cc742011-02-08 14:29:53 +01004056 conn_info(tconn, "Connection closed\n");
4057
Philipp Reisnercb703452011-03-24 11:03:07 +01004058 if (conn_highest_role(tconn) == R_PRIMARY && conn_highest_pdsk(tconn) >= D_UNKNOWN)
4059 conn_try_outdate_peer_async(tconn);
4060
Philipp Reisner360cc742011-02-08 14:29:53 +01004061 spin_lock_irq(&tconn->req_lock);
Philipp Reisnerbbeb6412011-02-10 13:45:46 +01004062 oc = tconn->cstate;
4063 if (oc >= C_UNCONNECTED)
4064 rv = _conn_request_state(tconn, NS(conn, C_UNCONNECTED), CS_VERBOSE);
4065
Philipp Reisner360cc742011-02-08 14:29:53 +01004066 spin_unlock_irq(&tconn->req_lock);
4067
Philipp Reisnerbbeb6412011-02-10 13:45:46 +01004068 if (oc == C_DISCONNECTING) {
Philipp Reisner360cc742011-02-08 14:29:53 +01004069 wait_event(tconn->net_cnt_wait, atomic_read(&tconn->net_cnt) == 0);
4070
4071 crypto_free_hash(tconn->cram_hmac_tfm);
4072 tconn->cram_hmac_tfm = NULL;
4073
4074 kfree(tconn->net_conf);
4075 tconn->net_conf = NULL;
Philipp Reisnerbbeb6412011-02-10 13:45:46 +01004076 conn_request_state(tconn, NS(conn, C_STANDALONE), CS_VERBOSE);
Philipp Reisner360cc742011-02-08 14:29:53 +01004077 }
4078}
4079
4080static int drbd_disconnected(int vnr, void *p, void *data)
4081{
4082 struct drbd_conf *mdev = (struct drbd_conf *)p;
4083 enum drbd_fencing_p fp;
4084 unsigned int i;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004085
Philipp Reisner85719572010-07-21 10:20:17 +02004086 /* wait for current activity to cease. */
Philipp Reisner87eeee42011-01-19 14:16:30 +01004087 spin_lock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004088 _drbd_wait_ee_list_empty(mdev, &mdev->active_ee);
4089 _drbd_wait_ee_list_empty(mdev, &mdev->sync_ee);
4090 _drbd_wait_ee_list_empty(mdev, &mdev->read_ee);
Philipp Reisner87eeee42011-01-19 14:16:30 +01004091 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004092
4093 /* We do not have data structures that would allow us to
4094 * get the rs_pending_cnt down to 0 again.
4095 * * On C_SYNC_TARGET we do not have any data structures describing
4096 * the pending RSDataRequest's we have sent.
4097 * * On C_SYNC_SOURCE there is no data structure that tracks
4098 * the P_RS_DATA_REPLY blocks that we sent to the SyncTarget.
4099 * And no, it is not the sum of the reference counts in the
4100 * resync_LRU. The resync_LRU tracks the whole operation including
4101 * the disk-IO, while the rs_pending_cnt only tracks the blocks
4102 * on the fly. */
4103 drbd_rs_cancel_all(mdev);
4104 mdev->rs_total = 0;
4105 mdev->rs_failed = 0;
4106 atomic_set(&mdev->rs_pending_cnt, 0);
4107 wake_up(&mdev->misc_wait);
4108
Philipp Reisner7fde2be2011-03-01 11:08:28 +01004109 del_timer(&mdev->request_timer);
4110
Philipp Reisnerb411b362009-09-25 16:07:19 -07004111 del_timer_sync(&mdev->resync_timer);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004112 resync_timer_fn((unsigned long)mdev);
4113
Philipp Reisnerb411b362009-09-25 16:07:19 -07004114 /* wait for all w_e_end_data_req, w_e_end_rsdata_req, w_send_barrier,
4115 * w_make_resync_request etc. which may still be on the worker queue
4116 * to be "canceled" */
Philipp Reisnera21e9292011-02-08 15:08:49 +01004117 drbd_flush_workqueue(mdev);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004118
4119 /* This also does reclaim_net_ee(). If we do this too early, we might
4120 * miss some resync ee and pages.*/
4121 drbd_process_done_ee(mdev);
4122
4123 kfree(mdev->p_uuid);
4124 mdev->p_uuid = NULL;
4125
Philipp Reisnerfb22c402010-09-08 23:20:21 +02004126 if (!is_susp(mdev->state))
Philipp Reisner2f5cdd02011-02-21 14:29:27 +01004127 tl_clear(mdev->tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004128
Philipp Reisnerb411b362009-09-25 16:07:19 -07004129 drbd_md_sync(mdev);
4130
4131 fp = FP_DONT_CARE;
4132 if (get_ldev(mdev)) {
4133 fp = mdev->ldev->dc.fencing;
4134 put_ldev(mdev);
4135 }
4136
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01004137 /* serialize with bitmap writeout triggered by the state change,
4138 * if any. */
4139 wait_event(mdev->misc_wait, !test_bit(BITMAP_IO, &mdev->flags));
4140
Philipp Reisnerb411b362009-09-25 16:07:19 -07004141 /* tcp_close and release of sendpage pages can be deferred. I don't
4142 * want to use SO_LINGER, because apparently it can be deferred for
4143 * more than 20 seconds (longest time I checked).
4144 *
4145 * Actually we don't care for exactly when the network stack does its
4146 * put_page(), but release our reference on these pages right here.
4147 */
4148 i = drbd_release_ee(mdev, &mdev->net_ee);
4149 if (i)
4150 dev_info(DEV, "net_ee not empty, killed %u entries\n", i);
Lars Ellenberg435f0742010-09-06 12:30:25 +02004151 i = atomic_read(&mdev->pp_in_use_by_net);
4152 if (i)
4153 dev_info(DEV, "pp_in_use_by_net = %d, expected 0\n", i);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004154 i = atomic_read(&mdev->pp_in_use);
4155 if (i)
Lars Ellenberg45bb9122010-05-14 17:10:48 +02004156 dev_info(DEV, "pp_in_use = %d, expected 0\n", i);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004157
4158 D_ASSERT(list_empty(&mdev->read_ee));
4159 D_ASSERT(list_empty(&mdev->active_ee));
4160 D_ASSERT(list_empty(&mdev->sync_ee));
4161 D_ASSERT(list_empty(&mdev->done_ee));
4162
4163 /* ok, no more ee's on the fly, it is safe to reset the epoch_size */
4164 atomic_set(&mdev->current_epoch->epoch_size, 0);
4165 D_ASSERT(list_empty(&mdev->current_epoch->list));
Philipp Reisner360cc742011-02-08 14:29:53 +01004166
4167 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004168}
4169
4170/*
4171 * We support PRO_VERSION_MIN to PRO_VERSION_MAX. The protocol version
4172 * we can agree on is stored in agreed_pro_version.
4173 *
4174 * feature flags and the reserved array should be enough room for future
4175 * enhancements of the handshake protocol, and possible plugins...
4176 *
4177 * for now, they are expected to be zero, but ignored.
4178 */
Andreas Gruenbacher60381782011-03-28 17:05:50 +02004179static int drbd_send_features(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004180{
Philipp Reisnere6b3ea82011-01-19 14:02:01 +01004181 /* ASSERT current == mdev->tconn->receiver ... */
Andreas Gruenbacher60381782011-03-28 17:05:50 +02004182 struct p_connection_features *p = tconn->data.sbuf;
Andreas Gruenbachere8d17b02011-03-16 00:54:19 +01004183 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004184
Philipp Reisner8a22ccc2011-02-07 16:47:12 +01004185 if (mutex_lock_interruptible(&tconn->data.mutex)) {
4186 conn_err(tconn, "interrupted during initial handshake\n");
Andreas Gruenbachere8d17b02011-03-16 00:54:19 +01004187 return -EINTR;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004188 }
4189
Philipp Reisner8a22ccc2011-02-07 16:47:12 +01004190 if (tconn->data.socket == NULL) {
4191 mutex_unlock(&tconn->data.mutex);
Andreas Gruenbachere8d17b02011-03-16 00:54:19 +01004192 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004193 }
4194
4195 memset(p, 0, sizeof(*p));
4196 p->protocol_min = cpu_to_be32(PRO_VERSION_MIN);
4197 p->protocol_max = cpu_to_be32(PRO_VERSION_MAX);
Andreas Gruenbacher60381782011-03-28 17:05:50 +02004198 err = _conn_send_cmd(tconn, 0, &tconn->data, P_CONNECTION_FEATURES,
Andreas Gruenbacherecf23632011-03-15 23:48:25 +01004199 &p->head, sizeof(*p), 0);
Philipp Reisner8a22ccc2011-02-07 16:47:12 +01004200 mutex_unlock(&tconn->data.mutex);
Andreas Gruenbachere8d17b02011-03-16 00:54:19 +01004201 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004202}
4203
4204/*
4205 * return values:
4206 * 1 yes, we have a valid connection
4207 * 0 oops, did not work out, please try again
4208 * -1 peer talks different language,
4209 * no point in trying again, please go standalone.
4210 */
Andreas Gruenbacher60381782011-03-28 17:05:50 +02004211static int drbd_do_features(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004212{
Philipp Reisner65d11ed2011-02-07 17:35:59 +01004213 /* ASSERT current == tconn->receiver ... */
Andreas Gruenbacher60381782011-03-28 17:05:50 +02004214 struct p_connection_features *p = tconn->data.rbuf;
4215 const int expect = sizeof(struct p_connection_features) - sizeof(struct p_header80);
Philipp Reisner77351055b2011-02-07 17:24:26 +01004216 struct packet_info pi;
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01004217 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004218
Andreas Gruenbacher60381782011-03-28 17:05:50 +02004219 err = drbd_send_features(tconn);
Andreas Gruenbachere8d17b02011-03-16 00:54:19 +01004220 if (err)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004221 return 0;
4222
Andreas Gruenbacher69bc7bc2011-03-16 17:31:52 +01004223 err = drbd_recv_header(tconn, &pi);
4224 if (err)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004225 return 0;
4226
Andreas Gruenbacher60381782011-03-28 17:05:50 +02004227 if (pi.cmd != P_CONNECTION_FEATURES) {
4228 conn_err(tconn, "expected ConnectionFeatures packet, received: %s (0x%04x)\n",
Philipp Reisner77351055b2011-02-07 17:24:26 +01004229 cmdname(pi.cmd), pi.cmd);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004230 return -1;
4231 }
4232
Philipp Reisner77351055b2011-02-07 17:24:26 +01004233 if (pi.size != expect) {
Andreas Gruenbacher60381782011-03-28 17:05:50 +02004234 conn_err(tconn, "expected ConnectionFeatures length: %u, received: %u\n",
Philipp Reisner77351055b2011-02-07 17:24:26 +01004235 expect, pi.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004236 return -1;
4237 }
4238
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01004239 err = drbd_recv_all_warn(tconn, &p->head.payload, expect);
4240 if (err)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004241 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004242
Philipp Reisnerb411b362009-09-25 16:07:19 -07004243 p->protocol_min = be32_to_cpu(p->protocol_min);
4244 p->protocol_max = be32_to_cpu(p->protocol_max);
4245 if (p->protocol_max == 0)
4246 p->protocol_max = p->protocol_min;
4247
4248 if (PRO_VERSION_MAX < p->protocol_min ||
4249 PRO_VERSION_MIN > p->protocol_max)
4250 goto incompat;
4251
Philipp Reisner65d11ed2011-02-07 17:35:59 +01004252 tconn->agreed_pro_version = min_t(int, PRO_VERSION_MAX, p->protocol_max);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004253
Philipp Reisner65d11ed2011-02-07 17:35:59 +01004254 conn_info(tconn, "Handshake successful: "
4255 "Agreed network protocol version %d\n", tconn->agreed_pro_version);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004256
4257 return 1;
4258
4259 incompat:
Philipp Reisner65d11ed2011-02-07 17:35:59 +01004260 conn_err(tconn, "incompatible DRBD dialects: "
Philipp Reisnerb411b362009-09-25 16:07:19 -07004261 "I support %d-%d, peer supports %d-%d\n",
4262 PRO_VERSION_MIN, PRO_VERSION_MAX,
4263 p->protocol_min, p->protocol_max);
4264 return -1;
4265}
4266
4267#if !defined(CONFIG_CRYPTO_HMAC) && !defined(CONFIG_CRYPTO_HMAC_MODULE)
Philipp Reisner13e60372011-02-08 09:54:40 +01004268static int drbd_do_auth(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004269{
4270 dev_err(DEV, "This kernel was build without CONFIG_CRYPTO_HMAC.\n");
4271 dev_err(DEV, "You need to disable 'cram-hmac-alg' in drbd.conf.\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01004272 return -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004273}
4274#else
4275#define CHALLENGE_LEN 64
Johannes Thomab10d96c2010-01-07 16:02:50 +01004276
4277/* Return value:
4278 1 - auth succeeded,
4279 0 - failed, try again (network error),
4280 -1 - auth failed, don't try again.
4281*/
4282
Philipp Reisner13e60372011-02-08 09:54:40 +01004283static int drbd_do_auth(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004284{
4285 char my_challenge[CHALLENGE_LEN]; /* 64 Bytes... */
4286 struct scatterlist sg;
4287 char *response = NULL;
4288 char *right_response = NULL;
4289 char *peers_ch = NULL;
Philipp Reisner13e60372011-02-08 09:54:40 +01004290 unsigned int key_len = strlen(tconn->net_conf->shared_secret);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004291 unsigned int resp_size;
4292 struct hash_desc desc;
Philipp Reisner77351055b2011-02-07 17:24:26 +01004293 struct packet_info pi;
Andreas Gruenbacher69bc7bc2011-03-16 17:31:52 +01004294 int err, rv;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004295
Philipp Reisner13e60372011-02-08 09:54:40 +01004296 desc.tfm = tconn->cram_hmac_tfm;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004297 desc.flags = 0;
4298
Philipp Reisner13e60372011-02-08 09:54:40 +01004299 rv = crypto_hash_setkey(tconn->cram_hmac_tfm,
4300 (u8 *)tconn->net_conf->shared_secret, key_len);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004301 if (rv) {
Philipp Reisner13e60372011-02-08 09:54:40 +01004302 conn_err(tconn, "crypto_hash_setkey() failed with %d\n", rv);
Johannes Thomab10d96c2010-01-07 16:02:50 +01004303 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004304 goto fail;
4305 }
4306
4307 get_random_bytes(my_challenge, CHALLENGE_LEN);
4308
Andreas Gruenbacherce9879c2011-03-15 23:34:29 +01004309 rv = !conn_send_cmd2(tconn, P_AUTH_CHALLENGE, my_challenge, CHALLENGE_LEN);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004310 if (!rv)
4311 goto fail;
4312
Andreas Gruenbacher69bc7bc2011-03-16 17:31:52 +01004313 err = drbd_recv_header(tconn, &pi);
4314 if (err) {
4315 rv = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004316 goto fail;
Andreas Gruenbacher69bc7bc2011-03-16 17:31:52 +01004317 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07004318
Philipp Reisner77351055b2011-02-07 17:24:26 +01004319 if (pi.cmd != P_AUTH_CHALLENGE) {
Philipp Reisner13e60372011-02-08 09:54:40 +01004320 conn_err(tconn, "expected AuthChallenge packet, received: %s (0x%04x)\n",
Philipp Reisner77351055b2011-02-07 17:24:26 +01004321 cmdname(pi.cmd), pi.cmd);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004322 rv = 0;
4323 goto fail;
4324 }
4325
Philipp Reisner77351055b2011-02-07 17:24:26 +01004326 if (pi.size > CHALLENGE_LEN * 2) {
Philipp Reisner13e60372011-02-08 09:54:40 +01004327 conn_err(tconn, "expected AuthChallenge payload too big.\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01004328 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004329 goto fail;
4330 }
4331
Philipp Reisner77351055b2011-02-07 17:24:26 +01004332 peers_ch = kmalloc(pi.size, GFP_NOIO);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004333 if (peers_ch == NULL) {
Philipp Reisner13e60372011-02-08 09:54:40 +01004334 conn_err(tconn, "kmalloc of peers_ch failed\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01004335 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004336 goto fail;
4337 }
4338
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01004339 err = drbd_recv_all_warn(tconn, peers_ch, pi.size);
4340 if (err) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004341 rv = 0;
4342 goto fail;
4343 }
4344
Philipp Reisner13e60372011-02-08 09:54:40 +01004345 resp_size = crypto_hash_digestsize(tconn->cram_hmac_tfm);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004346 response = kmalloc(resp_size, GFP_NOIO);
4347 if (response == NULL) {
Philipp Reisner13e60372011-02-08 09:54:40 +01004348 conn_err(tconn, "kmalloc of response failed\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01004349 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004350 goto fail;
4351 }
4352
4353 sg_init_table(&sg, 1);
Philipp Reisner77351055b2011-02-07 17:24:26 +01004354 sg_set_buf(&sg, peers_ch, pi.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004355
4356 rv = crypto_hash_digest(&desc, &sg, sg.length, response);
4357 if (rv) {
Philipp Reisner13e60372011-02-08 09:54:40 +01004358 conn_err(tconn, "crypto_hash_digest() failed with %d\n", rv);
Johannes Thomab10d96c2010-01-07 16:02:50 +01004359 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004360 goto fail;
4361 }
4362
Andreas Gruenbacherce9879c2011-03-15 23:34:29 +01004363 rv = !conn_send_cmd2(tconn, P_AUTH_RESPONSE, response, resp_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004364 if (!rv)
4365 goto fail;
4366
Andreas Gruenbacher69bc7bc2011-03-16 17:31:52 +01004367 err = drbd_recv_header(tconn, &pi);
4368 if (err) {
4369 rv = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004370 goto fail;
Andreas Gruenbacher69bc7bc2011-03-16 17:31:52 +01004371 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07004372
Philipp Reisner77351055b2011-02-07 17:24:26 +01004373 if (pi.cmd != P_AUTH_RESPONSE) {
Philipp Reisner13e60372011-02-08 09:54:40 +01004374 conn_err(tconn, "expected AuthResponse packet, received: %s (0x%04x)\n",
Philipp Reisner77351055b2011-02-07 17:24:26 +01004375 cmdname(pi.cmd), pi.cmd);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004376 rv = 0;
4377 goto fail;
4378 }
4379
Philipp Reisner77351055b2011-02-07 17:24:26 +01004380 if (pi.size != resp_size) {
Philipp Reisner13e60372011-02-08 09:54:40 +01004381 conn_err(tconn, "expected AuthResponse payload of wrong size\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07004382 rv = 0;
4383 goto fail;
4384 }
4385
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01004386 err = drbd_recv_all_warn(tconn, response , resp_size);
4387 if (err) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004388 rv = 0;
4389 goto fail;
4390 }
4391
4392 right_response = kmalloc(resp_size, GFP_NOIO);
Julia Lawall2d1ee872009-12-27 22:27:11 +01004393 if (right_response == NULL) {
Philipp Reisner13e60372011-02-08 09:54:40 +01004394 conn_err(tconn, "kmalloc of right_response failed\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01004395 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004396 goto fail;
4397 }
4398
4399 sg_set_buf(&sg, my_challenge, CHALLENGE_LEN);
4400
4401 rv = crypto_hash_digest(&desc, &sg, sg.length, right_response);
4402 if (rv) {
Philipp Reisner13e60372011-02-08 09:54:40 +01004403 conn_err(tconn, "crypto_hash_digest() failed with %d\n", rv);
Johannes Thomab10d96c2010-01-07 16:02:50 +01004404 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004405 goto fail;
4406 }
4407
4408 rv = !memcmp(response, right_response, resp_size);
4409
4410 if (rv)
Philipp Reisner13e60372011-02-08 09:54:40 +01004411 conn_info(tconn, "Peer authenticated using %d bytes of '%s' HMAC\n",
4412 resp_size, tconn->net_conf->cram_hmac_alg);
Johannes Thomab10d96c2010-01-07 16:02:50 +01004413 else
4414 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004415
4416 fail:
4417 kfree(peers_ch);
4418 kfree(response);
4419 kfree(right_response);
4420
4421 return rv;
4422}
4423#endif
4424
4425int drbdd_init(struct drbd_thread *thi)
4426{
Philipp Reisner392c8802011-02-09 10:33:31 +01004427 struct drbd_tconn *tconn = thi->tconn;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004428 int h;
4429
Philipp Reisner4d641dd2011-02-08 15:40:24 +01004430 conn_info(tconn, "receiver (re)started\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07004431
4432 do {
Philipp Reisner4d641dd2011-02-08 15:40:24 +01004433 h = drbd_connect(tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004434 if (h == 0) {
Philipp Reisner4d641dd2011-02-08 15:40:24 +01004435 drbd_disconnect(tconn);
Philipp Reisner20ee6392011-01-18 15:28:59 +01004436 schedule_timeout_interruptible(HZ);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004437 }
4438 if (h == -1) {
Philipp Reisner4d641dd2011-02-08 15:40:24 +01004439 conn_warn(tconn, "Discarding network configuration.\n");
Philipp Reisnerbbeb6412011-02-10 13:45:46 +01004440 conn_request_state(tconn, NS(conn, C_DISCONNECTING), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004441 }
4442 } while (h == 0);
4443
4444 if (h > 0) {
Philipp Reisner4d641dd2011-02-08 15:40:24 +01004445 if (get_net_conf(tconn)) {
4446 drbdd(tconn);
4447 put_net_conf(tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004448 }
4449 }
4450
Philipp Reisner4d641dd2011-02-08 15:40:24 +01004451 drbd_disconnect(tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004452
Philipp Reisner4d641dd2011-02-08 15:40:24 +01004453 conn_info(tconn, "receiver terminated\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07004454 return 0;
4455}
4456
4457/* ********* acknowledge sender ******** */
4458
Andreas Gruenbachere05e1e52011-03-25 15:16:26 +01004459static int got_conn_RqSReply(struct drbd_tconn *tconn, struct packet_info *pi)
Philipp Reisnere4f78ed2011-03-16 11:27:48 +01004460{
Andreas Gruenbachere6ef8a52011-03-24 18:07:54 +01004461 struct p_req_state_reply *p = tconn->meta.rbuf;
Philipp Reisnere4f78ed2011-03-16 11:27:48 +01004462 int retcode = be32_to_cpu(p->retcode);
4463
4464 if (retcode >= SS_SUCCESS) {
4465 set_bit(CONN_WD_ST_CHG_OKAY, &tconn->flags);
4466 } else {
4467 set_bit(CONN_WD_ST_CHG_FAIL, &tconn->flags);
4468 conn_err(tconn, "Requested state change failed by peer: %s (%d)\n",
4469 drbd_set_st_err_str(retcode), retcode);
4470 }
4471 wake_up(&tconn->ping_wait);
4472
4473 return true;
4474}
4475
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01004476static int got_RqSReply(struct drbd_tconn *tconn, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004477{
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01004478 struct drbd_conf *mdev;
4479 struct p_req_state_reply *p = tconn->meta.rbuf;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004480 int retcode = be32_to_cpu(p->retcode);
4481
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01004482 mdev = vnr_to_mdev(tconn, pi->vnr);
4483 if (!mdev)
4484 return false;
4485
Philipp Reisnere4f78ed2011-03-16 11:27:48 +01004486 if (retcode >= SS_SUCCESS) {
4487 set_bit(CL_ST_CHG_SUCCESS, &mdev->flags);
4488 } else {
4489 set_bit(CL_ST_CHG_FAIL, &mdev->flags);
4490 dev_err(DEV, "Requested state change failed by peer: %s (%d)\n",
4491 drbd_set_st_err_str(retcode), retcode);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004492 }
Philipp Reisnere4f78ed2011-03-16 11:27:48 +01004493 wake_up(&mdev->state_wait);
4494
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004495 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004496}
4497
Andreas Gruenbachere05e1e52011-03-25 15:16:26 +01004498static int got_Ping(struct drbd_tconn *tconn, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004499{
Philipp Reisnerf19e4f82011-03-16 11:21:50 +01004500 return drbd_send_ping_ack(tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004501
4502}
4503
Andreas Gruenbachere05e1e52011-03-25 15:16:26 +01004504static int got_PingAck(struct drbd_tconn *tconn, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004505{
4506 /* restore idle timeout */
Philipp Reisner2a67d8b2011-02-09 14:10:32 +01004507 tconn->meta.socket->sk->sk_rcvtimeo = tconn->net_conf->ping_int*HZ;
4508 if (!test_and_set_bit(GOT_PING_ACK, &tconn->flags))
4509 wake_up(&tconn->ping_wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004510
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004511 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004512}
4513
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01004514static int got_IsInSync(struct drbd_tconn *tconn, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004515{
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01004516 struct drbd_conf *mdev;
4517 struct p_block_ack *p = tconn->meta.rbuf;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004518 sector_t sector = be64_to_cpu(p->sector);
4519 int blksize = be32_to_cpu(p->blksize);
4520
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01004521 mdev = vnr_to_mdev(tconn, pi->vnr);
4522 if (!mdev)
4523 return false;
4524
Philipp Reisner31890f42011-01-19 14:12:51 +01004525 D_ASSERT(mdev->tconn->agreed_pro_version >= 89);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004526
4527 update_peer_seq(mdev, be32_to_cpu(p->seq_num));
4528
Lars Ellenberg1d53f092010-09-05 01:13:24 +02004529 if (get_ldev(mdev)) {
4530 drbd_rs_complete_io(mdev, sector);
4531 drbd_set_in_sync(mdev, sector, blksize);
4532 /* rs_same_csums is supposed to count in units of BM_BLOCK_SIZE */
4533 mdev->rs_same_csum += (blksize >> BM_BLOCK_SHIFT);
4534 put_ldev(mdev);
4535 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07004536 dec_rs_pending(mdev);
Philipp Reisner778f2712010-07-06 11:14:00 +02004537 atomic_add(blksize >> 9, &mdev->rs_sect_in);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004538
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004539 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004540}
4541
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01004542static int
4543validate_req_change_req_state(struct drbd_conf *mdev, u64 id, sector_t sector,
4544 struct rb_root *root, const char *func,
4545 enum drbd_req_event what, bool missing_ok)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004546{
4547 struct drbd_request *req;
4548 struct bio_and_error m;
4549
Philipp Reisner87eeee42011-01-19 14:16:30 +01004550 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01004551 req = find_request(mdev, root, id, sector, missing_ok, func);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004552 if (unlikely(!req)) {
Philipp Reisner87eeee42011-01-19 14:16:30 +01004553 spin_unlock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004554 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004555 }
4556 __req_mod(req, what, &m);
Philipp Reisner87eeee42011-01-19 14:16:30 +01004557 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004558
4559 if (m.bio)
4560 complete_master_bio(mdev, &m);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004561 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004562}
4563
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01004564static int got_BlockAck(struct drbd_tconn *tconn, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004565{
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01004566 struct drbd_conf *mdev;
4567 struct p_block_ack *p = tconn->meta.rbuf;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004568 sector_t sector = be64_to_cpu(p->sector);
4569 int blksize = be32_to_cpu(p->blksize);
4570 enum drbd_req_event what;
4571
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01004572 mdev = vnr_to_mdev(tconn, pi->vnr);
4573 if (!mdev)
4574 return false;
4575
Philipp Reisnerb411b362009-09-25 16:07:19 -07004576 update_peer_seq(mdev, be32_to_cpu(p->seq_num));
4577
Andreas Gruenbacher579b57e2011-01-13 18:40:57 +01004578 if (p->block_id == ID_SYNCER) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004579 drbd_set_in_sync(mdev, sector, blksize);
4580 dec_rs_pending(mdev);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004581 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004582 }
Andreas Gruenbachere05e1e52011-03-25 15:16:26 +01004583 switch (pi->cmd) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004584 case P_RS_WRITE_ACK:
Philipp Reisner89e58e72011-01-19 13:12:45 +01004585 D_ASSERT(mdev->tconn->net_conf->wire_protocol == DRBD_PROT_C);
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01004586 what = WRITE_ACKED_BY_PEER_AND_SIS;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004587 break;
4588 case P_WRITE_ACK:
Philipp Reisner89e58e72011-01-19 13:12:45 +01004589 D_ASSERT(mdev->tconn->net_conf->wire_protocol == DRBD_PROT_C);
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01004590 what = WRITE_ACKED_BY_PEER;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004591 break;
4592 case P_RECV_ACK:
Philipp Reisner89e58e72011-01-19 13:12:45 +01004593 D_ASSERT(mdev->tconn->net_conf->wire_protocol == DRBD_PROT_B);
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01004594 what = RECV_ACKED_BY_PEER;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004595 break;
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01004596 case P_DISCARD_WRITE:
Philipp Reisner89e58e72011-01-19 13:12:45 +01004597 D_ASSERT(mdev->tconn->net_conf->wire_protocol == DRBD_PROT_C);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01004598 what = DISCARD_WRITE;
4599 break;
4600 case P_RETRY_WRITE:
4601 D_ASSERT(mdev->tconn->net_conf->wire_protocol == DRBD_PROT_C);
4602 what = POSTPONE_WRITE;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004603 break;
4604 default:
4605 D_ASSERT(0);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004606 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004607 }
4608
4609 return validate_req_change_req_state(mdev, p->block_id, sector,
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01004610 &mdev->write_requests, __func__,
4611 what, false);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004612}
4613
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01004614static int got_NegAck(struct drbd_tconn *tconn, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004615{
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01004616 struct drbd_conf *mdev;
4617 struct p_block_ack *p = tconn->meta.rbuf;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004618 sector_t sector = be64_to_cpu(p->sector);
Philipp Reisner2deb8332011-01-17 18:39:18 +01004619 int size = be32_to_cpu(p->blksize);
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01004620 bool missing_ok = tconn->net_conf->wire_protocol == DRBD_PROT_A ||
4621 tconn->net_conf->wire_protocol == DRBD_PROT_B;
Andreas Gruenbacherc3afd8f2011-01-20 22:25:40 +01004622 bool found;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004623
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01004624 mdev = vnr_to_mdev(tconn, pi->vnr);
4625 if (!mdev)
4626 return false;
4627
Philipp Reisnerb411b362009-09-25 16:07:19 -07004628 update_peer_seq(mdev, be32_to_cpu(p->seq_num));
4629
Andreas Gruenbacher579b57e2011-01-13 18:40:57 +01004630 if (p->block_id == ID_SYNCER) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004631 dec_rs_pending(mdev);
4632 drbd_rs_failed_io(mdev, sector, size);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004633 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004634 }
Philipp Reisner2deb8332011-01-17 18:39:18 +01004635
Andreas Gruenbacherc3afd8f2011-01-20 22:25:40 +01004636 found = validate_req_change_req_state(mdev, p->block_id, sector,
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01004637 &mdev->write_requests, __func__,
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01004638 NEG_ACKED, missing_ok);
Andreas Gruenbacherc3afd8f2011-01-20 22:25:40 +01004639 if (!found) {
4640 /* Protocol A has no P_WRITE_ACKs, but has P_NEG_ACKs.
4641 The master bio might already be completed, therefore the
4642 request is no longer in the collision hash. */
4643 /* In Protocol B we might already have got a P_RECV_ACK
4644 but then get a P_NEG_ACK afterwards. */
4645 if (!missing_ok)
Philipp Reisner2deb8332011-01-17 18:39:18 +01004646 return false;
Andreas Gruenbacherc3afd8f2011-01-20 22:25:40 +01004647 drbd_set_out_of_sync(mdev, sector, size);
Philipp Reisner2deb8332011-01-17 18:39:18 +01004648 }
Philipp Reisner2deb8332011-01-17 18:39:18 +01004649 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004650}
4651
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01004652static int got_NegDReply(struct drbd_tconn *tconn, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004653{
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01004654 struct drbd_conf *mdev;
4655 struct p_block_ack *p = tconn->meta.rbuf;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004656 sector_t sector = be64_to_cpu(p->sector);
4657
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01004658 mdev = vnr_to_mdev(tconn, pi->vnr);
4659 if (!mdev)
4660 return false;
4661
Philipp Reisnerb411b362009-09-25 16:07:19 -07004662 update_peer_seq(mdev, be32_to_cpu(p->seq_num));
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01004663
Philipp Reisnerb411b362009-09-25 16:07:19 -07004664 dev_err(DEV, "Got NegDReply; Sector %llus, len %u; Fail original request.\n",
4665 (unsigned long long)sector, be32_to_cpu(p->blksize));
4666
4667 return validate_req_change_req_state(mdev, p->block_id, sector,
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01004668 &mdev->read_requests, __func__,
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01004669 NEG_ACKED, false);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004670}
4671
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01004672static int got_NegRSDReply(struct drbd_tconn *tconn, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004673{
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01004674 struct drbd_conf *mdev;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004675 sector_t sector;
4676 int size;
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01004677 struct p_block_ack *p = tconn->meta.rbuf;
4678
4679 mdev = vnr_to_mdev(tconn, pi->vnr);
4680 if (!mdev)
4681 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004682
4683 sector = be64_to_cpu(p->sector);
4684 size = be32_to_cpu(p->blksize);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004685
4686 update_peer_seq(mdev, be32_to_cpu(p->seq_num));
4687
4688 dec_rs_pending(mdev);
4689
4690 if (get_ldev_if_state(mdev, D_FAILED)) {
4691 drbd_rs_complete_io(mdev, sector);
Andreas Gruenbachere05e1e52011-03-25 15:16:26 +01004692 switch (pi->cmd) {
Philipp Reisnerd612d302010-12-27 10:53:28 +01004693 case P_NEG_RS_DREPLY:
4694 drbd_rs_failed_io(mdev, sector, size);
4695 case P_RS_CANCEL:
4696 break;
4697 default:
4698 D_ASSERT(0);
4699 put_ldev(mdev);
4700 return false;
4701 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07004702 put_ldev(mdev);
4703 }
4704
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004705 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004706}
4707
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01004708static int got_BarrierAck(struct drbd_tconn *tconn, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004709{
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01004710 struct drbd_conf *mdev;
4711 struct p_barrier_ack *p = tconn->meta.rbuf;
4712
4713 mdev = vnr_to_mdev(tconn, pi->vnr);
4714 if (!mdev)
4715 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004716
Philipp Reisner2f5cdd02011-02-21 14:29:27 +01004717 tl_release(mdev->tconn, p->barrier, be32_to_cpu(p->set_size));
Philipp Reisnerb411b362009-09-25 16:07:19 -07004718
Philipp Reisnerc4752ef2010-10-27 17:32:36 +02004719 if (mdev->state.conn == C_AHEAD &&
4720 atomic_read(&mdev->ap_in_flight) == 0 &&
Philipp Reisner370a43e2011-01-14 16:03:11 +01004721 !test_and_set_bit(AHEAD_TO_SYNC_SOURCE, &mdev->current_epoch->flags)) {
4722 mdev->start_resync_timer.expires = jiffies + HZ;
4723 add_timer(&mdev->start_resync_timer);
Philipp Reisnerc4752ef2010-10-27 17:32:36 +02004724 }
4725
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004726 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004727}
4728
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01004729static int got_OVResult(struct drbd_tconn *tconn, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004730{
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01004731 struct drbd_conf *mdev;
4732 struct p_block_ack *p = tconn->meta.rbuf;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004733 struct drbd_work *w;
4734 sector_t sector;
4735 int size;
4736
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01004737 mdev = vnr_to_mdev(tconn, pi->vnr);
4738 if (!mdev)
4739 return false;
4740
Philipp Reisnerb411b362009-09-25 16:07:19 -07004741 sector = be64_to_cpu(p->sector);
4742 size = be32_to_cpu(p->blksize);
4743
4744 update_peer_seq(mdev, be32_to_cpu(p->seq_num));
4745
4746 if (be64_to_cpu(p->block_id) == ID_OUT_OF_SYNC)
Andreas Gruenbacher8f7bed72010-12-19 23:53:14 +01004747 drbd_ov_out_of_sync_found(mdev, sector, size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004748 else
Andreas Gruenbacher8f7bed72010-12-19 23:53:14 +01004749 ov_out_of_sync_print(mdev);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004750
Lars Ellenberg1d53f092010-09-05 01:13:24 +02004751 if (!get_ldev(mdev))
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004752 return true;
Lars Ellenberg1d53f092010-09-05 01:13:24 +02004753
Philipp Reisnerb411b362009-09-25 16:07:19 -07004754 drbd_rs_complete_io(mdev, sector);
4755 dec_rs_pending(mdev);
4756
Lars Ellenbergea5442a2010-11-05 09:48:01 +01004757 --mdev->ov_left;
4758
4759 /* let's advance progress step marks only for every other megabyte */
4760 if ((mdev->ov_left & 0x200) == 0x200)
4761 drbd_advance_rs_marks(mdev, mdev->ov_left);
4762
4763 if (mdev->ov_left == 0) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004764 w = kmalloc(sizeof(*w), GFP_NOIO);
4765 if (w) {
4766 w->cb = w_ov_finished;
Philipp Reisnera21e9292011-02-08 15:08:49 +01004767 w->mdev = mdev;
Philipp Reisnere42325a2011-01-19 13:55:45 +01004768 drbd_queue_work_front(&mdev->tconn->data.work, w);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004769 } else {
4770 dev_err(DEV, "kmalloc(w) failed.");
Andreas Gruenbacher8f7bed72010-12-19 23:53:14 +01004771 ov_out_of_sync_print(mdev);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004772 drbd_resync_finished(mdev);
4773 }
4774 }
Lars Ellenberg1d53f092010-09-05 01:13:24 +02004775 put_ldev(mdev);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004776 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004777}
4778
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01004779static int got_skip(struct drbd_tconn *tconn, struct packet_info *pi)
Philipp Reisner0ced55a2010-04-30 15:26:20 +02004780{
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004781 return true;
Philipp Reisner0ced55a2010-04-30 15:26:20 +02004782}
4783
Philipp Reisner32862ec2011-02-08 16:41:01 +01004784static int tconn_process_done_ee(struct drbd_tconn *tconn)
4785{
Philipp Reisner082a3432011-03-15 16:05:42 +01004786 struct drbd_conf *mdev;
4787 int i, not_empty = 0;
Philipp Reisner32862ec2011-02-08 16:41:01 +01004788
4789 do {
4790 clear_bit(SIGNAL_ASENDER, &tconn->flags);
4791 flush_signals(current);
Philipp Reisner082a3432011-03-15 16:05:42 +01004792 idr_for_each_entry(&tconn->volumes, mdev, i) {
Andreas Gruenbachere2b30322011-03-16 17:16:12 +01004793 if (drbd_process_done_ee(mdev))
Philipp Reisner082a3432011-03-15 16:05:42 +01004794 return 1; /* error */
4795 }
Philipp Reisner32862ec2011-02-08 16:41:01 +01004796 set_bit(SIGNAL_ASENDER, &tconn->flags);
Philipp Reisner082a3432011-03-15 16:05:42 +01004797
4798 spin_lock_irq(&tconn->req_lock);
4799 idr_for_each_entry(&tconn->volumes, mdev, i) {
4800 not_empty = !list_empty(&mdev->done_ee);
4801 if (not_empty)
4802 break;
4803 }
4804 spin_unlock_irq(&tconn->req_lock);
Philipp Reisner32862ec2011-02-08 16:41:01 +01004805 } while (not_empty);
4806
4807 return 0;
4808}
4809
Andreas Gruenbacher7201b972011-03-14 18:23:00 +01004810struct asender_cmd {
4811 size_t pkt_size;
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01004812 int (*fn)(struct drbd_tconn *tconn, struct packet_info *);
Andreas Gruenbacher7201b972011-03-14 18:23:00 +01004813};
4814
4815static struct asender_cmd asender_tbl[] = {
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01004816 [P_PING] = { sizeof(struct p_header), got_Ping },
4817 [P_PING_ACK] = { sizeof(struct p_header), got_PingAck },
4818 [P_RECV_ACK] = { sizeof(struct p_block_ack), got_BlockAck },
4819 [P_WRITE_ACK] = { sizeof(struct p_block_ack), got_BlockAck },
4820 [P_RS_WRITE_ACK] = { sizeof(struct p_block_ack), got_BlockAck },
4821 [P_DISCARD_WRITE] = { sizeof(struct p_block_ack), got_BlockAck },
4822 [P_NEG_ACK] = { sizeof(struct p_block_ack), got_NegAck },
4823 [P_NEG_DREPLY] = { sizeof(struct p_block_ack), got_NegDReply },
4824 [P_NEG_RS_DREPLY] = { sizeof(struct p_block_ack), got_NegRSDReply },
4825 [P_OV_RESULT] = { sizeof(struct p_block_ack), got_OVResult },
4826 [P_BARRIER_ACK] = { sizeof(struct p_barrier_ack), got_BarrierAck },
4827 [P_STATE_CHG_REPLY] = { sizeof(struct p_req_state_reply), got_RqSReply },
4828 [P_RS_IS_IN_SYNC] = { sizeof(struct p_block_ack), got_IsInSync },
4829 [P_DELAY_PROBE] = { sizeof(struct p_delay_probe93), got_skip },
4830 [P_RS_CANCEL] = { sizeof(struct p_block_ack), got_NegRSDReply },
4831 [P_CONN_ST_CHG_REPLY]={ sizeof(struct p_req_state_reply), got_conn_RqSReply },
4832 [P_RETRY_WRITE] = { sizeof(struct p_block_ack), got_BlockAck },
Andreas Gruenbacher7201b972011-03-14 18:23:00 +01004833};
4834
Philipp Reisnerb411b362009-09-25 16:07:19 -07004835int drbd_asender(struct drbd_thread *thi)
4836{
Philipp Reisner392c8802011-02-09 10:33:31 +01004837 struct drbd_tconn *tconn = thi->tconn;
Andreas Gruenbachere6ef8a52011-03-24 18:07:54 +01004838 struct p_header *h = tconn->meta.rbuf;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004839 struct asender_cmd *cmd = NULL;
Philipp Reisner77351055b2011-02-07 17:24:26 +01004840 struct packet_info pi;
Philipp Reisner257d0af2011-01-26 12:15:29 +01004841 int rv;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004842 void *buf = h;
4843 int received = 0;
Philipp Reisner257d0af2011-01-26 12:15:29 +01004844 int expect = sizeof(struct p_header);
Lars Ellenbergf36af182011-03-09 22:44:55 +01004845 int ping_timeout_active = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004846
Philipp Reisnerb411b362009-09-25 16:07:19 -07004847 current->policy = SCHED_RR; /* Make this a realtime task! */
4848 current->rt_priority = 2; /* more important than all other tasks */
4849
Andreas Gruenbachere77a0a52011-01-25 15:43:39 +01004850 while (get_t_state(thi) == RUNNING) {
Philipp Reisner80822282011-02-08 12:46:30 +01004851 drbd_thread_current_set_cpu(thi);
Philipp Reisner32862ec2011-02-08 16:41:01 +01004852 if (test_and_clear_bit(SEND_PING, &tconn->flags)) {
Philipp Reisner2a67d8b2011-02-09 14:10:32 +01004853 if (!drbd_send_ping(tconn)) {
Philipp Reisner32862ec2011-02-08 16:41:01 +01004854 conn_err(tconn, "drbd_send_ping has failed\n");
Andreas Gruenbacher841ce242010-12-15 19:31:20 +01004855 goto reconnect;
4856 }
Philipp Reisner32862ec2011-02-08 16:41:01 +01004857 tconn->meta.socket->sk->sk_rcvtimeo =
4858 tconn->net_conf->ping_timeo*HZ/10;
Lars Ellenbergf36af182011-03-09 22:44:55 +01004859 ping_timeout_active = 1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004860 }
4861
Philipp Reisner32862ec2011-02-08 16:41:01 +01004862 /* TODO: conditionally cork; it may hurt latency if we cork without
4863 much to send */
4864 if (!tconn->net_conf->no_cork)
4865 drbd_tcp_cork(tconn->meta.socket);
Philipp Reisner082a3432011-03-15 16:05:42 +01004866 if (tconn_process_done_ee(tconn)) {
4867 conn_err(tconn, "tconn_process_done_ee() failed\n");
Philipp Reisner32862ec2011-02-08 16:41:01 +01004868 goto reconnect;
Philipp Reisner082a3432011-03-15 16:05:42 +01004869 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07004870 /* but unconditionally uncork unless disabled */
Philipp Reisner32862ec2011-02-08 16:41:01 +01004871 if (!tconn->net_conf->no_cork)
4872 drbd_tcp_uncork(tconn->meta.socket);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004873
4874 /* short circuit, recv_msg would return EINTR anyways. */
4875 if (signal_pending(current))
4876 continue;
4877
Philipp Reisner32862ec2011-02-08 16:41:01 +01004878 rv = drbd_recv_short(tconn->meta.socket, buf, expect-received, 0);
4879 clear_bit(SIGNAL_ASENDER, &tconn->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004880
4881 flush_signals(current);
4882
4883 /* Note:
4884 * -EINTR (on meta) we got a signal
4885 * -EAGAIN (on meta) rcvtimeo expired
4886 * -ECONNRESET other side closed the connection
4887 * -ERESTARTSYS (on data) we got a signal
4888 * rv < 0 other than above: unexpected error!
4889 * rv == expected: full header or command
4890 * rv < expected: "woken" by signal during receive
4891 * rv == 0 : "connection shut down by peer"
4892 */
4893 if (likely(rv > 0)) {
4894 received += rv;
4895 buf += rv;
4896 } else if (rv == 0) {
Philipp Reisner32862ec2011-02-08 16:41:01 +01004897 conn_err(tconn, "meta connection shut down by peer.\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07004898 goto reconnect;
4899 } else if (rv == -EAGAIN) {
Lars Ellenbergcb6518c2011-06-20 14:44:45 +02004900 /* If the data socket received something meanwhile,
4901 * that is good enough: peer is still alive. */
Philipp Reisner32862ec2011-02-08 16:41:01 +01004902 if (time_after(tconn->last_received,
4903 jiffies - tconn->meta.socket->sk->sk_rcvtimeo))
Lars Ellenbergcb6518c2011-06-20 14:44:45 +02004904 continue;
Lars Ellenbergf36af182011-03-09 22:44:55 +01004905 if (ping_timeout_active) {
Philipp Reisner32862ec2011-02-08 16:41:01 +01004906 conn_err(tconn, "PingAck did not arrive in time.\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07004907 goto reconnect;
4908 }
Philipp Reisner32862ec2011-02-08 16:41:01 +01004909 set_bit(SEND_PING, &tconn->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004910 continue;
4911 } else if (rv == -EINTR) {
4912 continue;
4913 } else {
Philipp Reisner32862ec2011-02-08 16:41:01 +01004914 conn_err(tconn, "sock_recvmsg returned %d\n", rv);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004915 goto reconnect;
4916 }
4917
4918 if (received == expect && cmd == NULL) {
Andreas Gruenbacher8172f3e2011-03-16 17:22:39 +01004919 if (decode_header(tconn, h, &pi))
Philipp Reisnerb411b362009-09-25 16:07:19 -07004920 goto reconnect;
Andreas Gruenbacher7201b972011-03-14 18:23:00 +01004921 cmd = &asender_tbl[pi.cmd];
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01004922 if (pi.cmd >= ARRAY_SIZE(asender_tbl) || !cmd->fn) {
Philipp Reisner32862ec2011-02-08 16:41:01 +01004923 conn_err(tconn, "unknown command %d on meta (l: %d)\n",
Philipp Reisner77351055b2011-02-07 17:24:26 +01004924 pi.cmd, pi.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004925 goto disconnect;
4926 }
4927 expect = cmd->pkt_size;
Philipp Reisner77351055b2011-02-07 17:24:26 +01004928 if (pi.size != expect - sizeof(struct p_header)) {
Philipp Reisner32862ec2011-02-08 16:41:01 +01004929 conn_err(tconn, "Wrong packet size on meta (c: %d, l: %d)\n",
Philipp Reisner77351055b2011-02-07 17:24:26 +01004930 pi.cmd, pi.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004931 goto reconnect;
Philipp Reisner257d0af2011-01-26 12:15:29 +01004932 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07004933 }
4934 if (received == expect) {
Philipp Reisnera4fbda82011-03-16 11:13:17 +01004935 bool rv;
4936
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01004937 rv = cmd->fn(tconn, &pi);
4938 if (!rv) {
4939 conn_err(tconn, "%pf failed\n", cmd->fn);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004940 goto reconnect;
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01004941 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07004942
Philipp Reisnera4fbda82011-03-16 11:13:17 +01004943 tconn->last_received = jiffies;
4944
Lars Ellenbergf36af182011-03-09 22:44:55 +01004945 /* the idle_timeout (ping-int)
4946 * has been restored in got_PingAck() */
Andreas Gruenbacher7201b972011-03-14 18:23:00 +01004947 if (cmd == &asender_tbl[P_PING_ACK])
Lars Ellenbergf36af182011-03-09 22:44:55 +01004948 ping_timeout_active = 0;
4949
Philipp Reisnerb411b362009-09-25 16:07:19 -07004950 buf = h;
4951 received = 0;
Philipp Reisner257d0af2011-01-26 12:15:29 +01004952 expect = sizeof(struct p_header);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004953 cmd = NULL;
4954 }
4955 }
4956
4957 if (0) {
4958reconnect:
Philipp Reisnerbbeb6412011-02-10 13:45:46 +01004959 conn_request_state(tconn, NS(conn, C_NETWORK_FAILURE), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004960 }
4961 if (0) {
4962disconnect:
Philipp Reisnerbbeb6412011-02-10 13:45:46 +01004963 conn_request_state(tconn, NS(conn, C_DISCONNECTING), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004964 }
Philipp Reisner32862ec2011-02-08 16:41:01 +01004965 clear_bit(SIGNAL_ASENDER, &tconn->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004966
Philipp Reisner32862ec2011-02-08 16:41:01 +01004967 conn_info(tconn, "asender terminated\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07004968
4969 return 0;
4970}