blob: 61104dbb01720a1a126c34f65d94094ae53ed7ad [file] [log] [blame]
Philipp Reisnerb411b362009-09-25 16:07:19 -07001/*
2 drbd_receiver.c
3
4 This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
5
6 Copyright (C) 2001-2008, LINBIT Information Technologies GmbH.
7 Copyright (C) 1999-2008, Philipp Reisner <philipp.reisner@linbit.com>.
8 Copyright (C) 2002-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
9
10 drbd is free software; you can redistribute it and/or modify
11 it under the terms of the GNU General Public License as published by
12 the Free Software Foundation; either version 2, or (at your option)
13 any later version.
14
15 drbd is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License for more details.
19
20 You should have received a copy of the GNU General Public License
21 along with drbd; see the file COPYING. If not, write to
22 the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
23 */
24
25
Philipp Reisnerb411b362009-09-25 16:07:19 -070026#include <linux/module.h>
27
28#include <asm/uaccess.h>
29#include <net/sock.h>
30
Philipp Reisnerb411b362009-09-25 16:07:19 -070031#include <linux/drbd.h>
32#include <linux/fs.h>
33#include <linux/file.h>
34#include <linux/in.h>
35#include <linux/mm.h>
36#include <linux/memcontrol.h>
37#include <linux/mm_inline.h>
38#include <linux/slab.h>
Philipp Reisnerb411b362009-09-25 16:07:19 -070039#include <linux/pkt_sched.h>
40#define __KERNEL_SYSCALLS__
41#include <linux/unistd.h>
42#include <linux/vmalloc.h>
43#include <linux/random.h>
Philipp Reisnerb411b362009-09-25 16:07:19 -070044#include <linux/string.h>
45#include <linux/scatterlist.h>
46#include "drbd_int.h"
Philipp Reisnerb411b362009-09-25 16:07:19 -070047#include "drbd_req.h"
48
49#include "drbd_vli.h"
50
Philipp Reisner77351055b2011-02-07 17:24:26 +010051struct packet_info {
52 enum drbd_packet cmd;
Andreas Gruenbachere2857212011-03-25 00:57:38 +010053 unsigned int size;
54 unsigned int vnr;
Philipp Reisner77351055b2011-02-07 17:24:26 +010055};
56
Philipp Reisnerb411b362009-09-25 16:07:19 -070057enum finish_epoch {
58 FE_STILL_LIVE,
59 FE_DESTROYED,
60 FE_RECYCLED,
61};
62
Andreas Gruenbacher60381782011-03-28 17:05:50 +020063static int drbd_do_features(struct drbd_tconn *tconn);
Philipp Reisner13e60372011-02-08 09:54:40 +010064static int drbd_do_auth(struct drbd_tconn *tconn);
Philipp Reisner360cc742011-02-08 14:29:53 +010065static int drbd_disconnected(int vnr, void *p, void *data);
Philipp Reisnerb411b362009-09-25 16:07:19 -070066
67static enum finish_epoch drbd_may_finish_epoch(struct drbd_conf *, struct drbd_epoch *, enum epoch_event);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +010068static int e_end_block(struct drbd_work *, int);
Philipp Reisnerb411b362009-09-25 16:07:19 -070069
Philipp Reisnerb411b362009-09-25 16:07:19 -070070
71#define GFP_TRY (__GFP_HIGHMEM | __GFP_NOWARN)
72
Lars Ellenberg45bb9122010-05-14 17:10:48 +020073/*
74 * some helper functions to deal with single linked page lists,
75 * page->private being our "next" pointer.
76 */
77
78/* If at least n pages are linked at head, get n pages off.
79 * Otherwise, don't modify head, and return NULL.
80 * Locking is the responsibility of the caller.
81 */
82static struct page *page_chain_del(struct page **head, int n)
83{
84 struct page *page;
85 struct page *tmp;
86
87 BUG_ON(!n);
88 BUG_ON(!head);
89
90 page = *head;
Philipp Reisner23ce4222010-05-20 13:35:31 +020091
92 if (!page)
93 return NULL;
94
Lars Ellenberg45bb9122010-05-14 17:10:48 +020095 while (page) {
96 tmp = page_chain_next(page);
97 if (--n == 0)
98 break; /* found sufficient pages */
99 if (tmp == NULL)
100 /* insufficient pages, don't use any of them. */
101 return NULL;
102 page = tmp;
103 }
104
105 /* add end of list marker for the returned list */
106 set_page_private(page, 0);
107 /* actual return value, and adjustment of head */
108 page = *head;
109 *head = tmp;
110 return page;
111}
112
113/* may be used outside of locks to find the tail of a (usually short)
114 * "private" page chain, before adding it back to a global chain head
115 * with page_chain_add() under a spinlock. */
116static struct page *page_chain_tail(struct page *page, int *len)
117{
118 struct page *tmp;
119 int i = 1;
120 while ((tmp = page_chain_next(page)))
121 ++i, page = tmp;
122 if (len)
123 *len = i;
124 return page;
125}
126
127static int page_chain_free(struct page *page)
128{
129 struct page *tmp;
130 int i = 0;
131 page_chain_for_each_safe(page, tmp) {
132 put_page(page);
133 ++i;
134 }
135 return i;
136}
137
138static void page_chain_add(struct page **head,
139 struct page *chain_first, struct page *chain_last)
140{
141#if 1
142 struct page *tmp;
143 tmp = page_chain_tail(chain_first, NULL);
144 BUG_ON(tmp != chain_last);
145#endif
146
147 /* add chain to head */
148 set_page_private(chain_last, (unsigned long)*head);
149 *head = chain_first;
150}
151
152static struct page *drbd_pp_first_pages_or_try_alloc(struct drbd_conf *mdev, int number)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700153{
154 struct page *page = NULL;
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200155 struct page *tmp = NULL;
156 int i = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700157
158 /* Yes, testing drbd_pp_vacant outside the lock is racy.
159 * So what. It saves a spin_lock. */
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200160 if (drbd_pp_vacant >= number) {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700161 spin_lock(&drbd_pp_lock);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200162 page = page_chain_del(&drbd_pp_pool, number);
163 if (page)
164 drbd_pp_vacant -= number;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700165 spin_unlock(&drbd_pp_lock);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200166 if (page)
167 return page;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700168 }
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200169
Philipp Reisnerb411b362009-09-25 16:07:19 -0700170 /* GFP_TRY, because we must not cause arbitrary write-out: in a DRBD
171 * "criss-cross" setup, that might cause write-out on some other DRBD,
172 * which in turn might block on the other node at this very place. */
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200173 for (i = 0; i < number; i++) {
174 tmp = alloc_page(GFP_TRY);
175 if (!tmp)
176 break;
177 set_page_private(tmp, (unsigned long)page);
178 page = tmp;
179 }
180
181 if (i == number)
182 return page;
183
184 /* Not enough pages immediately available this time.
185 * No need to jump around here, drbd_pp_alloc will retry this
186 * function "soon". */
187 if (page) {
188 tmp = page_chain_tail(page, NULL);
189 spin_lock(&drbd_pp_lock);
190 page_chain_add(&drbd_pp_pool, page, tmp);
191 drbd_pp_vacant += i;
192 spin_unlock(&drbd_pp_lock);
193 }
194 return NULL;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700195}
196
Philipp Reisnerb411b362009-09-25 16:07:19 -0700197static void reclaim_net_ee(struct drbd_conf *mdev, struct list_head *to_be_freed)
198{
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100199 struct drbd_peer_request *peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700200 struct list_head *le, *tle;
201
202 /* The EEs are always appended to the end of the list. Since
203 they are sent in order over the wire, they have to finish
204 in order. As soon as we see the first not finished we can
205 stop to examine the list... */
206
207 list_for_each_safe(le, tle, &mdev->net_ee) {
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100208 peer_req = list_entry(le, struct drbd_peer_request, w.list);
209 if (drbd_ee_has_active_page(peer_req))
Philipp Reisnerb411b362009-09-25 16:07:19 -0700210 break;
211 list_move(le, to_be_freed);
212 }
213}
214
215static void drbd_kick_lo_and_reclaim_net(struct drbd_conf *mdev)
216{
217 LIST_HEAD(reclaimed);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100218 struct drbd_peer_request *peer_req, *t;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700219
Philipp Reisner87eeee42011-01-19 14:16:30 +0100220 spin_lock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700221 reclaim_net_ee(mdev, &reclaimed);
Philipp Reisner87eeee42011-01-19 14:16:30 +0100222 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700223
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100224 list_for_each_entry_safe(peer_req, t, &reclaimed, w.list)
225 drbd_free_net_ee(mdev, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700226}
227
228/**
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200229 * drbd_pp_alloc() - Returns @number pages, retries forever (or until signalled)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700230 * @mdev: DRBD device.
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200231 * @number: number of pages requested
232 * @retry: whether to retry, if not enough pages are available right now
Philipp Reisnerb411b362009-09-25 16:07:19 -0700233 *
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200234 * Tries to allocate number pages, first from our own page pool, then from
235 * the kernel, unless this allocation would exceed the max_buffers setting.
236 * Possibly retry until DRBD frees sufficient pages somewhere else.
237 *
238 * Returns a page chain linked via page->private.
Philipp Reisnerb411b362009-09-25 16:07:19 -0700239 */
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200240static struct page *drbd_pp_alloc(struct drbd_conf *mdev, unsigned number, bool retry)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700241{
242 struct page *page = NULL;
243 DEFINE_WAIT(wait);
244
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200245 /* Yes, we may run up to @number over max_buffers. If we
246 * follow it strictly, the admin will get it wrong anyways. */
Philipp Reisner89e58e72011-01-19 13:12:45 +0100247 if (atomic_read(&mdev->pp_in_use) < mdev->tconn->net_conf->max_buffers)
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200248 page = drbd_pp_first_pages_or_try_alloc(mdev, number);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700249
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200250 while (page == NULL) {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700251 prepare_to_wait(&drbd_pp_wait, &wait, TASK_INTERRUPTIBLE);
252
253 drbd_kick_lo_and_reclaim_net(mdev);
254
Philipp Reisner89e58e72011-01-19 13:12:45 +0100255 if (atomic_read(&mdev->pp_in_use) < mdev->tconn->net_conf->max_buffers) {
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200256 page = drbd_pp_first_pages_or_try_alloc(mdev, number);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700257 if (page)
258 break;
259 }
260
261 if (!retry)
262 break;
263
264 if (signal_pending(current)) {
265 dev_warn(DEV, "drbd_pp_alloc interrupted!\n");
266 break;
267 }
268
269 schedule();
270 }
271 finish_wait(&drbd_pp_wait, &wait);
272
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200273 if (page)
274 atomic_add(number, &mdev->pp_in_use);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700275 return page;
276}
277
278/* Must not be used from irq, as that may deadlock: see drbd_pp_alloc.
Philipp Reisner87eeee42011-01-19 14:16:30 +0100279 * Is also used from inside an other spin_lock_irq(&mdev->tconn->req_lock);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200280 * Either links the page chain back to the global pool,
281 * or returns all pages to the system. */
Lars Ellenberg435f0742010-09-06 12:30:25 +0200282static void drbd_pp_free(struct drbd_conf *mdev, struct page *page, int is_net)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700283{
Lars Ellenberg435f0742010-09-06 12:30:25 +0200284 atomic_t *a = is_net ? &mdev->pp_in_use_by_net : &mdev->pp_in_use;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700285 int i;
Lars Ellenberg435f0742010-09-06 12:30:25 +0200286
Philipp Reisner81a5d602011-02-22 19:53:16 -0500287 if (drbd_pp_vacant > (DRBD_MAX_BIO_SIZE/PAGE_SIZE) * minor_count)
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200288 i = page_chain_free(page);
289 else {
290 struct page *tmp;
291 tmp = page_chain_tail(page, &i);
292 spin_lock(&drbd_pp_lock);
293 page_chain_add(&drbd_pp_pool, page, tmp);
294 drbd_pp_vacant += i;
295 spin_unlock(&drbd_pp_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700296 }
Lars Ellenberg435f0742010-09-06 12:30:25 +0200297 i = atomic_sub_return(i, a);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200298 if (i < 0)
Lars Ellenberg435f0742010-09-06 12:30:25 +0200299 dev_warn(DEV, "ASSERTION FAILED: %s: %d < 0\n",
300 is_net ? "pp_in_use_by_net" : "pp_in_use", i);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700301 wake_up(&drbd_pp_wait);
302}
303
304/*
305You need to hold the req_lock:
306 _drbd_wait_ee_list_empty()
307
308You must not have the req_lock:
309 drbd_free_ee()
310 drbd_alloc_ee()
311 drbd_init_ee()
312 drbd_release_ee()
313 drbd_ee_fix_bhs()
314 drbd_process_done_ee()
315 drbd_clear_done_ee()
316 drbd_wait_ee_list_empty()
317*/
318
Andreas Gruenbacherf6ffca92011-02-04 15:30:34 +0100319struct drbd_peer_request *
320drbd_alloc_ee(struct drbd_conf *mdev, u64 id, sector_t sector,
321 unsigned int data_size, gfp_t gfp_mask) __must_hold(local)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700322{
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100323 struct drbd_peer_request *peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700324 struct page *page;
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200325 unsigned nr_pages = (data_size + PAGE_SIZE -1) >> PAGE_SHIFT;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700326
Andreas Gruenbacher0cf9d272010-12-07 10:43:29 +0100327 if (drbd_insert_fault(mdev, DRBD_FAULT_AL_EE))
Philipp Reisnerb411b362009-09-25 16:07:19 -0700328 return NULL;
329
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100330 peer_req = mempool_alloc(drbd_ee_mempool, gfp_mask & ~__GFP_HIGHMEM);
331 if (!peer_req) {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700332 if (!(gfp_mask & __GFP_NOWARN))
333 dev_err(DEV, "alloc_ee: Allocation of an EE failed\n");
334 return NULL;
335 }
336
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200337 page = drbd_pp_alloc(mdev, nr_pages, (gfp_mask & __GFP_WAIT));
338 if (!page)
339 goto fail;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700340
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100341 drbd_clear_interval(&peer_req->i);
342 peer_req->i.size = data_size;
343 peer_req->i.sector = sector;
344 peer_req->i.local = false;
345 peer_req->i.waiting = false;
Andreas Gruenbacher53840642011-01-28 10:31:04 +0100346
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100347 peer_req->epoch = NULL;
Philipp Reisnera21e9292011-02-08 15:08:49 +0100348 peer_req->w.mdev = mdev;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100349 peer_req->pages = page;
350 atomic_set(&peer_req->pending_bios, 0);
351 peer_req->flags = 0;
Andreas Gruenbacher9a8e7752011-01-11 14:04:09 +0100352 /*
353 * The block_id is opaque to the receiver. It is not endianness
354 * converted, and sent back to the sender unchanged.
355 */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100356 peer_req->block_id = id;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700357
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100358 return peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700359
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200360 fail:
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100361 mempool_free(peer_req, drbd_ee_mempool);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700362 return NULL;
363}
364
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100365void drbd_free_some_ee(struct drbd_conf *mdev, struct drbd_peer_request *peer_req,
Andreas Gruenbacherf6ffca92011-02-04 15:30:34 +0100366 int is_net)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700367{
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100368 if (peer_req->flags & EE_HAS_DIGEST)
369 kfree(peer_req->digest);
370 drbd_pp_free(mdev, peer_req->pages, is_net);
371 D_ASSERT(atomic_read(&peer_req->pending_bios) == 0);
372 D_ASSERT(drbd_interval_empty(&peer_req->i));
373 mempool_free(peer_req, drbd_ee_mempool);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700374}
375
376int drbd_release_ee(struct drbd_conf *mdev, struct list_head *list)
377{
378 LIST_HEAD(work_list);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100379 struct drbd_peer_request *peer_req, *t;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700380 int count = 0;
Lars Ellenberg435f0742010-09-06 12:30:25 +0200381 int is_net = list == &mdev->net_ee;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700382
Philipp Reisner87eeee42011-01-19 14:16:30 +0100383 spin_lock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700384 list_splice_init(list, &work_list);
Philipp Reisner87eeee42011-01-19 14:16:30 +0100385 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700386
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100387 list_for_each_entry_safe(peer_req, t, &work_list, w.list) {
388 drbd_free_some_ee(mdev, peer_req, is_net);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700389 count++;
390 }
391 return count;
392}
393
394
Philipp Reisner32862ec2011-02-08 16:41:01 +0100395/* See also comments in _req_mod(,BARRIER_ACKED)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700396 * and receive_Barrier.
397 *
398 * Move entries from net_ee to done_ee, if ready.
399 * Grab done_ee, call all callbacks, free the entries.
400 * The callbacks typically send out ACKs.
401 */
402static int drbd_process_done_ee(struct drbd_conf *mdev)
403{
404 LIST_HEAD(work_list);
405 LIST_HEAD(reclaimed);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100406 struct drbd_peer_request *peer_req, *t;
Andreas Gruenbachere2b30322011-03-16 17:16:12 +0100407 int err = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700408
Philipp Reisner87eeee42011-01-19 14:16:30 +0100409 spin_lock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700410 reclaim_net_ee(mdev, &reclaimed);
411 list_splice_init(&mdev->done_ee, &work_list);
Philipp Reisner87eeee42011-01-19 14:16:30 +0100412 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700413
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100414 list_for_each_entry_safe(peer_req, t, &reclaimed, w.list)
415 drbd_free_net_ee(mdev, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700416
417 /* possible callbacks here:
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +0100418 * e_end_block, and e_end_resync_block, e_send_discard_write.
Philipp Reisnerb411b362009-09-25 16:07:19 -0700419 * all ignore the last argument.
420 */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100421 list_for_each_entry_safe(peer_req, t, &work_list, w.list) {
Andreas Gruenbachere2b30322011-03-16 17:16:12 +0100422 int err2;
423
Philipp Reisnerb411b362009-09-25 16:07:19 -0700424 /* list_del not necessary, next/prev members not touched */
Andreas Gruenbachere2b30322011-03-16 17:16:12 +0100425 err2 = peer_req->w.cb(&peer_req->w, !!err);
426 if (!err)
427 err = err2;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100428 drbd_free_ee(mdev, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700429 }
430 wake_up(&mdev->ee_wait);
431
Andreas Gruenbachere2b30322011-03-16 17:16:12 +0100432 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700433}
434
435void _drbd_wait_ee_list_empty(struct drbd_conf *mdev, struct list_head *head)
436{
437 DEFINE_WAIT(wait);
438
439 /* avoids spin_lock/unlock
440 * and calling prepare_to_wait in the fast path */
441 while (!list_empty(head)) {
442 prepare_to_wait(&mdev->ee_wait, &wait, TASK_UNINTERRUPTIBLE);
Philipp Reisner87eeee42011-01-19 14:16:30 +0100443 spin_unlock_irq(&mdev->tconn->req_lock);
Jens Axboe7eaceac2011-03-10 08:52:07 +0100444 io_schedule();
Philipp Reisnerb411b362009-09-25 16:07:19 -0700445 finish_wait(&mdev->ee_wait, &wait);
Philipp Reisner87eeee42011-01-19 14:16:30 +0100446 spin_lock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700447 }
448}
449
450void drbd_wait_ee_list_empty(struct drbd_conf *mdev, struct list_head *head)
451{
Philipp Reisner87eeee42011-01-19 14:16:30 +0100452 spin_lock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700453 _drbd_wait_ee_list_empty(mdev, head);
Philipp Reisner87eeee42011-01-19 14:16:30 +0100454 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700455}
456
457/* see also kernel_accept; which is only present since 2.6.18.
458 * also we want to log which part of it failed, exactly */
Philipp Reisner76536202011-02-07 14:09:54 +0100459static int drbd_accept(const char **what, struct socket *sock, struct socket **newsock)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700460{
461 struct sock *sk = sock->sk;
462 int err = 0;
463
464 *what = "listen";
465 err = sock->ops->listen(sock, 5);
466 if (err < 0)
467 goto out;
468
469 *what = "sock_create_lite";
470 err = sock_create_lite(sk->sk_family, sk->sk_type, sk->sk_protocol,
471 newsock);
472 if (err < 0)
473 goto out;
474
475 *what = "accept";
476 err = sock->ops->accept(sock, *newsock, 0);
477 if (err < 0) {
478 sock_release(*newsock);
479 *newsock = NULL;
480 goto out;
481 }
482 (*newsock)->ops = sock->ops;
483
484out:
485 return err;
486}
487
Philipp Reisnerdbd9eea2011-02-07 15:34:16 +0100488static int drbd_recv_short(struct socket *sock, void *buf, size_t size, int flags)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700489{
490 mm_segment_t oldfs;
491 struct kvec iov = {
492 .iov_base = buf,
493 .iov_len = size,
494 };
495 struct msghdr msg = {
496 .msg_iovlen = 1,
497 .msg_iov = (struct iovec *)&iov,
498 .msg_flags = (flags ? flags : MSG_WAITALL | MSG_NOSIGNAL)
499 };
500 int rv;
501
502 oldfs = get_fs();
503 set_fs(KERNEL_DS);
504 rv = sock_recvmsg(sock, &msg, size, msg.msg_flags);
505 set_fs(oldfs);
506
507 return rv;
508}
509
Philipp Reisnerde0ff332011-02-07 16:56:20 +0100510static int drbd_recv(struct drbd_tconn *tconn, void *buf, size_t size)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700511{
512 mm_segment_t oldfs;
513 struct kvec iov = {
514 .iov_base = buf,
515 .iov_len = size,
516 };
517 struct msghdr msg = {
518 .msg_iovlen = 1,
519 .msg_iov = (struct iovec *)&iov,
520 .msg_flags = MSG_WAITALL | MSG_NOSIGNAL
521 };
522 int rv;
523
524 oldfs = get_fs();
525 set_fs(KERNEL_DS);
526
527 for (;;) {
Philipp Reisnerde0ff332011-02-07 16:56:20 +0100528 rv = sock_recvmsg(tconn->data.socket, &msg, size, msg.msg_flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700529 if (rv == size)
530 break;
531
532 /* Note:
533 * ECONNRESET other side closed the connection
534 * ERESTARTSYS (on sock) we got a signal
535 */
536
537 if (rv < 0) {
538 if (rv == -ECONNRESET)
Philipp Reisnerde0ff332011-02-07 16:56:20 +0100539 conn_info(tconn, "sock was reset by peer\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -0700540 else if (rv != -ERESTARTSYS)
Philipp Reisnerde0ff332011-02-07 16:56:20 +0100541 conn_err(tconn, "sock_recvmsg returned %d\n", rv);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700542 break;
543 } else if (rv == 0) {
Philipp Reisnerde0ff332011-02-07 16:56:20 +0100544 conn_info(tconn, "sock was shut down by peer\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -0700545 break;
546 } else {
547 /* signal came in, or peer/link went down,
548 * after we read a partial message
549 */
550 /* D_ASSERT(signal_pending(current)); */
551 break;
552 }
553 };
554
555 set_fs(oldfs);
556
557 if (rv != size)
Philipp Reisnerbbeb6412011-02-10 13:45:46 +0100558 conn_request_state(tconn, NS(conn, C_BROKEN_PIPE), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700559
560 return rv;
561}
562
Andreas Gruenbacherc6967742011-03-17 17:15:20 +0100563static int drbd_recv_all(struct drbd_tconn *tconn, void *buf, size_t size)
564{
565 int err;
566
567 err = drbd_recv(tconn, buf, size);
568 if (err != size) {
569 if (err >= 0)
570 err = -EIO;
571 } else
572 err = 0;
573 return err;
574}
575
Andreas Gruenbachera5c31902011-03-24 03:28:04 +0100576static int drbd_recv_all_warn(struct drbd_tconn *tconn, void *buf, size_t size)
577{
578 int err;
579
580 err = drbd_recv_all(tconn, buf, size);
581 if (err && !signal_pending(current))
582 conn_warn(tconn, "short read (expected size %d)\n", (int)size);
583 return err;
584}
585
Lars Ellenberg5dbf1672010-05-25 16:18:01 +0200586/* quoting tcp(7):
587 * On individual connections, the socket buffer size must be set prior to the
588 * listen(2) or connect(2) calls in order to have it take effect.
589 * This is our wrapper to do so.
590 */
591static void drbd_setbufsize(struct socket *sock, unsigned int snd,
592 unsigned int rcv)
593{
594 /* open coded SO_SNDBUF, SO_RCVBUF */
595 if (snd) {
596 sock->sk->sk_sndbuf = snd;
597 sock->sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
598 }
599 if (rcv) {
600 sock->sk->sk_rcvbuf = rcv;
601 sock->sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
602 }
603}
604
Philipp Reisnereac3e992011-02-07 14:05:07 +0100605static struct socket *drbd_try_connect(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700606{
607 const char *what;
608 struct socket *sock;
609 struct sockaddr_in6 src_in6;
610 int err;
611 int disconnect_on_error = 1;
612
Philipp Reisnereac3e992011-02-07 14:05:07 +0100613 if (!get_net_conf(tconn))
Philipp Reisnerb411b362009-09-25 16:07:19 -0700614 return NULL;
615
616 what = "sock_create_kern";
Philipp Reisnereac3e992011-02-07 14:05:07 +0100617 err = sock_create_kern(((struct sockaddr *)tconn->net_conf->my_addr)->sa_family,
Philipp Reisnerb411b362009-09-25 16:07:19 -0700618 SOCK_STREAM, IPPROTO_TCP, &sock);
619 if (err < 0) {
620 sock = NULL;
621 goto out;
622 }
623
624 sock->sk->sk_rcvtimeo =
Philipp Reisnereac3e992011-02-07 14:05:07 +0100625 sock->sk->sk_sndtimeo = tconn->net_conf->try_connect_int*HZ;
626 drbd_setbufsize(sock, tconn->net_conf->sndbuf_size,
627 tconn->net_conf->rcvbuf_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700628
629 /* explicitly bind to the configured IP as source IP
630 * for the outgoing connections.
631 * This is needed for multihomed hosts and to be
632 * able to use lo: interfaces for drbd.
633 * Make sure to use 0 as port number, so linux selects
634 * a free one dynamically.
635 */
Philipp Reisnereac3e992011-02-07 14:05:07 +0100636 memcpy(&src_in6, tconn->net_conf->my_addr,
637 min_t(int, tconn->net_conf->my_addr_len, sizeof(src_in6)));
638 if (((struct sockaddr *)tconn->net_conf->my_addr)->sa_family == AF_INET6)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700639 src_in6.sin6_port = 0;
640 else
641 ((struct sockaddr_in *)&src_in6)->sin_port = 0; /* AF_INET & AF_SCI */
642
643 what = "bind before connect";
644 err = sock->ops->bind(sock,
645 (struct sockaddr *) &src_in6,
Philipp Reisnereac3e992011-02-07 14:05:07 +0100646 tconn->net_conf->my_addr_len);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700647 if (err < 0)
648 goto out;
649
650 /* connect may fail, peer not yet available.
651 * stay C_WF_CONNECTION, don't go Disconnecting! */
652 disconnect_on_error = 0;
653 what = "connect";
654 err = sock->ops->connect(sock,
Philipp Reisnereac3e992011-02-07 14:05:07 +0100655 (struct sockaddr *)tconn->net_conf->peer_addr,
656 tconn->net_conf->peer_addr_len, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700657
658out:
659 if (err < 0) {
660 if (sock) {
661 sock_release(sock);
662 sock = NULL;
663 }
664 switch (-err) {
665 /* timeout, busy, signal pending */
666 case ETIMEDOUT: case EAGAIN: case EINPROGRESS:
667 case EINTR: case ERESTARTSYS:
668 /* peer not (yet) available, network problem */
669 case ECONNREFUSED: case ENETUNREACH:
670 case EHOSTDOWN: case EHOSTUNREACH:
671 disconnect_on_error = 0;
672 break;
673 default:
Philipp Reisnereac3e992011-02-07 14:05:07 +0100674 conn_err(tconn, "%s failed, err = %d\n", what, err);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700675 }
676 if (disconnect_on_error)
Philipp Reisnerbbeb6412011-02-10 13:45:46 +0100677 conn_request_state(tconn, NS(conn, C_DISCONNECTING), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700678 }
Philipp Reisnereac3e992011-02-07 14:05:07 +0100679 put_net_conf(tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700680 return sock;
681}
682
Philipp Reisner76536202011-02-07 14:09:54 +0100683static struct socket *drbd_wait_for_connect(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700684{
685 int timeo, err;
686 struct socket *s_estab = NULL, *s_listen;
687 const char *what;
688
Philipp Reisner76536202011-02-07 14:09:54 +0100689 if (!get_net_conf(tconn))
Philipp Reisnerb411b362009-09-25 16:07:19 -0700690 return NULL;
691
692 what = "sock_create_kern";
Philipp Reisner76536202011-02-07 14:09:54 +0100693 err = sock_create_kern(((struct sockaddr *)tconn->net_conf->my_addr)->sa_family,
Philipp Reisnerb411b362009-09-25 16:07:19 -0700694 SOCK_STREAM, IPPROTO_TCP, &s_listen);
695 if (err) {
696 s_listen = NULL;
697 goto out;
698 }
699
Philipp Reisner76536202011-02-07 14:09:54 +0100700 timeo = tconn->net_conf->try_connect_int * HZ;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700701 timeo += (random32() & 1) ? timeo / 7 : -timeo / 7; /* 28.5% random jitter */
702
703 s_listen->sk->sk_reuse = 1; /* SO_REUSEADDR */
704 s_listen->sk->sk_rcvtimeo = timeo;
705 s_listen->sk->sk_sndtimeo = timeo;
Philipp Reisner76536202011-02-07 14:09:54 +0100706 drbd_setbufsize(s_listen, tconn->net_conf->sndbuf_size,
707 tconn->net_conf->rcvbuf_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700708
709 what = "bind before listen";
710 err = s_listen->ops->bind(s_listen,
Philipp Reisner76536202011-02-07 14:09:54 +0100711 (struct sockaddr *) tconn->net_conf->my_addr,
712 tconn->net_conf->my_addr_len);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700713 if (err < 0)
714 goto out;
715
Philipp Reisner76536202011-02-07 14:09:54 +0100716 err = drbd_accept(&what, s_listen, &s_estab);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700717
718out:
719 if (s_listen)
720 sock_release(s_listen);
721 if (err < 0) {
722 if (err != -EAGAIN && err != -EINTR && err != -ERESTARTSYS) {
Philipp Reisner76536202011-02-07 14:09:54 +0100723 conn_err(tconn, "%s failed, err = %d\n", what, err);
Philipp Reisnerbbeb6412011-02-10 13:45:46 +0100724 conn_request_state(tconn, NS(conn, C_DISCONNECTING), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700725 }
726 }
Philipp Reisner76536202011-02-07 14:09:54 +0100727 put_net_conf(tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700728
729 return s_estab;
730}
731
Andreas Gruenbacher7c967152011-03-22 00:49:36 +0100732static int drbd_send_fp(struct drbd_tconn *tconn, struct drbd_socket *sock, enum drbd_packet cmd)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700733{
Andreas Gruenbacher5a87d922011-03-24 21:17:52 +0100734 struct p_header *h = tconn->data.sbuf;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700735
Andreas Gruenbacherecf23632011-03-15 23:48:25 +0100736 return !_conn_send_cmd(tconn, 0, sock, cmd, h, sizeof(*h), 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700737}
738
Philipp Reisnera25b63f2011-02-07 15:43:45 +0100739static enum drbd_packet drbd_recv_fp(struct drbd_tconn *tconn, struct socket *sock)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700740{
Andreas Gruenbacher7c967152011-03-22 00:49:36 +0100741 struct p_header80 h;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700742 int rr;
743
Andreas Gruenbacher7c967152011-03-22 00:49:36 +0100744 rr = drbd_recv_short(sock, &h, sizeof(h), 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700745
Andreas Gruenbacher7c967152011-03-22 00:49:36 +0100746 if (rr == sizeof(h) && h.magic == cpu_to_be32(DRBD_MAGIC))
747 return be16_to_cpu(h.command);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700748
749 return 0xffff;
750}
751
752/**
753 * drbd_socket_okay() - Free the socket if its connection is not okay
Philipp Reisnerb411b362009-09-25 16:07:19 -0700754 * @sock: pointer to the pointer to the socket.
755 */
Philipp Reisnerdbd9eea2011-02-07 15:34:16 +0100756static int drbd_socket_okay(struct socket **sock)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700757{
758 int rr;
759 char tb[4];
760
761 if (!*sock)
Andreas Gruenbacher81e84652010-12-09 15:03:57 +0100762 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700763
Philipp Reisnerdbd9eea2011-02-07 15:34:16 +0100764 rr = drbd_recv_short(*sock, tb, 4, MSG_DONTWAIT | MSG_PEEK);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700765
766 if (rr > 0 || rr == -EAGAIN) {
Andreas Gruenbacher81e84652010-12-09 15:03:57 +0100767 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700768 } else {
769 sock_release(*sock);
770 *sock = NULL;
Andreas Gruenbacher81e84652010-12-09 15:03:57 +0100771 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700772 }
773}
Philipp Reisner2325eb62011-03-15 16:56:18 +0100774/* Gets called if a connection is established, or if a new minor gets created
775 in a connection */
776int drbd_connected(int vnr, void *p, void *data)
Philipp Reisner907599e2011-02-08 11:25:37 +0100777{
778 struct drbd_conf *mdev = (struct drbd_conf *)p;
Andreas Gruenbacher0829f5e2011-03-24 14:31:22 +0100779 int err;
Philipp Reisner907599e2011-02-08 11:25:37 +0100780
781 atomic_set(&mdev->packet_seq, 0);
782 mdev->peer_seq = 0;
783
Philipp Reisner8410da82011-02-11 20:11:10 +0100784 mdev->state_mutex = mdev->tconn->agreed_pro_version < 100 ?
785 &mdev->tconn->cstate_mutex :
786 &mdev->own_state_mutex;
787
Andreas Gruenbacher0829f5e2011-03-24 14:31:22 +0100788 err = drbd_send_sync_param(mdev);
789 if (!err)
790 err = drbd_send_sizes(mdev, 0, 0);
791 if (!err)
792 err = drbd_send_uuids(mdev);
793 if (!err)
794 err = drbd_send_state(mdev);
Philipp Reisner907599e2011-02-08 11:25:37 +0100795 clear_bit(USE_DEGR_WFC_T, &mdev->flags);
796 clear_bit(RESIZE_PENDING, &mdev->flags);
Philipp Reisner8b924f12011-03-01 11:08:28 +0100797 mod_timer(&mdev->request_timer, jiffies + HZ); /* just start it here. */
Andreas Gruenbacher0829f5e2011-03-24 14:31:22 +0100798 return err;
Philipp Reisner907599e2011-02-08 11:25:37 +0100799}
800
Philipp Reisnerb411b362009-09-25 16:07:19 -0700801/*
802 * return values:
803 * 1 yes, we have a valid connection
804 * 0 oops, did not work out, please try again
805 * -1 peer talks different language,
806 * no point in trying again, please go standalone.
807 * -2 We do not have a network config...
808 */
Philipp Reisner907599e2011-02-08 11:25:37 +0100809static int drbd_connect(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700810{
Andreas Gruenbacher2bf89622011-03-28 16:33:12 +0200811 struct socket *sock, *msock;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700812 int try, h, ok;
813
Philipp Reisnerbbeb6412011-02-10 13:45:46 +0100814 if (conn_request_state(tconn, NS(conn, C_WF_CONNECTION), CS_VERBOSE) < SS_SUCCESS)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700815 return -2;
816
Philipp Reisner907599e2011-02-08 11:25:37 +0100817 clear_bit(DISCARD_CONCURRENT, &tconn->flags);
Andreas Gruenbacher0916e0e2011-03-21 14:10:15 +0100818
819 /* Assume that the peer only understands protocol 80 until we know better. */
820 tconn->agreed_pro_version = 80;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700821
Philipp Reisnerb411b362009-09-25 16:07:19 -0700822 do {
Andreas Gruenbacher2bf89622011-03-28 16:33:12 +0200823 struct socket *s;
824
Philipp Reisnerb411b362009-09-25 16:07:19 -0700825 for (try = 0;;) {
826 /* 3 tries, this should take less than a second! */
Philipp Reisner907599e2011-02-08 11:25:37 +0100827 s = drbd_try_connect(tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700828 if (s || ++try >= 3)
829 break;
830 /* give the other side time to call bind() & listen() */
Philipp Reisner20ee6392011-01-18 15:28:59 +0100831 schedule_timeout_interruptible(HZ / 10);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700832 }
833
834 if (s) {
Andreas Gruenbacher2bf89622011-03-28 16:33:12 +0200835 if (!tconn->data.socket) {
836 tconn->data.socket = s;
Andreas Gruenbachere5d6f332011-03-28 16:44:40 +0200837 drbd_send_fp(tconn, &tconn->data, P_INITIAL_DATA);
Andreas Gruenbacher2bf89622011-03-28 16:33:12 +0200838 } else if (!tconn->meta.socket) {
839 tconn->meta.socket = s;
Andreas Gruenbachere5d6f332011-03-28 16:44:40 +0200840 drbd_send_fp(tconn, &tconn->meta, P_INITIAL_META);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700841 } else {
Philipp Reisner907599e2011-02-08 11:25:37 +0100842 conn_err(tconn, "Logic error in drbd_connect()\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -0700843 goto out_release_sockets;
844 }
845 }
846
Andreas Gruenbacher2bf89622011-03-28 16:33:12 +0200847 if (tconn->data.socket && tconn->meta.socket) {
Philipp Reisner907599e2011-02-08 11:25:37 +0100848 schedule_timeout_interruptible(tconn->net_conf->ping_timeo*HZ/10);
Andreas Gruenbacher2bf89622011-03-28 16:33:12 +0200849 ok = drbd_socket_okay(&tconn->data.socket);
850 ok = drbd_socket_okay(&tconn->meta.socket) && ok;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700851 if (ok)
852 break;
853 }
854
855retry:
Philipp Reisner907599e2011-02-08 11:25:37 +0100856 s = drbd_wait_for_connect(tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700857 if (s) {
Philipp Reisner907599e2011-02-08 11:25:37 +0100858 try = drbd_recv_fp(tconn, s);
Andreas Gruenbacher2bf89622011-03-28 16:33:12 +0200859 drbd_socket_okay(&tconn->data.socket);
860 drbd_socket_okay(&tconn->meta.socket);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700861 switch (try) {
Andreas Gruenbachere5d6f332011-03-28 16:44:40 +0200862 case P_INITIAL_DATA:
Andreas Gruenbacher2bf89622011-03-28 16:33:12 +0200863 if (tconn->data.socket) {
Philipp Reisner907599e2011-02-08 11:25:37 +0100864 conn_warn(tconn, "initial packet S crossed\n");
Andreas Gruenbacher2bf89622011-03-28 16:33:12 +0200865 sock_release(tconn->data.socket);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700866 }
Andreas Gruenbacher2bf89622011-03-28 16:33:12 +0200867 tconn->data.socket = s;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700868 break;
Andreas Gruenbachere5d6f332011-03-28 16:44:40 +0200869 case P_INITIAL_META:
Andreas Gruenbacher2bf89622011-03-28 16:33:12 +0200870 if (tconn->meta.socket) {
Philipp Reisner907599e2011-02-08 11:25:37 +0100871 conn_warn(tconn, "initial packet M crossed\n");
Andreas Gruenbacher2bf89622011-03-28 16:33:12 +0200872 sock_release(tconn->meta.socket);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700873 }
Andreas Gruenbacher2bf89622011-03-28 16:33:12 +0200874 tconn->meta.socket = s;
Philipp Reisner907599e2011-02-08 11:25:37 +0100875 set_bit(DISCARD_CONCURRENT, &tconn->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700876 break;
877 default:
Philipp Reisner907599e2011-02-08 11:25:37 +0100878 conn_warn(tconn, "Error receiving initial packet\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -0700879 sock_release(s);
880 if (random32() & 1)
881 goto retry;
882 }
883 }
884
Philipp Reisnerbbeb6412011-02-10 13:45:46 +0100885 if (tconn->cstate <= C_DISCONNECTING)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700886 goto out_release_sockets;
887 if (signal_pending(current)) {
888 flush_signals(current);
889 smp_rmb();
Philipp Reisner907599e2011-02-08 11:25:37 +0100890 if (get_t_state(&tconn->receiver) == EXITING)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700891 goto out_release_sockets;
892 }
893
Andreas Gruenbacher2bf89622011-03-28 16:33:12 +0200894 if (tconn->data.socket && &tconn->meta.socket) {
895 ok = drbd_socket_okay(&tconn->data.socket);
896 ok = drbd_socket_okay(&tconn->meta.socket) && ok;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700897 if (ok)
898 break;
899 }
900 } while (1);
901
Andreas Gruenbacher2bf89622011-03-28 16:33:12 +0200902 sock = tconn->data.socket;
903 msock = tconn->meta.socket;
904
Philipp Reisnerb411b362009-09-25 16:07:19 -0700905 msock->sk->sk_reuse = 1; /* SO_REUSEADDR */
906 sock->sk->sk_reuse = 1; /* SO_REUSEADDR */
907
908 sock->sk->sk_allocation = GFP_NOIO;
909 msock->sk->sk_allocation = GFP_NOIO;
910
911 sock->sk->sk_priority = TC_PRIO_INTERACTIVE_BULK;
912 msock->sk->sk_priority = TC_PRIO_INTERACTIVE;
913
Philipp Reisnerb411b362009-09-25 16:07:19 -0700914 /* NOT YET ...
Philipp Reisner907599e2011-02-08 11:25:37 +0100915 * sock->sk->sk_sndtimeo = tconn->net_conf->timeout*HZ/10;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700916 * sock->sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
Andreas Gruenbacher60381782011-03-28 17:05:50 +0200917 * first set it to the P_CONNECTION_FEATURES timeout,
Philipp Reisnerb411b362009-09-25 16:07:19 -0700918 * which we set to 4x the configured ping_timeout. */
919 sock->sk->sk_sndtimeo =
Philipp Reisner907599e2011-02-08 11:25:37 +0100920 sock->sk->sk_rcvtimeo = tconn->net_conf->ping_timeo*4*HZ/10;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700921
Philipp Reisner907599e2011-02-08 11:25:37 +0100922 msock->sk->sk_sndtimeo = tconn->net_conf->timeout*HZ/10;
923 msock->sk->sk_rcvtimeo = tconn->net_conf->ping_int*HZ;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700924
925 /* we don't want delays.
Lucas De Marchi25985ed2011-03-30 22:57:33 -0300926 * we use TCP_CORK where appropriate, though */
Philipp Reisnerb411b362009-09-25 16:07:19 -0700927 drbd_tcp_nodelay(sock);
928 drbd_tcp_nodelay(msock);
929
Philipp Reisner907599e2011-02-08 11:25:37 +0100930 tconn->last_received = jiffies;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700931
Andreas Gruenbacher60381782011-03-28 17:05:50 +0200932 h = drbd_do_features(tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700933 if (h <= 0)
934 return h;
935
Philipp Reisner907599e2011-02-08 11:25:37 +0100936 if (tconn->cram_hmac_tfm) {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700937 /* drbd_request_state(mdev, NS(conn, WFAuth)); */
Philipp Reisner907599e2011-02-08 11:25:37 +0100938 switch (drbd_do_auth(tconn)) {
Johannes Thomab10d96c2010-01-07 16:02:50 +0100939 case -1:
Philipp Reisner907599e2011-02-08 11:25:37 +0100940 conn_err(tconn, "Authentication of peer failed\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -0700941 return -1;
Johannes Thomab10d96c2010-01-07 16:02:50 +0100942 case 0:
Philipp Reisner907599e2011-02-08 11:25:37 +0100943 conn_err(tconn, "Authentication of peer failed, trying again.\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +0100944 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700945 }
946 }
947
Philipp Reisnerbbeb6412011-02-10 13:45:46 +0100948 if (conn_request_state(tconn, NS(conn, C_WF_REPORT_PARAMS), CS_VERBOSE) < SS_SUCCESS)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700949 return 0;
950
Philipp Reisner907599e2011-02-08 11:25:37 +0100951 sock->sk->sk_sndtimeo = tconn->net_conf->timeout*HZ/10;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700952 sock->sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
953
Philipp Reisner907599e2011-02-08 11:25:37 +0100954 drbd_thread_start(&tconn->asender);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700955
Andreas Gruenbacher387eb302011-03-16 01:05:37 +0100956 if (drbd_send_protocol(tconn) == -EOPNOTSUPP)
Philipp Reisner7e2455c2010-04-22 14:50:23 +0200957 return -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700958
Philipp Reisner907599e2011-02-08 11:25:37 +0100959 return !idr_for_each(&tconn->volumes, drbd_connected, tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700960
961out_release_sockets:
Andreas Gruenbacher2bf89622011-03-28 16:33:12 +0200962 if (tconn->data.socket) {
963 sock_release(tconn->data.socket);
964 tconn->data.socket = NULL;
965 }
966 if (tconn->meta.socket) {
967 sock_release(tconn->meta.socket);
968 tconn->meta.socket = NULL;
969 }
Philipp Reisnerb411b362009-09-25 16:07:19 -0700970 return -1;
971}
972
Andreas Gruenbacher8172f3e2011-03-16 17:22:39 +0100973static int decode_header(struct drbd_tconn *tconn, struct p_header *h, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700974{
Philipp Reisnerfd340c12011-01-19 16:57:39 +0100975 if (h->h80.magic == cpu_to_be32(DRBD_MAGIC)) {
Philipp Reisner77351055b2011-02-07 17:24:26 +0100976 pi->cmd = be16_to_cpu(h->h80.command);
977 pi->size = be16_to_cpu(h->h80.length);
Philipp Reisnereefc2f72011-02-08 12:55:24 +0100978 pi->vnr = 0;
Andreas Gruenbacherca9bc122011-01-11 13:47:24 +0100979 } else if (h->h95.magic == cpu_to_be16(DRBD_MAGIC_BIG)) {
Philipp Reisner77351055b2011-02-07 17:24:26 +0100980 pi->cmd = be16_to_cpu(h->h95.command);
981 pi->size = be32_to_cpu(h->h95.length) & 0x00ffffff;
982 pi->vnr = 0;
Philipp Reisner02918be2010-08-20 14:35:10 +0200983 } else {
Philipp Reisnerce243852011-02-07 17:27:47 +0100984 conn_err(tconn, "magic?? on data m: 0x%08x c: %d l: %d\n",
Lars Ellenberg004352f2010-10-05 20:13:58 +0200985 be32_to_cpu(h->h80.magic),
986 be16_to_cpu(h->h80.command),
987 be16_to_cpu(h->h80.length));
Andreas Gruenbacher8172f3e2011-03-16 17:22:39 +0100988 return -EINVAL;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700989 }
Andreas Gruenbacher8172f3e2011-03-16 17:22:39 +0100990 return 0;
Philipp Reisner257d0af2011-01-26 12:15:29 +0100991}
992
Philipp Reisner9ba7aa02011-02-07 17:32:41 +0100993static int drbd_recv_header(struct drbd_tconn *tconn, struct packet_info *pi)
Philipp Reisner257d0af2011-01-26 12:15:29 +0100994{
Andreas Gruenbachere6ef8a52011-03-24 18:07:54 +0100995 struct p_header *h = tconn->data.rbuf;
Andreas Gruenbacher69bc7bc2011-03-16 17:31:52 +0100996 int err;
Philipp Reisner257d0af2011-01-26 12:15:29 +0100997
Andreas Gruenbachera5c31902011-03-24 03:28:04 +0100998 err = drbd_recv_all_warn(tconn, h, sizeof(*h));
999 if (err)
Andreas Gruenbacher69bc7bc2011-03-16 17:31:52 +01001000 return err;
Philipp Reisner257d0af2011-01-26 12:15:29 +01001001
Andreas Gruenbacher69bc7bc2011-03-16 17:31:52 +01001002 err = decode_header(tconn, h, pi);
Philipp Reisner9ba7aa02011-02-07 17:32:41 +01001003 tconn->last_received = jiffies;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001004
Andreas Gruenbacher69bc7bc2011-03-16 17:31:52 +01001005 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001006}
1007
Philipp Reisner2451fc32010-08-24 13:43:11 +02001008static void drbd_flush(struct drbd_conf *mdev)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001009{
1010 int rv;
1011
1012 if (mdev->write_ordering >= WO_bdev_flush && get_ldev(mdev)) {
Dmitry Monakhovfbd9b092010-04-28 17:55:06 +04001013 rv = blkdev_issue_flush(mdev->ldev->backing_bdev, GFP_KERNEL,
Christoph Hellwigdd3932e2010-09-16 20:51:46 +02001014 NULL);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001015 if (rv) {
1016 dev_err(DEV, "local disk flush failed with status %d\n", rv);
1017 /* would rather check on EOPNOTSUPP, but that is not reliable.
1018 * don't try again for ANY return value != 0
1019 * if (rv == -EOPNOTSUPP) */
1020 drbd_bump_write_ordering(mdev, WO_drain_io);
1021 }
1022 put_ldev(mdev);
1023 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07001024}
1025
1026/**
1027 * drbd_may_finish_epoch() - Applies an epoch_event to the epoch's state, eventually finishes it.
1028 * @mdev: DRBD device.
1029 * @epoch: Epoch object.
1030 * @ev: Epoch event.
1031 */
1032static enum finish_epoch drbd_may_finish_epoch(struct drbd_conf *mdev,
1033 struct drbd_epoch *epoch,
1034 enum epoch_event ev)
1035{
Philipp Reisner2451fc32010-08-24 13:43:11 +02001036 int epoch_size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001037 struct drbd_epoch *next_epoch;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001038 enum finish_epoch rv = FE_STILL_LIVE;
1039
1040 spin_lock(&mdev->epoch_lock);
1041 do {
1042 next_epoch = NULL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001043
1044 epoch_size = atomic_read(&epoch->epoch_size);
1045
1046 switch (ev & ~EV_CLEANUP) {
1047 case EV_PUT:
1048 atomic_dec(&epoch->active);
1049 break;
1050 case EV_GOT_BARRIER_NR:
1051 set_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001052 break;
1053 case EV_BECAME_LAST:
1054 /* nothing to do*/
1055 break;
1056 }
1057
Philipp Reisnerb411b362009-09-25 16:07:19 -07001058 if (epoch_size != 0 &&
1059 atomic_read(&epoch->active) == 0 &&
Philipp Reisner2451fc32010-08-24 13:43:11 +02001060 test_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001061 if (!(ev & EV_CLEANUP)) {
1062 spin_unlock(&mdev->epoch_lock);
1063 drbd_send_b_ack(mdev, epoch->barrier_nr, epoch_size);
1064 spin_lock(&mdev->epoch_lock);
1065 }
1066 dec_unacked(mdev);
1067
1068 if (mdev->current_epoch != epoch) {
1069 next_epoch = list_entry(epoch->list.next, struct drbd_epoch, list);
1070 list_del(&epoch->list);
1071 ev = EV_BECAME_LAST | (ev & EV_CLEANUP);
1072 mdev->epochs--;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001073 kfree(epoch);
1074
1075 if (rv == FE_STILL_LIVE)
1076 rv = FE_DESTROYED;
1077 } else {
1078 epoch->flags = 0;
1079 atomic_set(&epoch->epoch_size, 0);
Uwe Kleine-König698f9312010-07-02 20:41:51 +02001080 /* atomic_set(&epoch->active, 0); is already zero */
Philipp Reisnerb411b362009-09-25 16:07:19 -07001081 if (rv == FE_STILL_LIVE)
1082 rv = FE_RECYCLED;
Philipp Reisner2451fc32010-08-24 13:43:11 +02001083 wake_up(&mdev->ee_wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001084 }
1085 }
1086
1087 if (!next_epoch)
1088 break;
1089
1090 epoch = next_epoch;
1091 } while (1);
1092
1093 spin_unlock(&mdev->epoch_lock);
1094
Philipp Reisnerb411b362009-09-25 16:07:19 -07001095 return rv;
1096}
1097
1098/**
1099 * drbd_bump_write_ordering() - Fall back to an other write ordering method
1100 * @mdev: DRBD device.
1101 * @wo: Write ordering method to try.
1102 */
1103void drbd_bump_write_ordering(struct drbd_conf *mdev, enum write_ordering_e wo) __must_hold(local)
1104{
1105 enum write_ordering_e pwo;
1106 static char *write_ordering_str[] = {
1107 [WO_none] = "none",
1108 [WO_drain_io] = "drain",
1109 [WO_bdev_flush] = "flush",
Philipp Reisnerb411b362009-09-25 16:07:19 -07001110 };
1111
1112 pwo = mdev->write_ordering;
1113 wo = min(pwo, wo);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001114 if (wo == WO_bdev_flush && mdev->ldev->dc.no_disk_flush)
1115 wo = WO_drain_io;
1116 if (wo == WO_drain_io && mdev->ldev->dc.no_disk_drain)
1117 wo = WO_none;
1118 mdev->write_ordering = wo;
Philipp Reisner2451fc32010-08-24 13:43:11 +02001119 if (pwo != mdev->write_ordering || wo == WO_bdev_flush)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001120 dev_info(DEV, "Method to ensure write ordering: %s\n", write_ordering_str[mdev->write_ordering]);
1121}
1122
1123/**
Andreas Gruenbacherfbe29de2011-02-17 16:38:35 +01001124 * drbd_submit_peer_request()
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001125 * @mdev: DRBD device.
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001126 * @peer_req: peer request
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001127 * @rw: flag field, see bio->bi_rw
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001128 *
1129 * May spread the pages to multiple bios,
1130 * depending on bio_add_page restrictions.
1131 *
1132 * Returns 0 if all bios have been submitted,
1133 * -ENOMEM if we could not allocate enough bios,
1134 * -ENOSPC (any better suggestion?) if we have not been able to bio_add_page a
1135 * single page to an empty bio (which should never happen and likely indicates
1136 * that the lower level IO stack is in some way broken). This has been observed
1137 * on certain Xen deployments.
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001138 */
1139/* TODO allocate from our own bio_set. */
Andreas Gruenbacherfbe29de2011-02-17 16:38:35 +01001140int drbd_submit_peer_request(struct drbd_conf *mdev,
1141 struct drbd_peer_request *peer_req,
1142 const unsigned rw, const int fault_type)
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001143{
1144 struct bio *bios = NULL;
1145 struct bio *bio;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001146 struct page *page = peer_req->pages;
1147 sector_t sector = peer_req->i.sector;
1148 unsigned ds = peer_req->i.size;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001149 unsigned n_bios = 0;
1150 unsigned nr_pages = (ds + PAGE_SIZE -1) >> PAGE_SHIFT;
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001151 int err = -ENOMEM;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001152
1153 /* In most cases, we will only need one bio. But in case the lower
1154 * level restrictions happen to be different at this offset on this
1155 * side than those of the sending peer, we may need to submit the
Lars Ellenbergda4a75d2011-02-23 17:02:01 +01001156 * request in more than one bio.
1157 *
1158 * Plain bio_alloc is good enough here, this is no DRBD internally
1159 * generated bio, but a bio allocated on behalf of the peer.
1160 */
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001161next_bio:
1162 bio = bio_alloc(GFP_NOIO, nr_pages);
1163 if (!bio) {
1164 dev_err(DEV, "submit_ee: Allocation of a bio failed\n");
1165 goto fail;
1166 }
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001167 /* > peer_req->i.sector, unless this is the first bio */
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001168 bio->bi_sector = sector;
1169 bio->bi_bdev = mdev->ldev->backing_bdev;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001170 bio->bi_rw = rw;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001171 bio->bi_private = peer_req;
Andreas Gruenbacherfcefa622011-02-17 16:46:59 +01001172 bio->bi_end_io = drbd_peer_request_endio;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001173
1174 bio->bi_next = bios;
1175 bios = bio;
1176 ++n_bios;
1177
1178 page_chain_for_each(page) {
1179 unsigned len = min_t(unsigned, ds, PAGE_SIZE);
1180 if (!bio_add_page(bio, page, len, 0)) {
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001181 /* A single page must always be possible!
1182 * But in case it fails anyways,
1183 * we deal with it, and complain (below). */
1184 if (bio->bi_vcnt == 0) {
1185 dev_err(DEV,
1186 "bio_add_page failed for len=%u, "
1187 "bi_vcnt=0 (bi_sector=%llu)\n",
1188 len, (unsigned long long)bio->bi_sector);
1189 err = -ENOSPC;
1190 goto fail;
1191 }
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001192 goto next_bio;
1193 }
1194 ds -= len;
1195 sector += len >> 9;
1196 --nr_pages;
1197 }
1198 D_ASSERT(page == NULL);
1199 D_ASSERT(ds == 0);
1200
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001201 atomic_set(&peer_req->pending_bios, n_bios);
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001202 do {
1203 bio = bios;
1204 bios = bios->bi_next;
1205 bio->bi_next = NULL;
1206
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001207 drbd_generic_make_request(mdev, fault_type, bio);
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001208 } while (bios);
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001209 return 0;
1210
1211fail:
1212 while (bios) {
1213 bio = bios;
1214 bios = bios->bi_next;
1215 bio_put(bio);
1216 }
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001217 return err;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001218}
1219
Andreas Gruenbacher53840642011-01-28 10:31:04 +01001220static void drbd_remove_epoch_entry_interval(struct drbd_conf *mdev,
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001221 struct drbd_peer_request *peer_req)
Andreas Gruenbacher53840642011-01-28 10:31:04 +01001222{
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001223 struct drbd_interval *i = &peer_req->i;
Andreas Gruenbacher53840642011-01-28 10:31:04 +01001224
1225 drbd_remove_interval(&mdev->write_requests, i);
1226 drbd_clear_interval(i);
1227
Andreas Gruenbacher6c852be2011-02-04 15:38:52 +01001228 /* Wake up any processes waiting for this peer request to complete. */
Andreas Gruenbacher53840642011-01-28 10:31:04 +01001229 if (i->waiting)
1230 wake_up(&mdev->misc_wait);
1231}
1232
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01001233static int receive_Barrier(struct drbd_tconn *tconn, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001234{
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01001235 struct drbd_conf *mdev;
Philipp Reisner2451fc32010-08-24 13:43:11 +02001236 int rv;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01001237 struct p_barrier *p = tconn->data.rbuf;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001238 struct drbd_epoch *epoch;
1239
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01001240 mdev = vnr_to_mdev(tconn, pi->vnr);
1241 if (!mdev)
1242 return -EIO;
1243
Philipp Reisnerb411b362009-09-25 16:07:19 -07001244 inc_unacked(mdev);
1245
Philipp Reisnerb411b362009-09-25 16:07:19 -07001246 mdev->current_epoch->barrier_nr = p->barrier;
1247 rv = drbd_may_finish_epoch(mdev, mdev->current_epoch, EV_GOT_BARRIER_NR);
1248
1249 /* P_BARRIER_ACK may imply that the corresponding extent is dropped from
1250 * the activity log, which means it would not be resynced in case the
1251 * R_PRIMARY crashes now.
1252 * Therefore we must send the barrier_ack after the barrier request was
1253 * completed. */
1254 switch (mdev->write_ordering) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001255 case WO_none:
1256 if (rv == FE_RECYCLED)
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001257 return 0;
Philipp Reisner2451fc32010-08-24 13:43:11 +02001258
1259 /* receiver context, in the writeout path of the other node.
1260 * avoid potential distributed deadlock */
1261 epoch = kmalloc(sizeof(struct drbd_epoch), GFP_NOIO);
1262 if (epoch)
1263 break;
1264 else
1265 dev_warn(DEV, "Allocation of an epoch failed, slowing down\n");
1266 /* Fall through */
Philipp Reisnerb411b362009-09-25 16:07:19 -07001267
1268 case WO_bdev_flush:
1269 case WO_drain_io:
Philipp Reisnerb411b362009-09-25 16:07:19 -07001270 drbd_wait_ee_list_empty(mdev, &mdev->active_ee);
Philipp Reisner2451fc32010-08-24 13:43:11 +02001271 drbd_flush(mdev);
1272
1273 if (atomic_read(&mdev->current_epoch->epoch_size)) {
1274 epoch = kmalloc(sizeof(struct drbd_epoch), GFP_NOIO);
1275 if (epoch)
1276 break;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001277 }
1278
Philipp Reisner2451fc32010-08-24 13:43:11 +02001279 epoch = mdev->current_epoch;
1280 wait_event(mdev->ee_wait, atomic_read(&epoch->epoch_size) == 0);
1281
1282 D_ASSERT(atomic_read(&epoch->active) == 0);
1283 D_ASSERT(epoch->flags == 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001284
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001285 return 0;
Philipp Reisner2451fc32010-08-24 13:43:11 +02001286 default:
1287 dev_err(DEV, "Strangeness in mdev->write_ordering %d\n", mdev->write_ordering);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001288 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001289 }
1290
1291 epoch->flags = 0;
1292 atomic_set(&epoch->epoch_size, 0);
1293 atomic_set(&epoch->active, 0);
1294
1295 spin_lock(&mdev->epoch_lock);
1296 if (atomic_read(&mdev->current_epoch->epoch_size)) {
1297 list_add(&epoch->list, &mdev->current_epoch->list);
1298 mdev->current_epoch = epoch;
1299 mdev->epochs++;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001300 } else {
1301 /* The current_epoch got recycled while we allocated this one... */
1302 kfree(epoch);
1303 }
1304 spin_unlock(&mdev->epoch_lock);
1305
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001306 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001307}
1308
1309/* used from receive_RSDataReply (recv_resync_read)
1310 * and from receive_Data */
Andreas Gruenbacherf6ffca92011-02-04 15:30:34 +01001311static struct drbd_peer_request *
1312read_in_block(struct drbd_conf *mdev, u64 id, sector_t sector,
1313 int data_size) __must_hold(local)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001314{
Lars Ellenberg66660322010-04-06 12:15:04 +02001315 const sector_t capacity = drbd_get_capacity(mdev->this_bdev);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001316 struct drbd_peer_request *peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001317 struct page *page;
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001318 int dgs, ds, err;
Philipp Reisnera0638452011-01-19 14:31:32 +01001319 void *dig_in = mdev->tconn->int_dig_in;
1320 void *dig_vv = mdev->tconn->int_dig_vv;
Philipp Reisner6b4388a2010-04-26 14:11:45 +02001321 unsigned long *data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001322
Philipp Reisnera0638452011-01-19 14:31:32 +01001323 dgs = (mdev->tconn->agreed_pro_version >= 87 && mdev->tconn->integrity_r_tfm) ?
1324 crypto_hash_digestsize(mdev->tconn->integrity_r_tfm) : 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001325
1326 if (dgs) {
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001327 err = drbd_recv_all_warn(mdev->tconn, dig_in, dgs);
1328 if (err)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001329 return NULL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001330 }
1331
1332 data_size -= dgs;
1333
Andreas Gruenbacher841ce242010-12-15 19:31:20 +01001334 if (!expect(data_size != 0))
1335 return NULL;
1336 if (!expect(IS_ALIGNED(data_size, 512)))
1337 return NULL;
1338 if (!expect(data_size <= DRBD_MAX_BIO_SIZE))
1339 return NULL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001340
Lars Ellenberg66660322010-04-06 12:15:04 +02001341 /* even though we trust out peer,
1342 * we sometimes have to double check. */
1343 if (sector + (data_size>>9) > capacity) {
Lars Ellenbergfdda6542011-01-24 15:11:01 +01001344 dev_err(DEV, "request from peer beyond end of local disk: "
1345 "capacity: %llus < sector: %llus + size: %u\n",
Lars Ellenberg66660322010-04-06 12:15:04 +02001346 (unsigned long long)capacity,
1347 (unsigned long long)sector, data_size);
1348 return NULL;
1349 }
1350
Philipp Reisnerb411b362009-09-25 16:07:19 -07001351 /* GFP_NOIO, because we must not cause arbitrary write-out: in a DRBD
1352 * "criss-cross" setup, that might cause write-out on some other DRBD,
1353 * which in turn might block on the other node at this very place. */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001354 peer_req = drbd_alloc_ee(mdev, id, sector, data_size, GFP_NOIO);
1355 if (!peer_req)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001356 return NULL;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001357
Philipp Reisnerb411b362009-09-25 16:07:19 -07001358 ds = data_size;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001359 page = peer_req->pages;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001360 page_chain_for_each(page) {
1361 unsigned len = min_t(int, ds, PAGE_SIZE);
Philipp Reisner6b4388a2010-04-26 14:11:45 +02001362 data = kmap(page);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001363 err = drbd_recv_all_warn(mdev->tconn, data, len);
Andreas Gruenbacher0cf9d272010-12-07 10:43:29 +01001364 if (drbd_insert_fault(mdev, DRBD_FAULT_RECEIVE)) {
Philipp Reisner6b4388a2010-04-26 14:11:45 +02001365 dev_err(DEV, "Fault injection: Corrupting data on receive\n");
1366 data[0] = data[0] ^ (unsigned long)-1;
1367 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07001368 kunmap(page);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001369 if (err) {
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001370 drbd_free_ee(mdev, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001371 return NULL;
1372 }
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001373 ds -= len;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001374 }
1375
1376 if (dgs) {
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001377 drbd_csum_ee(mdev, mdev->tconn->integrity_r_tfm, peer_req, dig_vv);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001378 if (memcmp(dig_in, dig_vv, dgs)) {
Lars Ellenberg470be442010-11-10 10:36:52 +01001379 dev_err(DEV, "Digest integrity check FAILED: %llus +%u\n",
1380 (unsigned long long)sector, data_size);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001381 drbd_free_ee(mdev, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001382 return NULL;
1383 }
1384 }
1385 mdev->recv_cnt += data_size>>9;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001386 return peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001387}
1388
1389/* drbd_drain_block() just takes a data block
1390 * out of the socket input buffer, and discards it.
1391 */
1392static int drbd_drain_block(struct drbd_conf *mdev, int data_size)
1393{
1394 struct page *page;
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001395 int err = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001396 void *data;
1397
Lars Ellenbergc3470cd2010-04-01 16:57:19 +02001398 if (!data_size)
Andreas Gruenbacherfc5be832011-03-16 17:50:50 +01001399 return 0;
Lars Ellenbergc3470cd2010-04-01 16:57:19 +02001400
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001401 page = drbd_pp_alloc(mdev, 1, 1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001402
1403 data = kmap(page);
1404 while (data_size) {
Andreas Gruenbacherfc5be832011-03-16 17:50:50 +01001405 unsigned int len = min_t(int, data_size, PAGE_SIZE);
1406
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001407 err = drbd_recv_all_warn(mdev->tconn, data, len);
1408 if (err)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001409 break;
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001410 data_size -= len;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001411 }
1412 kunmap(page);
Lars Ellenberg435f0742010-09-06 12:30:25 +02001413 drbd_pp_free(mdev, page, 0);
Andreas Gruenbacherfc5be832011-03-16 17:50:50 +01001414 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001415}
1416
1417static int recv_dless_read(struct drbd_conf *mdev, struct drbd_request *req,
1418 sector_t sector, int data_size)
1419{
1420 struct bio_vec *bvec;
1421 struct bio *bio;
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001422 int dgs, err, i, expect;
Philipp Reisnera0638452011-01-19 14:31:32 +01001423 void *dig_in = mdev->tconn->int_dig_in;
1424 void *dig_vv = mdev->tconn->int_dig_vv;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001425
Philipp Reisnera0638452011-01-19 14:31:32 +01001426 dgs = (mdev->tconn->agreed_pro_version >= 87 && mdev->tconn->integrity_r_tfm) ?
1427 crypto_hash_digestsize(mdev->tconn->integrity_r_tfm) : 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001428
1429 if (dgs) {
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001430 err = drbd_recv_all_warn(mdev->tconn, dig_in, dgs);
1431 if (err)
1432 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001433 }
1434
1435 data_size -= dgs;
1436
1437 /* optimistically update recv_cnt. if receiving fails below,
1438 * we disconnect anyways, and counters will be reset. */
1439 mdev->recv_cnt += data_size>>9;
1440
1441 bio = req->master_bio;
1442 D_ASSERT(sector == bio->bi_sector);
1443
1444 bio_for_each_segment(bvec, bio, i) {
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001445 void *mapped = kmap(bvec->bv_page) + bvec->bv_offset;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001446 expect = min_t(int, data_size, bvec->bv_len);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001447 err = drbd_recv_all_warn(mdev->tconn, mapped, expect);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001448 kunmap(bvec->bv_page);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001449 if (err)
1450 return err;
1451 data_size -= expect;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001452 }
1453
1454 if (dgs) {
Philipp Reisnera0638452011-01-19 14:31:32 +01001455 drbd_csum_bio(mdev, mdev->tconn->integrity_r_tfm, bio, dig_vv);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001456 if (memcmp(dig_in, dig_vv, dgs)) {
1457 dev_err(DEV, "Digest integrity check FAILED. Broken NICs?\n");
Andreas Gruenbacher28284ce2011-03-16 17:54:02 +01001458 return -EINVAL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001459 }
1460 }
1461
1462 D_ASSERT(data_size == 0);
Andreas Gruenbacher28284ce2011-03-16 17:54:02 +01001463 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001464}
1465
1466/* e_end_resync_block() is called via
1467 * drbd_process_done_ee() by asender only */
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001468static int e_end_resync_block(struct drbd_work *w, int unused)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001469{
Andreas Gruenbacher8050e6d2011-02-18 16:12:48 +01001470 struct drbd_peer_request *peer_req =
1471 container_of(w, struct drbd_peer_request, w);
Philipp Reisner00d56942011-02-09 18:09:48 +01001472 struct drbd_conf *mdev = w->mdev;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001473 sector_t sector = peer_req->i.sector;
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001474 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001475
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001476 D_ASSERT(drbd_interval_empty(&peer_req->i));
Philipp Reisnerb411b362009-09-25 16:07:19 -07001477
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001478 if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
1479 drbd_set_in_sync(mdev, sector, peer_req->i.size);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001480 err = drbd_send_ack(mdev, P_RS_WRITE_ACK, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001481 } else {
1482 /* Record failure to sync */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001483 drbd_rs_failed_io(mdev, sector, peer_req->i.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001484
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001485 err = drbd_send_ack(mdev, P_NEG_ACK, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001486 }
1487 dec_unacked(mdev);
1488
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001489 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001490}
1491
1492static int recv_resync_read(struct drbd_conf *mdev, sector_t sector, int data_size) __releases(local)
1493{
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001494 struct drbd_peer_request *peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001495
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001496 peer_req = read_in_block(mdev, ID_SYNCER, sector, data_size);
1497 if (!peer_req)
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001498 goto fail;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001499
1500 dec_rs_pending(mdev);
1501
Philipp Reisnerb411b362009-09-25 16:07:19 -07001502 inc_unacked(mdev);
1503 /* corresponding dec_unacked() in e_end_resync_block()
1504 * respective _drbd_clear_done_ee */
1505
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001506 peer_req->w.cb = e_end_resync_block;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001507
Philipp Reisner87eeee42011-01-19 14:16:30 +01001508 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001509 list_add(&peer_req->w.list, &mdev->sync_ee);
Philipp Reisner87eeee42011-01-19 14:16:30 +01001510 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001511
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02001512 atomic_add(data_size >> 9, &mdev->rs_sect_ev);
Andreas Gruenbacherfbe29de2011-02-17 16:38:35 +01001513 if (drbd_submit_peer_request(mdev, peer_req, WRITE, DRBD_FAULT_RS_WR) == 0)
Andreas Gruenbachere1c1b0f2011-03-16 17:58:27 +01001514 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001515
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001516 /* don't care for the reason here */
1517 dev_err(DEV, "submit failed, triggering re-connect\n");
Philipp Reisner87eeee42011-01-19 14:16:30 +01001518 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001519 list_del(&peer_req->w.list);
Philipp Reisner87eeee42011-01-19 14:16:30 +01001520 spin_unlock_irq(&mdev->tconn->req_lock);
Lars Ellenberg22cc37a2010-09-14 20:40:41 +02001521
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001522 drbd_free_ee(mdev, peer_req);
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001523fail:
1524 put_ldev(mdev);
Andreas Gruenbachere1c1b0f2011-03-16 17:58:27 +01001525 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001526}
1527
Andreas Gruenbacher668eebc2011-01-20 17:14:26 +01001528static struct drbd_request *
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01001529find_request(struct drbd_conf *mdev, struct rb_root *root, u64 id,
1530 sector_t sector, bool missing_ok, const char *func)
Andreas Gruenbacher668eebc2011-01-20 17:14:26 +01001531{
Andreas Gruenbacher668eebc2011-01-20 17:14:26 +01001532 struct drbd_request *req;
1533
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01001534 /* Request object according to our peer */
1535 req = (struct drbd_request *)(unsigned long)id;
Andreas Gruenbacher5e472262011-01-27 14:42:51 +01001536 if (drbd_contains_interval(root, sector, &req->i) && req->i.local)
Andreas Gruenbacher668eebc2011-01-20 17:14:26 +01001537 return req;
Andreas Gruenbacherc3afd8f2011-01-20 22:25:40 +01001538 if (!missing_ok) {
1539 dev_err(DEV, "%s: failed to find request %lu, sector %llus\n", func,
1540 (unsigned long)id, (unsigned long long)sector);
1541 }
Andreas Gruenbacher668eebc2011-01-20 17:14:26 +01001542 return NULL;
1543}
1544
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01001545static int receive_DataReply(struct drbd_tconn *tconn, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001546{
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01001547 struct drbd_conf *mdev;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001548 struct drbd_request *req;
1549 sector_t sector;
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001550 int err;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01001551 struct p_data *p = tconn->data.rbuf;
1552
1553 mdev = vnr_to_mdev(tconn, pi->vnr);
1554 if (!mdev)
1555 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001556
1557 sector = be64_to_cpu(p->sector);
1558
Philipp Reisner87eeee42011-01-19 14:16:30 +01001559 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01001560 req = find_request(mdev, &mdev->read_requests, p->block_id, sector, false, __func__);
Philipp Reisner87eeee42011-01-19 14:16:30 +01001561 spin_unlock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherc3afd8f2011-01-20 22:25:40 +01001562 if (unlikely(!req))
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001563 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001564
Bart Van Assche24c48302011-05-21 18:32:29 +02001565 /* hlist_del(&req->collision) is done in _req_may_be_done, to avoid
Philipp Reisnerb411b362009-09-25 16:07:19 -07001566 * special casing it there for the various failure cases.
1567 * still no race with drbd_fail_pending_reads */
Andreas Gruenbachere2857212011-03-25 00:57:38 +01001568 err = recv_dless_read(mdev, req, sector, pi->size);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001569 if (!err)
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01001570 req_mod(req, DATA_RECEIVED);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001571 /* else: nothing. handled from drbd_disconnect...
1572 * I don't think we may complete this just yet
1573 * in case we are "on-disconnect: freeze" */
1574
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001575 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001576}
1577
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01001578static int receive_RSDataReply(struct drbd_tconn *tconn, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001579{
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01001580 struct drbd_conf *mdev;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001581 sector_t sector;
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001582 int err;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01001583 struct p_data *p = tconn->data.rbuf;
1584
1585 mdev = vnr_to_mdev(tconn, pi->vnr);
1586 if (!mdev)
1587 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001588
1589 sector = be64_to_cpu(p->sector);
1590 D_ASSERT(p->block_id == ID_SYNCER);
1591
1592 if (get_ldev(mdev)) {
1593 /* data is submitted to disk within recv_resync_read.
1594 * corresponding put_ldev done below on error,
Andreas Gruenbacherfcefa622011-02-17 16:46:59 +01001595 * or in drbd_peer_request_endio. */
Andreas Gruenbachere2857212011-03-25 00:57:38 +01001596 err = recv_resync_read(mdev, sector, pi->size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001597 } else {
1598 if (__ratelimit(&drbd_ratelimit_state))
1599 dev_err(DEV, "Can not write resync data to local disk.\n");
1600
Andreas Gruenbachere2857212011-03-25 00:57:38 +01001601 err = drbd_drain_block(mdev, pi->size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001602
Andreas Gruenbachere2857212011-03-25 00:57:38 +01001603 drbd_send_ack_dp(mdev, P_NEG_ACK, p, pi->size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001604 }
1605
Andreas Gruenbachere2857212011-03-25 00:57:38 +01001606 atomic_add(pi->size >> 9, &mdev->rs_sect_in);
Philipp Reisner778f2712010-07-06 11:14:00 +02001607
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001608 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001609}
1610
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001611static int w_restart_write(struct drbd_work *w, int cancel)
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001612{
1613 struct drbd_request *req = container_of(w, struct drbd_request, w);
1614 struct drbd_conf *mdev = w->mdev;
1615 struct bio *bio;
1616 unsigned long start_time;
1617 unsigned long flags;
1618
1619 spin_lock_irqsave(&mdev->tconn->req_lock, flags);
1620 if (!expect(req->rq_state & RQ_POSTPONED)) {
1621 spin_unlock_irqrestore(&mdev->tconn->req_lock, flags);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001622 return -EIO;
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001623 }
1624 bio = req->master_bio;
1625 start_time = req->start_time;
1626 /* Postponed requests will not have their master_bio completed! */
1627 __req_mod(req, DISCARD_WRITE, NULL);
1628 spin_unlock_irqrestore(&mdev->tconn->req_lock, flags);
1629
1630 while (__drbd_make_request(mdev, bio, start_time))
1631 /* retry */ ;
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001632 return 0;
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001633}
1634
1635static void restart_conflicting_writes(struct drbd_conf *mdev,
1636 sector_t sector, int size)
1637{
1638 struct drbd_interval *i;
1639 struct drbd_request *req;
1640
1641 drbd_for_each_overlap(i, &mdev->write_requests, sector, size) {
1642 if (!i->local)
1643 continue;
1644 req = container_of(i, struct drbd_request, i);
1645 if (req->rq_state & RQ_LOCAL_PENDING ||
1646 !(req->rq_state & RQ_POSTPONED))
1647 continue;
1648 if (expect(list_empty(&req->w.list))) {
1649 req->w.mdev = mdev;
1650 req->w.cb = w_restart_write;
1651 drbd_queue_work(&mdev->tconn->data.work, &req->w);
1652 }
1653 }
1654}
1655
Philipp Reisnerb411b362009-09-25 16:07:19 -07001656/* e_end_block() is called via drbd_process_done_ee().
1657 * this means this function only runs in the asender thread
1658 */
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001659static int e_end_block(struct drbd_work *w, int cancel)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001660{
Andreas Gruenbacher8050e6d2011-02-18 16:12:48 +01001661 struct drbd_peer_request *peer_req =
1662 container_of(w, struct drbd_peer_request, w);
Philipp Reisner00d56942011-02-09 18:09:48 +01001663 struct drbd_conf *mdev = w->mdev;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001664 sector_t sector = peer_req->i.sector;
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001665 int err = 0, pcmd;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001666
Philipp Reisner89e58e72011-01-19 13:12:45 +01001667 if (mdev->tconn->net_conf->wire_protocol == DRBD_PROT_C) {
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001668 if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001669 pcmd = (mdev->state.conn >= C_SYNC_SOURCE &&
1670 mdev->state.conn <= C_PAUSED_SYNC_T &&
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001671 peer_req->flags & EE_MAY_SET_IN_SYNC) ?
Philipp Reisnerb411b362009-09-25 16:07:19 -07001672 P_RS_WRITE_ACK : P_WRITE_ACK;
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001673 err = drbd_send_ack(mdev, pcmd, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001674 if (pcmd == P_RS_WRITE_ACK)
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001675 drbd_set_in_sync(mdev, sector, peer_req->i.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001676 } else {
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001677 err = drbd_send_ack(mdev, P_NEG_ACK, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001678 /* we expect it to be marked out of sync anyways...
1679 * maybe assert this? */
1680 }
1681 dec_unacked(mdev);
1682 }
1683 /* we delete from the conflict detection hash _after_ we sent out the
1684 * P_WRITE_ACK / P_NEG_ACK, to get the sequence number right. */
Philipp Reisner89e58e72011-01-19 13:12:45 +01001685 if (mdev->tconn->net_conf->two_primaries) {
Philipp Reisner87eeee42011-01-19 14:16:30 +01001686 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001687 D_ASSERT(!drbd_interval_empty(&peer_req->i));
1688 drbd_remove_epoch_entry_interval(mdev, peer_req);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001689 if (peer_req->flags & EE_RESTART_REQUESTS)
1690 restart_conflicting_writes(mdev, sector, peer_req->i.size);
Philipp Reisner87eeee42011-01-19 14:16:30 +01001691 spin_unlock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherbb3bfe92011-01-21 15:59:23 +01001692 } else
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001693 D_ASSERT(drbd_interval_empty(&peer_req->i));
Philipp Reisnerb411b362009-09-25 16:07:19 -07001694
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001695 drbd_may_finish_epoch(mdev, peer_req->epoch, EV_PUT + (cancel ? EV_CLEANUP : 0));
Philipp Reisnerb411b362009-09-25 16:07:19 -07001696
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001697 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001698}
1699
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001700static int e_send_ack(struct drbd_work *w, enum drbd_packet ack)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001701{
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001702 struct drbd_conf *mdev = w->mdev;
Andreas Gruenbacher8050e6d2011-02-18 16:12:48 +01001703 struct drbd_peer_request *peer_req =
1704 container_of(w, struct drbd_peer_request, w);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001705 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001706
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001707 err = drbd_send_ack(mdev, ack, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001708 dec_unacked(mdev);
1709
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001710 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001711}
1712
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001713static int e_send_discard_write(struct drbd_work *w, int unused)
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001714{
1715 return e_send_ack(w, P_DISCARD_WRITE);
1716}
1717
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001718static int e_send_retry_write(struct drbd_work *w, int unused)
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001719{
1720 struct drbd_tconn *tconn = w->mdev->tconn;
1721
1722 return e_send_ack(w, tconn->agreed_pro_version >= 100 ?
1723 P_RETRY_WRITE : P_DISCARD_WRITE);
1724}
1725
Andreas Gruenbacher3e394da2011-01-26 18:36:55 +01001726static bool seq_greater(u32 a, u32 b)
1727{
1728 /*
1729 * We assume 32-bit wrap-around here.
1730 * For 24-bit wrap-around, we would have to shift:
1731 * a <<= 8; b <<= 8;
1732 */
1733 return (s32)a - (s32)b > 0;
1734}
1735
1736static u32 seq_max(u32 a, u32 b)
1737{
1738 return seq_greater(a, b) ? a : b;
1739}
1740
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001741static bool need_peer_seq(struct drbd_conf *mdev)
1742{
1743 struct drbd_tconn *tconn = mdev->tconn;
1744
1745 /*
1746 * We only need to keep track of the last packet_seq number of our peer
1747 * if we are in dual-primary mode and we have the discard flag set; see
1748 * handle_write_conflicts().
1749 */
1750 return tconn->net_conf->two_primaries &&
1751 test_bit(DISCARD_CONCURRENT, &tconn->flags);
1752}
1753
Andreas Gruenbacher43ae0772011-02-03 18:42:08 +01001754static void update_peer_seq(struct drbd_conf *mdev, unsigned int peer_seq)
Andreas Gruenbacher3e394da2011-01-26 18:36:55 +01001755{
Lars Ellenberg3c13b682011-02-23 16:10:01 +01001756 unsigned int newest_peer_seq;
Andreas Gruenbacher3e394da2011-01-26 18:36:55 +01001757
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001758 if (need_peer_seq(mdev)) {
1759 spin_lock(&mdev->peer_seq_lock);
Lars Ellenberg3c13b682011-02-23 16:10:01 +01001760 newest_peer_seq = seq_max(mdev->peer_seq, peer_seq);
1761 mdev->peer_seq = newest_peer_seq;
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001762 spin_unlock(&mdev->peer_seq_lock);
Lars Ellenberg3c13b682011-02-23 16:10:01 +01001763 /* wake up only if we actually changed mdev->peer_seq */
1764 if (peer_seq == newest_peer_seq)
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001765 wake_up(&mdev->seq_wait);
1766 }
Andreas Gruenbacher3e394da2011-01-26 18:36:55 +01001767}
1768
Philipp Reisnerb411b362009-09-25 16:07:19 -07001769/* Called from receive_Data.
1770 * Synchronize packets on sock with packets on msock.
1771 *
1772 * This is here so even when a P_DATA packet traveling via sock overtook an Ack
1773 * packet traveling on msock, they are still processed in the order they have
1774 * been sent.
1775 *
1776 * Note: we don't care for Ack packets overtaking P_DATA packets.
1777 *
1778 * In case packet_seq is larger than mdev->peer_seq number, there are
1779 * outstanding packets on the msock. We wait for them to arrive.
1780 * In case we are the logically next packet, we update mdev->peer_seq
1781 * ourselves. Correctly handles 32bit wrap around.
1782 *
1783 * Assume we have a 10 GBit connection, that is about 1<<30 byte per second,
1784 * about 1<<21 sectors per second. So "worst" case, we have 1<<3 == 8 seconds
1785 * for the 24bit wrap (historical atomic_t guarantee on some archs), and we have
1786 * 1<<9 == 512 seconds aka ages for the 32bit wrap around...
1787 *
1788 * returns 0 if we may process the packet,
1789 * -ERESTARTSYS if we were interrupted (by disconnect signal). */
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001790static int wait_for_and_update_peer_seq(struct drbd_conf *mdev, const u32 peer_seq)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001791{
1792 DEFINE_WAIT(wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001793 long timeout;
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001794 int ret;
1795
1796 if (!need_peer_seq(mdev))
1797 return 0;
1798
Philipp Reisnerb411b362009-09-25 16:07:19 -07001799 spin_lock(&mdev->peer_seq_lock);
1800 for (;;) {
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001801 if (!seq_greater(peer_seq - 1, mdev->peer_seq)) {
1802 mdev->peer_seq = seq_max(mdev->peer_seq, peer_seq);
1803 ret = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001804 break;
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001805 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07001806 if (signal_pending(current)) {
1807 ret = -ERESTARTSYS;
1808 break;
1809 }
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001810 prepare_to_wait(&mdev->seq_wait, &wait, TASK_INTERRUPTIBLE);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001811 spin_unlock(&mdev->peer_seq_lock);
Andreas Gruenbacher71b1c1e2011-03-01 15:40:43 +01001812 timeout = mdev->tconn->net_conf->ping_timeo*HZ/10;
1813 timeout = schedule_timeout(timeout);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001814 spin_lock(&mdev->peer_seq_lock);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001815 if (!timeout) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001816 ret = -ETIMEDOUT;
Andreas Gruenbacher71b1c1e2011-03-01 15:40:43 +01001817 dev_err(DEV, "Timed out waiting for missing ack packets; disconnecting\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07001818 break;
1819 }
1820 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07001821 spin_unlock(&mdev->peer_seq_lock);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001822 finish_wait(&mdev->seq_wait, &wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001823 return ret;
1824}
1825
Lars Ellenberg688593c2010-11-17 22:25:03 +01001826/* see also bio_flags_to_wire()
1827 * DRBD_REQ_*, because we need to semantically map the flags to data packet
1828 * flags and back. We may replicate to other kernel versions. */
1829static unsigned long wire_flags_to_bio(struct drbd_conf *mdev, u32 dpf)
Philipp Reisner76d2e7e2010-08-25 11:58:05 +02001830{
Lars Ellenberg688593c2010-11-17 22:25:03 +01001831 return (dpf & DP_RW_SYNC ? REQ_SYNC : 0) |
1832 (dpf & DP_FUA ? REQ_FUA : 0) |
1833 (dpf & DP_FLUSH ? REQ_FLUSH : 0) |
1834 (dpf & DP_DISCARD ? REQ_DISCARD : 0);
Philipp Reisner76d2e7e2010-08-25 11:58:05 +02001835}
1836
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001837static void fail_postponed_requests(struct drbd_conf *mdev, sector_t sector,
1838 unsigned int size)
1839{
1840 struct drbd_interval *i;
1841
1842 repeat:
1843 drbd_for_each_overlap(i, &mdev->write_requests, sector, size) {
1844 struct drbd_request *req;
1845 struct bio_and_error m;
1846
1847 if (!i->local)
1848 continue;
1849 req = container_of(i, struct drbd_request, i);
1850 if (!(req->rq_state & RQ_POSTPONED))
1851 continue;
1852 req->rq_state &= ~RQ_POSTPONED;
1853 __req_mod(req, NEG_ACKED, &m);
1854 spin_unlock_irq(&mdev->tconn->req_lock);
1855 if (m.bio)
1856 complete_master_bio(mdev, &m);
1857 spin_lock_irq(&mdev->tconn->req_lock);
1858 goto repeat;
1859 }
1860}
1861
1862static int handle_write_conflicts(struct drbd_conf *mdev,
1863 struct drbd_peer_request *peer_req)
1864{
1865 struct drbd_tconn *tconn = mdev->tconn;
1866 bool resolve_conflicts = test_bit(DISCARD_CONCURRENT, &tconn->flags);
1867 sector_t sector = peer_req->i.sector;
1868 const unsigned int size = peer_req->i.size;
1869 struct drbd_interval *i;
1870 bool equal;
1871 int err;
1872
1873 /*
1874 * Inserting the peer request into the write_requests tree will prevent
1875 * new conflicting local requests from being added.
1876 */
1877 drbd_insert_interval(&mdev->write_requests, &peer_req->i);
1878
1879 repeat:
1880 drbd_for_each_overlap(i, &mdev->write_requests, sector, size) {
1881 if (i == &peer_req->i)
1882 continue;
1883
1884 if (!i->local) {
1885 /*
1886 * Our peer has sent a conflicting remote request; this
1887 * should not happen in a two-node setup. Wait for the
1888 * earlier peer request to complete.
1889 */
1890 err = drbd_wait_misc(mdev, i);
1891 if (err)
1892 goto out;
1893 goto repeat;
1894 }
1895
1896 equal = i->sector == sector && i->size == size;
1897 if (resolve_conflicts) {
1898 /*
1899 * If the peer request is fully contained within the
1900 * overlapping request, it can be discarded; otherwise,
1901 * it will be retried once all overlapping requests
1902 * have completed.
1903 */
1904 bool discard = i->sector <= sector && i->sector +
1905 (i->size >> 9) >= sector + (size >> 9);
1906
1907 if (!equal)
1908 dev_alert(DEV, "Concurrent writes detected: "
1909 "local=%llus +%u, remote=%llus +%u, "
1910 "assuming %s came first\n",
1911 (unsigned long long)i->sector, i->size,
1912 (unsigned long long)sector, size,
1913 discard ? "local" : "remote");
1914
1915 inc_unacked(mdev);
1916 peer_req->w.cb = discard ? e_send_discard_write :
1917 e_send_retry_write;
1918 list_add_tail(&peer_req->w.list, &mdev->done_ee);
1919 wake_asender(mdev->tconn);
1920
1921 err = -ENOENT;
1922 goto out;
1923 } else {
1924 struct drbd_request *req =
1925 container_of(i, struct drbd_request, i);
1926
1927 if (!equal)
1928 dev_alert(DEV, "Concurrent writes detected: "
1929 "local=%llus +%u, remote=%llus +%u\n",
1930 (unsigned long long)i->sector, i->size,
1931 (unsigned long long)sector, size);
1932
1933 if (req->rq_state & RQ_LOCAL_PENDING ||
1934 !(req->rq_state & RQ_POSTPONED)) {
1935 /*
1936 * Wait for the node with the discard flag to
1937 * decide if this request will be discarded or
1938 * retried. Requests that are discarded will
1939 * disappear from the write_requests tree.
1940 *
1941 * In addition, wait for the conflicting
1942 * request to finish locally before submitting
1943 * the conflicting peer request.
1944 */
1945 err = drbd_wait_misc(mdev, &req->i);
1946 if (err) {
1947 _conn_request_state(mdev->tconn,
1948 NS(conn, C_TIMEOUT),
1949 CS_HARD);
1950 fail_postponed_requests(mdev, sector, size);
1951 goto out;
1952 }
1953 goto repeat;
1954 }
1955 /*
1956 * Remember to restart the conflicting requests after
1957 * the new peer request has completed.
1958 */
1959 peer_req->flags |= EE_RESTART_REQUESTS;
1960 }
1961 }
1962 err = 0;
1963
1964 out:
1965 if (err)
1966 drbd_remove_epoch_entry_interval(mdev, peer_req);
1967 return err;
1968}
1969
Philipp Reisnerb411b362009-09-25 16:07:19 -07001970/* mirrored write */
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01001971static int receive_Data(struct drbd_tconn *tconn, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001972{
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01001973 struct drbd_conf *mdev;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001974 sector_t sector;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001975 struct drbd_peer_request *peer_req;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01001976 struct p_data *p = tconn->data.rbuf;
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001977 u32 peer_seq = be32_to_cpu(p->seq_num);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001978 int rw = WRITE;
1979 u32 dp_flags;
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001980 int err;
1981
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01001982 mdev = vnr_to_mdev(tconn, pi->vnr);
1983 if (!mdev)
1984 return -EIO;
1985
Philipp Reisnerb411b362009-09-25 16:07:19 -07001986 if (!get_ldev(mdev)) {
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001987 int err2;
1988
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001989 err = wait_for_and_update_peer_seq(mdev, peer_seq);
Andreas Gruenbachere2857212011-03-25 00:57:38 +01001990 drbd_send_ack_dp(mdev, P_NEG_ACK, p, pi->size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001991 atomic_inc(&mdev->current_epoch->epoch_size);
Andreas Gruenbachere2857212011-03-25 00:57:38 +01001992 err2 = drbd_drain_block(mdev, pi->size);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001993 if (!err)
1994 err = err2;
1995 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001996 }
1997
Andreas Gruenbacherfcefa622011-02-17 16:46:59 +01001998 /*
1999 * Corresponding put_ldev done either below (on various errors), or in
2000 * drbd_peer_request_endio, if we successfully submit the data at the
2001 * end of this function.
2002 */
Philipp Reisnerb411b362009-09-25 16:07:19 -07002003
2004 sector = be64_to_cpu(p->sector);
Andreas Gruenbachere2857212011-03-25 00:57:38 +01002005 peer_req = read_in_block(mdev, p->block_id, sector, pi->size);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002006 if (!peer_req) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002007 put_ldev(mdev);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002008 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002009 }
2010
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002011 peer_req->w.cb = e_end_block;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002012
Lars Ellenberg688593c2010-11-17 22:25:03 +01002013 dp_flags = be32_to_cpu(p->dp_flags);
2014 rw |= wire_flags_to_bio(mdev, dp_flags);
2015
2016 if (dp_flags & DP_MAY_SET_IN_SYNC)
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002017 peer_req->flags |= EE_MAY_SET_IN_SYNC;
Lars Ellenberg688593c2010-11-17 22:25:03 +01002018
Philipp Reisnerb411b362009-09-25 16:07:19 -07002019 spin_lock(&mdev->epoch_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002020 peer_req->epoch = mdev->current_epoch;
2021 atomic_inc(&peer_req->epoch->epoch_size);
2022 atomic_inc(&peer_req->epoch->active);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002023 spin_unlock(&mdev->epoch_lock);
2024
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002025 if (mdev->tconn->net_conf->two_primaries) {
2026 err = wait_for_and_update_peer_seq(mdev, peer_seq);
2027 if (err)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002028 goto out_interrupted;
Philipp Reisner87eeee42011-01-19 14:16:30 +01002029 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002030 err = handle_write_conflicts(mdev, peer_req);
2031 if (err) {
2032 spin_unlock_irq(&mdev->tconn->req_lock);
2033 if (err == -ENOENT) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002034 put_ldev(mdev);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002035 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002036 }
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002037 goto out_interrupted;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002038 }
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002039 } else
2040 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002041 list_add(&peer_req->w.list, &mdev->active_ee);
Philipp Reisner87eeee42011-01-19 14:16:30 +01002042 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002043
Philipp Reisner89e58e72011-01-19 13:12:45 +01002044 switch (mdev->tconn->net_conf->wire_protocol) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002045 case DRBD_PROT_C:
2046 inc_unacked(mdev);
2047 /* corresponding dec_unacked() in e_end_block()
2048 * respective _drbd_clear_done_ee */
2049 break;
2050 case DRBD_PROT_B:
2051 /* I really don't like it that the receiver thread
2052 * sends on the msock, but anyways */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002053 drbd_send_ack(mdev, P_RECV_ACK, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002054 break;
2055 case DRBD_PROT_A:
2056 /* nothing to do */
2057 break;
2058 }
2059
Lars Ellenberg6719fb02010-10-18 23:04:07 +02002060 if (mdev->state.pdsk < D_INCONSISTENT) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002061 /* In case we have the only disk of the cluster, */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002062 drbd_set_out_of_sync(mdev, peer_req->i.sector, peer_req->i.size);
2063 peer_req->flags |= EE_CALL_AL_COMPLETE_IO;
2064 peer_req->flags &= ~EE_MAY_SET_IN_SYNC;
Lars Ellenberg181286a2011-03-31 15:18:56 +02002065 drbd_al_begin_io(mdev, &peer_req->i);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002066 }
2067
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002068 err = drbd_submit_peer_request(mdev, peer_req, rw, DRBD_FAULT_DT_WR);
2069 if (!err)
2070 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002071
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01002072 /* don't care for the reason here */
2073 dev_err(DEV, "submit failed, triggering re-connect\n");
Philipp Reisner87eeee42011-01-19 14:16:30 +01002074 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002075 list_del(&peer_req->w.list);
2076 drbd_remove_epoch_entry_interval(mdev, peer_req);
Philipp Reisner87eeee42011-01-19 14:16:30 +01002077 spin_unlock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002078 if (peer_req->flags & EE_CALL_AL_COMPLETE_IO)
Lars Ellenberg181286a2011-03-31 15:18:56 +02002079 drbd_al_complete_io(mdev, &peer_req->i);
Lars Ellenberg22cc37a2010-09-14 20:40:41 +02002080
Philipp Reisnerb411b362009-09-25 16:07:19 -07002081out_interrupted:
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002082 drbd_may_finish_epoch(mdev, peer_req->epoch, EV_PUT + EV_CLEANUP);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002083 put_ldev(mdev);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002084 drbd_free_ee(mdev, peer_req);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002085 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002086}
2087
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002088/* We may throttle resync, if the lower device seems to be busy,
2089 * and current sync rate is above c_min_rate.
2090 *
2091 * To decide whether or not the lower device is busy, we use a scheme similar
2092 * to MD RAID is_mddev_idle(): if the partition stats reveal "significant"
2093 * (more than 64 sectors) of activity we cannot account for with our own resync
2094 * activity, it obviously is "busy".
2095 *
2096 * The current sync rate used here uses only the most recent two step marks,
2097 * to have a short time average so we can react faster.
2098 */
Philipp Reisnere3555d82010-11-07 15:56:29 +01002099int drbd_rs_should_slow_down(struct drbd_conf *mdev, sector_t sector)
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002100{
2101 struct gendisk *disk = mdev->ldev->backing_bdev->bd_contains->bd_disk;
2102 unsigned long db, dt, dbdt;
Philipp Reisnere3555d82010-11-07 15:56:29 +01002103 struct lc_element *tmp;
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002104 int curr_events;
2105 int throttle = 0;
2106
2107 /* feature disabled? */
Lars Ellenbergf3990022011-03-23 14:31:09 +01002108 if (mdev->ldev->dc.c_min_rate == 0)
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002109 return 0;
2110
Philipp Reisnere3555d82010-11-07 15:56:29 +01002111 spin_lock_irq(&mdev->al_lock);
2112 tmp = lc_find(mdev->resync, BM_SECT_TO_EXT(sector));
2113 if (tmp) {
2114 struct bm_extent *bm_ext = lc_entry(tmp, struct bm_extent, lce);
2115 if (test_bit(BME_PRIORITY, &bm_ext->flags)) {
2116 spin_unlock_irq(&mdev->al_lock);
2117 return 0;
2118 }
2119 /* Do not slow down if app IO is already waiting for this extent */
2120 }
2121 spin_unlock_irq(&mdev->al_lock);
2122
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002123 curr_events = (int)part_stat_read(&disk->part0, sectors[0]) +
2124 (int)part_stat_read(&disk->part0, sectors[1]) -
2125 atomic_read(&mdev->rs_sect_ev);
Philipp Reisnere3555d82010-11-07 15:56:29 +01002126
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002127 if (!mdev->rs_last_events || curr_events - mdev->rs_last_events > 64) {
2128 unsigned long rs_left;
2129 int i;
2130
2131 mdev->rs_last_events = curr_events;
2132
2133 /* sync speed average over the last 2*DRBD_SYNC_MARK_STEP,
2134 * approx. */
Lars Ellenberg2649f082010-11-05 10:05:47 +01002135 i = (mdev->rs_last_mark + DRBD_SYNC_MARKS-1) % DRBD_SYNC_MARKS;
2136
2137 if (mdev->state.conn == C_VERIFY_S || mdev->state.conn == C_VERIFY_T)
2138 rs_left = mdev->ov_left;
2139 else
2140 rs_left = drbd_bm_total_weight(mdev) - mdev->rs_failed;
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002141
2142 dt = ((long)jiffies - (long)mdev->rs_mark_time[i]) / HZ;
2143 if (!dt)
2144 dt++;
2145 db = mdev->rs_mark_left[i] - rs_left;
2146 dbdt = Bit2KB(db/dt);
2147
Lars Ellenbergf3990022011-03-23 14:31:09 +01002148 if (dbdt > mdev->ldev->dc.c_min_rate)
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002149 throttle = 1;
2150 }
2151 return throttle;
2152}
2153
2154
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01002155static int receive_DataRequest(struct drbd_tconn *tconn, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002156{
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01002157 struct drbd_conf *mdev;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002158 sector_t sector;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01002159 sector_t capacity;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002160 struct drbd_peer_request *peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002161 struct digest_info *di = NULL;
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02002162 int size, verb;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002163 unsigned int fault_type;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01002164 struct p_block_req *p = tconn->data.rbuf;
2165
2166 mdev = vnr_to_mdev(tconn, pi->vnr);
2167 if (!mdev)
2168 return -EIO;
2169 capacity = drbd_get_capacity(mdev->this_bdev);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002170
2171 sector = be64_to_cpu(p->sector);
2172 size = be32_to_cpu(p->blksize);
2173
Andreas Gruenbacherc670a392011-02-21 12:41:39 +01002174 if (size <= 0 || !IS_ALIGNED(size, 512) || size > DRBD_MAX_BIO_SIZE) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002175 dev_err(DEV, "%s:%d: sector: %llus, size: %u\n", __FILE__, __LINE__,
2176 (unsigned long long)sector, size);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002177 return -EINVAL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002178 }
2179 if (sector + (size>>9) > capacity) {
2180 dev_err(DEV, "%s:%d: sector: %llus, size: %u\n", __FILE__, __LINE__,
2181 (unsigned long long)sector, size);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002182 return -EINVAL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002183 }
2184
2185 if (!get_ldev_if_state(mdev, D_UP_TO_DATE)) {
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02002186 verb = 1;
Andreas Gruenbachere2857212011-03-25 00:57:38 +01002187 switch (pi->cmd) {
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02002188 case P_DATA_REQUEST:
2189 drbd_send_ack_rp(mdev, P_NEG_DREPLY, p);
2190 break;
2191 case P_RS_DATA_REQUEST:
2192 case P_CSUM_RS_REQUEST:
2193 case P_OV_REQUEST:
2194 drbd_send_ack_rp(mdev, P_NEG_RS_DREPLY , p);
2195 break;
2196 case P_OV_REPLY:
2197 verb = 0;
2198 dec_rs_pending(mdev);
2199 drbd_send_ack_ex(mdev, P_OV_RESULT, sector, size, ID_IN_SYNC);
2200 break;
2201 default:
Andreas Gruenbacher49ba9b12011-03-25 00:35:45 +01002202 BUG();
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02002203 }
2204 if (verb && __ratelimit(&drbd_ratelimit_state))
Philipp Reisnerb411b362009-09-25 16:07:19 -07002205 dev_err(DEV, "Can not satisfy peer's read request, "
2206 "no local data.\n");
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02002207
Lars Ellenberga821cc42010-09-06 12:31:37 +02002208 /* drain possibly payload */
Andreas Gruenbachere2857212011-03-25 00:57:38 +01002209 return drbd_drain_block(mdev, pi->size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002210 }
2211
2212 /* GFP_NOIO, because we must not cause arbitrary write-out: in a DRBD
2213 * "criss-cross" setup, that might cause write-out on some other DRBD,
2214 * which in turn might block on the other node at this very place. */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002215 peer_req = drbd_alloc_ee(mdev, p->block_id, sector, size, GFP_NOIO);
2216 if (!peer_req) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002217 put_ldev(mdev);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002218 return -ENOMEM;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002219 }
2220
Andreas Gruenbachere2857212011-03-25 00:57:38 +01002221 switch (pi->cmd) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002222 case P_DATA_REQUEST:
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002223 peer_req->w.cb = w_e_end_data_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002224 fault_type = DRBD_FAULT_DT_RD;
Lars Ellenberg80a40e42010-08-11 23:28:00 +02002225 /* application IO, don't drbd_rs_begin_io */
2226 goto submit;
2227
Philipp Reisnerb411b362009-09-25 16:07:19 -07002228 case P_RS_DATA_REQUEST:
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002229 peer_req->w.cb = w_e_end_rsdata_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002230 fault_type = DRBD_FAULT_RS_RD;
Lars Ellenberg5f9915b2010-11-09 14:15:24 +01002231 /* used in the sector offset progress display */
2232 mdev->bm_resync_fo = BM_SECT_TO_BIT(sector);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002233 break;
2234
2235 case P_OV_REPLY:
2236 case P_CSUM_RS_REQUEST:
2237 fault_type = DRBD_FAULT_RS_RD;
Andreas Gruenbachere2857212011-03-25 00:57:38 +01002238 di = kmalloc(sizeof(*di) + pi->size, GFP_NOIO);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002239 if (!di)
2240 goto out_free_e;
2241
Andreas Gruenbachere2857212011-03-25 00:57:38 +01002242 di->digest_size = pi->size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002243 di->digest = (((char *)di)+sizeof(struct digest_info));
2244
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002245 peer_req->digest = di;
2246 peer_req->flags |= EE_HAS_DIGEST;
Lars Ellenbergc36c3ce2010-08-11 20:42:55 +02002247
Andreas Gruenbachere2857212011-03-25 00:57:38 +01002248 if (drbd_recv_all(mdev->tconn, di->digest, pi->size))
Philipp Reisnerb411b362009-09-25 16:07:19 -07002249 goto out_free_e;
2250
Andreas Gruenbachere2857212011-03-25 00:57:38 +01002251 if (pi->cmd == P_CSUM_RS_REQUEST) {
Philipp Reisner31890f42011-01-19 14:12:51 +01002252 D_ASSERT(mdev->tconn->agreed_pro_version >= 89);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002253 peer_req->w.cb = w_e_end_csum_rs_req;
Lars Ellenberg5f9915b2010-11-09 14:15:24 +01002254 /* used in the sector offset progress display */
2255 mdev->bm_resync_fo = BM_SECT_TO_BIT(sector);
Andreas Gruenbachere2857212011-03-25 00:57:38 +01002256 } else if (pi->cmd == P_OV_REPLY) {
Lars Ellenberg2649f082010-11-05 10:05:47 +01002257 /* track progress, we may need to throttle */
2258 atomic_add(size >> 9, &mdev->rs_sect_in);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002259 peer_req->w.cb = w_e_end_ov_reply;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002260 dec_rs_pending(mdev);
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002261 /* drbd_rs_begin_io done when we sent this request,
2262 * but accounting still needs to be done. */
2263 goto submit_for_resync;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002264 }
2265 break;
2266
2267 case P_OV_REQUEST:
Philipp Reisnerb411b362009-09-25 16:07:19 -07002268 if (mdev->ov_start_sector == ~(sector_t)0 &&
Philipp Reisner31890f42011-01-19 14:12:51 +01002269 mdev->tconn->agreed_pro_version >= 90) {
Lars Ellenbergde228bb2010-11-05 09:43:15 +01002270 unsigned long now = jiffies;
2271 int i;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002272 mdev->ov_start_sector = sector;
2273 mdev->ov_position = sector;
Lars Ellenberg30b743a2010-11-05 09:39:06 +01002274 mdev->ov_left = drbd_bm_bits(mdev) - BM_SECT_TO_BIT(sector);
2275 mdev->rs_total = mdev->ov_left;
Lars Ellenbergde228bb2010-11-05 09:43:15 +01002276 for (i = 0; i < DRBD_SYNC_MARKS; i++) {
2277 mdev->rs_mark_left[i] = mdev->ov_left;
2278 mdev->rs_mark_time[i] = now;
2279 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07002280 dev_info(DEV, "Online Verify start sector: %llu\n",
2281 (unsigned long long)sector);
2282 }
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002283 peer_req->w.cb = w_e_end_ov_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002284 fault_type = DRBD_FAULT_RS_RD;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002285 break;
2286
Philipp Reisnerb411b362009-09-25 16:07:19 -07002287 default:
Andreas Gruenbacher49ba9b12011-03-25 00:35:45 +01002288 BUG();
Philipp Reisnerb411b362009-09-25 16:07:19 -07002289 }
2290
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002291 /* Throttle, drbd_rs_begin_io and submit should become asynchronous
2292 * wrt the receiver, but it is not as straightforward as it may seem.
2293 * Various places in the resync start and stop logic assume resync
2294 * requests are processed in order, requeuing this on the worker thread
2295 * introduces a bunch of new code for synchronization between threads.
2296 *
2297 * Unlimited throttling before drbd_rs_begin_io may stall the resync
2298 * "forever", throttling after drbd_rs_begin_io will lock that extent
2299 * for application writes for the same time. For now, just throttle
2300 * here, where the rest of the code expects the receiver to sleep for
2301 * a while, anyways.
2302 */
Philipp Reisnerb411b362009-09-25 16:07:19 -07002303
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002304 /* Throttle before drbd_rs_begin_io, as that locks out application IO;
2305 * this defers syncer requests for some time, before letting at least
2306 * on request through. The resync controller on the receiving side
2307 * will adapt to the incoming rate accordingly.
2308 *
2309 * We cannot throttle here if remote is Primary/SyncTarget:
2310 * we would also throttle its application reads.
2311 * In that case, throttling is done on the SyncTarget only.
2312 */
Philipp Reisnere3555d82010-11-07 15:56:29 +01002313 if (mdev->state.peer != R_PRIMARY && drbd_rs_should_slow_down(mdev, sector))
2314 schedule_timeout_uninterruptible(HZ/10);
2315 if (drbd_rs_begin_io(mdev, sector))
Lars Ellenberg80a40e42010-08-11 23:28:00 +02002316 goto out_free_e;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002317
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002318submit_for_resync:
2319 atomic_add(size >> 9, &mdev->rs_sect_ev);
2320
Lars Ellenberg80a40e42010-08-11 23:28:00 +02002321submit:
Philipp Reisnerb411b362009-09-25 16:07:19 -07002322 inc_unacked(mdev);
Philipp Reisner87eeee42011-01-19 14:16:30 +01002323 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002324 list_add_tail(&peer_req->w.list, &mdev->read_ee);
Philipp Reisner87eeee42011-01-19 14:16:30 +01002325 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002326
Andreas Gruenbacherfbe29de2011-02-17 16:38:35 +01002327 if (drbd_submit_peer_request(mdev, peer_req, READ, fault_type) == 0)
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002328 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002329
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01002330 /* don't care for the reason here */
2331 dev_err(DEV, "submit failed, triggering re-connect\n");
Philipp Reisner87eeee42011-01-19 14:16:30 +01002332 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002333 list_del(&peer_req->w.list);
Philipp Reisner87eeee42011-01-19 14:16:30 +01002334 spin_unlock_irq(&mdev->tconn->req_lock);
Lars Ellenberg22cc37a2010-09-14 20:40:41 +02002335 /* no drbd_rs_complete_io(), we are dropping the connection anyways */
2336
Philipp Reisnerb411b362009-09-25 16:07:19 -07002337out_free_e:
Philipp Reisnerb411b362009-09-25 16:07:19 -07002338 put_ldev(mdev);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002339 drbd_free_ee(mdev, peer_req);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002340 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002341}
2342
2343static int drbd_asb_recover_0p(struct drbd_conf *mdev) __must_hold(local)
2344{
2345 int self, peer, rv = -100;
2346 unsigned long ch_self, ch_peer;
2347
2348 self = mdev->ldev->md.uuid[UI_BITMAP] & 1;
2349 peer = mdev->p_uuid[UI_BITMAP] & 1;
2350
2351 ch_peer = mdev->p_uuid[UI_SIZE];
2352 ch_self = mdev->comm_bm_set;
2353
Philipp Reisner89e58e72011-01-19 13:12:45 +01002354 switch (mdev->tconn->net_conf->after_sb_0p) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002355 case ASB_CONSENSUS:
2356 case ASB_DISCARD_SECONDARY:
2357 case ASB_CALL_HELPER:
2358 dev_err(DEV, "Configuration error.\n");
2359 break;
2360 case ASB_DISCONNECT:
2361 break;
2362 case ASB_DISCARD_YOUNGER_PRI:
2363 if (self == 0 && peer == 1) {
2364 rv = -1;
2365 break;
2366 }
2367 if (self == 1 && peer == 0) {
2368 rv = 1;
2369 break;
2370 }
2371 /* Else fall through to one of the other strategies... */
2372 case ASB_DISCARD_OLDER_PRI:
2373 if (self == 0 && peer == 1) {
2374 rv = 1;
2375 break;
2376 }
2377 if (self == 1 && peer == 0) {
2378 rv = -1;
2379 break;
2380 }
2381 /* Else fall through to one of the other strategies... */
Lars Ellenbergad19bf62009-10-14 09:36:49 +02002382 dev_warn(DEV, "Discard younger/older primary did not find a decision\n"
Philipp Reisnerb411b362009-09-25 16:07:19 -07002383 "Using discard-least-changes instead\n");
2384 case ASB_DISCARD_ZERO_CHG:
2385 if (ch_peer == 0 && ch_self == 0) {
Philipp Reisner25703f82011-02-07 14:35:25 +01002386 rv = test_bit(DISCARD_CONCURRENT, &mdev->tconn->flags)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002387 ? -1 : 1;
2388 break;
2389 } else {
2390 if (ch_peer == 0) { rv = 1; break; }
2391 if (ch_self == 0) { rv = -1; break; }
2392 }
Philipp Reisner89e58e72011-01-19 13:12:45 +01002393 if (mdev->tconn->net_conf->after_sb_0p == ASB_DISCARD_ZERO_CHG)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002394 break;
2395 case ASB_DISCARD_LEAST_CHG:
2396 if (ch_self < ch_peer)
2397 rv = -1;
2398 else if (ch_self > ch_peer)
2399 rv = 1;
2400 else /* ( ch_self == ch_peer ) */
2401 /* Well, then use something else. */
Philipp Reisner25703f82011-02-07 14:35:25 +01002402 rv = test_bit(DISCARD_CONCURRENT, &mdev->tconn->flags)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002403 ? -1 : 1;
2404 break;
2405 case ASB_DISCARD_LOCAL:
2406 rv = -1;
2407 break;
2408 case ASB_DISCARD_REMOTE:
2409 rv = 1;
2410 }
2411
2412 return rv;
2413}
2414
2415static int drbd_asb_recover_1p(struct drbd_conf *mdev) __must_hold(local)
2416{
Andreas Gruenbacher6184ea22010-12-09 14:23:27 +01002417 int hg, rv = -100;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002418
Philipp Reisner89e58e72011-01-19 13:12:45 +01002419 switch (mdev->tconn->net_conf->after_sb_1p) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002420 case ASB_DISCARD_YOUNGER_PRI:
2421 case ASB_DISCARD_OLDER_PRI:
2422 case ASB_DISCARD_LEAST_CHG:
2423 case ASB_DISCARD_LOCAL:
2424 case ASB_DISCARD_REMOTE:
2425 dev_err(DEV, "Configuration error.\n");
2426 break;
2427 case ASB_DISCONNECT:
2428 break;
2429 case ASB_CONSENSUS:
2430 hg = drbd_asb_recover_0p(mdev);
2431 if (hg == -1 && mdev->state.role == R_SECONDARY)
2432 rv = hg;
2433 if (hg == 1 && mdev->state.role == R_PRIMARY)
2434 rv = hg;
2435 break;
2436 case ASB_VIOLENTLY:
2437 rv = drbd_asb_recover_0p(mdev);
2438 break;
2439 case ASB_DISCARD_SECONDARY:
2440 return mdev->state.role == R_PRIMARY ? 1 : -1;
2441 case ASB_CALL_HELPER:
2442 hg = drbd_asb_recover_0p(mdev);
2443 if (hg == -1 && mdev->state.role == R_PRIMARY) {
Andreas Gruenbacherbb437942010-12-09 14:02:35 +01002444 enum drbd_state_rv rv2;
2445
2446 drbd_set_role(mdev, R_SECONDARY, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002447 /* drbd_change_state() does not sleep while in SS_IN_TRANSIENT_STATE,
2448 * we might be here in C_WF_REPORT_PARAMS which is transient.
2449 * we do not need to wait for the after state change work either. */
Andreas Gruenbacherbb437942010-12-09 14:02:35 +01002450 rv2 = drbd_change_state(mdev, CS_VERBOSE, NS(role, R_SECONDARY));
2451 if (rv2 != SS_SUCCESS) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002452 drbd_khelper(mdev, "pri-lost-after-sb");
2453 } else {
2454 dev_warn(DEV, "Successfully gave up primary role.\n");
2455 rv = hg;
2456 }
2457 } else
2458 rv = hg;
2459 }
2460
2461 return rv;
2462}
2463
2464static int drbd_asb_recover_2p(struct drbd_conf *mdev) __must_hold(local)
2465{
Andreas Gruenbacher6184ea22010-12-09 14:23:27 +01002466 int hg, rv = -100;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002467
Philipp Reisner89e58e72011-01-19 13:12:45 +01002468 switch (mdev->tconn->net_conf->after_sb_2p) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002469 case ASB_DISCARD_YOUNGER_PRI:
2470 case ASB_DISCARD_OLDER_PRI:
2471 case ASB_DISCARD_LEAST_CHG:
2472 case ASB_DISCARD_LOCAL:
2473 case ASB_DISCARD_REMOTE:
2474 case ASB_CONSENSUS:
2475 case ASB_DISCARD_SECONDARY:
2476 dev_err(DEV, "Configuration error.\n");
2477 break;
2478 case ASB_VIOLENTLY:
2479 rv = drbd_asb_recover_0p(mdev);
2480 break;
2481 case ASB_DISCONNECT:
2482 break;
2483 case ASB_CALL_HELPER:
2484 hg = drbd_asb_recover_0p(mdev);
2485 if (hg == -1) {
Andreas Gruenbacherbb437942010-12-09 14:02:35 +01002486 enum drbd_state_rv rv2;
2487
Philipp Reisnerb411b362009-09-25 16:07:19 -07002488 /* drbd_change_state() does not sleep while in SS_IN_TRANSIENT_STATE,
2489 * we might be here in C_WF_REPORT_PARAMS which is transient.
2490 * we do not need to wait for the after state change work either. */
Andreas Gruenbacherbb437942010-12-09 14:02:35 +01002491 rv2 = drbd_change_state(mdev, CS_VERBOSE, NS(role, R_SECONDARY));
2492 if (rv2 != SS_SUCCESS) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002493 drbd_khelper(mdev, "pri-lost-after-sb");
2494 } else {
2495 dev_warn(DEV, "Successfully gave up primary role.\n");
2496 rv = hg;
2497 }
2498 } else
2499 rv = hg;
2500 }
2501
2502 return rv;
2503}
2504
2505static void drbd_uuid_dump(struct drbd_conf *mdev, char *text, u64 *uuid,
2506 u64 bits, u64 flags)
2507{
2508 if (!uuid) {
2509 dev_info(DEV, "%s uuid info vanished while I was looking!\n", text);
2510 return;
2511 }
2512 dev_info(DEV, "%s %016llX:%016llX:%016llX:%016llX bits:%llu flags:%llX\n",
2513 text,
2514 (unsigned long long)uuid[UI_CURRENT],
2515 (unsigned long long)uuid[UI_BITMAP],
2516 (unsigned long long)uuid[UI_HISTORY_START],
2517 (unsigned long long)uuid[UI_HISTORY_END],
2518 (unsigned long long)bits,
2519 (unsigned long long)flags);
2520}
2521
2522/*
2523 100 after split brain try auto recover
2524 2 C_SYNC_SOURCE set BitMap
2525 1 C_SYNC_SOURCE use BitMap
2526 0 no Sync
2527 -1 C_SYNC_TARGET use BitMap
2528 -2 C_SYNC_TARGET set BitMap
2529 -100 after split brain, disconnect
2530-1000 unrelated data
Philipp Reisner4a23f262011-01-11 17:42:17 +01002531-1091 requires proto 91
2532-1096 requires proto 96
Philipp Reisnerb411b362009-09-25 16:07:19 -07002533 */
2534static int drbd_uuid_compare(struct drbd_conf *mdev, int *rule_nr) __must_hold(local)
2535{
2536 u64 self, peer;
2537 int i, j;
2538
2539 self = mdev->ldev->md.uuid[UI_CURRENT] & ~((u64)1);
2540 peer = mdev->p_uuid[UI_CURRENT] & ~((u64)1);
2541
2542 *rule_nr = 10;
2543 if (self == UUID_JUST_CREATED && peer == UUID_JUST_CREATED)
2544 return 0;
2545
2546 *rule_nr = 20;
2547 if ((self == UUID_JUST_CREATED || self == (u64)0) &&
2548 peer != UUID_JUST_CREATED)
2549 return -2;
2550
2551 *rule_nr = 30;
2552 if (self != UUID_JUST_CREATED &&
2553 (peer == UUID_JUST_CREATED || peer == (u64)0))
2554 return 2;
2555
2556 if (self == peer) {
2557 int rct, dc; /* roles at crash time */
2558
2559 if (mdev->p_uuid[UI_BITMAP] == (u64)0 && mdev->ldev->md.uuid[UI_BITMAP] != (u64)0) {
2560
Philipp Reisner31890f42011-01-19 14:12:51 +01002561 if (mdev->tconn->agreed_pro_version < 91)
Philipp Reisner4a23f262011-01-11 17:42:17 +01002562 return -1091;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002563
2564 if ((mdev->ldev->md.uuid[UI_BITMAP] & ~((u64)1)) == (mdev->p_uuid[UI_HISTORY_START] & ~((u64)1)) &&
2565 (mdev->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) == (mdev->p_uuid[UI_HISTORY_START + 1] & ~((u64)1))) {
2566 dev_info(DEV, "was SyncSource, missed the resync finished event, corrected myself:\n");
2567 drbd_uuid_set_bm(mdev, 0UL);
2568
2569 drbd_uuid_dump(mdev, "self", mdev->ldev->md.uuid,
2570 mdev->state.disk >= D_NEGOTIATING ? drbd_bm_total_weight(mdev) : 0, 0);
2571 *rule_nr = 34;
2572 } else {
2573 dev_info(DEV, "was SyncSource (peer failed to write sync_uuid)\n");
2574 *rule_nr = 36;
2575 }
2576
2577 return 1;
2578 }
2579
2580 if (mdev->ldev->md.uuid[UI_BITMAP] == (u64)0 && mdev->p_uuid[UI_BITMAP] != (u64)0) {
2581
Philipp Reisner31890f42011-01-19 14:12:51 +01002582 if (mdev->tconn->agreed_pro_version < 91)
Philipp Reisner4a23f262011-01-11 17:42:17 +01002583 return -1091;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002584
2585 if ((mdev->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) == (mdev->p_uuid[UI_BITMAP] & ~((u64)1)) &&
2586 (mdev->ldev->md.uuid[UI_HISTORY_START + 1] & ~((u64)1)) == (mdev->p_uuid[UI_HISTORY_START] & ~((u64)1))) {
2587 dev_info(DEV, "was SyncTarget, peer missed the resync finished event, corrected peer:\n");
2588
2589 mdev->p_uuid[UI_HISTORY_START + 1] = mdev->p_uuid[UI_HISTORY_START];
2590 mdev->p_uuid[UI_HISTORY_START] = mdev->p_uuid[UI_BITMAP];
2591 mdev->p_uuid[UI_BITMAP] = 0UL;
2592
2593 drbd_uuid_dump(mdev, "peer", mdev->p_uuid, mdev->p_uuid[UI_SIZE], mdev->p_uuid[UI_FLAGS]);
2594 *rule_nr = 35;
2595 } else {
2596 dev_info(DEV, "was SyncTarget (failed to write sync_uuid)\n");
2597 *rule_nr = 37;
2598 }
2599
2600 return -1;
2601 }
2602
2603 /* Common power [off|failure] */
2604 rct = (test_bit(CRASHED_PRIMARY, &mdev->flags) ? 1 : 0) +
2605 (mdev->p_uuid[UI_FLAGS] & 2);
2606 /* lowest bit is set when we were primary,
2607 * next bit (weight 2) is set when peer was primary */
2608 *rule_nr = 40;
2609
2610 switch (rct) {
2611 case 0: /* !self_pri && !peer_pri */ return 0;
2612 case 1: /* self_pri && !peer_pri */ return 1;
2613 case 2: /* !self_pri && peer_pri */ return -1;
2614 case 3: /* self_pri && peer_pri */
Philipp Reisner25703f82011-02-07 14:35:25 +01002615 dc = test_bit(DISCARD_CONCURRENT, &mdev->tconn->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002616 return dc ? -1 : 1;
2617 }
2618 }
2619
2620 *rule_nr = 50;
2621 peer = mdev->p_uuid[UI_BITMAP] & ~((u64)1);
2622 if (self == peer)
2623 return -1;
2624
2625 *rule_nr = 51;
2626 peer = mdev->p_uuid[UI_HISTORY_START] & ~((u64)1);
2627 if (self == peer) {
Philipp Reisner31890f42011-01-19 14:12:51 +01002628 if (mdev->tconn->agreed_pro_version < 96 ?
Philipp Reisner4a23f262011-01-11 17:42:17 +01002629 (mdev->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) ==
2630 (mdev->p_uuid[UI_HISTORY_START + 1] & ~((u64)1)) :
2631 peer + UUID_NEW_BM_OFFSET == (mdev->p_uuid[UI_BITMAP] & ~((u64)1))) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002632 /* The last P_SYNC_UUID did not get though. Undo the last start of
2633 resync as sync source modifications of the peer's UUIDs. */
2634
Philipp Reisner31890f42011-01-19 14:12:51 +01002635 if (mdev->tconn->agreed_pro_version < 91)
Philipp Reisner4a23f262011-01-11 17:42:17 +01002636 return -1091;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002637
2638 mdev->p_uuid[UI_BITMAP] = mdev->p_uuid[UI_HISTORY_START];
2639 mdev->p_uuid[UI_HISTORY_START] = mdev->p_uuid[UI_HISTORY_START + 1];
Philipp Reisner4a23f262011-01-11 17:42:17 +01002640
2641 dev_info(DEV, "Did not got last syncUUID packet, corrected:\n");
2642 drbd_uuid_dump(mdev, "peer", mdev->p_uuid, mdev->p_uuid[UI_SIZE], mdev->p_uuid[UI_FLAGS]);
2643
Philipp Reisnerb411b362009-09-25 16:07:19 -07002644 return -1;
2645 }
2646 }
2647
2648 *rule_nr = 60;
2649 self = mdev->ldev->md.uuid[UI_CURRENT] & ~((u64)1);
2650 for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) {
2651 peer = mdev->p_uuid[i] & ~((u64)1);
2652 if (self == peer)
2653 return -2;
2654 }
2655
2656 *rule_nr = 70;
2657 self = mdev->ldev->md.uuid[UI_BITMAP] & ~((u64)1);
2658 peer = mdev->p_uuid[UI_CURRENT] & ~((u64)1);
2659 if (self == peer)
2660 return 1;
2661
2662 *rule_nr = 71;
2663 self = mdev->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1);
2664 if (self == peer) {
Philipp Reisner31890f42011-01-19 14:12:51 +01002665 if (mdev->tconn->agreed_pro_version < 96 ?
Philipp Reisner4a23f262011-01-11 17:42:17 +01002666 (mdev->ldev->md.uuid[UI_HISTORY_START + 1] & ~((u64)1)) ==
2667 (mdev->p_uuid[UI_HISTORY_START] & ~((u64)1)) :
2668 self + UUID_NEW_BM_OFFSET == (mdev->ldev->md.uuid[UI_BITMAP] & ~((u64)1))) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002669 /* The last P_SYNC_UUID did not get though. Undo the last start of
2670 resync as sync source modifications of our UUIDs. */
2671
Philipp Reisner31890f42011-01-19 14:12:51 +01002672 if (mdev->tconn->agreed_pro_version < 91)
Philipp Reisner4a23f262011-01-11 17:42:17 +01002673 return -1091;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002674
2675 _drbd_uuid_set(mdev, UI_BITMAP, mdev->ldev->md.uuid[UI_HISTORY_START]);
2676 _drbd_uuid_set(mdev, UI_HISTORY_START, mdev->ldev->md.uuid[UI_HISTORY_START + 1]);
2677
Philipp Reisner4a23f262011-01-11 17:42:17 +01002678 dev_info(DEV, "Last syncUUID did not get through, corrected:\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002679 drbd_uuid_dump(mdev, "self", mdev->ldev->md.uuid,
2680 mdev->state.disk >= D_NEGOTIATING ? drbd_bm_total_weight(mdev) : 0, 0);
2681
2682 return 1;
2683 }
2684 }
2685
2686
2687 *rule_nr = 80;
Philipp Reisnerd8c2a362009-11-18 15:52:51 +01002688 peer = mdev->p_uuid[UI_CURRENT] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002689 for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) {
2690 self = mdev->ldev->md.uuid[i] & ~((u64)1);
2691 if (self == peer)
2692 return 2;
2693 }
2694
2695 *rule_nr = 90;
2696 self = mdev->ldev->md.uuid[UI_BITMAP] & ~((u64)1);
2697 peer = mdev->p_uuid[UI_BITMAP] & ~((u64)1);
2698 if (self == peer && self != ((u64)0))
2699 return 100;
2700
2701 *rule_nr = 100;
2702 for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) {
2703 self = mdev->ldev->md.uuid[i] & ~((u64)1);
2704 for (j = UI_HISTORY_START; j <= UI_HISTORY_END; j++) {
2705 peer = mdev->p_uuid[j] & ~((u64)1);
2706 if (self == peer)
2707 return -100;
2708 }
2709 }
2710
2711 return -1000;
2712}
2713
2714/* drbd_sync_handshake() returns the new conn state on success, or
2715 CONN_MASK (-1) on failure.
2716 */
2717static enum drbd_conns drbd_sync_handshake(struct drbd_conf *mdev, enum drbd_role peer_role,
2718 enum drbd_disk_state peer_disk) __must_hold(local)
2719{
2720 int hg, rule_nr;
2721 enum drbd_conns rv = C_MASK;
2722 enum drbd_disk_state mydisk;
2723
2724 mydisk = mdev->state.disk;
2725 if (mydisk == D_NEGOTIATING)
2726 mydisk = mdev->new_state_tmp.disk;
2727
2728 dev_info(DEV, "drbd_sync_handshake:\n");
2729 drbd_uuid_dump(mdev, "self", mdev->ldev->md.uuid, mdev->comm_bm_set, 0);
2730 drbd_uuid_dump(mdev, "peer", mdev->p_uuid,
2731 mdev->p_uuid[UI_SIZE], mdev->p_uuid[UI_FLAGS]);
2732
2733 hg = drbd_uuid_compare(mdev, &rule_nr);
2734
2735 dev_info(DEV, "uuid_compare()=%d by rule %d\n", hg, rule_nr);
2736
2737 if (hg == -1000) {
2738 dev_alert(DEV, "Unrelated data, aborting!\n");
2739 return C_MASK;
2740 }
Philipp Reisner4a23f262011-01-11 17:42:17 +01002741 if (hg < -1000) {
2742 dev_alert(DEV, "To resolve this both sides have to support at least protocol %d\n", -hg - 1000);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002743 return C_MASK;
2744 }
2745
2746 if ((mydisk == D_INCONSISTENT && peer_disk > D_INCONSISTENT) ||
2747 (peer_disk == D_INCONSISTENT && mydisk > D_INCONSISTENT)) {
2748 int f = (hg == -100) || abs(hg) == 2;
2749 hg = mydisk > D_INCONSISTENT ? 1 : -1;
2750 if (f)
2751 hg = hg*2;
2752 dev_info(DEV, "Becoming sync %s due to disk states.\n",
2753 hg > 0 ? "source" : "target");
2754 }
2755
Adam Gandelman3a11a482010-04-08 16:48:23 -07002756 if (abs(hg) == 100)
2757 drbd_khelper(mdev, "initial-split-brain");
2758
Philipp Reisner89e58e72011-01-19 13:12:45 +01002759 if (hg == 100 || (hg == -100 && mdev->tconn->net_conf->always_asbp)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002760 int pcount = (mdev->state.role == R_PRIMARY)
2761 + (peer_role == R_PRIMARY);
2762 int forced = (hg == -100);
2763
2764 switch (pcount) {
2765 case 0:
2766 hg = drbd_asb_recover_0p(mdev);
2767 break;
2768 case 1:
2769 hg = drbd_asb_recover_1p(mdev);
2770 break;
2771 case 2:
2772 hg = drbd_asb_recover_2p(mdev);
2773 break;
2774 }
2775 if (abs(hg) < 100) {
2776 dev_warn(DEV, "Split-Brain detected, %d primaries, "
2777 "automatically solved. Sync from %s node\n",
2778 pcount, (hg < 0) ? "peer" : "this");
2779 if (forced) {
2780 dev_warn(DEV, "Doing a full sync, since"
2781 " UUIDs where ambiguous.\n");
2782 hg = hg*2;
2783 }
2784 }
2785 }
2786
2787 if (hg == -100) {
Philipp Reisner89e58e72011-01-19 13:12:45 +01002788 if (mdev->tconn->net_conf->want_lose && !(mdev->p_uuid[UI_FLAGS]&1))
Philipp Reisnerb411b362009-09-25 16:07:19 -07002789 hg = -1;
Philipp Reisner89e58e72011-01-19 13:12:45 +01002790 if (!mdev->tconn->net_conf->want_lose && (mdev->p_uuid[UI_FLAGS]&1))
Philipp Reisnerb411b362009-09-25 16:07:19 -07002791 hg = 1;
2792
2793 if (abs(hg) < 100)
2794 dev_warn(DEV, "Split-Brain detected, manually solved. "
2795 "Sync from %s node\n",
2796 (hg < 0) ? "peer" : "this");
2797 }
2798
2799 if (hg == -100) {
Lars Ellenberg580b9762010-02-26 23:15:23 +01002800 /* FIXME this log message is not correct if we end up here
2801 * after an attempted attach on a diskless node.
2802 * We just refuse to attach -- well, we drop the "connection"
2803 * to that disk, in a way... */
Adam Gandelman3a11a482010-04-08 16:48:23 -07002804 dev_alert(DEV, "Split-Brain detected but unresolved, dropping connection!\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002805 drbd_khelper(mdev, "split-brain");
2806 return C_MASK;
2807 }
2808
2809 if (hg > 0 && mydisk <= D_INCONSISTENT) {
2810 dev_err(DEV, "I shall become SyncSource, but I am inconsistent!\n");
2811 return C_MASK;
2812 }
2813
2814 if (hg < 0 && /* by intention we do not use mydisk here. */
2815 mdev->state.role == R_PRIMARY && mdev->state.disk >= D_CONSISTENT) {
Philipp Reisner89e58e72011-01-19 13:12:45 +01002816 switch (mdev->tconn->net_conf->rr_conflict) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002817 case ASB_CALL_HELPER:
2818 drbd_khelper(mdev, "pri-lost");
2819 /* fall through */
2820 case ASB_DISCONNECT:
2821 dev_err(DEV, "I shall become SyncTarget, but I am primary!\n");
2822 return C_MASK;
2823 case ASB_VIOLENTLY:
2824 dev_warn(DEV, "Becoming SyncTarget, violating the stable-data"
2825 "assumption\n");
2826 }
2827 }
2828
Philipp Reisner8169e412011-03-15 18:40:27 +01002829 if (mdev->tconn->net_conf->dry_run || test_bit(CONN_DRY_RUN, &mdev->tconn->flags)) {
Philipp Reisnercf14c2e2010-02-02 21:03:50 +01002830 if (hg == 0)
2831 dev_info(DEV, "dry-run connect: No resync, would become Connected immediately.\n");
2832 else
2833 dev_info(DEV, "dry-run connect: Would become %s, doing a %s resync.",
2834 drbd_conn_str(hg > 0 ? C_SYNC_SOURCE : C_SYNC_TARGET),
2835 abs(hg) >= 2 ? "full" : "bit-map based");
2836 return C_MASK;
2837 }
2838
Philipp Reisnerb411b362009-09-25 16:07:19 -07002839 if (abs(hg) >= 2) {
2840 dev_info(DEV, "Writing the whole bitmap, full sync required after drbd_sync_handshake.\n");
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01002841 if (drbd_bitmap_io(mdev, &drbd_bmio_set_n_write, "set_n_write from sync_handshake",
2842 BM_LOCKED_SET_ALLOWED))
Philipp Reisnerb411b362009-09-25 16:07:19 -07002843 return C_MASK;
2844 }
2845
2846 if (hg > 0) { /* become sync source. */
2847 rv = C_WF_BITMAP_S;
2848 } else if (hg < 0) { /* become sync target */
2849 rv = C_WF_BITMAP_T;
2850 } else {
2851 rv = C_CONNECTED;
2852 if (drbd_bm_total_weight(mdev)) {
2853 dev_info(DEV, "No resync, but %lu bits in bitmap!\n",
2854 drbd_bm_total_weight(mdev));
2855 }
2856 }
2857
2858 return rv;
2859}
2860
2861/* returns 1 if invalid */
2862static int cmp_after_sb(enum drbd_after_sb_p peer, enum drbd_after_sb_p self)
2863{
2864 /* ASB_DISCARD_REMOTE - ASB_DISCARD_LOCAL is valid */
2865 if ((peer == ASB_DISCARD_REMOTE && self == ASB_DISCARD_LOCAL) ||
2866 (self == ASB_DISCARD_REMOTE && peer == ASB_DISCARD_LOCAL))
2867 return 0;
2868
2869 /* any other things with ASB_DISCARD_REMOTE or ASB_DISCARD_LOCAL are invalid */
2870 if (peer == ASB_DISCARD_REMOTE || peer == ASB_DISCARD_LOCAL ||
2871 self == ASB_DISCARD_REMOTE || self == ASB_DISCARD_LOCAL)
2872 return 1;
2873
2874 /* everything else is valid if they are equal on both sides. */
2875 if (peer == self)
2876 return 0;
2877
2878 /* everything es is invalid. */
2879 return 1;
2880}
2881
Andreas Gruenbachere2857212011-03-25 00:57:38 +01002882static int receive_protocol(struct drbd_tconn *tconn, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002883{
Andreas Gruenbachere6ef8a52011-03-24 18:07:54 +01002884 struct p_protocol *p = tconn->data.rbuf;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002885 int p_proto, p_after_sb_0p, p_after_sb_1p, p_after_sb_2p;
Philipp Reisnercf14c2e2010-02-02 21:03:50 +01002886 int p_want_lose, p_two_primaries, cf;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002887 char p_integrity_alg[SHARED_SECRET_MAX] = "";
2888
Philipp Reisnerb411b362009-09-25 16:07:19 -07002889 p_proto = be32_to_cpu(p->protocol);
2890 p_after_sb_0p = be32_to_cpu(p->after_sb_0p);
2891 p_after_sb_1p = be32_to_cpu(p->after_sb_1p);
2892 p_after_sb_2p = be32_to_cpu(p->after_sb_2p);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002893 p_two_primaries = be32_to_cpu(p->two_primaries);
Philipp Reisnercf14c2e2010-02-02 21:03:50 +01002894 cf = be32_to_cpu(p->conn_flags);
2895 p_want_lose = cf & CF_WANT_LOSE;
2896
Philipp Reisner72046242011-03-15 18:51:47 +01002897 clear_bit(CONN_DRY_RUN, &tconn->flags);
Philipp Reisnercf14c2e2010-02-02 21:03:50 +01002898
2899 if (cf & CF_DRY_RUN)
Philipp Reisner72046242011-03-15 18:51:47 +01002900 set_bit(CONN_DRY_RUN, &tconn->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002901
Philipp Reisner72046242011-03-15 18:51:47 +01002902 if (p_proto != tconn->net_conf->wire_protocol) {
2903 conn_err(tconn, "incompatible communication protocols\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002904 goto disconnect;
2905 }
2906
Philipp Reisner72046242011-03-15 18:51:47 +01002907 if (cmp_after_sb(p_after_sb_0p, tconn->net_conf->after_sb_0p)) {
2908 conn_err(tconn, "incompatible after-sb-0pri settings\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002909 goto disconnect;
2910 }
2911
Philipp Reisner72046242011-03-15 18:51:47 +01002912 if (cmp_after_sb(p_after_sb_1p, tconn->net_conf->after_sb_1p)) {
2913 conn_err(tconn, "incompatible after-sb-1pri settings\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002914 goto disconnect;
2915 }
2916
Philipp Reisner72046242011-03-15 18:51:47 +01002917 if (cmp_after_sb(p_after_sb_2p, tconn->net_conf->after_sb_2p)) {
2918 conn_err(tconn, "incompatible after-sb-2pri settings\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002919 goto disconnect;
2920 }
2921
Philipp Reisner72046242011-03-15 18:51:47 +01002922 if (p_want_lose && tconn->net_conf->want_lose) {
2923 conn_err(tconn, "both sides have the 'want_lose' flag set\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002924 goto disconnect;
2925 }
2926
Philipp Reisner72046242011-03-15 18:51:47 +01002927 if (p_two_primaries != tconn->net_conf->two_primaries) {
2928 conn_err(tconn, "incompatible setting of the two-primaries options\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002929 goto disconnect;
2930 }
2931
Philipp Reisner72046242011-03-15 18:51:47 +01002932 if (tconn->agreed_pro_version >= 87) {
2933 unsigned char *my_alg = tconn->net_conf->integrity_alg;
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002934 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002935
Andreas Gruenbachere2857212011-03-25 00:57:38 +01002936 err = drbd_recv_all(tconn, p_integrity_alg, pi->size);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002937 if (err)
2938 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002939
2940 p_integrity_alg[SHARED_SECRET_MAX-1] = 0;
2941 if (strcmp(p_integrity_alg, my_alg)) {
Philipp Reisner72046242011-03-15 18:51:47 +01002942 conn_err(tconn, "incompatible setting of the data-integrity-alg\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002943 goto disconnect;
2944 }
Philipp Reisner72046242011-03-15 18:51:47 +01002945 conn_info(tconn, "data-integrity-alg: %s\n",
Philipp Reisnerb411b362009-09-25 16:07:19 -07002946 my_alg[0] ? my_alg : (unsigned char *)"<not-used>");
2947 }
2948
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002949 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002950
2951disconnect:
Philipp Reisner72046242011-03-15 18:51:47 +01002952 conn_request_state(tconn, NS(conn, C_DISCONNECTING), CS_HARD);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002953 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002954}
2955
2956/* helper function
2957 * input: alg name, feature name
2958 * return: NULL (alg name was "")
2959 * ERR_PTR(error) if something goes wrong
2960 * or the crypto hash ptr, if it worked out ok. */
2961struct crypto_hash *drbd_crypto_alloc_digest_safe(const struct drbd_conf *mdev,
2962 const char *alg, const char *name)
2963{
2964 struct crypto_hash *tfm;
2965
2966 if (!alg[0])
2967 return NULL;
2968
2969 tfm = crypto_alloc_hash(alg, 0, CRYPTO_ALG_ASYNC);
2970 if (IS_ERR(tfm)) {
2971 dev_err(DEV, "Can not allocate \"%s\" as %s (reason: %ld)\n",
2972 alg, name, PTR_ERR(tfm));
2973 return tfm;
2974 }
2975 if (!drbd_crypto_is_hash(crypto_hash_tfm(tfm))) {
2976 crypto_free_hash(tfm);
2977 dev_err(DEV, "\"%s\" is not a digest (%s)\n", alg, name);
2978 return ERR_PTR(-EINVAL);
2979 }
2980 return tfm;
2981}
2982
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01002983static int ignore_remaining_packet(struct drbd_tconn *tconn, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002984{
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01002985 void *buffer = tconn->data.rbuf;
2986 int size = pi->size;
2987
2988 while (size) {
2989 int s = min_t(int, size, DRBD_SOCKET_BUFFER_SIZE);
2990 s = drbd_recv(tconn, buffer, s);
2991 if (s <= 0) {
2992 if (s < 0)
2993 return s;
2994 break;
2995 }
2996 size -= s;
2997 }
2998 if (size)
2999 return -EIO;
3000 return 0;
3001}
3002
3003/*
3004 * config_unknown_volume - device configuration command for unknown volume
3005 *
3006 * When a device is added to an existing connection, the node on which the
3007 * device is added first will send configuration commands to its peer but the
3008 * peer will not know about the device yet. It will warn and ignore these
3009 * commands. Once the device is added on the second node, the second node will
3010 * send the same device configuration commands, but in the other direction.
3011 *
3012 * (We can also end up here if drbd is misconfigured.)
3013 */
3014static int config_unknown_volume(struct drbd_tconn *tconn, struct packet_info *pi)
3015{
3016 conn_warn(tconn, "Volume %u unknown; ignoring %s packet\n",
3017 pi->vnr, cmdname(pi->cmd));
3018 return ignore_remaining_packet(tconn, pi);
3019}
3020
3021static int receive_SyncParam(struct drbd_tconn *tconn, struct packet_info *pi)
3022{
3023 struct drbd_conf *mdev;
3024 struct p_rs_param_95 *p = tconn->data.rbuf;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003025 unsigned int header_size, data_size, exp_max_sz;
3026 struct crypto_hash *verify_tfm = NULL;
3027 struct crypto_hash *csums_tfm = NULL;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003028 const int apv = tconn->agreed_pro_version;
Philipp Reisner778f2712010-07-06 11:14:00 +02003029 int *rs_plan_s = NULL;
3030 int fifo_size = 0;
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003031 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003032
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003033 mdev = vnr_to_mdev(tconn, pi->vnr);
3034 if (!mdev)
3035 return config_unknown_volume(tconn, pi);
3036
Philipp Reisnerb411b362009-09-25 16:07:19 -07003037 exp_max_sz = apv <= 87 ? sizeof(struct p_rs_param)
3038 : apv == 88 ? sizeof(struct p_rs_param)
3039 + SHARED_SECRET_MAX
Philipp Reisner8e26f9c2010-07-06 17:25:54 +02003040 : apv <= 94 ? sizeof(struct p_rs_param_89)
3041 : /* apv >= 95 */ sizeof(struct p_rs_param_95);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003042
Andreas Gruenbachere2857212011-03-25 00:57:38 +01003043 if (pi->size > exp_max_sz) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07003044 dev_err(DEV, "SyncParam packet too long: received %u, expected <= %u bytes\n",
Andreas Gruenbachere2857212011-03-25 00:57:38 +01003045 pi->size, exp_max_sz);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003046 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003047 }
3048
3049 if (apv <= 88) {
Philipp Reisner257d0af2011-01-26 12:15:29 +01003050 header_size = sizeof(struct p_rs_param) - sizeof(struct p_header);
Andreas Gruenbachere2857212011-03-25 00:57:38 +01003051 data_size = pi->size - header_size;
Philipp Reisner8e26f9c2010-07-06 17:25:54 +02003052 } else if (apv <= 94) {
Philipp Reisner257d0af2011-01-26 12:15:29 +01003053 header_size = sizeof(struct p_rs_param_89) - sizeof(struct p_header);
Andreas Gruenbachere2857212011-03-25 00:57:38 +01003054 data_size = pi->size - header_size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003055 D_ASSERT(data_size == 0);
Philipp Reisner8e26f9c2010-07-06 17:25:54 +02003056 } else {
Philipp Reisner257d0af2011-01-26 12:15:29 +01003057 header_size = sizeof(struct p_rs_param_95) - sizeof(struct p_header);
Andreas Gruenbachere2857212011-03-25 00:57:38 +01003058 data_size = pi->size - header_size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003059 D_ASSERT(data_size == 0);
3060 }
3061
3062 /* initialize verify_alg and csums_alg */
3063 memset(p->verify_alg, 0, 2 * SHARED_SECRET_MAX);
3064
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003065 err = drbd_recv_all(mdev->tconn, &p->head.payload, header_size);
3066 if (err)
3067 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003068
Lars Ellenbergf3990022011-03-23 14:31:09 +01003069 if (get_ldev(mdev)) {
3070 mdev->ldev->dc.resync_rate = be32_to_cpu(p->rate);
3071 put_ldev(mdev);
3072 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003073
3074 if (apv >= 88) {
3075 if (apv == 88) {
3076 if (data_size > SHARED_SECRET_MAX) {
3077 dev_err(DEV, "verify-alg too long, "
3078 "peer wants %u, accepting only %u byte\n",
3079 data_size, SHARED_SECRET_MAX);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003080 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003081 }
3082
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003083 err = drbd_recv_all(mdev->tconn, p->verify_alg, data_size);
3084 if (err)
3085 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003086
3087 /* we expect NUL terminated string */
3088 /* but just in case someone tries to be evil */
3089 D_ASSERT(p->verify_alg[data_size-1] == 0);
3090 p->verify_alg[data_size-1] = 0;
3091
3092 } else /* apv >= 89 */ {
3093 /* we still expect NUL terminated strings */
3094 /* but just in case someone tries to be evil */
3095 D_ASSERT(p->verify_alg[SHARED_SECRET_MAX-1] == 0);
3096 D_ASSERT(p->csums_alg[SHARED_SECRET_MAX-1] == 0);
3097 p->verify_alg[SHARED_SECRET_MAX-1] = 0;
3098 p->csums_alg[SHARED_SECRET_MAX-1] = 0;
3099 }
3100
Lars Ellenbergf3990022011-03-23 14:31:09 +01003101 if (strcmp(mdev->tconn->net_conf->verify_alg, p->verify_alg)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07003102 if (mdev->state.conn == C_WF_REPORT_PARAMS) {
3103 dev_err(DEV, "Different verify-alg settings. me=\"%s\" peer=\"%s\"\n",
Lars Ellenbergf3990022011-03-23 14:31:09 +01003104 mdev->tconn->net_conf->verify_alg, p->verify_alg);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003105 goto disconnect;
3106 }
3107 verify_tfm = drbd_crypto_alloc_digest_safe(mdev,
3108 p->verify_alg, "verify-alg");
3109 if (IS_ERR(verify_tfm)) {
3110 verify_tfm = NULL;
3111 goto disconnect;
3112 }
3113 }
3114
Lars Ellenbergf3990022011-03-23 14:31:09 +01003115 if (apv >= 89 && strcmp(mdev->tconn->net_conf->csums_alg, p->csums_alg)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07003116 if (mdev->state.conn == C_WF_REPORT_PARAMS) {
3117 dev_err(DEV, "Different csums-alg settings. me=\"%s\" peer=\"%s\"\n",
Lars Ellenbergf3990022011-03-23 14:31:09 +01003118 mdev->tconn->net_conf->csums_alg, p->csums_alg);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003119 goto disconnect;
3120 }
3121 csums_tfm = drbd_crypto_alloc_digest_safe(mdev,
3122 p->csums_alg, "csums-alg");
3123 if (IS_ERR(csums_tfm)) {
3124 csums_tfm = NULL;
3125 goto disconnect;
3126 }
3127 }
3128
Lars Ellenbergf3990022011-03-23 14:31:09 +01003129 if (apv > 94 && get_ldev(mdev)) {
3130 mdev->ldev->dc.resync_rate = be32_to_cpu(p->rate);
3131 mdev->ldev->dc.c_plan_ahead = be32_to_cpu(p->c_plan_ahead);
3132 mdev->ldev->dc.c_delay_target = be32_to_cpu(p->c_delay_target);
3133 mdev->ldev->dc.c_fill_target = be32_to_cpu(p->c_fill_target);
3134 mdev->ldev->dc.c_max_rate = be32_to_cpu(p->c_max_rate);
Philipp Reisner778f2712010-07-06 11:14:00 +02003135
Lars Ellenbergf3990022011-03-23 14:31:09 +01003136 fifo_size = (mdev->ldev->dc.c_plan_ahead * 10 * SLEEP_TIME) / HZ;
Philipp Reisner778f2712010-07-06 11:14:00 +02003137 if (fifo_size != mdev->rs_plan_s.size && fifo_size > 0) {
3138 rs_plan_s = kzalloc(sizeof(int) * fifo_size, GFP_KERNEL);
3139 if (!rs_plan_s) {
3140 dev_err(DEV, "kmalloc of fifo_buffer failed");
Lars Ellenbergf3990022011-03-23 14:31:09 +01003141 put_ldev(mdev);
Philipp Reisner778f2712010-07-06 11:14:00 +02003142 goto disconnect;
3143 }
3144 }
Lars Ellenbergf3990022011-03-23 14:31:09 +01003145 put_ldev(mdev);
Philipp Reisner8e26f9c2010-07-06 17:25:54 +02003146 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003147
3148 spin_lock(&mdev->peer_seq_lock);
3149 /* lock against drbd_nl_syncer_conf() */
3150 if (verify_tfm) {
Lars Ellenbergf3990022011-03-23 14:31:09 +01003151 strcpy(mdev->tconn->net_conf->verify_alg, p->verify_alg);
3152 mdev->tconn->net_conf->verify_alg_len = strlen(p->verify_alg) + 1;
3153 crypto_free_hash(mdev->tconn->verify_tfm);
3154 mdev->tconn->verify_tfm = verify_tfm;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003155 dev_info(DEV, "using verify-alg: \"%s\"\n", p->verify_alg);
3156 }
3157 if (csums_tfm) {
Lars Ellenbergf3990022011-03-23 14:31:09 +01003158 strcpy(mdev->tconn->net_conf->csums_alg, p->csums_alg);
3159 mdev->tconn->net_conf->csums_alg_len = strlen(p->csums_alg) + 1;
3160 crypto_free_hash(mdev->tconn->csums_tfm);
3161 mdev->tconn->csums_tfm = csums_tfm;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003162 dev_info(DEV, "using csums-alg: \"%s\"\n", p->csums_alg);
3163 }
Philipp Reisner778f2712010-07-06 11:14:00 +02003164 if (fifo_size != mdev->rs_plan_s.size) {
3165 kfree(mdev->rs_plan_s.values);
3166 mdev->rs_plan_s.values = rs_plan_s;
3167 mdev->rs_plan_s.size = fifo_size;
3168 mdev->rs_planed = 0;
3169 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003170 spin_unlock(&mdev->peer_seq_lock);
3171 }
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003172 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003173
Philipp Reisnerb411b362009-09-25 16:07:19 -07003174disconnect:
3175 /* just for completeness: actually not needed,
3176 * as this is not reached if csums_tfm was ok. */
3177 crypto_free_hash(csums_tfm);
3178 /* but free the verify_tfm again, if csums_tfm did not work out */
3179 crypto_free_hash(verify_tfm);
Philipp Reisner38fa9982011-03-15 18:24:49 +01003180 conn_request_state(mdev->tconn, NS(conn, C_DISCONNECTING), CS_HARD);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003181 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003182}
3183
Philipp Reisnerb411b362009-09-25 16:07:19 -07003184/* warn if the arguments differ by more than 12.5% */
3185static void warn_if_differ_considerably(struct drbd_conf *mdev,
3186 const char *s, sector_t a, sector_t b)
3187{
3188 sector_t d;
3189 if (a == 0 || b == 0)
3190 return;
3191 d = (a > b) ? (a - b) : (b - a);
3192 if (d > (a>>3) || d > (b>>3))
3193 dev_warn(DEV, "Considerable difference in %s: %llus vs. %llus\n", s,
3194 (unsigned long long)a, (unsigned long long)b);
3195}
3196
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003197static int receive_sizes(struct drbd_tconn *tconn, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003198{
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003199 struct drbd_conf *mdev;
3200 struct p_sizes *p = tconn->data.rbuf;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003201 enum determine_dev_size dd = unchanged;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003202 sector_t p_size, p_usize, my_usize;
3203 int ldsc = 0; /* local disk size changed */
Philipp Reisnere89b5912010-03-24 17:11:33 +01003204 enum dds_flags ddsf;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003205
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003206 mdev = vnr_to_mdev(tconn, pi->vnr);
3207 if (!mdev)
3208 return config_unknown_volume(tconn, pi);
3209
Philipp Reisnerb411b362009-09-25 16:07:19 -07003210 p_size = be64_to_cpu(p->d_size);
3211 p_usize = be64_to_cpu(p->u_size);
3212
Philipp Reisnerb411b362009-09-25 16:07:19 -07003213 /* just store the peer's disk size for now.
3214 * we still need to figure out whether we accept that. */
3215 mdev->p_size = p_size;
3216
Philipp Reisnerb411b362009-09-25 16:07:19 -07003217 if (get_ldev(mdev)) {
3218 warn_if_differ_considerably(mdev, "lower level device sizes",
3219 p_size, drbd_get_max_capacity(mdev->ldev));
3220 warn_if_differ_considerably(mdev, "user requested size",
3221 p_usize, mdev->ldev->dc.disk_size);
3222
3223 /* if this is the first connect, or an otherwise expected
3224 * param exchange, choose the minimum */
3225 if (mdev->state.conn == C_WF_REPORT_PARAMS)
3226 p_usize = min_not_zero((sector_t)mdev->ldev->dc.disk_size,
3227 p_usize);
3228
3229 my_usize = mdev->ldev->dc.disk_size;
3230
3231 if (mdev->ldev->dc.disk_size != p_usize) {
3232 mdev->ldev->dc.disk_size = p_usize;
3233 dev_info(DEV, "Peer sets u_size to %lu sectors\n",
3234 (unsigned long)mdev->ldev->dc.disk_size);
3235 }
3236
3237 /* Never shrink a device with usable data during connect.
3238 But allow online shrinking if we are connected. */
Philipp Reisnera393db62009-12-22 13:35:52 +01003239 if (drbd_new_dev_size(mdev, mdev->ldev, 0) <
Philipp Reisnerb411b362009-09-25 16:07:19 -07003240 drbd_get_capacity(mdev->this_bdev) &&
3241 mdev->state.disk >= D_OUTDATED &&
3242 mdev->state.conn < C_CONNECTED) {
3243 dev_err(DEV, "The peer's disk size is too small!\n");
Philipp Reisner38fa9982011-03-15 18:24:49 +01003244 conn_request_state(mdev->tconn, NS(conn, C_DISCONNECTING), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003245 mdev->ldev->dc.disk_size = my_usize;
3246 put_ldev(mdev);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003247 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003248 }
3249 put_ldev(mdev);
3250 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003251
Philipp Reisnere89b5912010-03-24 17:11:33 +01003252 ddsf = be16_to_cpu(p->dds_flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003253 if (get_ldev(mdev)) {
Bart Van Assche24c48302011-05-21 18:32:29 +02003254 dd = drbd_determine_dev_size(mdev, ddsf);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003255 put_ldev(mdev);
3256 if (dd == dev_size_error)
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003257 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003258 drbd_md_sync(mdev);
3259 } else {
3260 /* I am diskless, need to accept the peer's size. */
3261 drbd_set_my_capacity(mdev, p_size);
3262 }
3263
Philipp Reisner99432fc2011-05-20 16:39:13 +02003264 mdev->peer_max_bio_size = be32_to_cpu(p->max_bio_size);
3265 drbd_reconsider_max_bio_size(mdev);
3266
Philipp Reisnerb411b362009-09-25 16:07:19 -07003267 if (get_ldev(mdev)) {
3268 if (mdev->ldev->known_size != drbd_get_capacity(mdev->ldev->backing_bdev)) {
3269 mdev->ldev->known_size = drbd_get_capacity(mdev->ldev->backing_bdev);
3270 ldsc = 1;
3271 }
3272
Philipp Reisnerb411b362009-09-25 16:07:19 -07003273 put_ldev(mdev);
3274 }
3275
3276 if (mdev->state.conn > C_WF_REPORT_PARAMS) {
3277 if (be64_to_cpu(p->c_size) !=
3278 drbd_get_capacity(mdev->this_bdev) || ldsc) {
3279 /* we have different sizes, probably peer
3280 * needs to know my new size... */
Philipp Reisnere89b5912010-03-24 17:11:33 +01003281 drbd_send_sizes(mdev, 0, ddsf);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003282 }
3283 if (test_and_clear_bit(RESIZE_PENDING, &mdev->flags) ||
3284 (dd == grew && mdev->state.conn == C_CONNECTED)) {
3285 if (mdev->state.pdsk >= D_INCONSISTENT &&
Philipp Reisnere89b5912010-03-24 17:11:33 +01003286 mdev->state.disk >= D_INCONSISTENT) {
3287 if (ddsf & DDSF_NO_RESYNC)
3288 dev_info(DEV, "Resync of new storage suppressed with --assume-clean\n");
3289 else
3290 resync_after_online_grow(mdev);
3291 } else
Philipp Reisnerb411b362009-09-25 16:07:19 -07003292 set_bit(RESYNC_AFTER_NEG, &mdev->flags);
3293 }
3294 }
3295
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003296 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003297}
3298
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003299static int receive_uuids(struct drbd_tconn *tconn, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003300{
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003301 struct drbd_conf *mdev;
3302 struct p_uuids *p = tconn->data.rbuf;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003303 u64 *p_uuid;
Lars Ellenberg62b0da32011-01-20 13:25:21 +01003304 int i, updated_uuids = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003305
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003306 mdev = vnr_to_mdev(tconn, pi->vnr);
3307 if (!mdev)
3308 return config_unknown_volume(tconn, pi);
3309
Philipp Reisnerb411b362009-09-25 16:07:19 -07003310 p_uuid = kmalloc(sizeof(u64)*UI_EXTENDED_SIZE, GFP_NOIO);
3311
3312 for (i = UI_CURRENT; i < UI_EXTENDED_SIZE; i++)
3313 p_uuid[i] = be64_to_cpu(p->uuid[i]);
3314
3315 kfree(mdev->p_uuid);
3316 mdev->p_uuid = p_uuid;
3317
3318 if (mdev->state.conn < C_CONNECTED &&
3319 mdev->state.disk < D_INCONSISTENT &&
3320 mdev->state.role == R_PRIMARY &&
3321 (mdev->ed_uuid & ~((u64)1)) != (p_uuid[UI_CURRENT] & ~((u64)1))) {
3322 dev_err(DEV, "Can only connect to data with current UUID=%016llX\n",
3323 (unsigned long long)mdev->ed_uuid);
Philipp Reisner38fa9982011-03-15 18:24:49 +01003324 conn_request_state(mdev->tconn, NS(conn, C_DISCONNECTING), CS_HARD);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003325 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003326 }
3327
3328 if (get_ldev(mdev)) {
3329 int skip_initial_sync =
3330 mdev->state.conn == C_CONNECTED &&
Philipp Reisner31890f42011-01-19 14:12:51 +01003331 mdev->tconn->agreed_pro_version >= 90 &&
Philipp Reisnerb411b362009-09-25 16:07:19 -07003332 mdev->ldev->md.uuid[UI_CURRENT] == UUID_JUST_CREATED &&
3333 (p_uuid[UI_FLAGS] & 8);
3334 if (skip_initial_sync) {
3335 dev_info(DEV, "Accepted new current UUID, preparing to skip initial sync\n");
3336 drbd_bitmap_io(mdev, &drbd_bmio_clear_n_write,
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01003337 "clear_n_write from receive_uuids",
3338 BM_LOCKED_TEST_ALLOWED);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003339 _drbd_uuid_set(mdev, UI_CURRENT, p_uuid[UI_CURRENT]);
3340 _drbd_uuid_set(mdev, UI_BITMAP, 0);
3341 _drbd_set_state(_NS2(mdev, disk, D_UP_TO_DATE, pdsk, D_UP_TO_DATE),
3342 CS_VERBOSE, NULL);
3343 drbd_md_sync(mdev);
Lars Ellenberg62b0da32011-01-20 13:25:21 +01003344 updated_uuids = 1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003345 }
3346 put_ldev(mdev);
Philipp Reisner18a50fa2010-06-21 14:14:15 +02003347 } else if (mdev->state.disk < D_INCONSISTENT &&
3348 mdev->state.role == R_PRIMARY) {
3349 /* I am a diskless primary, the peer just created a new current UUID
3350 for me. */
Lars Ellenberg62b0da32011-01-20 13:25:21 +01003351 updated_uuids = drbd_set_ed_uuid(mdev, p_uuid[UI_CURRENT]);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003352 }
3353
3354 /* Before we test for the disk state, we should wait until an eventually
3355 ongoing cluster wide state change is finished. That is important if
3356 we are primary and are detaching from our disk. We need to see the
3357 new disk state... */
Philipp Reisner8410da82011-02-11 20:11:10 +01003358 mutex_lock(mdev->state_mutex);
3359 mutex_unlock(mdev->state_mutex);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003360 if (mdev->state.conn >= C_CONNECTED && mdev->state.disk < D_INCONSISTENT)
Lars Ellenberg62b0da32011-01-20 13:25:21 +01003361 updated_uuids |= drbd_set_ed_uuid(mdev, p_uuid[UI_CURRENT]);
3362
3363 if (updated_uuids)
3364 drbd_print_uuids(mdev, "receiver updated UUIDs to");
Philipp Reisnerb411b362009-09-25 16:07:19 -07003365
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003366 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003367}
3368
3369/**
3370 * convert_state() - Converts the peer's view of the cluster state to our point of view
3371 * @ps: The state as seen by the peer.
3372 */
3373static union drbd_state convert_state(union drbd_state ps)
3374{
3375 union drbd_state ms;
3376
3377 static enum drbd_conns c_tab[] = {
3378 [C_CONNECTED] = C_CONNECTED,
3379
3380 [C_STARTING_SYNC_S] = C_STARTING_SYNC_T,
3381 [C_STARTING_SYNC_T] = C_STARTING_SYNC_S,
3382 [C_DISCONNECTING] = C_TEAR_DOWN, /* C_NETWORK_FAILURE, */
3383 [C_VERIFY_S] = C_VERIFY_T,
3384 [C_MASK] = C_MASK,
3385 };
3386
3387 ms.i = ps.i;
3388
3389 ms.conn = c_tab[ps.conn];
3390 ms.peer = ps.role;
3391 ms.role = ps.peer;
3392 ms.pdsk = ps.disk;
3393 ms.disk = ps.pdsk;
3394 ms.peer_isp = (ps.aftr_isp | ps.user_isp);
3395
3396 return ms;
3397}
3398
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003399static int receive_req_state(struct drbd_tconn *tconn, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003400{
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003401 struct drbd_conf *mdev;
3402 struct p_req_state *p = tconn->data.rbuf;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003403 union drbd_state mask, val;
Andreas Gruenbacherbf885f82010-12-08 00:39:32 +01003404 enum drbd_state_rv rv;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003405
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003406 mdev = vnr_to_mdev(tconn, pi->vnr);
3407 if (!mdev)
3408 return -EIO;
3409
Philipp Reisnerb411b362009-09-25 16:07:19 -07003410 mask.i = be32_to_cpu(p->mask);
3411 val.i = be32_to_cpu(p->val);
3412
Philipp Reisner25703f82011-02-07 14:35:25 +01003413 if (test_bit(DISCARD_CONCURRENT, &mdev->tconn->flags) &&
Philipp Reisner8410da82011-02-11 20:11:10 +01003414 mutex_is_locked(mdev->state_mutex)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07003415 drbd_send_sr_reply(mdev, SS_CONCURRENT_ST_CHG);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003416 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003417 }
3418
3419 mask = convert_state(mask);
3420 val = convert_state(val);
3421
Philipp Reisnerdfafcc82011-03-16 10:55:07 +01003422 rv = drbd_change_state(mdev, CS_VERBOSE, mask, val);
3423 drbd_send_sr_reply(mdev, rv);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003424
Philipp Reisnerb411b362009-09-25 16:07:19 -07003425 drbd_md_sync(mdev);
3426
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003427 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003428}
3429
Andreas Gruenbachere2857212011-03-25 00:57:38 +01003430static int receive_req_conn_state(struct drbd_tconn *tconn, struct packet_info *pi)
Philipp Reisnerdfafcc82011-03-16 10:55:07 +01003431{
Andreas Gruenbachere6ef8a52011-03-24 18:07:54 +01003432 struct p_req_state *p = tconn->data.rbuf;
Philipp Reisnerdfafcc82011-03-16 10:55:07 +01003433 union drbd_state mask, val;
3434 enum drbd_state_rv rv;
3435
3436 mask.i = be32_to_cpu(p->mask);
3437 val.i = be32_to_cpu(p->val);
3438
3439 if (test_bit(DISCARD_CONCURRENT, &tconn->flags) &&
3440 mutex_is_locked(&tconn->cstate_mutex)) {
3441 conn_send_sr_reply(tconn, SS_CONCURRENT_ST_CHG);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003442 return 0;
Philipp Reisnerdfafcc82011-03-16 10:55:07 +01003443 }
3444
3445 mask = convert_state(mask);
3446 val = convert_state(val);
3447
Philipp Reisner778bcf22011-03-28 12:55:03 +02003448 rv = conn_request_state(tconn, mask, val, CS_VERBOSE | CS_LOCAL_ONLY | CS_IGN_OUTD_FAIL);
Philipp Reisnerdfafcc82011-03-16 10:55:07 +01003449 conn_send_sr_reply(tconn, rv);
3450
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003451 return 0;
Philipp Reisnerdfafcc82011-03-16 10:55:07 +01003452}
3453
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003454static int receive_state(struct drbd_tconn *tconn, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003455{
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003456 struct drbd_conf *mdev;
3457 struct p_state *p = tconn->data.rbuf;
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003458 union drbd_state os, ns, peer_state;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003459 enum drbd_disk_state real_peer_disk;
Philipp Reisner65d922c2010-06-16 16:18:09 +02003460 enum chg_state_flags cs_flags;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003461 int rv;
3462
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003463 mdev = vnr_to_mdev(tconn, pi->vnr);
3464 if (!mdev)
3465 return config_unknown_volume(tconn, pi);
3466
Philipp Reisnerb411b362009-09-25 16:07:19 -07003467 peer_state.i = be32_to_cpu(p->state);
3468
3469 real_peer_disk = peer_state.disk;
3470 if (peer_state.disk == D_NEGOTIATING) {
3471 real_peer_disk = mdev->p_uuid[UI_FLAGS] & 4 ? D_INCONSISTENT : D_CONSISTENT;
3472 dev_info(DEV, "real peer disk state = %s\n", drbd_disk_str(real_peer_disk));
3473 }
3474
Philipp Reisner87eeee42011-01-19 14:16:30 +01003475 spin_lock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003476 retry:
Philipp Reisner78bae592011-03-28 15:40:12 +02003477 os = ns = drbd_read_state(mdev);
Philipp Reisner87eeee42011-01-19 14:16:30 +01003478 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003479
Lars Ellenberge9ef7bb2010-10-07 15:55:39 +02003480 /* peer says his disk is uptodate, while we think it is inconsistent,
3481 * and this happens while we think we have a sync going on. */
3482 if (os.pdsk == D_INCONSISTENT && real_peer_disk == D_UP_TO_DATE &&
3483 os.conn > C_CONNECTED && os.disk == D_UP_TO_DATE) {
3484 /* If we are (becoming) SyncSource, but peer is still in sync
3485 * preparation, ignore its uptodate-ness to avoid flapping, it
3486 * will change to inconsistent once the peer reaches active
3487 * syncing states.
3488 * It may have changed syncer-paused flags, however, so we
3489 * cannot ignore this completely. */
3490 if (peer_state.conn > C_CONNECTED &&
3491 peer_state.conn < C_SYNC_SOURCE)
3492 real_peer_disk = D_INCONSISTENT;
3493
3494 /* if peer_state changes to connected at the same time,
3495 * it explicitly notifies us that it finished resync.
3496 * Maybe we should finish it up, too? */
3497 else if (os.conn >= C_SYNC_SOURCE &&
3498 peer_state.conn == C_CONNECTED) {
3499 if (drbd_bm_total_weight(mdev) <= mdev->rs_failed)
3500 drbd_resync_finished(mdev);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003501 return 0;
Lars Ellenberge9ef7bb2010-10-07 15:55:39 +02003502 }
3503 }
3504
3505 /* peer says his disk is inconsistent, while we think it is uptodate,
3506 * and this happens while the peer still thinks we have a sync going on,
3507 * but we think we are already done with the sync.
3508 * We ignore this to avoid flapping pdsk.
3509 * This should not happen, if the peer is a recent version of drbd. */
3510 if (os.pdsk == D_UP_TO_DATE && real_peer_disk == D_INCONSISTENT &&
3511 os.conn == C_CONNECTED && peer_state.conn > C_SYNC_SOURCE)
3512 real_peer_disk = D_UP_TO_DATE;
3513
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003514 if (ns.conn == C_WF_REPORT_PARAMS)
3515 ns.conn = C_CONNECTED;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003516
Philipp Reisner67531712010-10-27 12:21:30 +02003517 if (peer_state.conn == C_AHEAD)
3518 ns.conn = C_BEHIND;
3519
Philipp Reisnerb411b362009-09-25 16:07:19 -07003520 if (mdev->p_uuid && peer_state.disk >= D_NEGOTIATING &&
3521 get_ldev_if_state(mdev, D_NEGOTIATING)) {
3522 int cr; /* consider resync */
3523
3524 /* if we established a new connection */
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003525 cr = (os.conn < C_CONNECTED);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003526 /* if we had an established connection
3527 * and one of the nodes newly attaches a disk */
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003528 cr |= (os.conn == C_CONNECTED &&
Philipp Reisnerb411b362009-09-25 16:07:19 -07003529 (peer_state.disk == D_NEGOTIATING ||
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003530 os.disk == D_NEGOTIATING));
Philipp Reisnerb411b362009-09-25 16:07:19 -07003531 /* if we have both been inconsistent, and the peer has been
3532 * forced to be UpToDate with --overwrite-data */
3533 cr |= test_bit(CONSIDER_RESYNC, &mdev->flags);
3534 /* if we had been plain connected, and the admin requested to
3535 * start a sync by "invalidate" or "invalidate-remote" */
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003536 cr |= (os.conn == C_CONNECTED &&
Philipp Reisnerb411b362009-09-25 16:07:19 -07003537 (peer_state.conn >= C_STARTING_SYNC_S &&
3538 peer_state.conn <= C_WF_BITMAP_T));
3539
3540 if (cr)
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003541 ns.conn = drbd_sync_handshake(mdev, peer_state.role, real_peer_disk);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003542
3543 put_ldev(mdev);
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003544 if (ns.conn == C_MASK) {
3545 ns.conn = C_CONNECTED;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003546 if (mdev->state.disk == D_NEGOTIATING) {
Lars Ellenberg82f59cc2010-10-16 12:13:47 +02003547 drbd_force_state(mdev, NS(disk, D_FAILED));
Philipp Reisnerb411b362009-09-25 16:07:19 -07003548 } else if (peer_state.disk == D_NEGOTIATING) {
3549 dev_err(DEV, "Disk attach process on the peer node was aborted.\n");
3550 peer_state.disk = D_DISKLESS;
Lars Ellenberg580b9762010-02-26 23:15:23 +01003551 real_peer_disk = D_DISKLESS;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003552 } else {
Philipp Reisner8169e412011-03-15 18:40:27 +01003553 if (test_and_clear_bit(CONN_DRY_RUN, &mdev->tconn->flags))
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003554 return -EIO;
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003555 D_ASSERT(os.conn == C_WF_REPORT_PARAMS);
Philipp Reisner38fa9982011-03-15 18:24:49 +01003556 conn_request_state(mdev->tconn, NS(conn, C_DISCONNECTING), CS_HARD);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003557 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003558 }
3559 }
3560 }
3561
Philipp Reisner87eeee42011-01-19 14:16:30 +01003562 spin_lock_irq(&mdev->tconn->req_lock);
Philipp Reisner78bae592011-03-28 15:40:12 +02003563 if (os.i != drbd_read_state(mdev).i)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003564 goto retry;
3565 clear_bit(CONSIDER_RESYNC, &mdev->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003566 ns.peer = peer_state.role;
3567 ns.pdsk = real_peer_disk;
3568 ns.peer_isp = (peer_state.aftr_isp | peer_state.user_isp);
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003569 if ((ns.conn == C_CONNECTED || ns.conn == C_WF_BITMAP_S) && ns.disk == D_NEGOTIATING)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003570 ns.disk = mdev->new_state_tmp.disk;
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003571 cs_flags = CS_VERBOSE + (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED ? 0 : CS_HARD);
Philipp Reisner2aebfab2011-03-28 16:48:11 +02003572 if (ns.pdsk == D_CONSISTENT && drbd_suspended(mdev) && ns.conn == C_CONNECTED && os.conn < C_CONNECTED &&
Philipp Reisner481c6f52010-06-22 14:03:27 +02003573 test_bit(NEW_CUR_UUID, &mdev->flags)) {
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01003574 /* Do not allow tl_restart(RESEND) for a rebooted peer. We can only allow this
Philipp Reisner481c6f52010-06-22 14:03:27 +02003575 for temporal network outages! */
Philipp Reisner87eeee42011-01-19 14:16:30 +01003576 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisner481c6f52010-06-22 14:03:27 +02003577 dev_err(DEV, "Aborting Connect, can not thaw IO with an only Consistent peer\n");
Philipp Reisner2f5cdd02011-02-21 14:29:27 +01003578 tl_clear(mdev->tconn);
Philipp Reisner481c6f52010-06-22 14:03:27 +02003579 drbd_uuid_new_current(mdev);
3580 clear_bit(NEW_CUR_UUID, &mdev->flags);
Philipp Reisner38fa9982011-03-15 18:24:49 +01003581 conn_request_state(mdev->tconn, NS2(conn, C_PROTOCOL_ERROR, susp, 0), CS_HARD);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003582 return -EIO;
Philipp Reisner481c6f52010-06-22 14:03:27 +02003583 }
Philipp Reisner65d922c2010-06-16 16:18:09 +02003584 rv = _drbd_set_state(mdev, ns, cs_flags, NULL);
Philipp Reisner78bae592011-03-28 15:40:12 +02003585 ns = drbd_read_state(mdev);
Philipp Reisner87eeee42011-01-19 14:16:30 +01003586 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003587
3588 if (rv < SS_SUCCESS) {
Philipp Reisner38fa9982011-03-15 18:24:49 +01003589 conn_request_state(mdev->tconn, NS(conn, C_DISCONNECTING), CS_HARD);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003590 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003591 }
3592
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003593 if (os.conn > C_WF_REPORT_PARAMS) {
3594 if (ns.conn > C_CONNECTED && peer_state.conn <= C_CONNECTED &&
Philipp Reisnerb411b362009-09-25 16:07:19 -07003595 peer_state.disk != D_NEGOTIATING ) {
3596 /* we want resync, peer has not yet decided to sync... */
3597 /* Nowadays only used when forcing a node into primary role and
3598 setting its disk to UpToDate with that */
3599 drbd_send_uuids(mdev);
3600 drbd_send_state(mdev);
3601 }
3602 }
3603
Philipp Reisner89e58e72011-01-19 13:12:45 +01003604 mdev->tconn->net_conf->want_lose = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003605
3606 drbd_md_sync(mdev); /* update connected indicator, la_size, ... */
3607
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003608 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003609}
3610
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003611static int receive_sync_uuid(struct drbd_tconn *tconn, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003612{
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003613 struct drbd_conf *mdev;
3614 struct p_rs_uuid *p = tconn->data.rbuf;
3615
3616 mdev = vnr_to_mdev(tconn, pi->vnr);
3617 if (!mdev)
3618 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003619
3620 wait_event(mdev->misc_wait,
3621 mdev->state.conn == C_WF_SYNC_UUID ||
Philipp Reisnerc4752ef2010-10-27 17:32:36 +02003622 mdev->state.conn == C_BEHIND ||
Philipp Reisnerb411b362009-09-25 16:07:19 -07003623 mdev->state.conn < C_CONNECTED ||
3624 mdev->state.disk < D_NEGOTIATING);
3625
3626 /* D_ASSERT( mdev->state.conn == C_WF_SYNC_UUID ); */
3627
Philipp Reisnerb411b362009-09-25 16:07:19 -07003628 /* Here the _drbd_uuid_ functions are right, current should
3629 _not_ be rotated into the history */
3630 if (get_ldev_if_state(mdev, D_NEGOTIATING)) {
3631 _drbd_uuid_set(mdev, UI_CURRENT, be64_to_cpu(p->uuid));
3632 _drbd_uuid_set(mdev, UI_BITMAP, 0UL);
3633
Lars Ellenberg62b0da32011-01-20 13:25:21 +01003634 drbd_print_uuids(mdev, "updated sync uuid");
Philipp Reisnerb411b362009-09-25 16:07:19 -07003635 drbd_start_resync(mdev, C_SYNC_TARGET);
3636
3637 put_ldev(mdev);
3638 } else
3639 dev_err(DEV, "Ignoring SyncUUID packet!\n");
3640
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003641 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003642}
3643
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003644/**
3645 * receive_bitmap_plain
3646 *
3647 * Return 0 when done, 1 when another iteration is needed, and a negative error
3648 * code upon failure.
3649 */
3650static int
Philipp Reisner02918be2010-08-20 14:35:10 +02003651receive_bitmap_plain(struct drbd_conf *mdev, unsigned int data_size,
Andreas Gruenbacherfc568152011-03-24 21:23:50 +01003652 struct p_header *h, struct bm_xfer_ctx *c)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003653{
Andreas Gruenbacherfc568152011-03-24 21:23:50 +01003654 unsigned long *buffer = (unsigned long *)h->payload;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003655 unsigned num_words = min_t(size_t, BM_PACKET_WORDS, c->bm_words - c->word_offset);
3656 unsigned want = num_words * sizeof(long);
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003657 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003658
Philipp Reisner02918be2010-08-20 14:35:10 +02003659 if (want != data_size) {
3660 dev_err(DEV, "%s:want (%u) != data_size (%u)\n", __func__, want, data_size);
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003661 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003662 }
3663 if (want == 0)
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003664 return 0;
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003665 err = drbd_recv_all(mdev->tconn, buffer, want);
3666 if (err)
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003667 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003668
3669 drbd_bm_merge_lel(mdev, c->word_offset, num_words, buffer);
3670
3671 c->word_offset += num_words;
3672 c->bit_offset = c->word_offset * BITS_PER_LONG;
3673 if (c->bit_offset > c->bm_bits)
3674 c->bit_offset = c->bm_bits;
3675
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003676 return 1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003677}
3678
Andreas Gruenbachera02d1242011-03-22 17:20:45 +01003679static enum drbd_bitmap_code dcbp_get_code(struct p_compressed_bm *p)
3680{
3681 return (enum drbd_bitmap_code)(p->encoding & 0x0f);
3682}
3683
3684static int dcbp_get_start(struct p_compressed_bm *p)
3685{
3686 return (p->encoding & 0x80) != 0;
3687}
3688
3689static int dcbp_get_pad_bits(struct p_compressed_bm *p)
3690{
3691 return (p->encoding >> 4) & 0x7;
3692}
3693
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003694/**
3695 * recv_bm_rle_bits
3696 *
3697 * Return 0 when done, 1 when another iteration is needed, and a negative error
3698 * code upon failure.
3699 */
3700static int
Philipp Reisnerb411b362009-09-25 16:07:19 -07003701recv_bm_rle_bits(struct drbd_conf *mdev,
3702 struct p_compressed_bm *p,
Philipp Reisnerc6d25cf2011-01-19 16:13:06 +01003703 struct bm_xfer_ctx *c,
3704 unsigned int len)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003705{
3706 struct bitstream bs;
3707 u64 look_ahead;
3708 u64 rl;
3709 u64 tmp;
3710 unsigned long s = c->bit_offset;
3711 unsigned long e;
Andreas Gruenbachera02d1242011-03-22 17:20:45 +01003712 int toggle = dcbp_get_start(p);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003713 int have;
3714 int bits;
3715
Andreas Gruenbachera02d1242011-03-22 17:20:45 +01003716 bitstream_init(&bs, p->code, len, dcbp_get_pad_bits(p));
Philipp Reisnerb411b362009-09-25 16:07:19 -07003717
3718 bits = bitstream_get_bits(&bs, &look_ahead, 64);
3719 if (bits < 0)
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003720 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003721
3722 for (have = bits; have > 0; s += rl, toggle = !toggle) {
3723 bits = vli_decode_bits(&rl, look_ahead);
3724 if (bits <= 0)
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003725 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003726
3727 if (toggle) {
3728 e = s + rl -1;
3729 if (e >= c->bm_bits) {
3730 dev_err(DEV, "bitmap overflow (e:%lu) while decoding bm RLE packet\n", e);
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003731 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003732 }
3733 _drbd_bm_set_bits(mdev, s, e);
3734 }
3735
3736 if (have < bits) {
3737 dev_err(DEV, "bitmap decoding error: h:%d b:%d la:0x%08llx l:%u/%u\n",
3738 have, bits, look_ahead,
3739 (unsigned int)(bs.cur.b - p->code),
3740 (unsigned int)bs.buf_len);
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003741 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003742 }
3743 look_ahead >>= bits;
3744 have -= bits;
3745
3746 bits = bitstream_get_bits(&bs, &tmp, 64 - have);
3747 if (bits < 0)
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003748 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003749 look_ahead |= tmp << have;
3750 have += bits;
3751 }
3752
3753 c->bit_offset = s;
3754 bm_xfer_ctx_bit_to_word_offset(c);
3755
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003756 return (s != c->bm_bits);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003757}
3758
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003759/**
3760 * decode_bitmap_c
3761 *
3762 * Return 0 when done, 1 when another iteration is needed, and a negative error
3763 * code upon failure.
3764 */
3765static int
Philipp Reisnerb411b362009-09-25 16:07:19 -07003766decode_bitmap_c(struct drbd_conf *mdev,
3767 struct p_compressed_bm *p,
Philipp Reisnerc6d25cf2011-01-19 16:13:06 +01003768 struct bm_xfer_ctx *c,
3769 unsigned int len)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003770{
Andreas Gruenbachera02d1242011-03-22 17:20:45 +01003771 if (dcbp_get_code(p) == RLE_VLI_Bits)
Philipp Reisnerc6d25cf2011-01-19 16:13:06 +01003772 return recv_bm_rle_bits(mdev, p, c, len);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003773
3774 /* other variants had been implemented for evaluation,
3775 * but have been dropped as this one turned out to be "best"
3776 * during all our tests. */
3777
3778 dev_err(DEV, "receive_bitmap_c: unknown encoding %u\n", p->encoding);
Philipp Reisner38fa9982011-03-15 18:24:49 +01003779 conn_request_state(mdev->tconn, NS(conn, C_PROTOCOL_ERROR), CS_HARD);
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003780 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003781}
3782
3783void INFO_bm_xfer_stats(struct drbd_conf *mdev,
3784 const char *direction, struct bm_xfer_ctx *c)
3785{
3786 /* what would it take to transfer it "plaintext" */
Philipp Reisnerc0129492011-01-19 16:58:16 +01003787 unsigned plain = sizeof(struct p_header) *
Philipp Reisnerb411b362009-09-25 16:07:19 -07003788 ((c->bm_words+BM_PACKET_WORDS-1)/BM_PACKET_WORDS+1)
3789 + c->bm_words * sizeof(long);
3790 unsigned total = c->bytes[0] + c->bytes[1];
3791 unsigned r;
3792
3793 /* total can not be zero. but just in case: */
3794 if (total == 0)
3795 return;
3796
3797 /* don't report if not compressed */
3798 if (total >= plain)
3799 return;
3800
3801 /* total < plain. check for overflow, still */
3802 r = (total > UINT_MAX/1000) ? (total / (plain/1000))
3803 : (1000 * total / plain);
3804
3805 if (r > 1000)
3806 r = 1000;
3807
3808 r = 1000 - r;
3809 dev_info(DEV, "%s bitmap stats [Bytes(packets)]: plain %u(%u), RLE %u(%u), "
3810 "total %u; compression: %u.%u%%\n",
3811 direction,
3812 c->bytes[1], c->packets[1],
3813 c->bytes[0], c->packets[0],
3814 total, r/10, r % 10);
3815}
3816
3817/* Since we are processing the bitfield from lower addresses to higher,
3818 it does not matter if the process it in 32 bit chunks or 64 bit
3819 chunks as long as it is little endian. (Understand it as byte stream,
3820 beginning with the lowest byte...) If we would use big endian
3821 we would need to process it from the highest address to the lowest,
3822 in order to be agnostic to the 32 vs 64 bits issue.
3823
3824 returns 0 on failure, 1 if we successfully received it. */
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003825static int receive_bitmap(struct drbd_tconn *tconn, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003826{
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003827 struct drbd_conf *mdev;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003828 struct bm_xfer_ctx c;
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003829 int err;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003830 struct p_header *h = tconn->data.rbuf;
3831
3832 mdev = vnr_to_mdev(tconn, pi->vnr);
3833 if (!mdev)
3834 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003835
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01003836 drbd_bm_lock(mdev, "receive bitmap", BM_LOCKED_SET_ALLOWED);
3837 /* you are supposed to send additional out-of-sync information
3838 * if you actually set bits during this phase */
Philipp Reisnerb411b362009-09-25 16:07:19 -07003839
Philipp Reisnerb411b362009-09-25 16:07:19 -07003840 c = (struct bm_xfer_ctx) {
3841 .bm_bits = drbd_bm_bits(mdev),
3842 .bm_words = drbd_bm_words(mdev),
3843 };
3844
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003845 for(;;) {
Andreas Gruenbachere2857212011-03-25 00:57:38 +01003846 if (pi->cmd == P_BITMAP) {
3847 err = receive_bitmap_plain(mdev, pi->size, h, &c);
3848 } else if (pi->cmd == P_COMPRESSED_BITMAP) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07003849 /* MAYBE: sanity check that we speak proto >= 90,
3850 * and the feature is enabled! */
3851 struct p_compressed_bm *p;
3852
Andreas Gruenbachere2857212011-03-25 00:57:38 +01003853 if (pi->size > BM_PACKET_PAYLOAD_BYTES) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07003854 dev_err(DEV, "ReportCBitmap packet too large\n");
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003855 err = -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003856 goto out;
3857 }
Andreas Gruenbacherfc568152011-03-24 21:23:50 +01003858
3859 p = mdev->tconn->data.rbuf;
Andreas Gruenbachere2857212011-03-25 00:57:38 +01003860 err = drbd_recv_all(mdev->tconn, p->head.payload, pi->size);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003861 if (err)
3862 goto out;
Andreas Gruenbachere2857212011-03-25 00:57:38 +01003863 if (pi->size <= (sizeof(*p) - sizeof(p->head))) {
3864 dev_err(DEV, "ReportCBitmap packet too small (l:%u)\n", pi->size);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003865 err = -EIO;
Andreas Gruenbacher78fcbda2010-12-10 22:18:27 +01003866 goto out;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003867 }
Andreas Gruenbachere2857212011-03-25 00:57:38 +01003868 err = decode_bitmap_c(mdev, p, &c, pi->size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003869 } else {
Andreas Gruenbachere2857212011-03-25 00:57:38 +01003870 dev_warn(DEV, "receive_bitmap: cmd neither ReportBitMap nor ReportCBitMap (is 0x%x)", pi->cmd);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003871 err = -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003872 goto out;
3873 }
3874
Andreas Gruenbachere2857212011-03-25 00:57:38 +01003875 c.packets[pi->cmd == P_BITMAP]++;
3876 c.bytes[pi->cmd == P_BITMAP] += sizeof(struct p_header) + pi->size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003877
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003878 if (err <= 0) {
3879 if (err < 0)
3880 goto out;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003881 break;
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003882 }
Andreas Gruenbachere2857212011-03-25 00:57:38 +01003883 err = drbd_recv_header(mdev->tconn, pi);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003884 if (err)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003885 goto out;
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003886 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003887
3888 INFO_bm_xfer_stats(mdev, "receive", &c);
3889
3890 if (mdev->state.conn == C_WF_BITMAP_T) {
Andreas Gruenbacherde1f8e42010-12-10 21:04:00 +01003891 enum drbd_state_rv rv;
3892
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003893 err = drbd_send_bitmap(mdev);
3894 if (err)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003895 goto out;
3896 /* Omit CS_ORDERED with this state transition to avoid deadlocks. */
Andreas Gruenbacherde1f8e42010-12-10 21:04:00 +01003897 rv = _drbd_request_state(mdev, NS(conn, C_WF_SYNC_UUID), CS_VERBOSE);
3898 D_ASSERT(rv == SS_SUCCESS);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003899 } else if (mdev->state.conn != C_WF_BITMAP_S) {
3900 /* admin may have requested C_DISCONNECTING,
3901 * other threads may have noticed network errors */
3902 dev_info(DEV, "unexpected cstate (%s) in receive_bitmap\n",
3903 drbd_conn_str(mdev->state.conn));
3904 }
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003905 err = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003906
Philipp Reisnerb411b362009-09-25 16:07:19 -07003907 out:
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01003908 drbd_bm_unlock(mdev);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003909 if (!err && mdev->state.conn == C_WF_BITMAP_S)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003910 drbd_start_resync(mdev, C_SYNC_SOURCE);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003911 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003912}
3913
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003914static int receive_skip(struct drbd_tconn *tconn, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003915{
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003916 conn_warn(tconn, "skipping unknown optional packet type %d, l: %d!\n",
Andreas Gruenbachere2857212011-03-25 00:57:38 +01003917 pi->cmd, pi->size);
Philipp Reisner2de876e2011-03-15 14:38:01 +01003918
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003919 return ignore_remaining_packet(tconn, pi);
Philipp Reisner2de876e2011-03-15 14:38:01 +01003920}
3921
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003922static int receive_UnplugRemote(struct drbd_tconn *tconn, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003923{
Philipp Reisnerb411b362009-09-25 16:07:19 -07003924 /* Make sure we've acked all the TCP data associated
3925 * with the data requests being unplugged */
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003926 drbd_tcp_quickack(tconn->data.socket);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003927
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003928 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003929}
3930
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003931static int receive_out_of_sync(struct drbd_tconn *tconn, struct packet_info *pi)
Philipp Reisner73a01a12010-10-27 14:33:00 +02003932{
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003933 struct drbd_conf *mdev;
3934 struct p_block_desc *p = tconn->data.rbuf;
3935
3936 mdev = vnr_to_mdev(tconn, pi->vnr);
3937 if (!mdev)
3938 return -EIO;
Philipp Reisner73a01a12010-10-27 14:33:00 +02003939
Lars Ellenbergf735e3632010-12-17 21:06:18 +01003940 switch (mdev->state.conn) {
3941 case C_WF_SYNC_UUID:
3942 case C_WF_BITMAP_T:
3943 case C_BEHIND:
3944 break;
3945 default:
3946 dev_err(DEV, "ASSERT FAILED cstate = %s, expected: WFSyncUUID|WFBitMapT|Behind\n",
3947 drbd_conn_str(mdev->state.conn));
3948 }
3949
Philipp Reisner73a01a12010-10-27 14:33:00 +02003950 drbd_set_out_of_sync(mdev, be64_to_cpu(p->sector), be32_to_cpu(p->blksize));
3951
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003952 return 0;
Philipp Reisner73a01a12010-10-27 14:33:00 +02003953}
3954
Philipp Reisner02918be2010-08-20 14:35:10 +02003955struct data_cmd {
3956 int expect_payload;
3957 size_t pkt_size;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003958 int (*fn)(struct drbd_tconn *, struct packet_info *);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003959};
3960
Philipp Reisner02918be2010-08-20 14:35:10 +02003961static struct data_cmd drbd_cmd_handler[] = {
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003962 [P_DATA] = { 1, sizeof(struct p_data), receive_Data },
3963 [P_DATA_REPLY] = { 1, sizeof(struct p_data), receive_DataReply },
3964 [P_RS_DATA_REPLY] = { 1, sizeof(struct p_data), receive_RSDataReply } ,
3965 [P_BARRIER] = { 0, sizeof(struct p_barrier), receive_Barrier } ,
3966 [P_BITMAP] = { 1, sizeof(struct p_header), receive_bitmap } ,
3967 [P_COMPRESSED_BITMAP] = { 1, sizeof(struct p_header), receive_bitmap } ,
3968 [P_UNPLUG_REMOTE] = { 0, sizeof(struct p_header), receive_UnplugRemote },
3969 [P_DATA_REQUEST] = { 0, sizeof(struct p_block_req), receive_DataRequest },
3970 [P_RS_DATA_REQUEST] = { 0, sizeof(struct p_block_req), receive_DataRequest },
3971 [P_SYNC_PARAM] = { 1, sizeof(struct p_header), receive_SyncParam },
3972 [P_SYNC_PARAM89] = { 1, sizeof(struct p_header), receive_SyncParam },
3973 [P_PROTOCOL] = { 1, sizeof(struct p_protocol), receive_protocol },
3974 [P_UUIDS] = { 0, sizeof(struct p_uuids), receive_uuids },
3975 [P_SIZES] = { 0, sizeof(struct p_sizes), receive_sizes },
3976 [P_STATE] = { 0, sizeof(struct p_state), receive_state },
3977 [P_STATE_CHG_REQ] = { 0, sizeof(struct p_req_state), receive_req_state },
3978 [P_SYNC_UUID] = { 0, sizeof(struct p_rs_uuid), receive_sync_uuid },
3979 [P_OV_REQUEST] = { 0, sizeof(struct p_block_req), receive_DataRequest },
3980 [P_OV_REPLY] = { 1, sizeof(struct p_block_req), receive_DataRequest },
3981 [P_CSUM_RS_REQUEST] = { 1, sizeof(struct p_block_req), receive_DataRequest },
3982 [P_DELAY_PROBE] = { 0, sizeof(struct p_delay_probe93), receive_skip },
3983 [P_OUT_OF_SYNC] = { 0, sizeof(struct p_block_desc), receive_out_of_sync },
3984 [P_CONN_ST_CHG_REQ] = { 0, sizeof(struct p_req_state), receive_req_conn_state },
Philipp Reisner02918be2010-08-20 14:35:10 +02003985};
3986
Philipp Reisnereefc2f72011-02-08 12:55:24 +01003987static void drbdd(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003988{
Andreas Gruenbachere6ef8a52011-03-24 18:07:54 +01003989 struct p_header *header = tconn->data.rbuf;
Philipp Reisner77351055b2011-02-07 17:24:26 +01003990 struct packet_info pi;
Philipp Reisner02918be2010-08-20 14:35:10 +02003991 size_t shs; /* sub header size */
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003992 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003993
Philipp Reisnereefc2f72011-02-08 12:55:24 +01003994 while (get_t_state(&tconn->receiver) == RUNNING) {
Andreas Gruenbacherdeebe192011-03-25 00:01:04 +01003995 struct data_cmd *cmd;
3996
Philipp Reisnereefc2f72011-02-08 12:55:24 +01003997 drbd_thread_current_set_cpu(&tconn->receiver);
Andreas Gruenbacher69bc7bc2011-03-16 17:31:52 +01003998 if (drbd_recv_header(tconn, &pi))
Philipp Reisner02918be2010-08-20 14:35:10 +02003999 goto err_out;
4000
Andreas Gruenbacherdeebe192011-03-25 00:01:04 +01004001 cmd = &drbd_cmd_handler[pi.cmd];
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004002 if (unlikely(pi.cmd >= ARRAY_SIZE(drbd_cmd_handler) || !cmd->fn)) {
Philipp Reisnereefc2f72011-02-08 12:55:24 +01004003 conn_err(tconn, "unknown packet type %d, l: %d!\n", pi.cmd, pi.size);
Philipp Reisner02918be2010-08-20 14:35:10 +02004004 goto err_out;
Lars Ellenberg0b33a912009-11-16 15:58:04 +01004005 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07004006
Andreas Gruenbacherdeebe192011-03-25 00:01:04 +01004007 shs = cmd->pkt_size - sizeof(struct p_header);
4008 if (pi.size - shs > 0 && !cmd->expect_payload) {
Philipp Reisnereefc2f72011-02-08 12:55:24 +01004009 conn_err(tconn, "No payload expected %s l:%d\n", cmdname(pi.cmd), pi.size);
Philipp Reisner02918be2010-08-20 14:35:10 +02004010 goto err_out;
4011 }
4012
Lars Ellenbergc13f7e12010-10-29 23:32:01 +02004013 if (shs) {
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01004014 err = drbd_recv_all_warn(tconn, &header->payload, shs);
4015 if (err)
Lars Ellenbergc13f7e12010-10-29 23:32:01 +02004016 goto err_out;
Andreas Gruenbachere2857212011-03-25 00:57:38 +01004017 pi.size -= shs;
Lars Ellenbergc13f7e12010-10-29 23:32:01 +02004018 }
4019
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004020 err = cmd->fn(tconn, &pi);
4021 if (err) {
Philipp Reisnereefc2f72011-02-08 12:55:24 +01004022 conn_err(tconn, "error receiving %s, l: %d!\n",
Philipp Reisner77351055b2011-02-07 17:24:26 +01004023 cmdname(pi.cmd), pi.size);
Philipp Reisner02918be2010-08-20 14:35:10 +02004024 goto err_out;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004025 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07004026 }
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004027 return;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004028
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004029 err_out:
4030 conn_request_state(tconn, NS(conn, C_PROTOCOL_ERROR), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004031}
4032
Philipp Reisner0e29d162011-02-18 14:23:11 +01004033void conn_flush_workqueue(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004034{
4035 struct drbd_wq_barrier barr;
4036
4037 barr.w.cb = w_prev_work_done;
Philipp Reisner0e29d162011-02-18 14:23:11 +01004038 barr.w.tconn = tconn;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004039 init_completion(&barr.done);
Philipp Reisner0e29d162011-02-18 14:23:11 +01004040 drbd_queue_work(&tconn->data.work, &barr.w);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004041 wait_for_completion(&barr.done);
4042}
4043
Philipp Reisner360cc742011-02-08 14:29:53 +01004044static void drbd_disconnect(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004045{
Philipp Reisnerbbeb6412011-02-10 13:45:46 +01004046 enum drbd_conns oc;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004047 int rv = SS_UNKNOWN_ERROR;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004048
Philipp Reisnerbbeb6412011-02-10 13:45:46 +01004049 if (tconn->cstate == C_STANDALONE)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004050 return;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004051
4052 /* asender does not clean up anything. it must not interfere, either */
Philipp Reisner360cc742011-02-08 14:29:53 +01004053 drbd_thread_stop(&tconn->asender);
4054 drbd_free_sock(tconn);
4055
4056 idr_for_each(&tconn->volumes, drbd_disconnected, tconn);
Philipp Reisner360cc742011-02-08 14:29:53 +01004057 conn_info(tconn, "Connection closed\n");
4058
Philipp Reisnercb703452011-03-24 11:03:07 +01004059 if (conn_highest_role(tconn) == R_PRIMARY && conn_highest_pdsk(tconn) >= D_UNKNOWN)
4060 conn_try_outdate_peer_async(tconn);
4061
Philipp Reisner360cc742011-02-08 14:29:53 +01004062 spin_lock_irq(&tconn->req_lock);
Philipp Reisnerbbeb6412011-02-10 13:45:46 +01004063 oc = tconn->cstate;
4064 if (oc >= C_UNCONNECTED)
4065 rv = _conn_request_state(tconn, NS(conn, C_UNCONNECTED), CS_VERBOSE);
4066
Philipp Reisner360cc742011-02-08 14:29:53 +01004067 spin_unlock_irq(&tconn->req_lock);
4068
Philipp Reisnerbbeb6412011-02-10 13:45:46 +01004069 if (oc == C_DISCONNECTING) {
Philipp Reisner360cc742011-02-08 14:29:53 +01004070 wait_event(tconn->net_cnt_wait, atomic_read(&tconn->net_cnt) == 0);
4071
4072 crypto_free_hash(tconn->cram_hmac_tfm);
4073 tconn->cram_hmac_tfm = NULL;
4074
4075 kfree(tconn->net_conf);
4076 tconn->net_conf = NULL;
Philipp Reisnerbbeb6412011-02-10 13:45:46 +01004077 conn_request_state(tconn, NS(conn, C_STANDALONE), CS_VERBOSE);
Philipp Reisner360cc742011-02-08 14:29:53 +01004078 }
4079}
4080
4081static int drbd_disconnected(int vnr, void *p, void *data)
4082{
4083 struct drbd_conf *mdev = (struct drbd_conf *)p;
4084 enum drbd_fencing_p fp;
4085 unsigned int i;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004086
Philipp Reisner85719572010-07-21 10:20:17 +02004087 /* wait for current activity to cease. */
Philipp Reisner87eeee42011-01-19 14:16:30 +01004088 spin_lock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004089 _drbd_wait_ee_list_empty(mdev, &mdev->active_ee);
4090 _drbd_wait_ee_list_empty(mdev, &mdev->sync_ee);
4091 _drbd_wait_ee_list_empty(mdev, &mdev->read_ee);
Philipp Reisner87eeee42011-01-19 14:16:30 +01004092 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004093
4094 /* We do not have data structures that would allow us to
4095 * get the rs_pending_cnt down to 0 again.
4096 * * On C_SYNC_TARGET we do not have any data structures describing
4097 * the pending RSDataRequest's we have sent.
4098 * * On C_SYNC_SOURCE there is no data structure that tracks
4099 * the P_RS_DATA_REPLY blocks that we sent to the SyncTarget.
4100 * And no, it is not the sum of the reference counts in the
4101 * resync_LRU. The resync_LRU tracks the whole operation including
4102 * the disk-IO, while the rs_pending_cnt only tracks the blocks
4103 * on the fly. */
4104 drbd_rs_cancel_all(mdev);
4105 mdev->rs_total = 0;
4106 mdev->rs_failed = 0;
4107 atomic_set(&mdev->rs_pending_cnt, 0);
4108 wake_up(&mdev->misc_wait);
4109
Philipp Reisner7fde2be2011-03-01 11:08:28 +01004110 del_timer(&mdev->request_timer);
4111
Philipp Reisnerb411b362009-09-25 16:07:19 -07004112 del_timer_sync(&mdev->resync_timer);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004113 resync_timer_fn((unsigned long)mdev);
4114
Philipp Reisnerb411b362009-09-25 16:07:19 -07004115 /* wait for all w_e_end_data_req, w_e_end_rsdata_req, w_send_barrier,
4116 * w_make_resync_request etc. which may still be on the worker queue
4117 * to be "canceled" */
Philipp Reisnera21e9292011-02-08 15:08:49 +01004118 drbd_flush_workqueue(mdev);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004119
4120 /* This also does reclaim_net_ee(). If we do this too early, we might
4121 * miss some resync ee and pages.*/
4122 drbd_process_done_ee(mdev);
4123
4124 kfree(mdev->p_uuid);
4125 mdev->p_uuid = NULL;
4126
Philipp Reisner2aebfab2011-03-28 16:48:11 +02004127 if (!drbd_suspended(mdev))
Philipp Reisner2f5cdd02011-02-21 14:29:27 +01004128 tl_clear(mdev->tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004129
Philipp Reisnerb411b362009-09-25 16:07:19 -07004130 drbd_md_sync(mdev);
4131
4132 fp = FP_DONT_CARE;
4133 if (get_ldev(mdev)) {
4134 fp = mdev->ldev->dc.fencing;
4135 put_ldev(mdev);
4136 }
4137
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01004138 /* serialize with bitmap writeout triggered by the state change,
4139 * if any. */
4140 wait_event(mdev->misc_wait, !test_bit(BITMAP_IO, &mdev->flags));
4141
Philipp Reisnerb411b362009-09-25 16:07:19 -07004142 /* tcp_close and release of sendpage pages can be deferred. I don't
4143 * want to use SO_LINGER, because apparently it can be deferred for
4144 * more than 20 seconds (longest time I checked).
4145 *
4146 * Actually we don't care for exactly when the network stack does its
4147 * put_page(), but release our reference on these pages right here.
4148 */
4149 i = drbd_release_ee(mdev, &mdev->net_ee);
4150 if (i)
4151 dev_info(DEV, "net_ee not empty, killed %u entries\n", i);
Lars Ellenberg435f0742010-09-06 12:30:25 +02004152 i = atomic_read(&mdev->pp_in_use_by_net);
4153 if (i)
4154 dev_info(DEV, "pp_in_use_by_net = %d, expected 0\n", i);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004155 i = atomic_read(&mdev->pp_in_use);
4156 if (i)
Lars Ellenberg45bb9122010-05-14 17:10:48 +02004157 dev_info(DEV, "pp_in_use = %d, expected 0\n", i);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004158
4159 D_ASSERT(list_empty(&mdev->read_ee));
4160 D_ASSERT(list_empty(&mdev->active_ee));
4161 D_ASSERT(list_empty(&mdev->sync_ee));
4162 D_ASSERT(list_empty(&mdev->done_ee));
4163
4164 /* ok, no more ee's on the fly, it is safe to reset the epoch_size */
4165 atomic_set(&mdev->current_epoch->epoch_size, 0);
4166 D_ASSERT(list_empty(&mdev->current_epoch->list));
Philipp Reisner360cc742011-02-08 14:29:53 +01004167
4168 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004169}
4170
4171/*
4172 * We support PRO_VERSION_MIN to PRO_VERSION_MAX. The protocol version
4173 * we can agree on is stored in agreed_pro_version.
4174 *
4175 * feature flags and the reserved array should be enough room for future
4176 * enhancements of the handshake protocol, and possible plugins...
4177 *
4178 * for now, they are expected to be zero, but ignored.
4179 */
Andreas Gruenbacher60381782011-03-28 17:05:50 +02004180static int drbd_send_features(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004181{
Philipp Reisnere6b3ea82011-01-19 14:02:01 +01004182 /* ASSERT current == mdev->tconn->receiver ... */
Andreas Gruenbacher60381782011-03-28 17:05:50 +02004183 struct p_connection_features *p = tconn->data.sbuf;
Andreas Gruenbachere8d17b02011-03-16 00:54:19 +01004184 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004185
Philipp Reisner8a22ccc2011-02-07 16:47:12 +01004186 if (mutex_lock_interruptible(&tconn->data.mutex)) {
4187 conn_err(tconn, "interrupted during initial handshake\n");
Andreas Gruenbachere8d17b02011-03-16 00:54:19 +01004188 return -EINTR;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004189 }
4190
Philipp Reisner8a22ccc2011-02-07 16:47:12 +01004191 if (tconn->data.socket == NULL) {
4192 mutex_unlock(&tconn->data.mutex);
Andreas Gruenbachere8d17b02011-03-16 00:54:19 +01004193 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004194 }
4195
4196 memset(p, 0, sizeof(*p));
4197 p->protocol_min = cpu_to_be32(PRO_VERSION_MIN);
4198 p->protocol_max = cpu_to_be32(PRO_VERSION_MAX);
Andreas Gruenbacher60381782011-03-28 17:05:50 +02004199 err = _conn_send_cmd(tconn, 0, &tconn->data, P_CONNECTION_FEATURES,
Andreas Gruenbacherecf23632011-03-15 23:48:25 +01004200 &p->head, sizeof(*p), 0);
Philipp Reisner8a22ccc2011-02-07 16:47:12 +01004201 mutex_unlock(&tconn->data.mutex);
Andreas Gruenbachere8d17b02011-03-16 00:54:19 +01004202 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004203}
4204
4205/*
4206 * return values:
4207 * 1 yes, we have a valid connection
4208 * 0 oops, did not work out, please try again
4209 * -1 peer talks different language,
4210 * no point in trying again, please go standalone.
4211 */
Andreas Gruenbacher60381782011-03-28 17:05:50 +02004212static int drbd_do_features(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004213{
Philipp Reisner65d11ed2011-02-07 17:35:59 +01004214 /* ASSERT current == tconn->receiver ... */
Andreas Gruenbacher60381782011-03-28 17:05:50 +02004215 struct p_connection_features *p = tconn->data.rbuf;
4216 const int expect = sizeof(struct p_connection_features) - sizeof(struct p_header80);
Philipp Reisner77351055b2011-02-07 17:24:26 +01004217 struct packet_info pi;
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01004218 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004219
Andreas Gruenbacher60381782011-03-28 17:05:50 +02004220 err = drbd_send_features(tconn);
Andreas Gruenbachere8d17b02011-03-16 00:54:19 +01004221 if (err)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004222 return 0;
4223
Andreas Gruenbacher69bc7bc2011-03-16 17:31:52 +01004224 err = drbd_recv_header(tconn, &pi);
4225 if (err)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004226 return 0;
4227
Andreas Gruenbacher60381782011-03-28 17:05:50 +02004228 if (pi.cmd != P_CONNECTION_FEATURES) {
4229 conn_err(tconn, "expected ConnectionFeatures packet, received: %s (0x%04x)\n",
Philipp Reisner77351055b2011-02-07 17:24:26 +01004230 cmdname(pi.cmd), pi.cmd);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004231 return -1;
4232 }
4233
Philipp Reisner77351055b2011-02-07 17:24:26 +01004234 if (pi.size != expect) {
Andreas Gruenbacher60381782011-03-28 17:05:50 +02004235 conn_err(tconn, "expected ConnectionFeatures length: %u, received: %u\n",
Philipp Reisner77351055b2011-02-07 17:24:26 +01004236 expect, pi.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004237 return -1;
4238 }
4239
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01004240 err = drbd_recv_all_warn(tconn, &p->head.payload, expect);
4241 if (err)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004242 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004243
Philipp Reisnerb411b362009-09-25 16:07:19 -07004244 p->protocol_min = be32_to_cpu(p->protocol_min);
4245 p->protocol_max = be32_to_cpu(p->protocol_max);
4246 if (p->protocol_max == 0)
4247 p->protocol_max = p->protocol_min;
4248
4249 if (PRO_VERSION_MAX < p->protocol_min ||
4250 PRO_VERSION_MIN > p->protocol_max)
4251 goto incompat;
4252
Philipp Reisner65d11ed2011-02-07 17:35:59 +01004253 tconn->agreed_pro_version = min_t(int, PRO_VERSION_MAX, p->protocol_max);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004254
Philipp Reisner65d11ed2011-02-07 17:35:59 +01004255 conn_info(tconn, "Handshake successful: "
4256 "Agreed network protocol version %d\n", tconn->agreed_pro_version);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004257
4258 return 1;
4259
4260 incompat:
Philipp Reisner65d11ed2011-02-07 17:35:59 +01004261 conn_err(tconn, "incompatible DRBD dialects: "
Philipp Reisnerb411b362009-09-25 16:07:19 -07004262 "I support %d-%d, peer supports %d-%d\n",
4263 PRO_VERSION_MIN, PRO_VERSION_MAX,
4264 p->protocol_min, p->protocol_max);
4265 return -1;
4266}
4267
4268#if !defined(CONFIG_CRYPTO_HMAC) && !defined(CONFIG_CRYPTO_HMAC_MODULE)
Philipp Reisner13e60372011-02-08 09:54:40 +01004269static int drbd_do_auth(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004270{
4271 dev_err(DEV, "This kernel was build without CONFIG_CRYPTO_HMAC.\n");
4272 dev_err(DEV, "You need to disable 'cram-hmac-alg' in drbd.conf.\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01004273 return -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004274}
4275#else
4276#define CHALLENGE_LEN 64
Johannes Thomab10d96c2010-01-07 16:02:50 +01004277
4278/* Return value:
4279 1 - auth succeeded,
4280 0 - failed, try again (network error),
4281 -1 - auth failed, don't try again.
4282*/
4283
Philipp Reisner13e60372011-02-08 09:54:40 +01004284static int drbd_do_auth(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004285{
4286 char my_challenge[CHALLENGE_LEN]; /* 64 Bytes... */
4287 struct scatterlist sg;
4288 char *response = NULL;
4289 char *right_response = NULL;
4290 char *peers_ch = NULL;
Philipp Reisner13e60372011-02-08 09:54:40 +01004291 unsigned int key_len = strlen(tconn->net_conf->shared_secret);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004292 unsigned int resp_size;
4293 struct hash_desc desc;
Philipp Reisner77351055b2011-02-07 17:24:26 +01004294 struct packet_info pi;
Andreas Gruenbacher69bc7bc2011-03-16 17:31:52 +01004295 int err, rv;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004296
Philipp Reisner13e60372011-02-08 09:54:40 +01004297 desc.tfm = tconn->cram_hmac_tfm;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004298 desc.flags = 0;
4299
Philipp Reisner13e60372011-02-08 09:54:40 +01004300 rv = crypto_hash_setkey(tconn->cram_hmac_tfm,
4301 (u8 *)tconn->net_conf->shared_secret, key_len);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004302 if (rv) {
Philipp Reisner13e60372011-02-08 09:54:40 +01004303 conn_err(tconn, "crypto_hash_setkey() failed with %d\n", rv);
Johannes Thomab10d96c2010-01-07 16:02:50 +01004304 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004305 goto fail;
4306 }
4307
4308 get_random_bytes(my_challenge, CHALLENGE_LEN);
4309
Andreas Gruenbacherce9879c2011-03-15 23:34:29 +01004310 rv = !conn_send_cmd2(tconn, P_AUTH_CHALLENGE, my_challenge, CHALLENGE_LEN);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004311 if (!rv)
4312 goto fail;
4313
Andreas Gruenbacher69bc7bc2011-03-16 17:31:52 +01004314 err = drbd_recv_header(tconn, &pi);
4315 if (err) {
4316 rv = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004317 goto fail;
Andreas Gruenbacher69bc7bc2011-03-16 17:31:52 +01004318 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07004319
Philipp Reisner77351055b2011-02-07 17:24:26 +01004320 if (pi.cmd != P_AUTH_CHALLENGE) {
Philipp Reisner13e60372011-02-08 09:54:40 +01004321 conn_err(tconn, "expected AuthChallenge packet, received: %s (0x%04x)\n",
Philipp Reisner77351055b2011-02-07 17:24:26 +01004322 cmdname(pi.cmd), pi.cmd);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004323 rv = 0;
4324 goto fail;
4325 }
4326
Philipp Reisner77351055b2011-02-07 17:24:26 +01004327 if (pi.size > CHALLENGE_LEN * 2) {
Philipp Reisner13e60372011-02-08 09:54:40 +01004328 conn_err(tconn, "expected AuthChallenge payload too big.\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01004329 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004330 goto fail;
4331 }
4332
Philipp Reisner77351055b2011-02-07 17:24:26 +01004333 peers_ch = kmalloc(pi.size, GFP_NOIO);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004334 if (peers_ch == NULL) {
Philipp Reisner13e60372011-02-08 09:54:40 +01004335 conn_err(tconn, "kmalloc of peers_ch failed\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01004336 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004337 goto fail;
4338 }
4339
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01004340 err = drbd_recv_all_warn(tconn, peers_ch, pi.size);
4341 if (err) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004342 rv = 0;
4343 goto fail;
4344 }
4345
Philipp Reisner13e60372011-02-08 09:54:40 +01004346 resp_size = crypto_hash_digestsize(tconn->cram_hmac_tfm);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004347 response = kmalloc(resp_size, GFP_NOIO);
4348 if (response == NULL) {
Philipp Reisner13e60372011-02-08 09:54:40 +01004349 conn_err(tconn, "kmalloc of response failed\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01004350 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004351 goto fail;
4352 }
4353
4354 sg_init_table(&sg, 1);
Philipp Reisner77351055b2011-02-07 17:24:26 +01004355 sg_set_buf(&sg, peers_ch, pi.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004356
4357 rv = crypto_hash_digest(&desc, &sg, sg.length, response);
4358 if (rv) {
Philipp Reisner13e60372011-02-08 09:54:40 +01004359 conn_err(tconn, "crypto_hash_digest() failed with %d\n", rv);
Johannes Thomab10d96c2010-01-07 16:02:50 +01004360 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004361 goto fail;
4362 }
4363
Andreas Gruenbacherce9879c2011-03-15 23:34:29 +01004364 rv = !conn_send_cmd2(tconn, P_AUTH_RESPONSE, response, resp_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004365 if (!rv)
4366 goto fail;
4367
Andreas Gruenbacher69bc7bc2011-03-16 17:31:52 +01004368 err = drbd_recv_header(tconn, &pi);
4369 if (err) {
4370 rv = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004371 goto fail;
Andreas Gruenbacher69bc7bc2011-03-16 17:31:52 +01004372 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07004373
Philipp Reisner77351055b2011-02-07 17:24:26 +01004374 if (pi.cmd != P_AUTH_RESPONSE) {
Philipp Reisner13e60372011-02-08 09:54:40 +01004375 conn_err(tconn, "expected AuthResponse packet, received: %s (0x%04x)\n",
Philipp Reisner77351055b2011-02-07 17:24:26 +01004376 cmdname(pi.cmd), pi.cmd);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004377 rv = 0;
4378 goto fail;
4379 }
4380
Philipp Reisner77351055b2011-02-07 17:24:26 +01004381 if (pi.size != resp_size) {
Philipp Reisner13e60372011-02-08 09:54:40 +01004382 conn_err(tconn, "expected AuthResponse payload of wrong size\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07004383 rv = 0;
4384 goto fail;
4385 }
4386
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01004387 err = drbd_recv_all_warn(tconn, response , resp_size);
4388 if (err) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004389 rv = 0;
4390 goto fail;
4391 }
4392
4393 right_response = kmalloc(resp_size, GFP_NOIO);
Julia Lawall2d1ee872009-12-27 22:27:11 +01004394 if (right_response == NULL) {
Philipp Reisner13e60372011-02-08 09:54:40 +01004395 conn_err(tconn, "kmalloc of right_response failed\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01004396 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004397 goto fail;
4398 }
4399
4400 sg_set_buf(&sg, my_challenge, CHALLENGE_LEN);
4401
4402 rv = crypto_hash_digest(&desc, &sg, sg.length, right_response);
4403 if (rv) {
Philipp Reisner13e60372011-02-08 09:54:40 +01004404 conn_err(tconn, "crypto_hash_digest() failed with %d\n", rv);
Johannes Thomab10d96c2010-01-07 16:02:50 +01004405 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004406 goto fail;
4407 }
4408
4409 rv = !memcmp(response, right_response, resp_size);
4410
4411 if (rv)
Philipp Reisner13e60372011-02-08 09:54:40 +01004412 conn_info(tconn, "Peer authenticated using %d bytes of '%s' HMAC\n",
4413 resp_size, tconn->net_conf->cram_hmac_alg);
Johannes Thomab10d96c2010-01-07 16:02:50 +01004414 else
4415 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004416
4417 fail:
4418 kfree(peers_ch);
4419 kfree(response);
4420 kfree(right_response);
4421
4422 return rv;
4423}
4424#endif
4425
4426int drbdd_init(struct drbd_thread *thi)
4427{
Philipp Reisner392c8802011-02-09 10:33:31 +01004428 struct drbd_tconn *tconn = thi->tconn;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004429 int h;
4430
Philipp Reisner4d641dd2011-02-08 15:40:24 +01004431 conn_info(tconn, "receiver (re)started\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07004432
4433 do {
Philipp Reisner4d641dd2011-02-08 15:40:24 +01004434 h = drbd_connect(tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004435 if (h == 0) {
Philipp Reisner4d641dd2011-02-08 15:40:24 +01004436 drbd_disconnect(tconn);
Philipp Reisner20ee6392011-01-18 15:28:59 +01004437 schedule_timeout_interruptible(HZ);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004438 }
4439 if (h == -1) {
Philipp Reisner4d641dd2011-02-08 15:40:24 +01004440 conn_warn(tconn, "Discarding network configuration.\n");
Philipp Reisnerbbeb6412011-02-10 13:45:46 +01004441 conn_request_state(tconn, NS(conn, C_DISCONNECTING), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004442 }
4443 } while (h == 0);
4444
4445 if (h > 0) {
Philipp Reisner4d641dd2011-02-08 15:40:24 +01004446 if (get_net_conf(tconn)) {
4447 drbdd(tconn);
4448 put_net_conf(tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004449 }
4450 }
4451
Philipp Reisner4d641dd2011-02-08 15:40:24 +01004452 drbd_disconnect(tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004453
Philipp Reisner4d641dd2011-02-08 15:40:24 +01004454 conn_info(tconn, "receiver terminated\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07004455 return 0;
4456}
4457
4458/* ********* acknowledge sender ******** */
4459
Andreas Gruenbachere05e1e52011-03-25 15:16:26 +01004460static int got_conn_RqSReply(struct drbd_tconn *tconn, struct packet_info *pi)
Philipp Reisnere4f78ed2011-03-16 11:27:48 +01004461{
Andreas Gruenbachere6ef8a52011-03-24 18:07:54 +01004462 struct p_req_state_reply *p = tconn->meta.rbuf;
Philipp Reisnere4f78ed2011-03-16 11:27:48 +01004463 int retcode = be32_to_cpu(p->retcode);
4464
4465 if (retcode >= SS_SUCCESS) {
4466 set_bit(CONN_WD_ST_CHG_OKAY, &tconn->flags);
4467 } else {
4468 set_bit(CONN_WD_ST_CHG_FAIL, &tconn->flags);
4469 conn_err(tconn, "Requested state change failed by peer: %s (%d)\n",
4470 drbd_set_st_err_str(retcode), retcode);
4471 }
4472 wake_up(&tconn->ping_wait);
4473
4474 return true;
4475}
4476
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01004477static int got_RqSReply(struct drbd_tconn *tconn, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004478{
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01004479 struct drbd_conf *mdev;
4480 struct p_req_state_reply *p = tconn->meta.rbuf;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004481 int retcode = be32_to_cpu(p->retcode);
4482
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01004483 mdev = vnr_to_mdev(tconn, pi->vnr);
4484 if (!mdev)
4485 return false;
4486
Philipp Reisnere4f78ed2011-03-16 11:27:48 +01004487 if (retcode >= SS_SUCCESS) {
4488 set_bit(CL_ST_CHG_SUCCESS, &mdev->flags);
4489 } else {
4490 set_bit(CL_ST_CHG_FAIL, &mdev->flags);
4491 dev_err(DEV, "Requested state change failed by peer: %s (%d)\n",
4492 drbd_set_st_err_str(retcode), retcode);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004493 }
Philipp Reisnere4f78ed2011-03-16 11:27:48 +01004494 wake_up(&mdev->state_wait);
4495
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004496 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004497}
4498
Andreas Gruenbachere05e1e52011-03-25 15:16:26 +01004499static int got_Ping(struct drbd_tconn *tconn, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004500{
Andreas Gruenbachera17647a2011-04-01 12:49:42 +02004501 return !drbd_send_ping_ack(tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004502
4503}
4504
Andreas Gruenbachere05e1e52011-03-25 15:16:26 +01004505static int got_PingAck(struct drbd_tconn *tconn, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004506{
4507 /* restore idle timeout */
Philipp Reisner2a67d8b2011-02-09 14:10:32 +01004508 tconn->meta.socket->sk->sk_rcvtimeo = tconn->net_conf->ping_int*HZ;
4509 if (!test_and_set_bit(GOT_PING_ACK, &tconn->flags))
4510 wake_up(&tconn->ping_wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004511
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004512 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004513}
4514
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01004515static int got_IsInSync(struct drbd_tconn *tconn, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004516{
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01004517 struct drbd_conf *mdev;
4518 struct p_block_ack *p = tconn->meta.rbuf;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004519 sector_t sector = be64_to_cpu(p->sector);
4520 int blksize = be32_to_cpu(p->blksize);
4521
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01004522 mdev = vnr_to_mdev(tconn, pi->vnr);
4523 if (!mdev)
4524 return false;
4525
Philipp Reisner31890f42011-01-19 14:12:51 +01004526 D_ASSERT(mdev->tconn->agreed_pro_version >= 89);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004527
4528 update_peer_seq(mdev, be32_to_cpu(p->seq_num));
4529
Lars Ellenberg1d53f092010-09-05 01:13:24 +02004530 if (get_ldev(mdev)) {
4531 drbd_rs_complete_io(mdev, sector);
4532 drbd_set_in_sync(mdev, sector, blksize);
4533 /* rs_same_csums is supposed to count in units of BM_BLOCK_SIZE */
4534 mdev->rs_same_csum += (blksize >> BM_BLOCK_SHIFT);
4535 put_ldev(mdev);
4536 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07004537 dec_rs_pending(mdev);
Philipp Reisner778f2712010-07-06 11:14:00 +02004538 atomic_add(blksize >> 9, &mdev->rs_sect_in);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004539
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004540 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004541}
4542
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01004543static int
4544validate_req_change_req_state(struct drbd_conf *mdev, u64 id, sector_t sector,
4545 struct rb_root *root, const char *func,
4546 enum drbd_req_event what, bool missing_ok)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004547{
4548 struct drbd_request *req;
4549 struct bio_and_error m;
4550
Philipp Reisner87eeee42011-01-19 14:16:30 +01004551 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01004552 req = find_request(mdev, root, id, sector, missing_ok, func);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004553 if (unlikely(!req)) {
Philipp Reisner87eeee42011-01-19 14:16:30 +01004554 spin_unlock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004555 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004556 }
4557 __req_mod(req, what, &m);
Philipp Reisner87eeee42011-01-19 14:16:30 +01004558 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004559
4560 if (m.bio)
4561 complete_master_bio(mdev, &m);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004562 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004563}
4564
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01004565static int got_BlockAck(struct drbd_tconn *tconn, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004566{
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01004567 struct drbd_conf *mdev;
4568 struct p_block_ack *p = tconn->meta.rbuf;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004569 sector_t sector = be64_to_cpu(p->sector);
4570 int blksize = be32_to_cpu(p->blksize);
4571 enum drbd_req_event what;
4572
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01004573 mdev = vnr_to_mdev(tconn, pi->vnr);
4574 if (!mdev)
4575 return false;
4576
Philipp Reisnerb411b362009-09-25 16:07:19 -07004577 update_peer_seq(mdev, be32_to_cpu(p->seq_num));
4578
Andreas Gruenbacher579b57e2011-01-13 18:40:57 +01004579 if (p->block_id == ID_SYNCER) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004580 drbd_set_in_sync(mdev, sector, blksize);
4581 dec_rs_pending(mdev);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004582 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004583 }
Andreas Gruenbachere05e1e52011-03-25 15:16:26 +01004584 switch (pi->cmd) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004585 case P_RS_WRITE_ACK:
Philipp Reisner89e58e72011-01-19 13:12:45 +01004586 D_ASSERT(mdev->tconn->net_conf->wire_protocol == DRBD_PROT_C);
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01004587 what = WRITE_ACKED_BY_PEER_AND_SIS;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004588 break;
4589 case P_WRITE_ACK:
Philipp Reisner89e58e72011-01-19 13:12:45 +01004590 D_ASSERT(mdev->tconn->net_conf->wire_protocol == DRBD_PROT_C);
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01004591 what = WRITE_ACKED_BY_PEER;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004592 break;
4593 case P_RECV_ACK:
Philipp Reisner89e58e72011-01-19 13:12:45 +01004594 D_ASSERT(mdev->tconn->net_conf->wire_protocol == DRBD_PROT_B);
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01004595 what = RECV_ACKED_BY_PEER;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004596 break;
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01004597 case P_DISCARD_WRITE:
Philipp Reisner89e58e72011-01-19 13:12:45 +01004598 D_ASSERT(mdev->tconn->net_conf->wire_protocol == DRBD_PROT_C);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01004599 what = DISCARD_WRITE;
4600 break;
4601 case P_RETRY_WRITE:
4602 D_ASSERT(mdev->tconn->net_conf->wire_protocol == DRBD_PROT_C);
4603 what = POSTPONE_WRITE;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004604 break;
4605 default:
4606 D_ASSERT(0);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004607 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004608 }
4609
4610 return validate_req_change_req_state(mdev, p->block_id, sector,
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01004611 &mdev->write_requests, __func__,
4612 what, false);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004613}
4614
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01004615static int got_NegAck(struct drbd_tconn *tconn, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004616{
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01004617 struct drbd_conf *mdev;
4618 struct p_block_ack *p = tconn->meta.rbuf;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004619 sector_t sector = be64_to_cpu(p->sector);
Philipp Reisner2deb8332011-01-17 18:39:18 +01004620 int size = be32_to_cpu(p->blksize);
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01004621 bool missing_ok = tconn->net_conf->wire_protocol == DRBD_PROT_A ||
4622 tconn->net_conf->wire_protocol == DRBD_PROT_B;
Andreas Gruenbacherc3afd8f2011-01-20 22:25:40 +01004623 bool found;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004624
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01004625 mdev = vnr_to_mdev(tconn, pi->vnr);
4626 if (!mdev)
4627 return false;
4628
Philipp Reisnerb411b362009-09-25 16:07:19 -07004629 update_peer_seq(mdev, be32_to_cpu(p->seq_num));
4630
Andreas Gruenbacher579b57e2011-01-13 18:40:57 +01004631 if (p->block_id == ID_SYNCER) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004632 dec_rs_pending(mdev);
4633 drbd_rs_failed_io(mdev, sector, size);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004634 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004635 }
Philipp Reisner2deb8332011-01-17 18:39:18 +01004636
Andreas Gruenbacherc3afd8f2011-01-20 22:25:40 +01004637 found = validate_req_change_req_state(mdev, p->block_id, sector,
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01004638 &mdev->write_requests, __func__,
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01004639 NEG_ACKED, missing_ok);
Andreas Gruenbacherc3afd8f2011-01-20 22:25:40 +01004640 if (!found) {
4641 /* Protocol A has no P_WRITE_ACKs, but has P_NEG_ACKs.
4642 The master bio might already be completed, therefore the
4643 request is no longer in the collision hash. */
4644 /* In Protocol B we might already have got a P_RECV_ACK
4645 but then get a P_NEG_ACK afterwards. */
4646 if (!missing_ok)
Philipp Reisner2deb8332011-01-17 18:39:18 +01004647 return false;
Andreas Gruenbacherc3afd8f2011-01-20 22:25:40 +01004648 drbd_set_out_of_sync(mdev, sector, size);
Philipp Reisner2deb8332011-01-17 18:39:18 +01004649 }
Philipp Reisner2deb8332011-01-17 18:39:18 +01004650 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004651}
4652
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01004653static int got_NegDReply(struct drbd_tconn *tconn, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004654{
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01004655 struct drbd_conf *mdev;
4656 struct p_block_ack *p = tconn->meta.rbuf;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004657 sector_t sector = be64_to_cpu(p->sector);
4658
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01004659 mdev = vnr_to_mdev(tconn, pi->vnr);
4660 if (!mdev)
4661 return false;
4662
Philipp Reisnerb411b362009-09-25 16:07:19 -07004663 update_peer_seq(mdev, be32_to_cpu(p->seq_num));
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01004664
Philipp Reisnerb411b362009-09-25 16:07:19 -07004665 dev_err(DEV, "Got NegDReply; Sector %llus, len %u; Fail original request.\n",
4666 (unsigned long long)sector, be32_to_cpu(p->blksize));
4667
4668 return validate_req_change_req_state(mdev, p->block_id, sector,
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01004669 &mdev->read_requests, __func__,
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01004670 NEG_ACKED, false);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004671}
4672
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01004673static int got_NegRSDReply(struct drbd_tconn *tconn, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004674{
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01004675 struct drbd_conf *mdev;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004676 sector_t sector;
4677 int size;
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01004678 struct p_block_ack *p = tconn->meta.rbuf;
4679
4680 mdev = vnr_to_mdev(tconn, pi->vnr);
4681 if (!mdev)
4682 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004683
4684 sector = be64_to_cpu(p->sector);
4685 size = be32_to_cpu(p->blksize);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004686
4687 update_peer_seq(mdev, be32_to_cpu(p->seq_num));
4688
4689 dec_rs_pending(mdev);
4690
4691 if (get_ldev_if_state(mdev, D_FAILED)) {
4692 drbd_rs_complete_io(mdev, sector);
Andreas Gruenbachere05e1e52011-03-25 15:16:26 +01004693 switch (pi->cmd) {
Philipp Reisnerd612d302010-12-27 10:53:28 +01004694 case P_NEG_RS_DREPLY:
4695 drbd_rs_failed_io(mdev, sector, size);
4696 case P_RS_CANCEL:
4697 break;
4698 default:
4699 D_ASSERT(0);
4700 put_ldev(mdev);
4701 return false;
4702 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07004703 put_ldev(mdev);
4704 }
4705
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004706 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004707}
4708
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01004709static int got_BarrierAck(struct drbd_tconn *tconn, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004710{
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01004711 struct drbd_conf *mdev;
4712 struct p_barrier_ack *p = tconn->meta.rbuf;
4713
4714 mdev = vnr_to_mdev(tconn, pi->vnr);
4715 if (!mdev)
4716 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004717
Philipp Reisner2f5cdd02011-02-21 14:29:27 +01004718 tl_release(mdev->tconn, p->barrier, be32_to_cpu(p->set_size));
Philipp Reisnerb411b362009-09-25 16:07:19 -07004719
Philipp Reisnerc4752ef2010-10-27 17:32:36 +02004720 if (mdev->state.conn == C_AHEAD &&
4721 atomic_read(&mdev->ap_in_flight) == 0 &&
Philipp Reisner370a43e2011-01-14 16:03:11 +01004722 !test_and_set_bit(AHEAD_TO_SYNC_SOURCE, &mdev->current_epoch->flags)) {
4723 mdev->start_resync_timer.expires = jiffies + HZ;
4724 add_timer(&mdev->start_resync_timer);
Philipp Reisnerc4752ef2010-10-27 17:32:36 +02004725 }
4726
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004727 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004728}
4729
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01004730static int got_OVResult(struct drbd_tconn *tconn, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004731{
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01004732 struct drbd_conf *mdev;
4733 struct p_block_ack *p = tconn->meta.rbuf;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004734 struct drbd_work *w;
4735 sector_t sector;
4736 int size;
4737
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01004738 mdev = vnr_to_mdev(tconn, pi->vnr);
4739 if (!mdev)
4740 return false;
4741
Philipp Reisnerb411b362009-09-25 16:07:19 -07004742 sector = be64_to_cpu(p->sector);
4743 size = be32_to_cpu(p->blksize);
4744
4745 update_peer_seq(mdev, be32_to_cpu(p->seq_num));
4746
4747 if (be64_to_cpu(p->block_id) == ID_OUT_OF_SYNC)
Andreas Gruenbacher8f7bed72010-12-19 23:53:14 +01004748 drbd_ov_out_of_sync_found(mdev, sector, size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004749 else
Andreas Gruenbacher8f7bed72010-12-19 23:53:14 +01004750 ov_out_of_sync_print(mdev);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004751
Lars Ellenberg1d53f092010-09-05 01:13:24 +02004752 if (!get_ldev(mdev))
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004753 return true;
Lars Ellenberg1d53f092010-09-05 01:13:24 +02004754
Philipp Reisnerb411b362009-09-25 16:07:19 -07004755 drbd_rs_complete_io(mdev, sector);
4756 dec_rs_pending(mdev);
4757
Lars Ellenbergea5442a2010-11-05 09:48:01 +01004758 --mdev->ov_left;
4759
4760 /* let's advance progress step marks only for every other megabyte */
4761 if ((mdev->ov_left & 0x200) == 0x200)
4762 drbd_advance_rs_marks(mdev, mdev->ov_left);
4763
4764 if (mdev->ov_left == 0) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004765 w = kmalloc(sizeof(*w), GFP_NOIO);
4766 if (w) {
4767 w->cb = w_ov_finished;
Philipp Reisnera21e9292011-02-08 15:08:49 +01004768 w->mdev = mdev;
Philipp Reisnere42325a2011-01-19 13:55:45 +01004769 drbd_queue_work_front(&mdev->tconn->data.work, w);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004770 } else {
4771 dev_err(DEV, "kmalloc(w) failed.");
Andreas Gruenbacher8f7bed72010-12-19 23:53:14 +01004772 ov_out_of_sync_print(mdev);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004773 drbd_resync_finished(mdev);
4774 }
4775 }
Lars Ellenberg1d53f092010-09-05 01:13:24 +02004776 put_ldev(mdev);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004777 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004778}
4779
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01004780static int got_skip(struct drbd_tconn *tconn, struct packet_info *pi)
Philipp Reisner0ced55a2010-04-30 15:26:20 +02004781{
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004782 return true;
Philipp Reisner0ced55a2010-04-30 15:26:20 +02004783}
4784
Philipp Reisner32862ec2011-02-08 16:41:01 +01004785static int tconn_process_done_ee(struct drbd_tconn *tconn)
4786{
Philipp Reisner082a3432011-03-15 16:05:42 +01004787 struct drbd_conf *mdev;
4788 int i, not_empty = 0;
Philipp Reisner32862ec2011-02-08 16:41:01 +01004789
4790 do {
4791 clear_bit(SIGNAL_ASENDER, &tconn->flags);
4792 flush_signals(current);
Philipp Reisner082a3432011-03-15 16:05:42 +01004793 idr_for_each_entry(&tconn->volumes, mdev, i) {
Andreas Gruenbachere2b30322011-03-16 17:16:12 +01004794 if (drbd_process_done_ee(mdev))
Philipp Reisner082a3432011-03-15 16:05:42 +01004795 return 1; /* error */
4796 }
Philipp Reisner32862ec2011-02-08 16:41:01 +01004797 set_bit(SIGNAL_ASENDER, &tconn->flags);
Philipp Reisner082a3432011-03-15 16:05:42 +01004798
4799 spin_lock_irq(&tconn->req_lock);
4800 idr_for_each_entry(&tconn->volumes, mdev, i) {
4801 not_empty = !list_empty(&mdev->done_ee);
4802 if (not_empty)
4803 break;
4804 }
4805 spin_unlock_irq(&tconn->req_lock);
Philipp Reisner32862ec2011-02-08 16:41:01 +01004806 } while (not_empty);
4807
4808 return 0;
4809}
4810
Andreas Gruenbacher7201b972011-03-14 18:23:00 +01004811struct asender_cmd {
4812 size_t pkt_size;
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01004813 int (*fn)(struct drbd_tconn *tconn, struct packet_info *);
Andreas Gruenbacher7201b972011-03-14 18:23:00 +01004814};
4815
4816static struct asender_cmd asender_tbl[] = {
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01004817 [P_PING] = { sizeof(struct p_header), got_Ping },
4818 [P_PING_ACK] = { sizeof(struct p_header), got_PingAck },
4819 [P_RECV_ACK] = { sizeof(struct p_block_ack), got_BlockAck },
4820 [P_WRITE_ACK] = { sizeof(struct p_block_ack), got_BlockAck },
4821 [P_RS_WRITE_ACK] = { sizeof(struct p_block_ack), got_BlockAck },
4822 [P_DISCARD_WRITE] = { sizeof(struct p_block_ack), got_BlockAck },
4823 [P_NEG_ACK] = { sizeof(struct p_block_ack), got_NegAck },
4824 [P_NEG_DREPLY] = { sizeof(struct p_block_ack), got_NegDReply },
4825 [P_NEG_RS_DREPLY] = { sizeof(struct p_block_ack), got_NegRSDReply },
4826 [P_OV_RESULT] = { sizeof(struct p_block_ack), got_OVResult },
4827 [P_BARRIER_ACK] = { sizeof(struct p_barrier_ack), got_BarrierAck },
4828 [P_STATE_CHG_REPLY] = { sizeof(struct p_req_state_reply), got_RqSReply },
4829 [P_RS_IS_IN_SYNC] = { sizeof(struct p_block_ack), got_IsInSync },
4830 [P_DELAY_PROBE] = { sizeof(struct p_delay_probe93), got_skip },
4831 [P_RS_CANCEL] = { sizeof(struct p_block_ack), got_NegRSDReply },
4832 [P_CONN_ST_CHG_REPLY]={ sizeof(struct p_req_state_reply), got_conn_RqSReply },
4833 [P_RETRY_WRITE] = { sizeof(struct p_block_ack), got_BlockAck },
Andreas Gruenbacher7201b972011-03-14 18:23:00 +01004834};
4835
Philipp Reisnerb411b362009-09-25 16:07:19 -07004836int drbd_asender(struct drbd_thread *thi)
4837{
Philipp Reisner392c8802011-02-09 10:33:31 +01004838 struct drbd_tconn *tconn = thi->tconn;
Andreas Gruenbachere6ef8a52011-03-24 18:07:54 +01004839 struct p_header *h = tconn->meta.rbuf;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004840 struct asender_cmd *cmd = NULL;
Philipp Reisner77351055b2011-02-07 17:24:26 +01004841 struct packet_info pi;
Philipp Reisner257d0af2011-01-26 12:15:29 +01004842 int rv;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004843 void *buf = h;
4844 int received = 0;
Philipp Reisner257d0af2011-01-26 12:15:29 +01004845 int expect = sizeof(struct p_header);
Lars Ellenbergf36af182011-03-09 22:44:55 +01004846 int ping_timeout_active = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004847
Philipp Reisnerb411b362009-09-25 16:07:19 -07004848 current->policy = SCHED_RR; /* Make this a realtime task! */
4849 current->rt_priority = 2; /* more important than all other tasks */
4850
Andreas Gruenbachere77a0a52011-01-25 15:43:39 +01004851 while (get_t_state(thi) == RUNNING) {
Philipp Reisner80822282011-02-08 12:46:30 +01004852 drbd_thread_current_set_cpu(thi);
Philipp Reisner32862ec2011-02-08 16:41:01 +01004853 if (test_and_clear_bit(SEND_PING, &tconn->flags)) {
Andreas Gruenbachera17647a2011-04-01 12:49:42 +02004854 if (drbd_send_ping(tconn)) {
Philipp Reisner32862ec2011-02-08 16:41:01 +01004855 conn_err(tconn, "drbd_send_ping has failed\n");
Andreas Gruenbacher841ce242010-12-15 19:31:20 +01004856 goto reconnect;
4857 }
Philipp Reisner32862ec2011-02-08 16:41:01 +01004858 tconn->meta.socket->sk->sk_rcvtimeo =
4859 tconn->net_conf->ping_timeo*HZ/10;
Lars Ellenbergf36af182011-03-09 22:44:55 +01004860 ping_timeout_active = 1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004861 }
4862
Philipp Reisner32862ec2011-02-08 16:41:01 +01004863 /* TODO: conditionally cork; it may hurt latency if we cork without
4864 much to send */
4865 if (!tconn->net_conf->no_cork)
4866 drbd_tcp_cork(tconn->meta.socket);
Philipp Reisner082a3432011-03-15 16:05:42 +01004867 if (tconn_process_done_ee(tconn)) {
4868 conn_err(tconn, "tconn_process_done_ee() failed\n");
Philipp Reisner32862ec2011-02-08 16:41:01 +01004869 goto reconnect;
Philipp Reisner082a3432011-03-15 16:05:42 +01004870 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07004871 /* but unconditionally uncork unless disabled */
Philipp Reisner32862ec2011-02-08 16:41:01 +01004872 if (!tconn->net_conf->no_cork)
4873 drbd_tcp_uncork(tconn->meta.socket);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004874
4875 /* short circuit, recv_msg would return EINTR anyways. */
4876 if (signal_pending(current))
4877 continue;
4878
Philipp Reisner32862ec2011-02-08 16:41:01 +01004879 rv = drbd_recv_short(tconn->meta.socket, buf, expect-received, 0);
4880 clear_bit(SIGNAL_ASENDER, &tconn->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004881
4882 flush_signals(current);
4883
4884 /* Note:
4885 * -EINTR (on meta) we got a signal
4886 * -EAGAIN (on meta) rcvtimeo expired
4887 * -ECONNRESET other side closed the connection
4888 * -ERESTARTSYS (on data) we got a signal
4889 * rv < 0 other than above: unexpected error!
4890 * rv == expected: full header or command
4891 * rv < expected: "woken" by signal during receive
4892 * rv == 0 : "connection shut down by peer"
4893 */
4894 if (likely(rv > 0)) {
4895 received += rv;
4896 buf += rv;
4897 } else if (rv == 0) {
Philipp Reisner32862ec2011-02-08 16:41:01 +01004898 conn_err(tconn, "meta connection shut down by peer.\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07004899 goto reconnect;
4900 } else if (rv == -EAGAIN) {
Lars Ellenbergcb6518c2011-06-20 14:44:45 +02004901 /* If the data socket received something meanwhile,
4902 * that is good enough: peer is still alive. */
Philipp Reisner32862ec2011-02-08 16:41:01 +01004903 if (time_after(tconn->last_received,
4904 jiffies - tconn->meta.socket->sk->sk_rcvtimeo))
Lars Ellenbergcb6518c2011-06-20 14:44:45 +02004905 continue;
Lars Ellenbergf36af182011-03-09 22:44:55 +01004906 if (ping_timeout_active) {
Philipp Reisner32862ec2011-02-08 16:41:01 +01004907 conn_err(tconn, "PingAck did not arrive in time.\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07004908 goto reconnect;
4909 }
Philipp Reisner32862ec2011-02-08 16:41:01 +01004910 set_bit(SEND_PING, &tconn->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004911 continue;
4912 } else if (rv == -EINTR) {
4913 continue;
4914 } else {
Philipp Reisner32862ec2011-02-08 16:41:01 +01004915 conn_err(tconn, "sock_recvmsg returned %d\n", rv);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004916 goto reconnect;
4917 }
4918
4919 if (received == expect && cmd == NULL) {
Andreas Gruenbacher8172f3e2011-03-16 17:22:39 +01004920 if (decode_header(tconn, h, &pi))
Philipp Reisnerb411b362009-09-25 16:07:19 -07004921 goto reconnect;
Andreas Gruenbacher7201b972011-03-14 18:23:00 +01004922 cmd = &asender_tbl[pi.cmd];
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01004923 if (pi.cmd >= ARRAY_SIZE(asender_tbl) || !cmd->fn) {
Philipp Reisner32862ec2011-02-08 16:41:01 +01004924 conn_err(tconn, "unknown command %d on meta (l: %d)\n",
Philipp Reisner77351055b2011-02-07 17:24:26 +01004925 pi.cmd, pi.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004926 goto disconnect;
4927 }
4928 expect = cmd->pkt_size;
Philipp Reisner77351055b2011-02-07 17:24:26 +01004929 if (pi.size != expect - sizeof(struct p_header)) {
Philipp Reisner32862ec2011-02-08 16:41:01 +01004930 conn_err(tconn, "Wrong packet size on meta (c: %d, l: %d)\n",
Philipp Reisner77351055b2011-02-07 17:24:26 +01004931 pi.cmd, pi.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004932 goto reconnect;
Philipp Reisner257d0af2011-01-26 12:15:29 +01004933 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07004934 }
4935 if (received == expect) {
Philipp Reisnera4fbda82011-03-16 11:13:17 +01004936 bool rv;
4937
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01004938 rv = cmd->fn(tconn, &pi);
4939 if (!rv) {
4940 conn_err(tconn, "%pf failed\n", cmd->fn);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004941 goto reconnect;
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01004942 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07004943
Philipp Reisnera4fbda82011-03-16 11:13:17 +01004944 tconn->last_received = jiffies;
4945
Lars Ellenbergf36af182011-03-09 22:44:55 +01004946 /* the idle_timeout (ping-int)
4947 * has been restored in got_PingAck() */
Andreas Gruenbacher7201b972011-03-14 18:23:00 +01004948 if (cmd == &asender_tbl[P_PING_ACK])
Lars Ellenbergf36af182011-03-09 22:44:55 +01004949 ping_timeout_active = 0;
4950
Philipp Reisnerb411b362009-09-25 16:07:19 -07004951 buf = h;
4952 received = 0;
Philipp Reisner257d0af2011-01-26 12:15:29 +01004953 expect = sizeof(struct p_header);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004954 cmd = NULL;
4955 }
4956 }
4957
4958 if (0) {
4959reconnect:
Philipp Reisnerbbeb6412011-02-10 13:45:46 +01004960 conn_request_state(tconn, NS(conn, C_NETWORK_FAILURE), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004961 }
4962 if (0) {
4963disconnect:
Philipp Reisnerbbeb6412011-02-10 13:45:46 +01004964 conn_request_state(tconn, NS(conn, C_DISCONNECTING), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004965 }
Philipp Reisner32862ec2011-02-08 16:41:01 +01004966 clear_bit(SIGNAL_ASENDER, &tconn->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004967
Philipp Reisner32862ec2011-02-08 16:41:01 +01004968 conn_info(tconn, "asender terminated\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07004969
4970 return 0;
4971}