blob: cca2da70276ed8f40584037b0d9c0b858df69be6 [file] [log] [blame]
Philipp Reisnerb411b362009-09-25 16:07:19 -07001/*
2 drbd_receiver.c
3
4 This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
5
6 Copyright (C) 2001-2008, LINBIT Information Technologies GmbH.
7 Copyright (C) 1999-2008, Philipp Reisner <philipp.reisner@linbit.com>.
8 Copyright (C) 2002-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
9
10 drbd is free software; you can redistribute it and/or modify
11 it under the terms of the GNU General Public License as published by
12 the Free Software Foundation; either version 2, or (at your option)
13 any later version.
14
15 drbd is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License for more details.
19
20 You should have received a copy of the GNU General Public License
21 along with drbd; see the file COPYING. If not, write to
22 the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
23 */
24
25
Philipp Reisnerb411b362009-09-25 16:07:19 -070026#include <linux/module.h>
27
28#include <asm/uaccess.h>
29#include <net/sock.h>
30
Philipp Reisnerb411b362009-09-25 16:07:19 -070031#include <linux/drbd.h>
32#include <linux/fs.h>
33#include <linux/file.h>
34#include <linux/in.h>
35#include <linux/mm.h>
36#include <linux/memcontrol.h>
37#include <linux/mm_inline.h>
38#include <linux/slab.h>
Philipp Reisnerb411b362009-09-25 16:07:19 -070039#include <linux/pkt_sched.h>
40#define __KERNEL_SYSCALLS__
41#include <linux/unistd.h>
42#include <linux/vmalloc.h>
43#include <linux/random.h>
Philipp Reisnerb411b362009-09-25 16:07:19 -070044#include <linux/string.h>
45#include <linux/scatterlist.h>
46#include "drbd_int.h"
Philipp Reisnerb411b362009-09-25 16:07:19 -070047#include "drbd_req.h"
48
49#include "drbd_vli.h"
50
Philipp Reisner77351055b2011-02-07 17:24:26 +010051struct packet_info {
52 enum drbd_packet cmd;
53 int size;
54 int vnr;
55};
56
Philipp Reisnerb411b362009-09-25 16:07:19 -070057enum finish_epoch {
58 FE_STILL_LIVE,
59 FE_DESTROYED,
60 FE_RECYCLED,
61};
62
Philipp Reisnera4fbda82011-03-16 11:13:17 +010063enum mdev_or_conn {
64 MDEV,
65 CONN,
66};
67
Philipp Reisner65d11ed2011-02-07 17:35:59 +010068static int drbd_do_handshake(struct drbd_tconn *tconn);
Philipp Reisner13e60372011-02-08 09:54:40 +010069static int drbd_do_auth(struct drbd_tconn *tconn);
Philipp Reisner360cc742011-02-08 14:29:53 +010070static int drbd_disconnected(int vnr, void *p, void *data);
Philipp Reisnerb411b362009-09-25 16:07:19 -070071
72static enum finish_epoch drbd_may_finish_epoch(struct drbd_conf *, struct drbd_epoch *, enum epoch_event);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +010073static int e_end_block(struct drbd_work *, int);
Philipp Reisnerb411b362009-09-25 16:07:19 -070074
Philipp Reisnerb411b362009-09-25 16:07:19 -070075
76#define GFP_TRY (__GFP_HIGHMEM | __GFP_NOWARN)
77
Lars Ellenberg45bb9122010-05-14 17:10:48 +020078/*
79 * some helper functions to deal with single linked page lists,
80 * page->private being our "next" pointer.
81 */
82
83/* If at least n pages are linked at head, get n pages off.
84 * Otherwise, don't modify head, and return NULL.
85 * Locking is the responsibility of the caller.
86 */
87static struct page *page_chain_del(struct page **head, int n)
88{
89 struct page *page;
90 struct page *tmp;
91
92 BUG_ON(!n);
93 BUG_ON(!head);
94
95 page = *head;
Philipp Reisner23ce4222010-05-20 13:35:31 +020096
97 if (!page)
98 return NULL;
99
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200100 while (page) {
101 tmp = page_chain_next(page);
102 if (--n == 0)
103 break; /* found sufficient pages */
104 if (tmp == NULL)
105 /* insufficient pages, don't use any of them. */
106 return NULL;
107 page = tmp;
108 }
109
110 /* add end of list marker for the returned list */
111 set_page_private(page, 0);
112 /* actual return value, and adjustment of head */
113 page = *head;
114 *head = tmp;
115 return page;
116}
117
118/* may be used outside of locks to find the tail of a (usually short)
119 * "private" page chain, before adding it back to a global chain head
120 * with page_chain_add() under a spinlock. */
121static struct page *page_chain_tail(struct page *page, int *len)
122{
123 struct page *tmp;
124 int i = 1;
125 while ((tmp = page_chain_next(page)))
126 ++i, page = tmp;
127 if (len)
128 *len = i;
129 return page;
130}
131
132static int page_chain_free(struct page *page)
133{
134 struct page *tmp;
135 int i = 0;
136 page_chain_for_each_safe(page, tmp) {
137 put_page(page);
138 ++i;
139 }
140 return i;
141}
142
143static void page_chain_add(struct page **head,
144 struct page *chain_first, struct page *chain_last)
145{
146#if 1
147 struct page *tmp;
148 tmp = page_chain_tail(chain_first, NULL);
149 BUG_ON(tmp != chain_last);
150#endif
151
152 /* add chain to head */
153 set_page_private(chain_last, (unsigned long)*head);
154 *head = chain_first;
155}
156
157static struct page *drbd_pp_first_pages_or_try_alloc(struct drbd_conf *mdev, int number)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700158{
159 struct page *page = NULL;
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200160 struct page *tmp = NULL;
161 int i = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700162
163 /* Yes, testing drbd_pp_vacant outside the lock is racy.
164 * So what. It saves a spin_lock. */
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200165 if (drbd_pp_vacant >= number) {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700166 spin_lock(&drbd_pp_lock);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200167 page = page_chain_del(&drbd_pp_pool, number);
168 if (page)
169 drbd_pp_vacant -= number;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700170 spin_unlock(&drbd_pp_lock);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200171 if (page)
172 return page;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700173 }
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200174
Philipp Reisnerb411b362009-09-25 16:07:19 -0700175 /* GFP_TRY, because we must not cause arbitrary write-out: in a DRBD
176 * "criss-cross" setup, that might cause write-out on some other DRBD,
177 * which in turn might block on the other node at this very place. */
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200178 for (i = 0; i < number; i++) {
179 tmp = alloc_page(GFP_TRY);
180 if (!tmp)
181 break;
182 set_page_private(tmp, (unsigned long)page);
183 page = tmp;
184 }
185
186 if (i == number)
187 return page;
188
189 /* Not enough pages immediately available this time.
190 * No need to jump around here, drbd_pp_alloc will retry this
191 * function "soon". */
192 if (page) {
193 tmp = page_chain_tail(page, NULL);
194 spin_lock(&drbd_pp_lock);
195 page_chain_add(&drbd_pp_pool, page, tmp);
196 drbd_pp_vacant += i;
197 spin_unlock(&drbd_pp_lock);
198 }
199 return NULL;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700200}
201
Philipp Reisnerb411b362009-09-25 16:07:19 -0700202static void reclaim_net_ee(struct drbd_conf *mdev, struct list_head *to_be_freed)
203{
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100204 struct drbd_peer_request *peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700205 struct list_head *le, *tle;
206
207 /* The EEs are always appended to the end of the list. Since
208 they are sent in order over the wire, they have to finish
209 in order. As soon as we see the first not finished we can
210 stop to examine the list... */
211
212 list_for_each_safe(le, tle, &mdev->net_ee) {
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100213 peer_req = list_entry(le, struct drbd_peer_request, w.list);
214 if (drbd_ee_has_active_page(peer_req))
Philipp Reisnerb411b362009-09-25 16:07:19 -0700215 break;
216 list_move(le, to_be_freed);
217 }
218}
219
220static void drbd_kick_lo_and_reclaim_net(struct drbd_conf *mdev)
221{
222 LIST_HEAD(reclaimed);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100223 struct drbd_peer_request *peer_req, *t;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700224
Philipp Reisner87eeee42011-01-19 14:16:30 +0100225 spin_lock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700226 reclaim_net_ee(mdev, &reclaimed);
Philipp Reisner87eeee42011-01-19 14:16:30 +0100227 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700228
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100229 list_for_each_entry_safe(peer_req, t, &reclaimed, w.list)
230 drbd_free_net_ee(mdev, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700231}
232
233/**
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200234 * drbd_pp_alloc() - Returns @number pages, retries forever (or until signalled)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700235 * @mdev: DRBD device.
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200236 * @number: number of pages requested
237 * @retry: whether to retry, if not enough pages are available right now
Philipp Reisnerb411b362009-09-25 16:07:19 -0700238 *
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200239 * Tries to allocate number pages, first from our own page pool, then from
240 * the kernel, unless this allocation would exceed the max_buffers setting.
241 * Possibly retry until DRBD frees sufficient pages somewhere else.
242 *
243 * Returns a page chain linked via page->private.
Philipp Reisnerb411b362009-09-25 16:07:19 -0700244 */
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200245static struct page *drbd_pp_alloc(struct drbd_conf *mdev, unsigned number, bool retry)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700246{
247 struct page *page = NULL;
248 DEFINE_WAIT(wait);
249
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200250 /* Yes, we may run up to @number over max_buffers. If we
251 * follow it strictly, the admin will get it wrong anyways. */
Philipp Reisner89e58e72011-01-19 13:12:45 +0100252 if (atomic_read(&mdev->pp_in_use) < mdev->tconn->net_conf->max_buffers)
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200253 page = drbd_pp_first_pages_or_try_alloc(mdev, number);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700254
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200255 while (page == NULL) {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700256 prepare_to_wait(&drbd_pp_wait, &wait, TASK_INTERRUPTIBLE);
257
258 drbd_kick_lo_and_reclaim_net(mdev);
259
Philipp Reisner89e58e72011-01-19 13:12:45 +0100260 if (atomic_read(&mdev->pp_in_use) < mdev->tconn->net_conf->max_buffers) {
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200261 page = drbd_pp_first_pages_or_try_alloc(mdev, number);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700262 if (page)
263 break;
264 }
265
266 if (!retry)
267 break;
268
269 if (signal_pending(current)) {
270 dev_warn(DEV, "drbd_pp_alloc interrupted!\n");
271 break;
272 }
273
274 schedule();
275 }
276 finish_wait(&drbd_pp_wait, &wait);
277
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200278 if (page)
279 atomic_add(number, &mdev->pp_in_use);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700280 return page;
281}
282
283/* Must not be used from irq, as that may deadlock: see drbd_pp_alloc.
Philipp Reisner87eeee42011-01-19 14:16:30 +0100284 * Is also used from inside an other spin_lock_irq(&mdev->tconn->req_lock);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200285 * Either links the page chain back to the global pool,
286 * or returns all pages to the system. */
Lars Ellenberg435f0742010-09-06 12:30:25 +0200287static void drbd_pp_free(struct drbd_conf *mdev, struct page *page, int is_net)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700288{
Lars Ellenberg435f0742010-09-06 12:30:25 +0200289 atomic_t *a = is_net ? &mdev->pp_in_use_by_net : &mdev->pp_in_use;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700290 int i;
Lars Ellenberg435f0742010-09-06 12:30:25 +0200291
Philipp Reisner81a5d602011-02-22 19:53:16 -0500292 if (drbd_pp_vacant > (DRBD_MAX_BIO_SIZE/PAGE_SIZE) * minor_count)
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200293 i = page_chain_free(page);
294 else {
295 struct page *tmp;
296 tmp = page_chain_tail(page, &i);
297 spin_lock(&drbd_pp_lock);
298 page_chain_add(&drbd_pp_pool, page, tmp);
299 drbd_pp_vacant += i;
300 spin_unlock(&drbd_pp_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700301 }
Lars Ellenberg435f0742010-09-06 12:30:25 +0200302 i = atomic_sub_return(i, a);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200303 if (i < 0)
Lars Ellenberg435f0742010-09-06 12:30:25 +0200304 dev_warn(DEV, "ASSERTION FAILED: %s: %d < 0\n",
305 is_net ? "pp_in_use_by_net" : "pp_in_use", i);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700306 wake_up(&drbd_pp_wait);
307}
308
309/*
310You need to hold the req_lock:
311 _drbd_wait_ee_list_empty()
312
313You must not have the req_lock:
314 drbd_free_ee()
315 drbd_alloc_ee()
316 drbd_init_ee()
317 drbd_release_ee()
318 drbd_ee_fix_bhs()
319 drbd_process_done_ee()
320 drbd_clear_done_ee()
321 drbd_wait_ee_list_empty()
322*/
323
Andreas Gruenbacherf6ffca92011-02-04 15:30:34 +0100324struct drbd_peer_request *
325drbd_alloc_ee(struct drbd_conf *mdev, u64 id, sector_t sector,
326 unsigned int data_size, gfp_t gfp_mask) __must_hold(local)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700327{
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100328 struct drbd_peer_request *peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700329 struct page *page;
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200330 unsigned nr_pages = (data_size + PAGE_SIZE -1) >> PAGE_SHIFT;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700331
Andreas Gruenbacher0cf9d272010-12-07 10:43:29 +0100332 if (drbd_insert_fault(mdev, DRBD_FAULT_AL_EE))
Philipp Reisnerb411b362009-09-25 16:07:19 -0700333 return NULL;
334
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100335 peer_req = mempool_alloc(drbd_ee_mempool, gfp_mask & ~__GFP_HIGHMEM);
336 if (!peer_req) {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700337 if (!(gfp_mask & __GFP_NOWARN))
338 dev_err(DEV, "alloc_ee: Allocation of an EE failed\n");
339 return NULL;
340 }
341
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200342 page = drbd_pp_alloc(mdev, nr_pages, (gfp_mask & __GFP_WAIT));
343 if (!page)
344 goto fail;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700345
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100346 drbd_clear_interval(&peer_req->i);
347 peer_req->i.size = data_size;
348 peer_req->i.sector = sector;
349 peer_req->i.local = false;
350 peer_req->i.waiting = false;
Andreas Gruenbacher53840642011-01-28 10:31:04 +0100351
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100352 peer_req->epoch = NULL;
Philipp Reisnera21e9292011-02-08 15:08:49 +0100353 peer_req->w.mdev = mdev;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100354 peer_req->pages = page;
355 atomic_set(&peer_req->pending_bios, 0);
356 peer_req->flags = 0;
Andreas Gruenbacher9a8e7752011-01-11 14:04:09 +0100357 /*
358 * The block_id is opaque to the receiver. It is not endianness
359 * converted, and sent back to the sender unchanged.
360 */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100361 peer_req->block_id = id;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700362
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100363 return peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700364
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200365 fail:
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100366 mempool_free(peer_req, drbd_ee_mempool);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700367 return NULL;
368}
369
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100370void drbd_free_some_ee(struct drbd_conf *mdev, struct drbd_peer_request *peer_req,
Andreas Gruenbacherf6ffca92011-02-04 15:30:34 +0100371 int is_net)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700372{
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100373 if (peer_req->flags & EE_HAS_DIGEST)
374 kfree(peer_req->digest);
375 drbd_pp_free(mdev, peer_req->pages, is_net);
376 D_ASSERT(atomic_read(&peer_req->pending_bios) == 0);
377 D_ASSERT(drbd_interval_empty(&peer_req->i));
378 mempool_free(peer_req, drbd_ee_mempool);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700379}
380
381int drbd_release_ee(struct drbd_conf *mdev, struct list_head *list)
382{
383 LIST_HEAD(work_list);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100384 struct drbd_peer_request *peer_req, *t;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700385 int count = 0;
Lars Ellenberg435f0742010-09-06 12:30:25 +0200386 int is_net = list == &mdev->net_ee;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700387
Philipp Reisner87eeee42011-01-19 14:16:30 +0100388 spin_lock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700389 list_splice_init(list, &work_list);
Philipp Reisner87eeee42011-01-19 14:16:30 +0100390 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700391
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100392 list_for_each_entry_safe(peer_req, t, &work_list, w.list) {
393 drbd_free_some_ee(mdev, peer_req, is_net);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700394 count++;
395 }
396 return count;
397}
398
399
Philipp Reisner32862ec2011-02-08 16:41:01 +0100400/* See also comments in _req_mod(,BARRIER_ACKED)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700401 * and receive_Barrier.
402 *
403 * Move entries from net_ee to done_ee, if ready.
404 * Grab done_ee, call all callbacks, free the entries.
405 * The callbacks typically send out ACKs.
406 */
407static int drbd_process_done_ee(struct drbd_conf *mdev)
408{
409 LIST_HEAD(work_list);
410 LIST_HEAD(reclaimed);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100411 struct drbd_peer_request *peer_req, *t;
Andreas Gruenbachere2b30322011-03-16 17:16:12 +0100412 int err = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700413
Philipp Reisner87eeee42011-01-19 14:16:30 +0100414 spin_lock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700415 reclaim_net_ee(mdev, &reclaimed);
416 list_splice_init(&mdev->done_ee, &work_list);
Philipp Reisner87eeee42011-01-19 14:16:30 +0100417 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700418
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100419 list_for_each_entry_safe(peer_req, t, &reclaimed, w.list)
420 drbd_free_net_ee(mdev, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700421
422 /* possible callbacks here:
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +0100423 * e_end_block, and e_end_resync_block, e_send_discard_write.
Philipp Reisnerb411b362009-09-25 16:07:19 -0700424 * all ignore the last argument.
425 */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100426 list_for_each_entry_safe(peer_req, t, &work_list, w.list) {
Andreas Gruenbachere2b30322011-03-16 17:16:12 +0100427 int err2;
428
Philipp Reisnerb411b362009-09-25 16:07:19 -0700429 /* list_del not necessary, next/prev members not touched */
Andreas Gruenbachere2b30322011-03-16 17:16:12 +0100430 err2 = peer_req->w.cb(&peer_req->w, !!err);
431 if (!err)
432 err = err2;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100433 drbd_free_ee(mdev, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700434 }
435 wake_up(&mdev->ee_wait);
436
Andreas Gruenbachere2b30322011-03-16 17:16:12 +0100437 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700438}
439
440void _drbd_wait_ee_list_empty(struct drbd_conf *mdev, struct list_head *head)
441{
442 DEFINE_WAIT(wait);
443
444 /* avoids spin_lock/unlock
445 * and calling prepare_to_wait in the fast path */
446 while (!list_empty(head)) {
447 prepare_to_wait(&mdev->ee_wait, &wait, TASK_UNINTERRUPTIBLE);
Philipp Reisner87eeee42011-01-19 14:16:30 +0100448 spin_unlock_irq(&mdev->tconn->req_lock);
Jens Axboe7eaceac2011-03-10 08:52:07 +0100449 io_schedule();
Philipp Reisnerb411b362009-09-25 16:07:19 -0700450 finish_wait(&mdev->ee_wait, &wait);
Philipp Reisner87eeee42011-01-19 14:16:30 +0100451 spin_lock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700452 }
453}
454
455void drbd_wait_ee_list_empty(struct drbd_conf *mdev, struct list_head *head)
456{
Philipp Reisner87eeee42011-01-19 14:16:30 +0100457 spin_lock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700458 _drbd_wait_ee_list_empty(mdev, head);
Philipp Reisner87eeee42011-01-19 14:16:30 +0100459 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700460}
461
462/* see also kernel_accept; which is only present since 2.6.18.
463 * also we want to log which part of it failed, exactly */
Philipp Reisner76536202011-02-07 14:09:54 +0100464static int drbd_accept(const char **what, struct socket *sock, struct socket **newsock)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700465{
466 struct sock *sk = sock->sk;
467 int err = 0;
468
469 *what = "listen";
470 err = sock->ops->listen(sock, 5);
471 if (err < 0)
472 goto out;
473
474 *what = "sock_create_lite";
475 err = sock_create_lite(sk->sk_family, sk->sk_type, sk->sk_protocol,
476 newsock);
477 if (err < 0)
478 goto out;
479
480 *what = "accept";
481 err = sock->ops->accept(sock, *newsock, 0);
482 if (err < 0) {
483 sock_release(*newsock);
484 *newsock = NULL;
485 goto out;
486 }
487 (*newsock)->ops = sock->ops;
488
489out:
490 return err;
491}
492
Philipp Reisnerdbd9eea2011-02-07 15:34:16 +0100493static int drbd_recv_short(struct socket *sock, void *buf, size_t size, int flags)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700494{
495 mm_segment_t oldfs;
496 struct kvec iov = {
497 .iov_base = buf,
498 .iov_len = size,
499 };
500 struct msghdr msg = {
501 .msg_iovlen = 1,
502 .msg_iov = (struct iovec *)&iov,
503 .msg_flags = (flags ? flags : MSG_WAITALL | MSG_NOSIGNAL)
504 };
505 int rv;
506
507 oldfs = get_fs();
508 set_fs(KERNEL_DS);
509 rv = sock_recvmsg(sock, &msg, size, msg.msg_flags);
510 set_fs(oldfs);
511
512 return rv;
513}
514
Philipp Reisnerde0ff332011-02-07 16:56:20 +0100515static int drbd_recv(struct drbd_tconn *tconn, void *buf, size_t size)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700516{
517 mm_segment_t oldfs;
518 struct kvec iov = {
519 .iov_base = buf,
520 .iov_len = size,
521 };
522 struct msghdr msg = {
523 .msg_iovlen = 1,
524 .msg_iov = (struct iovec *)&iov,
525 .msg_flags = MSG_WAITALL | MSG_NOSIGNAL
526 };
527 int rv;
528
529 oldfs = get_fs();
530 set_fs(KERNEL_DS);
531
532 for (;;) {
Philipp Reisnerde0ff332011-02-07 16:56:20 +0100533 rv = sock_recvmsg(tconn->data.socket, &msg, size, msg.msg_flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700534 if (rv == size)
535 break;
536
537 /* Note:
538 * ECONNRESET other side closed the connection
539 * ERESTARTSYS (on sock) we got a signal
540 */
541
542 if (rv < 0) {
543 if (rv == -ECONNRESET)
Philipp Reisnerde0ff332011-02-07 16:56:20 +0100544 conn_info(tconn, "sock was reset by peer\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -0700545 else if (rv != -ERESTARTSYS)
Philipp Reisnerde0ff332011-02-07 16:56:20 +0100546 conn_err(tconn, "sock_recvmsg returned %d\n", rv);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700547 break;
548 } else if (rv == 0) {
Philipp Reisnerde0ff332011-02-07 16:56:20 +0100549 conn_info(tconn, "sock was shut down by peer\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -0700550 break;
551 } else {
552 /* signal came in, or peer/link went down,
553 * after we read a partial message
554 */
555 /* D_ASSERT(signal_pending(current)); */
556 break;
557 }
558 };
559
560 set_fs(oldfs);
561
562 if (rv != size)
Philipp Reisnerbbeb6412011-02-10 13:45:46 +0100563 conn_request_state(tconn, NS(conn, C_BROKEN_PIPE), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700564
565 return rv;
566}
567
Lars Ellenberg5dbf1672010-05-25 16:18:01 +0200568/* quoting tcp(7):
569 * On individual connections, the socket buffer size must be set prior to the
570 * listen(2) or connect(2) calls in order to have it take effect.
571 * This is our wrapper to do so.
572 */
573static void drbd_setbufsize(struct socket *sock, unsigned int snd,
574 unsigned int rcv)
575{
576 /* open coded SO_SNDBUF, SO_RCVBUF */
577 if (snd) {
578 sock->sk->sk_sndbuf = snd;
579 sock->sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
580 }
581 if (rcv) {
582 sock->sk->sk_rcvbuf = rcv;
583 sock->sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
584 }
585}
586
Philipp Reisnereac3e992011-02-07 14:05:07 +0100587static struct socket *drbd_try_connect(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700588{
589 const char *what;
590 struct socket *sock;
591 struct sockaddr_in6 src_in6;
592 int err;
593 int disconnect_on_error = 1;
594
Philipp Reisnereac3e992011-02-07 14:05:07 +0100595 if (!get_net_conf(tconn))
Philipp Reisnerb411b362009-09-25 16:07:19 -0700596 return NULL;
597
598 what = "sock_create_kern";
Philipp Reisnereac3e992011-02-07 14:05:07 +0100599 err = sock_create_kern(((struct sockaddr *)tconn->net_conf->my_addr)->sa_family,
Philipp Reisnerb411b362009-09-25 16:07:19 -0700600 SOCK_STREAM, IPPROTO_TCP, &sock);
601 if (err < 0) {
602 sock = NULL;
603 goto out;
604 }
605
606 sock->sk->sk_rcvtimeo =
Philipp Reisnereac3e992011-02-07 14:05:07 +0100607 sock->sk->sk_sndtimeo = tconn->net_conf->try_connect_int*HZ;
608 drbd_setbufsize(sock, tconn->net_conf->sndbuf_size,
609 tconn->net_conf->rcvbuf_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700610
611 /* explicitly bind to the configured IP as source IP
612 * for the outgoing connections.
613 * This is needed for multihomed hosts and to be
614 * able to use lo: interfaces for drbd.
615 * Make sure to use 0 as port number, so linux selects
616 * a free one dynamically.
617 */
Philipp Reisnereac3e992011-02-07 14:05:07 +0100618 memcpy(&src_in6, tconn->net_conf->my_addr,
619 min_t(int, tconn->net_conf->my_addr_len, sizeof(src_in6)));
620 if (((struct sockaddr *)tconn->net_conf->my_addr)->sa_family == AF_INET6)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700621 src_in6.sin6_port = 0;
622 else
623 ((struct sockaddr_in *)&src_in6)->sin_port = 0; /* AF_INET & AF_SCI */
624
625 what = "bind before connect";
626 err = sock->ops->bind(sock,
627 (struct sockaddr *) &src_in6,
Philipp Reisnereac3e992011-02-07 14:05:07 +0100628 tconn->net_conf->my_addr_len);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700629 if (err < 0)
630 goto out;
631
632 /* connect may fail, peer not yet available.
633 * stay C_WF_CONNECTION, don't go Disconnecting! */
634 disconnect_on_error = 0;
635 what = "connect";
636 err = sock->ops->connect(sock,
Philipp Reisnereac3e992011-02-07 14:05:07 +0100637 (struct sockaddr *)tconn->net_conf->peer_addr,
638 tconn->net_conf->peer_addr_len, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700639
640out:
641 if (err < 0) {
642 if (sock) {
643 sock_release(sock);
644 sock = NULL;
645 }
646 switch (-err) {
647 /* timeout, busy, signal pending */
648 case ETIMEDOUT: case EAGAIN: case EINPROGRESS:
649 case EINTR: case ERESTARTSYS:
650 /* peer not (yet) available, network problem */
651 case ECONNREFUSED: case ENETUNREACH:
652 case EHOSTDOWN: case EHOSTUNREACH:
653 disconnect_on_error = 0;
654 break;
655 default:
Philipp Reisnereac3e992011-02-07 14:05:07 +0100656 conn_err(tconn, "%s failed, err = %d\n", what, err);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700657 }
658 if (disconnect_on_error)
Philipp Reisnerbbeb6412011-02-10 13:45:46 +0100659 conn_request_state(tconn, NS(conn, C_DISCONNECTING), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700660 }
Philipp Reisnereac3e992011-02-07 14:05:07 +0100661 put_net_conf(tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700662 return sock;
663}
664
Philipp Reisner76536202011-02-07 14:09:54 +0100665static struct socket *drbd_wait_for_connect(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700666{
667 int timeo, err;
668 struct socket *s_estab = NULL, *s_listen;
669 const char *what;
670
Philipp Reisner76536202011-02-07 14:09:54 +0100671 if (!get_net_conf(tconn))
Philipp Reisnerb411b362009-09-25 16:07:19 -0700672 return NULL;
673
674 what = "sock_create_kern";
Philipp Reisner76536202011-02-07 14:09:54 +0100675 err = sock_create_kern(((struct sockaddr *)tconn->net_conf->my_addr)->sa_family,
Philipp Reisnerb411b362009-09-25 16:07:19 -0700676 SOCK_STREAM, IPPROTO_TCP, &s_listen);
677 if (err) {
678 s_listen = NULL;
679 goto out;
680 }
681
Philipp Reisner76536202011-02-07 14:09:54 +0100682 timeo = tconn->net_conf->try_connect_int * HZ;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700683 timeo += (random32() & 1) ? timeo / 7 : -timeo / 7; /* 28.5% random jitter */
684
685 s_listen->sk->sk_reuse = 1; /* SO_REUSEADDR */
686 s_listen->sk->sk_rcvtimeo = timeo;
687 s_listen->sk->sk_sndtimeo = timeo;
Philipp Reisner76536202011-02-07 14:09:54 +0100688 drbd_setbufsize(s_listen, tconn->net_conf->sndbuf_size,
689 tconn->net_conf->rcvbuf_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700690
691 what = "bind before listen";
692 err = s_listen->ops->bind(s_listen,
Philipp Reisner76536202011-02-07 14:09:54 +0100693 (struct sockaddr *) tconn->net_conf->my_addr,
694 tconn->net_conf->my_addr_len);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700695 if (err < 0)
696 goto out;
697
Philipp Reisner76536202011-02-07 14:09:54 +0100698 err = drbd_accept(&what, s_listen, &s_estab);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700699
700out:
701 if (s_listen)
702 sock_release(s_listen);
703 if (err < 0) {
704 if (err != -EAGAIN && err != -EINTR && err != -ERESTARTSYS) {
Philipp Reisner76536202011-02-07 14:09:54 +0100705 conn_err(tconn, "%s failed, err = %d\n", what, err);
Philipp Reisnerbbeb6412011-02-10 13:45:46 +0100706 conn_request_state(tconn, NS(conn, C_DISCONNECTING), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700707 }
708 }
Philipp Reisner76536202011-02-07 14:09:54 +0100709 put_net_conf(tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700710
711 return s_estab;
712}
713
Philipp Reisnerd38e7872011-02-07 15:32:04 +0100714static int drbd_send_fp(struct drbd_tconn *tconn, struct socket *sock, enum drbd_packet cmd)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700715{
Philipp Reisnerd38e7872011-02-07 15:32:04 +0100716 struct p_header *h = &tconn->data.sbuf.header;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700717
Andreas Gruenbacherecf23632011-03-15 23:48:25 +0100718 return !_conn_send_cmd(tconn, 0, sock, cmd, h, sizeof(*h), 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700719}
720
Philipp Reisnera25b63f2011-02-07 15:43:45 +0100721static enum drbd_packet drbd_recv_fp(struct drbd_tconn *tconn, struct socket *sock)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700722{
Philipp Reisnera25b63f2011-02-07 15:43:45 +0100723 struct p_header80 *h = &tconn->data.rbuf.header.h80;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700724 int rr;
725
Philipp Reisnerdbd9eea2011-02-07 15:34:16 +0100726 rr = drbd_recv_short(sock, h, sizeof(*h), 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700727
Andreas Gruenbacherca9bc122011-01-11 13:47:24 +0100728 if (rr == sizeof(*h) && h->magic == cpu_to_be32(DRBD_MAGIC))
Philipp Reisnerb411b362009-09-25 16:07:19 -0700729 return be16_to_cpu(h->command);
730
731 return 0xffff;
732}
733
734/**
735 * drbd_socket_okay() - Free the socket if its connection is not okay
Philipp Reisnerb411b362009-09-25 16:07:19 -0700736 * @sock: pointer to the pointer to the socket.
737 */
Philipp Reisnerdbd9eea2011-02-07 15:34:16 +0100738static int drbd_socket_okay(struct socket **sock)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700739{
740 int rr;
741 char tb[4];
742
743 if (!*sock)
Andreas Gruenbacher81e84652010-12-09 15:03:57 +0100744 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700745
Philipp Reisnerdbd9eea2011-02-07 15:34:16 +0100746 rr = drbd_recv_short(*sock, tb, 4, MSG_DONTWAIT | MSG_PEEK);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700747
748 if (rr > 0 || rr == -EAGAIN) {
Andreas Gruenbacher81e84652010-12-09 15:03:57 +0100749 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700750 } else {
751 sock_release(*sock);
752 *sock = NULL;
Andreas Gruenbacher81e84652010-12-09 15:03:57 +0100753 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700754 }
755}
Philipp Reisner2325eb62011-03-15 16:56:18 +0100756/* Gets called if a connection is established, or if a new minor gets created
757 in a connection */
758int drbd_connected(int vnr, void *p, void *data)
Philipp Reisner907599e2011-02-08 11:25:37 +0100759{
760 struct drbd_conf *mdev = (struct drbd_conf *)p;
761 int ok = 1;
762
763 atomic_set(&mdev->packet_seq, 0);
764 mdev->peer_seq = 0;
765
Philipp Reisner8410da82011-02-11 20:11:10 +0100766 mdev->state_mutex = mdev->tconn->agreed_pro_version < 100 ?
767 &mdev->tconn->cstate_mutex :
768 &mdev->own_state_mutex;
769
Andreas Gruenbacher103ea272011-03-16 00:43:02 +0100770 ok &= !drbd_send_sync_param(mdev);
Andreas Gruenbacherf02d4d02011-03-16 01:12:50 +0100771 ok &= !drbd_send_sizes(mdev, 0, 0);
Andreas Gruenbacher2ae5f952011-03-16 01:07:20 +0100772 ok &= !drbd_send_uuids(mdev);
Andreas Gruenbacher927036f2011-03-16 00:50:00 +0100773 ok &= !drbd_send_state(mdev);
Philipp Reisner907599e2011-02-08 11:25:37 +0100774 clear_bit(USE_DEGR_WFC_T, &mdev->flags);
775 clear_bit(RESIZE_PENDING, &mdev->flags);
776
Philipp Reisner8410da82011-02-11 20:11:10 +0100777
Philipp Reisner907599e2011-02-08 11:25:37 +0100778 return !ok;
779}
780
Philipp Reisnerb411b362009-09-25 16:07:19 -0700781/*
782 * return values:
783 * 1 yes, we have a valid connection
784 * 0 oops, did not work out, please try again
785 * -1 peer talks different language,
786 * no point in trying again, please go standalone.
787 * -2 We do not have a network config...
788 */
Philipp Reisner907599e2011-02-08 11:25:37 +0100789static int drbd_connect(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700790{
791 struct socket *s, *sock, *msock;
792 int try, h, ok;
793
Philipp Reisnerbbeb6412011-02-10 13:45:46 +0100794 if (conn_request_state(tconn, NS(conn, C_WF_CONNECTION), CS_VERBOSE) < SS_SUCCESS)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700795 return -2;
796
Philipp Reisner907599e2011-02-08 11:25:37 +0100797 clear_bit(DISCARD_CONCURRENT, &tconn->flags);
798 tconn->agreed_pro_version = 99;
Philipp Reisnerfd340c12011-01-19 16:57:39 +0100799 /* agreed_pro_version must be smaller than 100 so we send the old
800 header (h80) in the first packet and in the handshake packet. */
Philipp Reisnerb411b362009-09-25 16:07:19 -0700801
802 sock = NULL;
803 msock = NULL;
804
805 do {
806 for (try = 0;;) {
807 /* 3 tries, this should take less than a second! */
Philipp Reisner907599e2011-02-08 11:25:37 +0100808 s = drbd_try_connect(tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700809 if (s || ++try >= 3)
810 break;
811 /* give the other side time to call bind() & listen() */
Philipp Reisner20ee6392011-01-18 15:28:59 +0100812 schedule_timeout_interruptible(HZ / 10);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700813 }
814
815 if (s) {
816 if (!sock) {
Philipp Reisner907599e2011-02-08 11:25:37 +0100817 drbd_send_fp(tconn, s, P_HAND_SHAKE_S);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700818 sock = s;
819 s = NULL;
820 } else if (!msock) {
Philipp Reisner907599e2011-02-08 11:25:37 +0100821 drbd_send_fp(tconn, s, P_HAND_SHAKE_M);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700822 msock = s;
823 s = NULL;
824 } else {
Philipp Reisner907599e2011-02-08 11:25:37 +0100825 conn_err(tconn, "Logic error in drbd_connect()\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -0700826 goto out_release_sockets;
827 }
828 }
829
830 if (sock && msock) {
Philipp Reisner907599e2011-02-08 11:25:37 +0100831 schedule_timeout_interruptible(tconn->net_conf->ping_timeo*HZ/10);
Philipp Reisnerdbd9eea2011-02-07 15:34:16 +0100832 ok = drbd_socket_okay(&sock);
833 ok = drbd_socket_okay(&msock) && ok;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700834 if (ok)
835 break;
836 }
837
838retry:
Philipp Reisner907599e2011-02-08 11:25:37 +0100839 s = drbd_wait_for_connect(tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700840 if (s) {
Philipp Reisner907599e2011-02-08 11:25:37 +0100841 try = drbd_recv_fp(tconn, s);
Philipp Reisnerdbd9eea2011-02-07 15:34:16 +0100842 drbd_socket_okay(&sock);
843 drbd_socket_okay(&msock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700844 switch (try) {
845 case P_HAND_SHAKE_S:
846 if (sock) {
Philipp Reisner907599e2011-02-08 11:25:37 +0100847 conn_warn(tconn, "initial packet S crossed\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -0700848 sock_release(sock);
849 }
850 sock = s;
851 break;
852 case P_HAND_SHAKE_M:
853 if (msock) {
Philipp Reisner907599e2011-02-08 11:25:37 +0100854 conn_warn(tconn, "initial packet M crossed\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -0700855 sock_release(msock);
856 }
857 msock = s;
Philipp Reisner907599e2011-02-08 11:25:37 +0100858 set_bit(DISCARD_CONCURRENT, &tconn->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700859 break;
860 default:
Philipp Reisner907599e2011-02-08 11:25:37 +0100861 conn_warn(tconn, "Error receiving initial packet\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -0700862 sock_release(s);
863 if (random32() & 1)
864 goto retry;
865 }
866 }
867
Philipp Reisnerbbeb6412011-02-10 13:45:46 +0100868 if (tconn->cstate <= C_DISCONNECTING)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700869 goto out_release_sockets;
870 if (signal_pending(current)) {
871 flush_signals(current);
872 smp_rmb();
Philipp Reisner907599e2011-02-08 11:25:37 +0100873 if (get_t_state(&tconn->receiver) == EXITING)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700874 goto out_release_sockets;
875 }
876
877 if (sock && msock) {
Philipp Reisnerdbd9eea2011-02-07 15:34:16 +0100878 ok = drbd_socket_okay(&sock);
879 ok = drbd_socket_okay(&msock) && ok;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700880 if (ok)
881 break;
882 }
883 } while (1);
884
885 msock->sk->sk_reuse = 1; /* SO_REUSEADDR */
886 sock->sk->sk_reuse = 1; /* SO_REUSEADDR */
887
888 sock->sk->sk_allocation = GFP_NOIO;
889 msock->sk->sk_allocation = GFP_NOIO;
890
891 sock->sk->sk_priority = TC_PRIO_INTERACTIVE_BULK;
892 msock->sk->sk_priority = TC_PRIO_INTERACTIVE;
893
Philipp Reisnerb411b362009-09-25 16:07:19 -0700894 /* NOT YET ...
Philipp Reisner907599e2011-02-08 11:25:37 +0100895 * sock->sk->sk_sndtimeo = tconn->net_conf->timeout*HZ/10;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700896 * sock->sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
897 * first set it to the P_HAND_SHAKE timeout,
898 * which we set to 4x the configured ping_timeout. */
899 sock->sk->sk_sndtimeo =
Philipp Reisner907599e2011-02-08 11:25:37 +0100900 sock->sk->sk_rcvtimeo = tconn->net_conf->ping_timeo*4*HZ/10;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700901
Philipp Reisner907599e2011-02-08 11:25:37 +0100902 msock->sk->sk_sndtimeo = tconn->net_conf->timeout*HZ/10;
903 msock->sk->sk_rcvtimeo = tconn->net_conf->ping_int*HZ;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700904
905 /* we don't want delays.
Lucas De Marchi25985ed2011-03-30 22:57:33 -0300906 * we use TCP_CORK where appropriate, though */
Philipp Reisnerb411b362009-09-25 16:07:19 -0700907 drbd_tcp_nodelay(sock);
908 drbd_tcp_nodelay(msock);
909
Philipp Reisner907599e2011-02-08 11:25:37 +0100910 tconn->data.socket = sock;
911 tconn->meta.socket = msock;
912 tconn->last_received = jiffies;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700913
Philipp Reisner907599e2011-02-08 11:25:37 +0100914 h = drbd_do_handshake(tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700915 if (h <= 0)
916 return h;
917
Philipp Reisner907599e2011-02-08 11:25:37 +0100918 if (tconn->cram_hmac_tfm) {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700919 /* drbd_request_state(mdev, NS(conn, WFAuth)); */
Philipp Reisner907599e2011-02-08 11:25:37 +0100920 switch (drbd_do_auth(tconn)) {
Johannes Thomab10d96c2010-01-07 16:02:50 +0100921 case -1:
Philipp Reisner907599e2011-02-08 11:25:37 +0100922 conn_err(tconn, "Authentication of peer failed\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -0700923 return -1;
Johannes Thomab10d96c2010-01-07 16:02:50 +0100924 case 0:
Philipp Reisner907599e2011-02-08 11:25:37 +0100925 conn_err(tconn, "Authentication of peer failed, trying again.\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +0100926 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700927 }
928 }
929
Philipp Reisnerbbeb6412011-02-10 13:45:46 +0100930 if (conn_request_state(tconn, NS(conn, C_WF_REPORT_PARAMS), CS_VERBOSE) < SS_SUCCESS)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700931 return 0;
932
Philipp Reisner907599e2011-02-08 11:25:37 +0100933 sock->sk->sk_sndtimeo = tconn->net_conf->timeout*HZ/10;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700934 sock->sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
935
Philipp Reisner907599e2011-02-08 11:25:37 +0100936 drbd_thread_start(&tconn->asender);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700937
Andreas Gruenbacher387eb302011-03-16 01:05:37 +0100938 if (drbd_send_protocol(tconn) == -EOPNOTSUPP)
Philipp Reisner7e2455c2010-04-22 14:50:23 +0200939 return -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700940
Philipp Reisner907599e2011-02-08 11:25:37 +0100941 return !idr_for_each(&tconn->volumes, drbd_connected, tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700942
943out_release_sockets:
944 if (sock)
945 sock_release(sock);
946 if (msock)
947 sock_release(msock);
948 return -1;
949}
950
Andreas Gruenbacher8172f3e2011-03-16 17:22:39 +0100951static int decode_header(struct drbd_tconn *tconn, struct p_header *h, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700952{
Philipp Reisnerfd340c12011-01-19 16:57:39 +0100953 if (h->h80.magic == cpu_to_be32(DRBD_MAGIC)) {
Philipp Reisner77351055b2011-02-07 17:24:26 +0100954 pi->cmd = be16_to_cpu(h->h80.command);
955 pi->size = be16_to_cpu(h->h80.length);
Philipp Reisnereefc2f72011-02-08 12:55:24 +0100956 pi->vnr = 0;
Andreas Gruenbacherca9bc122011-01-11 13:47:24 +0100957 } else if (h->h95.magic == cpu_to_be16(DRBD_MAGIC_BIG)) {
Philipp Reisner77351055b2011-02-07 17:24:26 +0100958 pi->cmd = be16_to_cpu(h->h95.command);
959 pi->size = be32_to_cpu(h->h95.length) & 0x00ffffff;
960 pi->vnr = 0;
Philipp Reisner02918be2010-08-20 14:35:10 +0200961 } else {
Philipp Reisnerce243852011-02-07 17:27:47 +0100962 conn_err(tconn, "magic?? on data m: 0x%08x c: %d l: %d\n",
Lars Ellenberg004352f2010-10-05 20:13:58 +0200963 be32_to_cpu(h->h80.magic),
964 be16_to_cpu(h->h80.command),
965 be16_to_cpu(h->h80.length));
Andreas Gruenbacher8172f3e2011-03-16 17:22:39 +0100966 return -EINVAL;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700967 }
Andreas Gruenbacher8172f3e2011-03-16 17:22:39 +0100968 return 0;
Philipp Reisner257d0af2011-01-26 12:15:29 +0100969}
970
Philipp Reisner9ba7aa02011-02-07 17:32:41 +0100971static int drbd_recv_header(struct drbd_tconn *tconn, struct packet_info *pi)
Philipp Reisner257d0af2011-01-26 12:15:29 +0100972{
Philipp Reisner9ba7aa02011-02-07 17:32:41 +0100973 struct p_header *h = &tconn->data.rbuf.header;
Andreas Gruenbacher69bc7bc2011-03-16 17:31:52 +0100974 int err;
Philipp Reisner257d0af2011-01-26 12:15:29 +0100975
Andreas Gruenbacher69bc7bc2011-03-16 17:31:52 +0100976 err = drbd_recv(tconn, h, sizeof(*h));
977 if (unlikely(err != sizeof(*h))) {
Philipp Reisner257d0af2011-01-26 12:15:29 +0100978 if (!signal_pending(current))
Andreas Gruenbacher69bc7bc2011-03-16 17:31:52 +0100979 conn_warn(tconn, "short read expecting header on sock: r=%d\n", err);
980 if (err >= 0)
981 err = -EIO;
982 return err;
Philipp Reisner257d0af2011-01-26 12:15:29 +0100983 }
984
Andreas Gruenbacher69bc7bc2011-03-16 17:31:52 +0100985 err = decode_header(tconn, h, pi);
Philipp Reisner9ba7aa02011-02-07 17:32:41 +0100986 tconn->last_received = jiffies;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700987
Andreas Gruenbacher69bc7bc2011-03-16 17:31:52 +0100988 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700989}
990
Philipp Reisner2451fc32010-08-24 13:43:11 +0200991static void drbd_flush(struct drbd_conf *mdev)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700992{
993 int rv;
994
995 if (mdev->write_ordering >= WO_bdev_flush && get_ldev(mdev)) {
Dmitry Monakhovfbd9b092010-04-28 17:55:06 +0400996 rv = blkdev_issue_flush(mdev->ldev->backing_bdev, GFP_KERNEL,
Christoph Hellwigdd3932e2010-09-16 20:51:46 +0200997 NULL);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700998 if (rv) {
999 dev_err(DEV, "local disk flush failed with status %d\n", rv);
1000 /* would rather check on EOPNOTSUPP, but that is not reliable.
1001 * don't try again for ANY return value != 0
1002 * if (rv == -EOPNOTSUPP) */
1003 drbd_bump_write_ordering(mdev, WO_drain_io);
1004 }
1005 put_ldev(mdev);
1006 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07001007}
1008
1009/**
1010 * drbd_may_finish_epoch() - Applies an epoch_event to the epoch's state, eventually finishes it.
1011 * @mdev: DRBD device.
1012 * @epoch: Epoch object.
1013 * @ev: Epoch event.
1014 */
1015static enum finish_epoch drbd_may_finish_epoch(struct drbd_conf *mdev,
1016 struct drbd_epoch *epoch,
1017 enum epoch_event ev)
1018{
Philipp Reisner2451fc32010-08-24 13:43:11 +02001019 int epoch_size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001020 struct drbd_epoch *next_epoch;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001021 enum finish_epoch rv = FE_STILL_LIVE;
1022
1023 spin_lock(&mdev->epoch_lock);
1024 do {
1025 next_epoch = NULL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001026
1027 epoch_size = atomic_read(&epoch->epoch_size);
1028
1029 switch (ev & ~EV_CLEANUP) {
1030 case EV_PUT:
1031 atomic_dec(&epoch->active);
1032 break;
1033 case EV_GOT_BARRIER_NR:
1034 set_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001035 break;
1036 case EV_BECAME_LAST:
1037 /* nothing to do*/
1038 break;
1039 }
1040
Philipp Reisnerb411b362009-09-25 16:07:19 -07001041 if (epoch_size != 0 &&
1042 atomic_read(&epoch->active) == 0 &&
Philipp Reisner2451fc32010-08-24 13:43:11 +02001043 test_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001044 if (!(ev & EV_CLEANUP)) {
1045 spin_unlock(&mdev->epoch_lock);
1046 drbd_send_b_ack(mdev, epoch->barrier_nr, epoch_size);
1047 spin_lock(&mdev->epoch_lock);
1048 }
1049 dec_unacked(mdev);
1050
1051 if (mdev->current_epoch != epoch) {
1052 next_epoch = list_entry(epoch->list.next, struct drbd_epoch, list);
1053 list_del(&epoch->list);
1054 ev = EV_BECAME_LAST | (ev & EV_CLEANUP);
1055 mdev->epochs--;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001056 kfree(epoch);
1057
1058 if (rv == FE_STILL_LIVE)
1059 rv = FE_DESTROYED;
1060 } else {
1061 epoch->flags = 0;
1062 atomic_set(&epoch->epoch_size, 0);
Uwe Kleine-König698f9312010-07-02 20:41:51 +02001063 /* atomic_set(&epoch->active, 0); is already zero */
Philipp Reisnerb411b362009-09-25 16:07:19 -07001064 if (rv == FE_STILL_LIVE)
1065 rv = FE_RECYCLED;
Philipp Reisner2451fc32010-08-24 13:43:11 +02001066 wake_up(&mdev->ee_wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001067 }
1068 }
1069
1070 if (!next_epoch)
1071 break;
1072
1073 epoch = next_epoch;
1074 } while (1);
1075
1076 spin_unlock(&mdev->epoch_lock);
1077
Philipp Reisnerb411b362009-09-25 16:07:19 -07001078 return rv;
1079}
1080
1081/**
1082 * drbd_bump_write_ordering() - Fall back to an other write ordering method
1083 * @mdev: DRBD device.
1084 * @wo: Write ordering method to try.
1085 */
1086void drbd_bump_write_ordering(struct drbd_conf *mdev, enum write_ordering_e wo) __must_hold(local)
1087{
1088 enum write_ordering_e pwo;
1089 static char *write_ordering_str[] = {
1090 [WO_none] = "none",
1091 [WO_drain_io] = "drain",
1092 [WO_bdev_flush] = "flush",
Philipp Reisnerb411b362009-09-25 16:07:19 -07001093 };
1094
1095 pwo = mdev->write_ordering;
1096 wo = min(pwo, wo);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001097 if (wo == WO_bdev_flush && mdev->ldev->dc.no_disk_flush)
1098 wo = WO_drain_io;
1099 if (wo == WO_drain_io && mdev->ldev->dc.no_disk_drain)
1100 wo = WO_none;
1101 mdev->write_ordering = wo;
Philipp Reisner2451fc32010-08-24 13:43:11 +02001102 if (pwo != mdev->write_ordering || wo == WO_bdev_flush)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001103 dev_info(DEV, "Method to ensure write ordering: %s\n", write_ordering_str[mdev->write_ordering]);
1104}
1105
1106/**
Andreas Gruenbacherfbe29de2011-02-17 16:38:35 +01001107 * drbd_submit_peer_request()
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001108 * @mdev: DRBD device.
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001109 * @peer_req: peer request
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001110 * @rw: flag field, see bio->bi_rw
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001111 *
1112 * May spread the pages to multiple bios,
1113 * depending on bio_add_page restrictions.
1114 *
1115 * Returns 0 if all bios have been submitted,
1116 * -ENOMEM if we could not allocate enough bios,
1117 * -ENOSPC (any better suggestion?) if we have not been able to bio_add_page a
1118 * single page to an empty bio (which should never happen and likely indicates
1119 * that the lower level IO stack is in some way broken). This has been observed
1120 * on certain Xen deployments.
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001121 */
1122/* TODO allocate from our own bio_set. */
Andreas Gruenbacherfbe29de2011-02-17 16:38:35 +01001123int drbd_submit_peer_request(struct drbd_conf *mdev,
1124 struct drbd_peer_request *peer_req,
1125 const unsigned rw, const int fault_type)
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001126{
1127 struct bio *bios = NULL;
1128 struct bio *bio;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001129 struct page *page = peer_req->pages;
1130 sector_t sector = peer_req->i.sector;
1131 unsigned ds = peer_req->i.size;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001132 unsigned n_bios = 0;
1133 unsigned nr_pages = (ds + PAGE_SIZE -1) >> PAGE_SHIFT;
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001134 int err = -ENOMEM;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001135
1136 /* In most cases, we will only need one bio. But in case the lower
1137 * level restrictions happen to be different at this offset on this
1138 * side than those of the sending peer, we may need to submit the
Lars Ellenbergda4a75d2011-02-23 17:02:01 +01001139 * request in more than one bio.
1140 *
1141 * Plain bio_alloc is good enough here, this is no DRBD internally
1142 * generated bio, but a bio allocated on behalf of the peer.
1143 */
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001144next_bio:
1145 bio = bio_alloc(GFP_NOIO, nr_pages);
1146 if (!bio) {
1147 dev_err(DEV, "submit_ee: Allocation of a bio failed\n");
1148 goto fail;
1149 }
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001150 /* > peer_req->i.sector, unless this is the first bio */
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001151 bio->bi_sector = sector;
1152 bio->bi_bdev = mdev->ldev->backing_bdev;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001153 bio->bi_rw = rw;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001154 bio->bi_private = peer_req;
Andreas Gruenbacherfcefa622011-02-17 16:46:59 +01001155 bio->bi_end_io = drbd_peer_request_endio;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001156
1157 bio->bi_next = bios;
1158 bios = bio;
1159 ++n_bios;
1160
1161 page_chain_for_each(page) {
1162 unsigned len = min_t(unsigned, ds, PAGE_SIZE);
1163 if (!bio_add_page(bio, page, len, 0)) {
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001164 /* A single page must always be possible!
1165 * But in case it fails anyways,
1166 * we deal with it, and complain (below). */
1167 if (bio->bi_vcnt == 0) {
1168 dev_err(DEV,
1169 "bio_add_page failed for len=%u, "
1170 "bi_vcnt=0 (bi_sector=%llu)\n",
1171 len, (unsigned long long)bio->bi_sector);
1172 err = -ENOSPC;
1173 goto fail;
1174 }
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001175 goto next_bio;
1176 }
1177 ds -= len;
1178 sector += len >> 9;
1179 --nr_pages;
1180 }
1181 D_ASSERT(page == NULL);
1182 D_ASSERT(ds == 0);
1183
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001184 atomic_set(&peer_req->pending_bios, n_bios);
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001185 do {
1186 bio = bios;
1187 bios = bios->bi_next;
1188 bio->bi_next = NULL;
1189
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001190 drbd_generic_make_request(mdev, fault_type, bio);
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001191 } while (bios);
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001192 return 0;
1193
1194fail:
1195 while (bios) {
1196 bio = bios;
1197 bios = bios->bi_next;
1198 bio_put(bio);
1199 }
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001200 return err;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001201}
1202
Andreas Gruenbacher53840642011-01-28 10:31:04 +01001203static void drbd_remove_epoch_entry_interval(struct drbd_conf *mdev,
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001204 struct drbd_peer_request *peer_req)
Andreas Gruenbacher53840642011-01-28 10:31:04 +01001205{
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001206 struct drbd_interval *i = &peer_req->i;
Andreas Gruenbacher53840642011-01-28 10:31:04 +01001207
1208 drbd_remove_interval(&mdev->write_requests, i);
1209 drbd_clear_interval(i);
1210
Andreas Gruenbacher6c852be2011-02-04 15:38:52 +01001211 /* Wake up any processes waiting for this peer request to complete. */
Andreas Gruenbacher53840642011-01-28 10:31:04 +01001212 if (i->waiting)
1213 wake_up(&mdev->misc_wait);
1214}
1215
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01001216static int receive_Barrier(struct drbd_conf *mdev, enum drbd_packet cmd,
1217 unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001218{
Philipp Reisner2451fc32010-08-24 13:43:11 +02001219 int rv;
Philipp Reisnere42325a2011-01-19 13:55:45 +01001220 struct p_barrier *p = &mdev->tconn->data.rbuf.barrier;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001221 struct drbd_epoch *epoch;
1222
Philipp Reisnerb411b362009-09-25 16:07:19 -07001223 inc_unacked(mdev);
1224
Philipp Reisnerb411b362009-09-25 16:07:19 -07001225 mdev->current_epoch->barrier_nr = p->barrier;
1226 rv = drbd_may_finish_epoch(mdev, mdev->current_epoch, EV_GOT_BARRIER_NR);
1227
1228 /* P_BARRIER_ACK may imply that the corresponding extent is dropped from
1229 * the activity log, which means it would not be resynced in case the
1230 * R_PRIMARY crashes now.
1231 * Therefore we must send the barrier_ack after the barrier request was
1232 * completed. */
1233 switch (mdev->write_ordering) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001234 case WO_none:
1235 if (rv == FE_RECYCLED)
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01001236 return true;
Philipp Reisner2451fc32010-08-24 13:43:11 +02001237
1238 /* receiver context, in the writeout path of the other node.
1239 * avoid potential distributed deadlock */
1240 epoch = kmalloc(sizeof(struct drbd_epoch), GFP_NOIO);
1241 if (epoch)
1242 break;
1243 else
1244 dev_warn(DEV, "Allocation of an epoch failed, slowing down\n");
1245 /* Fall through */
Philipp Reisnerb411b362009-09-25 16:07:19 -07001246
1247 case WO_bdev_flush:
1248 case WO_drain_io:
Philipp Reisnerb411b362009-09-25 16:07:19 -07001249 drbd_wait_ee_list_empty(mdev, &mdev->active_ee);
Philipp Reisner2451fc32010-08-24 13:43:11 +02001250 drbd_flush(mdev);
1251
1252 if (atomic_read(&mdev->current_epoch->epoch_size)) {
1253 epoch = kmalloc(sizeof(struct drbd_epoch), GFP_NOIO);
1254 if (epoch)
1255 break;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001256 }
1257
Philipp Reisner2451fc32010-08-24 13:43:11 +02001258 epoch = mdev->current_epoch;
1259 wait_event(mdev->ee_wait, atomic_read(&epoch->epoch_size) == 0);
1260
1261 D_ASSERT(atomic_read(&epoch->active) == 0);
1262 D_ASSERT(epoch->flags == 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001263
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01001264 return true;
Philipp Reisner2451fc32010-08-24 13:43:11 +02001265 default:
1266 dev_err(DEV, "Strangeness in mdev->write_ordering %d\n", mdev->write_ordering);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01001267 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001268 }
1269
1270 epoch->flags = 0;
1271 atomic_set(&epoch->epoch_size, 0);
1272 atomic_set(&epoch->active, 0);
1273
1274 spin_lock(&mdev->epoch_lock);
1275 if (atomic_read(&mdev->current_epoch->epoch_size)) {
1276 list_add(&epoch->list, &mdev->current_epoch->list);
1277 mdev->current_epoch = epoch;
1278 mdev->epochs++;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001279 } else {
1280 /* The current_epoch got recycled while we allocated this one... */
1281 kfree(epoch);
1282 }
1283 spin_unlock(&mdev->epoch_lock);
1284
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01001285 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001286}
1287
1288/* used from receive_RSDataReply (recv_resync_read)
1289 * and from receive_Data */
Andreas Gruenbacherf6ffca92011-02-04 15:30:34 +01001290static struct drbd_peer_request *
1291read_in_block(struct drbd_conf *mdev, u64 id, sector_t sector,
1292 int data_size) __must_hold(local)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001293{
Lars Ellenberg66660322010-04-06 12:15:04 +02001294 const sector_t capacity = drbd_get_capacity(mdev->this_bdev);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001295 struct drbd_peer_request *peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001296 struct page *page;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001297 int dgs, ds, rr;
Philipp Reisnera0638452011-01-19 14:31:32 +01001298 void *dig_in = mdev->tconn->int_dig_in;
1299 void *dig_vv = mdev->tconn->int_dig_vv;
Philipp Reisner6b4388a2010-04-26 14:11:45 +02001300 unsigned long *data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001301
Philipp Reisnera0638452011-01-19 14:31:32 +01001302 dgs = (mdev->tconn->agreed_pro_version >= 87 && mdev->tconn->integrity_r_tfm) ?
1303 crypto_hash_digestsize(mdev->tconn->integrity_r_tfm) : 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001304
1305 if (dgs) {
Philipp Reisnerde0ff332011-02-07 16:56:20 +01001306 rr = drbd_recv(mdev->tconn, dig_in, dgs);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001307 if (rr != dgs) {
Lars Ellenberg0ddc5542011-01-21 12:35:15 +01001308 if (!signal_pending(current))
1309 dev_warn(DEV,
1310 "short read receiving data digest: read %d expected %d\n",
1311 rr, dgs);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001312 return NULL;
1313 }
1314 }
1315
1316 data_size -= dgs;
1317
Andreas Gruenbacher841ce242010-12-15 19:31:20 +01001318 if (!expect(data_size != 0))
1319 return NULL;
1320 if (!expect(IS_ALIGNED(data_size, 512)))
1321 return NULL;
1322 if (!expect(data_size <= DRBD_MAX_BIO_SIZE))
1323 return NULL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001324
Lars Ellenberg66660322010-04-06 12:15:04 +02001325 /* even though we trust out peer,
1326 * we sometimes have to double check. */
1327 if (sector + (data_size>>9) > capacity) {
Lars Ellenbergfdda6542011-01-24 15:11:01 +01001328 dev_err(DEV, "request from peer beyond end of local disk: "
1329 "capacity: %llus < sector: %llus + size: %u\n",
Lars Ellenberg66660322010-04-06 12:15:04 +02001330 (unsigned long long)capacity,
1331 (unsigned long long)sector, data_size);
1332 return NULL;
1333 }
1334
Philipp Reisnerb411b362009-09-25 16:07:19 -07001335 /* GFP_NOIO, because we must not cause arbitrary write-out: in a DRBD
1336 * "criss-cross" setup, that might cause write-out on some other DRBD,
1337 * which in turn might block on the other node at this very place. */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001338 peer_req = drbd_alloc_ee(mdev, id, sector, data_size, GFP_NOIO);
1339 if (!peer_req)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001340 return NULL;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001341
Philipp Reisnerb411b362009-09-25 16:07:19 -07001342 ds = data_size;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001343 page = peer_req->pages;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001344 page_chain_for_each(page) {
1345 unsigned len = min_t(int, ds, PAGE_SIZE);
Philipp Reisner6b4388a2010-04-26 14:11:45 +02001346 data = kmap(page);
Philipp Reisnerde0ff332011-02-07 16:56:20 +01001347 rr = drbd_recv(mdev->tconn, data, len);
Andreas Gruenbacher0cf9d272010-12-07 10:43:29 +01001348 if (drbd_insert_fault(mdev, DRBD_FAULT_RECEIVE)) {
Philipp Reisner6b4388a2010-04-26 14:11:45 +02001349 dev_err(DEV, "Fault injection: Corrupting data on receive\n");
1350 data[0] = data[0] ^ (unsigned long)-1;
1351 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07001352 kunmap(page);
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001353 if (rr != len) {
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001354 drbd_free_ee(mdev, peer_req);
Lars Ellenberg0ddc5542011-01-21 12:35:15 +01001355 if (!signal_pending(current))
1356 dev_warn(DEV, "short read receiving data: read %d expected %d\n",
1357 rr, len);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001358 return NULL;
1359 }
1360 ds -= rr;
1361 }
1362
1363 if (dgs) {
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001364 drbd_csum_ee(mdev, mdev->tconn->integrity_r_tfm, peer_req, dig_vv);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001365 if (memcmp(dig_in, dig_vv, dgs)) {
Lars Ellenberg470be442010-11-10 10:36:52 +01001366 dev_err(DEV, "Digest integrity check FAILED: %llus +%u\n",
1367 (unsigned long long)sector, data_size);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001368 drbd_free_ee(mdev, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001369 return NULL;
1370 }
1371 }
1372 mdev->recv_cnt += data_size>>9;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001373 return peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001374}
1375
1376/* drbd_drain_block() just takes a data block
1377 * out of the socket input buffer, and discards it.
1378 */
1379static int drbd_drain_block(struct drbd_conf *mdev, int data_size)
1380{
1381 struct page *page;
Andreas Gruenbacherfc5be832011-03-16 17:50:50 +01001382 int rr, err = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001383 void *data;
1384
Lars Ellenbergc3470cd2010-04-01 16:57:19 +02001385 if (!data_size)
Andreas Gruenbacherfc5be832011-03-16 17:50:50 +01001386 return 0;
Lars Ellenbergc3470cd2010-04-01 16:57:19 +02001387
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001388 page = drbd_pp_alloc(mdev, 1, 1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001389
1390 data = kmap(page);
1391 while (data_size) {
Andreas Gruenbacherfc5be832011-03-16 17:50:50 +01001392 unsigned int len = min_t(int, data_size, PAGE_SIZE);
1393
1394 rr = drbd_recv(mdev->tconn, data, len);
1395 if (rr != len) {
Lars Ellenberg0ddc5542011-01-21 12:35:15 +01001396 if (!signal_pending(current))
1397 dev_warn(DEV,
1398 "short read receiving data: read %d expected %d\n",
Andreas Gruenbacherfc5be832011-03-16 17:50:50 +01001399 rr, len);
1400 err = (rr < 0) ? rr : -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001401 break;
1402 }
1403 data_size -= rr;
1404 }
1405 kunmap(page);
Lars Ellenberg435f0742010-09-06 12:30:25 +02001406 drbd_pp_free(mdev, page, 0);
Andreas Gruenbacherfc5be832011-03-16 17:50:50 +01001407 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001408}
1409
1410static int recv_dless_read(struct drbd_conf *mdev, struct drbd_request *req,
1411 sector_t sector, int data_size)
1412{
1413 struct bio_vec *bvec;
1414 struct bio *bio;
1415 int dgs, rr, i, expect;
Philipp Reisnera0638452011-01-19 14:31:32 +01001416 void *dig_in = mdev->tconn->int_dig_in;
1417 void *dig_vv = mdev->tconn->int_dig_vv;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001418
Philipp Reisnera0638452011-01-19 14:31:32 +01001419 dgs = (mdev->tconn->agreed_pro_version >= 87 && mdev->tconn->integrity_r_tfm) ?
1420 crypto_hash_digestsize(mdev->tconn->integrity_r_tfm) : 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001421
1422 if (dgs) {
Philipp Reisnerde0ff332011-02-07 16:56:20 +01001423 rr = drbd_recv(mdev->tconn, dig_in, dgs);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001424 if (rr != dgs) {
Lars Ellenberg0ddc5542011-01-21 12:35:15 +01001425 if (!signal_pending(current))
1426 dev_warn(DEV,
1427 "short read receiving data reply digest: read %d expected %d\n",
1428 rr, dgs);
Andreas Gruenbacher28284ce2011-03-16 17:54:02 +01001429 return rr < 0 ? rr : -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001430 }
1431 }
1432
1433 data_size -= dgs;
1434
1435 /* optimistically update recv_cnt. if receiving fails below,
1436 * we disconnect anyways, and counters will be reset. */
1437 mdev->recv_cnt += data_size>>9;
1438
1439 bio = req->master_bio;
1440 D_ASSERT(sector == bio->bi_sector);
1441
1442 bio_for_each_segment(bvec, bio, i) {
1443 expect = min_t(int, data_size, bvec->bv_len);
Philipp Reisnerde0ff332011-02-07 16:56:20 +01001444 rr = drbd_recv(mdev->tconn,
Philipp Reisnerb411b362009-09-25 16:07:19 -07001445 kmap(bvec->bv_page)+bvec->bv_offset,
1446 expect);
1447 kunmap(bvec->bv_page);
1448 if (rr != expect) {
Lars Ellenberg0ddc5542011-01-21 12:35:15 +01001449 if (!signal_pending(current))
1450 dev_warn(DEV, "short read receiving data reply: "
1451 "read %d expected %d\n",
1452 rr, expect);
Andreas Gruenbacher28284ce2011-03-16 17:54:02 +01001453 return rr < 0 ? rr : -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001454 }
1455 data_size -= rr;
1456 }
1457
1458 if (dgs) {
Philipp Reisnera0638452011-01-19 14:31:32 +01001459 drbd_csum_bio(mdev, mdev->tconn->integrity_r_tfm, bio, dig_vv);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001460 if (memcmp(dig_in, dig_vv, dgs)) {
1461 dev_err(DEV, "Digest integrity check FAILED. Broken NICs?\n");
Andreas Gruenbacher28284ce2011-03-16 17:54:02 +01001462 return -EINVAL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001463 }
1464 }
1465
1466 D_ASSERT(data_size == 0);
Andreas Gruenbacher28284ce2011-03-16 17:54:02 +01001467 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001468}
1469
1470/* e_end_resync_block() is called via
1471 * drbd_process_done_ee() by asender only */
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001472static int e_end_resync_block(struct drbd_work *w, int unused)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001473{
Andreas Gruenbacher8050e6d2011-02-18 16:12:48 +01001474 struct drbd_peer_request *peer_req =
1475 container_of(w, struct drbd_peer_request, w);
Philipp Reisner00d56942011-02-09 18:09:48 +01001476 struct drbd_conf *mdev = w->mdev;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001477 sector_t sector = peer_req->i.sector;
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001478 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001479
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001480 D_ASSERT(drbd_interval_empty(&peer_req->i));
Philipp Reisnerb411b362009-09-25 16:07:19 -07001481
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001482 if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
1483 drbd_set_in_sync(mdev, sector, peer_req->i.size);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001484 err = drbd_send_ack(mdev, P_RS_WRITE_ACK, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001485 } else {
1486 /* Record failure to sync */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001487 drbd_rs_failed_io(mdev, sector, peer_req->i.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001488
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001489 err = drbd_send_ack(mdev, P_NEG_ACK, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001490 }
1491 dec_unacked(mdev);
1492
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001493 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001494}
1495
1496static int recv_resync_read(struct drbd_conf *mdev, sector_t sector, int data_size) __releases(local)
1497{
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001498 struct drbd_peer_request *peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001499
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001500 peer_req = read_in_block(mdev, ID_SYNCER, sector, data_size);
1501 if (!peer_req)
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001502 goto fail;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001503
1504 dec_rs_pending(mdev);
1505
Philipp Reisnerb411b362009-09-25 16:07:19 -07001506 inc_unacked(mdev);
1507 /* corresponding dec_unacked() in e_end_resync_block()
1508 * respective _drbd_clear_done_ee */
1509
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001510 peer_req->w.cb = e_end_resync_block;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001511
Philipp Reisner87eeee42011-01-19 14:16:30 +01001512 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001513 list_add(&peer_req->w.list, &mdev->sync_ee);
Philipp Reisner87eeee42011-01-19 14:16:30 +01001514 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001515
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02001516 atomic_add(data_size >> 9, &mdev->rs_sect_ev);
Andreas Gruenbacherfbe29de2011-02-17 16:38:35 +01001517 if (drbd_submit_peer_request(mdev, peer_req, WRITE, DRBD_FAULT_RS_WR) == 0)
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01001518 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001519
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001520 /* don't care for the reason here */
1521 dev_err(DEV, "submit failed, triggering re-connect\n");
Philipp Reisner87eeee42011-01-19 14:16:30 +01001522 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001523 list_del(&peer_req->w.list);
Philipp Reisner87eeee42011-01-19 14:16:30 +01001524 spin_unlock_irq(&mdev->tconn->req_lock);
Lars Ellenberg22cc37a2010-09-14 20:40:41 +02001525
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001526 drbd_free_ee(mdev, peer_req);
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001527fail:
1528 put_ldev(mdev);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01001529 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001530}
1531
Andreas Gruenbacher668eebc2011-01-20 17:14:26 +01001532static struct drbd_request *
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01001533find_request(struct drbd_conf *mdev, struct rb_root *root, u64 id,
1534 sector_t sector, bool missing_ok, const char *func)
Andreas Gruenbacher668eebc2011-01-20 17:14:26 +01001535{
Andreas Gruenbacher668eebc2011-01-20 17:14:26 +01001536 struct drbd_request *req;
1537
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01001538 /* Request object according to our peer */
1539 req = (struct drbd_request *)(unsigned long)id;
Andreas Gruenbacher5e472262011-01-27 14:42:51 +01001540 if (drbd_contains_interval(root, sector, &req->i) && req->i.local)
Andreas Gruenbacher668eebc2011-01-20 17:14:26 +01001541 return req;
Andreas Gruenbacherc3afd8f2011-01-20 22:25:40 +01001542 if (!missing_ok) {
1543 dev_err(DEV, "%s: failed to find request %lu, sector %llus\n", func,
1544 (unsigned long)id, (unsigned long long)sector);
1545 }
Andreas Gruenbacher668eebc2011-01-20 17:14:26 +01001546 return NULL;
1547}
1548
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01001549static int receive_DataReply(struct drbd_conf *mdev, enum drbd_packet cmd,
1550 unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001551{
1552 struct drbd_request *req;
1553 sector_t sector;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001554 int ok;
Philipp Reisnere42325a2011-01-19 13:55:45 +01001555 struct p_data *p = &mdev->tconn->data.rbuf.data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001556
1557 sector = be64_to_cpu(p->sector);
1558
Philipp Reisner87eeee42011-01-19 14:16:30 +01001559 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01001560 req = find_request(mdev, &mdev->read_requests, p->block_id, sector, false, __func__);
Philipp Reisner87eeee42011-01-19 14:16:30 +01001561 spin_unlock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherc3afd8f2011-01-20 22:25:40 +01001562 if (unlikely(!req))
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01001563 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001564
Bart Van Assche24c48302011-05-21 18:32:29 +02001565 /* hlist_del(&req->collision) is done in _req_may_be_done, to avoid
Philipp Reisnerb411b362009-09-25 16:07:19 -07001566 * special casing it there for the various failure cases.
1567 * still no race with drbd_fail_pending_reads */
Andreas Gruenbacher28284ce2011-03-16 17:54:02 +01001568 ok = !recv_dless_read(mdev, req, sector, data_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001569
1570 if (ok)
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01001571 req_mod(req, DATA_RECEIVED);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001572 /* else: nothing. handled from drbd_disconnect...
1573 * I don't think we may complete this just yet
1574 * in case we are "on-disconnect: freeze" */
1575
1576 return ok;
1577}
1578
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01001579static int receive_RSDataReply(struct drbd_conf *mdev, enum drbd_packet cmd,
1580 unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001581{
1582 sector_t sector;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001583 int ok;
Philipp Reisnere42325a2011-01-19 13:55:45 +01001584 struct p_data *p = &mdev->tconn->data.rbuf.data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001585
1586 sector = be64_to_cpu(p->sector);
1587 D_ASSERT(p->block_id == ID_SYNCER);
1588
1589 if (get_ldev(mdev)) {
1590 /* data is submitted to disk within recv_resync_read.
1591 * corresponding put_ldev done below on error,
Andreas Gruenbacherfcefa622011-02-17 16:46:59 +01001592 * or in drbd_peer_request_endio. */
Philipp Reisnerb411b362009-09-25 16:07:19 -07001593 ok = recv_resync_read(mdev, sector, data_size);
1594 } else {
1595 if (__ratelimit(&drbd_ratelimit_state))
1596 dev_err(DEV, "Can not write resync data to local disk.\n");
1597
Andreas Gruenbacherfc5be832011-03-16 17:50:50 +01001598 ok = !drbd_drain_block(mdev, data_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001599
Lars Ellenberg2b2bf212010-10-06 11:46:55 +02001600 drbd_send_ack_dp(mdev, P_NEG_ACK, p, data_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001601 }
1602
Philipp Reisner778f2712010-07-06 11:14:00 +02001603 atomic_add(data_size >> 9, &mdev->rs_sect_in);
1604
Philipp Reisnerb411b362009-09-25 16:07:19 -07001605 return ok;
1606}
1607
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001608static int w_restart_write(struct drbd_work *w, int cancel)
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001609{
1610 struct drbd_request *req = container_of(w, struct drbd_request, w);
1611 struct drbd_conf *mdev = w->mdev;
1612 struct bio *bio;
1613 unsigned long start_time;
1614 unsigned long flags;
1615
1616 spin_lock_irqsave(&mdev->tconn->req_lock, flags);
1617 if (!expect(req->rq_state & RQ_POSTPONED)) {
1618 spin_unlock_irqrestore(&mdev->tconn->req_lock, flags);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001619 return -EIO;
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001620 }
1621 bio = req->master_bio;
1622 start_time = req->start_time;
1623 /* Postponed requests will not have their master_bio completed! */
1624 __req_mod(req, DISCARD_WRITE, NULL);
1625 spin_unlock_irqrestore(&mdev->tconn->req_lock, flags);
1626
1627 while (__drbd_make_request(mdev, bio, start_time))
1628 /* retry */ ;
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001629 return 0;
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001630}
1631
1632static void restart_conflicting_writes(struct drbd_conf *mdev,
1633 sector_t sector, int size)
1634{
1635 struct drbd_interval *i;
1636 struct drbd_request *req;
1637
1638 drbd_for_each_overlap(i, &mdev->write_requests, sector, size) {
1639 if (!i->local)
1640 continue;
1641 req = container_of(i, struct drbd_request, i);
1642 if (req->rq_state & RQ_LOCAL_PENDING ||
1643 !(req->rq_state & RQ_POSTPONED))
1644 continue;
1645 if (expect(list_empty(&req->w.list))) {
1646 req->w.mdev = mdev;
1647 req->w.cb = w_restart_write;
1648 drbd_queue_work(&mdev->tconn->data.work, &req->w);
1649 }
1650 }
1651}
1652
Philipp Reisnerb411b362009-09-25 16:07:19 -07001653/* e_end_block() is called via drbd_process_done_ee().
1654 * this means this function only runs in the asender thread
1655 */
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001656static int e_end_block(struct drbd_work *w, int cancel)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001657{
Andreas Gruenbacher8050e6d2011-02-18 16:12:48 +01001658 struct drbd_peer_request *peer_req =
1659 container_of(w, struct drbd_peer_request, w);
Philipp Reisner00d56942011-02-09 18:09:48 +01001660 struct drbd_conf *mdev = w->mdev;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001661 sector_t sector = peer_req->i.sector;
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001662 int err = 0, pcmd;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001663
Philipp Reisner89e58e72011-01-19 13:12:45 +01001664 if (mdev->tconn->net_conf->wire_protocol == DRBD_PROT_C) {
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001665 if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001666 pcmd = (mdev->state.conn >= C_SYNC_SOURCE &&
1667 mdev->state.conn <= C_PAUSED_SYNC_T &&
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001668 peer_req->flags & EE_MAY_SET_IN_SYNC) ?
Philipp Reisnerb411b362009-09-25 16:07:19 -07001669 P_RS_WRITE_ACK : P_WRITE_ACK;
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001670 err = drbd_send_ack(mdev, pcmd, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001671 if (pcmd == P_RS_WRITE_ACK)
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001672 drbd_set_in_sync(mdev, sector, peer_req->i.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001673 } else {
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001674 err = drbd_send_ack(mdev, P_NEG_ACK, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001675 /* we expect it to be marked out of sync anyways...
1676 * maybe assert this? */
1677 }
1678 dec_unacked(mdev);
1679 }
1680 /* we delete from the conflict detection hash _after_ we sent out the
1681 * P_WRITE_ACK / P_NEG_ACK, to get the sequence number right. */
Philipp Reisner89e58e72011-01-19 13:12:45 +01001682 if (mdev->tconn->net_conf->two_primaries) {
Philipp Reisner87eeee42011-01-19 14:16:30 +01001683 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001684 D_ASSERT(!drbd_interval_empty(&peer_req->i));
1685 drbd_remove_epoch_entry_interval(mdev, peer_req);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001686 if (peer_req->flags & EE_RESTART_REQUESTS)
1687 restart_conflicting_writes(mdev, sector, peer_req->i.size);
Philipp Reisner87eeee42011-01-19 14:16:30 +01001688 spin_unlock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherbb3bfe92011-01-21 15:59:23 +01001689 } else
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001690 D_ASSERT(drbd_interval_empty(&peer_req->i));
Philipp Reisnerb411b362009-09-25 16:07:19 -07001691
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001692 drbd_may_finish_epoch(mdev, peer_req->epoch, EV_PUT + (cancel ? EV_CLEANUP : 0));
Philipp Reisnerb411b362009-09-25 16:07:19 -07001693
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001694 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001695}
1696
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001697static int e_send_ack(struct drbd_work *w, enum drbd_packet ack)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001698{
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001699 struct drbd_conf *mdev = w->mdev;
Andreas Gruenbacher8050e6d2011-02-18 16:12:48 +01001700 struct drbd_peer_request *peer_req =
1701 container_of(w, struct drbd_peer_request, w);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001702 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001703
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001704 err = drbd_send_ack(mdev, ack, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001705 dec_unacked(mdev);
1706
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001707 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001708}
1709
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001710static int e_send_discard_write(struct drbd_work *w, int unused)
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001711{
1712 return e_send_ack(w, P_DISCARD_WRITE);
1713}
1714
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001715static int e_send_retry_write(struct drbd_work *w, int unused)
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001716{
1717 struct drbd_tconn *tconn = w->mdev->tconn;
1718
1719 return e_send_ack(w, tconn->agreed_pro_version >= 100 ?
1720 P_RETRY_WRITE : P_DISCARD_WRITE);
1721}
1722
Andreas Gruenbacher3e394da2011-01-26 18:36:55 +01001723static bool seq_greater(u32 a, u32 b)
1724{
1725 /*
1726 * We assume 32-bit wrap-around here.
1727 * For 24-bit wrap-around, we would have to shift:
1728 * a <<= 8; b <<= 8;
1729 */
1730 return (s32)a - (s32)b > 0;
1731}
1732
1733static u32 seq_max(u32 a, u32 b)
1734{
1735 return seq_greater(a, b) ? a : b;
1736}
1737
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001738static bool need_peer_seq(struct drbd_conf *mdev)
1739{
1740 struct drbd_tconn *tconn = mdev->tconn;
1741
1742 /*
1743 * We only need to keep track of the last packet_seq number of our peer
1744 * if we are in dual-primary mode and we have the discard flag set; see
1745 * handle_write_conflicts().
1746 */
1747 return tconn->net_conf->two_primaries &&
1748 test_bit(DISCARD_CONCURRENT, &tconn->flags);
1749}
1750
Andreas Gruenbacher43ae0772011-02-03 18:42:08 +01001751static void update_peer_seq(struct drbd_conf *mdev, unsigned int peer_seq)
Andreas Gruenbacher3e394da2011-01-26 18:36:55 +01001752{
Lars Ellenberg3c13b682011-02-23 16:10:01 +01001753 unsigned int newest_peer_seq;
Andreas Gruenbacher3e394da2011-01-26 18:36:55 +01001754
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001755 if (need_peer_seq(mdev)) {
1756 spin_lock(&mdev->peer_seq_lock);
Lars Ellenberg3c13b682011-02-23 16:10:01 +01001757 newest_peer_seq = seq_max(mdev->peer_seq, peer_seq);
1758 mdev->peer_seq = newest_peer_seq;
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001759 spin_unlock(&mdev->peer_seq_lock);
Lars Ellenberg3c13b682011-02-23 16:10:01 +01001760 /* wake up only if we actually changed mdev->peer_seq */
1761 if (peer_seq == newest_peer_seq)
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001762 wake_up(&mdev->seq_wait);
1763 }
Andreas Gruenbacher3e394da2011-01-26 18:36:55 +01001764}
1765
Philipp Reisnerb411b362009-09-25 16:07:19 -07001766/* Called from receive_Data.
1767 * Synchronize packets on sock with packets on msock.
1768 *
1769 * This is here so even when a P_DATA packet traveling via sock overtook an Ack
1770 * packet traveling on msock, they are still processed in the order they have
1771 * been sent.
1772 *
1773 * Note: we don't care for Ack packets overtaking P_DATA packets.
1774 *
1775 * In case packet_seq is larger than mdev->peer_seq number, there are
1776 * outstanding packets on the msock. We wait for them to arrive.
1777 * In case we are the logically next packet, we update mdev->peer_seq
1778 * ourselves. Correctly handles 32bit wrap around.
1779 *
1780 * Assume we have a 10 GBit connection, that is about 1<<30 byte per second,
1781 * about 1<<21 sectors per second. So "worst" case, we have 1<<3 == 8 seconds
1782 * for the 24bit wrap (historical atomic_t guarantee on some archs), and we have
1783 * 1<<9 == 512 seconds aka ages for the 32bit wrap around...
1784 *
1785 * returns 0 if we may process the packet,
1786 * -ERESTARTSYS if we were interrupted (by disconnect signal). */
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001787static int wait_for_and_update_peer_seq(struct drbd_conf *mdev, const u32 peer_seq)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001788{
1789 DEFINE_WAIT(wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001790 long timeout;
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001791 int ret;
1792
1793 if (!need_peer_seq(mdev))
1794 return 0;
1795
Philipp Reisnerb411b362009-09-25 16:07:19 -07001796 spin_lock(&mdev->peer_seq_lock);
1797 for (;;) {
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001798 if (!seq_greater(peer_seq - 1, mdev->peer_seq)) {
1799 mdev->peer_seq = seq_max(mdev->peer_seq, peer_seq);
1800 ret = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001801 break;
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001802 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07001803 if (signal_pending(current)) {
1804 ret = -ERESTARTSYS;
1805 break;
1806 }
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001807 prepare_to_wait(&mdev->seq_wait, &wait, TASK_INTERRUPTIBLE);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001808 spin_unlock(&mdev->peer_seq_lock);
Andreas Gruenbacher71b1c1e2011-03-01 15:40:43 +01001809 timeout = mdev->tconn->net_conf->ping_timeo*HZ/10;
1810 timeout = schedule_timeout(timeout);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001811 spin_lock(&mdev->peer_seq_lock);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001812 if (!timeout) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001813 ret = -ETIMEDOUT;
Andreas Gruenbacher71b1c1e2011-03-01 15:40:43 +01001814 dev_err(DEV, "Timed out waiting for missing ack packets; disconnecting\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07001815 break;
1816 }
1817 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07001818 spin_unlock(&mdev->peer_seq_lock);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001819 finish_wait(&mdev->seq_wait, &wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001820 return ret;
1821}
1822
Lars Ellenberg688593c2010-11-17 22:25:03 +01001823/* see also bio_flags_to_wire()
1824 * DRBD_REQ_*, because we need to semantically map the flags to data packet
1825 * flags and back. We may replicate to other kernel versions. */
1826static unsigned long wire_flags_to_bio(struct drbd_conf *mdev, u32 dpf)
Philipp Reisner76d2e7e2010-08-25 11:58:05 +02001827{
Lars Ellenberg688593c2010-11-17 22:25:03 +01001828 return (dpf & DP_RW_SYNC ? REQ_SYNC : 0) |
1829 (dpf & DP_FUA ? REQ_FUA : 0) |
1830 (dpf & DP_FLUSH ? REQ_FLUSH : 0) |
1831 (dpf & DP_DISCARD ? REQ_DISCARD : 0);
Philipp Reisner76d2e7e2010-08-25 11:58:05 +02001832}
1833
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001834static void fail_postponed_requests(struct drbd_conf *mdev, sector_t sector,
1835 unsigned int size)
1836{
1837 struct drbd_interval *i;
1838
1839 repeat:
1840 drbd_for_each_overlap(i, &mdev->write_requests, sector, size) {
1841 struct drbd_request *req;
1842 struct bio_and_error m;
1843
1844 if (!i->local)
1845 continue;
1846 req = container_of(i, struct drbd_request, i);
1847 if (!(req->rq_state & RQ_POSTPONED))
1848 continue;
1849 req->rq_state &= ~RQ_POSTPONED;
1850 __req_mod(req, NEG_ACKED, &m);
1851 spin_unlock_irq(&mdev->tconn->req_lock);
1852 if (m.bio)
1853 complete_master_bio(mdev, &m);
1854 spin_lock_irq(&mdev->tconn->req_lock);
1855 goto repeat;
1856 }
1857}
1858
1859static int handle_write_conflicts(struct drbd_conf *mdev,
1860 struct drbd_peer_request *peer_req)
1861{
1862 struct drbd_tconn *tconn = mdev->tconn;
1863 bool resolve_conflicts = test_bit(DISCARD_CONCURRENT, &tconn->flags);
1864 sector_t sector = peer_req->i.sector;
1865 const unsigned int size = peer_req->i.size;
1866 struct drbd_interval *i;
1867 bool equal;
1868 int err;
1869
1870 /*
1871 * Inserting the peer request into the write_requests tree will prevent
1872 * new conflicting local requests from being added.
1873 */
1874 drbd_insert_interval(&mdev->write_requests, &peer_req->i);
1875
1876 repeat:
1877 drbd_for_each_overlap(i, &mdev->write_requests, sector, size) {
1878 if (i == &peer_req->i)
1879 continue;
1880
1881 if (!i->local) {
1882 /*
1883 * Our peer has sent a conflicting remote request; this
1884 * should not happen in a two-node setup. Wait for the
1885 * earlier peer request to complete.
1886 */
1887 err = drbd_wait_misc(mdev, i);
1888 if (err)
1889 goto out;
1890 goto repeat;
1891 }
1892
1893 equal = i->sector == sector && i->size == size;
1894 if (resolve_conflicts) {
1895 /*
1896 * If the peer request is fully contained within the
1897 * overlapping request, it can be discarded; otherwise,
1898 * it will be retried once all overlapping requests
1899 * have completed.
1900 */
1901 bool discard = i->sector <= sector && i->sector +
1902 (i->size >> 9) >= sector + (size >> 9);
1903
1904 if (!equal)
1905 dev_alert(DEV, "Concurrent writes detected: "
1906 "local=%llus +%u, remote=%llus +%u, "
1907 "assuming %s came first\n",
1908 (unsigned long long)i->sector, i->size,
1909 (unsigned long long)sector, size,
1910 discard ? "local" : "remote");
1911
1912 inc_unacked(mdev);
1913 peer_req->w.cb = discard ? e_send_discard_write :
1914 e_send_retry_write;
1915 list_add_tail(&peer_req->w.list, &mdev->done_ee);
1916 wake_asender(mdev->tconn);
1917
1918 err = -ENOENT;
1919 goto out;
1920 } else {
1921 struct drbd_request *req =
1922 container_of(i, struct drbd_request, i);
1923
1924 if (!equal)
1925 dev_alert(DEV, "Concurrent writes detected: "
1926 "local=%llus +%u, remote=%llus +%u\n",
1927 (unsigned long long)i->sector, i->size,
1928 (unsigned long long)sector, size);
1929
1930 if (req->rq_state & RQ_LOCAL_PENDING ||
1931 !(req->rq_state & RQ_POSTPONED)) {
1932 /*
1933 * Wait for the node with the discard flag to
1934 * decide if this request will be discarded or
1935 * retried. Requests that are discarded will
1936 * disappear from the write_requests tree.
1937 *
1938 * In addition, wait for the conflicting
1939 * request to finish locally before submitting
1940 * the conflicting peer request.
1941 */
1942 err = drbd_wait_misc(mdev, &req->i);
1943 if (err) {
1944 _conn_request_state(mdev->tconn,
1945 NS(conn, C_TIMEOUT),
1946 CS_HARD);
1947 fail_postponed_requests(mdev, sector, size);
1948 goto out;
1949 }
1950 goto repeat;
1951 }
1952 /*
1953 * Remember to restart the conflicting requests after
1954 * the new peer request has completed.
1955 */
1956 peer_req->flags |= EE_RESTART_REQUESTS;
1957 }
1958 }
1959 err = 0;
1960
1961 out:
1962 if (err)
1963 drbd_remove_epoch_entry_interval(mdev, peer_req);
1964 return err;
1965}
1966
Philipp Reisnerb411b362009-09-25 16:07:19 -07001967/* mirrored write */
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01001968static int receive_Data(struct drbd_conf *mdev, enum drbd_packet cmd,
1969 unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001970{
1971 sector_t sector;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001972 struct drbd_peer_request *peer_req;
Philipp Reisnere42325a2011-01-19 13:55:45 +01001973 struct p_data *p = &mdev->tconn->data.rbuf.data;
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001974 u32 peer_seq = be32_to_cpu(p->seq_num);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001975 int rw = WRITE;
1976 u32 dp_flags;
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001977 int err;
1978
Philipp Reisnerb411b362009-09-25 16:07:19 -07001979
Philipp Reisnerb411b362009-09-25 16:07:19 -07001980 if (!get_ldev(mdev)) {
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001981 err = wait_for_and_update_peer_seq(mdev, peer_seq);
Lars Ellenberg2b2bf212010-10-06 11:46:55 +02001982 drbd_send_ack_dp(mdev, P_NEG_ACK, p, data_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001983 atomic_inc(&mdev->current_epoch->epoch_size);
Andreas Gruenbacherfc5be832011-03-16 17:50:50 +01001984 return !drbd_drain_block(mdev, data_size) && err == 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001985 }
1986
Andreas Gruenbacherfcefa622011-02-17 16:46:59 +01001987 /*
1988 * Corresponding put_ldev done either below (on various errors), or in
1989 * drbd_peer_request_endio, if we successfully submit the data at the
1990 * end of this function.
1991 */
Philipp Reisnerb411b362009-09-25 16:07:19 -07001992
1993 sector = be64_to_cpu(p->sector);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001994 peer_req = read_in_block(mdev, p->block_id, sector, data_size);
1995 if (!peer_req) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001996 put_ldev(mdev);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01001997 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001998 }
1999
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002000 peer_req->w.cb = e_end_block;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002001
Lars Ellenberg688593c2010-11-17 22:25:03 +01002002 dp_flags = be32_to_cpu(p->dp_flags);
2003 rw |= wire_flags_to_bio(mdev, dp_flags);
2004
2005 if (dp_flags & DP_MAY_SET_IN_SYNC)
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002006 peer_req->flags |= EE_MAY_SET_IN_SYNC;
Lars Ellenberg688593c2010-11-17 22:25:03 +01002007
Philipp Reisnerb411b362009-09-25 16:07:19 -07002008 spin_lock(&mdev->epoch_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002009 peer_req->epoch = mdev->current_epoch;
2010 atomic_inc(&peer_req->epoch->epoch_size);
2011 atomic_inc(&peer_req->epoch->active);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002012 spin_unlock(&mdev->epoch_lock);
2013
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002014 if (mdev->tconn->net_conf->two_primaries) {
2015 err = wait_for_and_update_peer_seq(mdev, peer_seq);
2016 if (err)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002017 goto out_interrupted;
Philipp Reisner87eeee42011-01-19 14:16:30 +01002018 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002019 err = handle_write_conflicts(mdev, peer_req);
2020 if (err) {
2021 spin_unlock_irq(&mdev->tconn->req_lock);
2022 if (err == -ENOENT) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002023 put_ldev(mdev);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002024 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002025 }
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002026 goto out_interrupted;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002027 }
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002028 } else
2029 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002030 list_add(&peer_req->w.list, &mdev->active_ee);
Philipp Reisner87eeee42011-01-19 14:16:30 +01002031 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002032
Philipp Reisner89e58e72011-01-19 13:12:45 +01002033 switch (mdev->tconn->net_conf->wire_protocol) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002034 case DRBD_PROT_C:
2035 inc_unacked(mdev);
2036 /* corresponding dec_unacked() in e_end_block()
2037 * respective _drbd_clear_done_ee */
2038 break;
2039 case DRBD_PROT_B:
2040 /* I really don't like it that the receiver thread
2041 * sends on the msock, but anyways */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002042 drbd_send_ack(mdev, P_RECV_ACK, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002043 break;
2044 case DRBD_PROT_A:
2045 /* nothing to do */
2046 break;
2047 }
2048
Lars Ellenberg6719fb02010-10-18 23:04:07 +02002049 if (mdev->state.pdsk < D_INCONSISTENT) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002050 /* In case we have the only disk of the cluster, */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002051 drbd_set_out_of_sync(mdev, peer_req->i.sector, peer_req->i.size);
2052 peer_req->flags |= EE_CALL_AL_COMPLETE_IO;
2053 peer_req->flags &= ~EE_MAY_SET_IN_SYNC;
2054 drbd_al_begin_io(mdev, peer_req->i.sector);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002055 }
2056
Andreas Gruenbacherfbe29de2011-02-17 16:38:35 +01002057 if (drbd_submit_peer_request(mdev, peer_req, rw, DRBD_FAULT_DT_WR) == 0)
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002058 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002059
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01002060 /* don't care for the reason here */
2061 dev_err(DEV, "submit failed, triggering re-connect\n");
Philipp Reisner87eeee42011-01-19 14:16:30 +01002062 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002063 list_del(&peer_req->w.list);
2064 drbd_remove_epoch_entry_interval(mdev, peer_req);
Philipp Reisner87eeee42011-01-19 14:16:30 +01002065 spin_unlock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002066 if (peer_req->flags & EE_CALL_AL_COMPLETE_IO)
2067 drbd_al_complete_io(mdev, peer_req->i.sector);
Lars Ellenberg22cc37a2010-09-14 20:40:41 +02002068
Philipp Reisnerb411b362009-09-25 16:07:19 -07002069out_interrupted:
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002070 drbd_may_finish_epoch(mdev, peer_req->epoch, EV_PUT + EV_CLEANUP);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002071 put_ldev(mdev);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002072 drbd_free_ee(mdev, peer_req);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002073 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002074}
2075
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002076/* We may throttle resync, if the lower device seems to be busy,
2077 * and current sync rate is above c_min_rate.
2078 *
2079 * To decide whether or not the lower device is busy, we use a scheme similar
2080 * to MD RAID is_mddev_idle(): if the partition stats reveal "significant"
2081 * (more than 64 sectors) of activity we cannot account for with our own resync
2082 * activity, it obviously is "busy".
2083 *
2084 * The current sync rate used here uses only the most recent two step marks,
2085 * to have a short time average so we can react faster.
2086 */
Philipp Reisnere3555d82010-11-07 15:56:29 +01002087int drbd_rs_should_slow_down(struct drbd_conf *mdev, sector_t sector)
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002088{
2089 struct gendisk *disk = mdev->ldev->backing_bdev->bd_contains->bd_disk;
2090 unsigned long db, dt, dbdt;
Philipp Reisnere3555d82010-11-07 15:56:29 +01002091 struct lc_element *tmp;
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002092 int curr_events;
2093 int throttle = 0;
2094
2095 /* feature disabled? */
Lars Ellenbergf3990022011-03-23 14:31:09 +01002096 if (mdev->ldev->dc.c_min_rate == 0)
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002097 return 0;
2098
Philipp Reisnere3555d82010-11-07 15:56:29 +01002099 spin_lock_irq(&mdev->al_lock);
2100 tmp = lc_find(mdev->resync, BM_SECT_TO_EXT(sector));
2101 if (tmp) {
2102 struct bm_extent *bm_ext = lc_entry(tmp, struct bm_extent, lce);
2103 if (test_bit(BME_PRIORITY, &bm_ext->flags)) {
2104 spin_unlock_irq(&mdev->al_lock);
2105 return 0;
2106 }
2107 /* Do not slow down if app IO is already waiting for this extent */
2108 }
2109 spin_unlock_irq(&mdev->al_lock);
2110
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002111 curr_events = (int)part_stat_read(&disk->part0, sectors[0]) +
2112 (int)part_stat_read(&disk->part0, sectors[1]) -
2113 atomic_read(&mdev->rs_sect_ev);
Philipp Reisnere3555d82010-11-07 15:56:29 +01002114
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002115 if (!mdev->rs_last_events || curr_events - mdev->rs_last_events > 64) {
2116 unsigned long rs_left;
2117 int i;
2118
2119 mdev->rs_last_events = curr_events;
2120
2121 /* sync speed average over the last 2*DRBD_SYNC_MARK_STEP,
2122 * approx. */
Lars Ellenberg2649f082010-11-05 10:05:47 +01002123 i = (mdev->rs_last_mark + DRBD_SYNC_MARKS-1) % DRBD_SYNC_MARKS;
2124
2125 if (mdev->state.conn == C_VERIFY_S || mdev->state.conn == C_VERIFY_T)
2126 rs_left = mdev->ov_left;
2127 else
2128 rs_left = drbd_bm_total_weight(mdev) - mdev->rs_failed;
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002129
2130 dt = ((long)jiffies - (long)mdev->rs_mark_time[i]) / HZ;
2131 if (!dt)
2132 dt++;
2133 db = mdev->rs_mark_left[i] - rs_left;
2134 dbdt = Bit2KB(db/dt);
2135
Lars Ellenbergf3990022011-03-23 14:31:09 +01002136 if (dbdt > mdev->ldev->dc.c_min_rate)
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002137 throttle = 1;
2138 }
2139 return throttle;
2140}
2141
2142
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01002143static int receive_DataRequest(struct drbd_conf *mdev, enum drbd_packet cmd,
2144 unsigned int digest_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002145{
2146 sector_t sector;
2147 const sector_t capacity = drbd_get_capacity(mdev->this_bdev);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002148 struct drbd_peer_request *peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002149 struct digest_info *di = NULL;
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02002150 int size, verb;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002151 unsigned int fault_type;
Philipp Reisnere42325a2011-01-19 13:55:45 +01002152 struct p_block_req *p = &mdev->tconn->data.rbuf.block_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002153
2154 sector = be64_to_cpu(p->sector);
2155 size = be32_to_cpu(p->blksize);
2156
Andreas Gruenbacherc670a392011-02-21 12:41:39 +01002157 if (size <= 0 || !IS_ALIGNED(size, 512) || size > DRBD_MAX_BIO_SIZE) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002158 dev_err(DEV, "%s:%d: sector: %llus, size: %u\n", __FILE__, __LINE__,
2159 (unsigned long long)sector, size);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002160 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002161 }
2162 if (sector + (size>>9) > capacity) {
2163 dev_err(DEV, "%s:%d: sector: %llus, size: %u\n", __FILE__, __LINE__,
2164 (unsigned long long)sector, size);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002165 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002166 }
2167
2168 if (!get_ldev_if_state(mdev, D_UP_TO_DATE)) {
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02002169 verb = 1;
2170 switch (cmd) {
2171 case P_DATA_REQUEST:
2172 drbd_send_ack_rp(mdev, P_NEG_DREPLY, p);
2173 break;
2174 case P_RS_DATA_REQUEST:
2175 case P_CSUM_RS_REQUEST:
2176 case P_OV_REQUEST:
2177 drbd_send_ack_rp(mdev, P_NEG_RS_DREPLY , p);
2178 break;
2179 case P_OV_REPLY:
2180 verb = 0;
2181 dec_rs_pending(mdev);
2182 drbd_send_ack_ex(mdev, P_OV_RESULT, sector, size, ID_IN_SYNC);
2183 break;
2184 default:
2185 dev_err(DEV, "unexpected command (%s) in receive_DataRequest\n",
2186 cmdname(cmd));
2187 }
2188 if (verb && __ratelimit(&drbd_ratelimit_state))
Philipp Reisnerb411b362009-09-25 16:07:19 -07002189 dev_err(DEV, "Can not satisfy peer's read request, "
2190 "no local data.\n");
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02002191
Lars Ellenberga821cc42010-09-06 12:31:37 +02002192 /* drain possibly payload */
Andreas Gruenbacherfc5be832011-03-16 17:50:50 +01002193 return !drbd_drain_block(mdev, digest_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002194 }
2195
2196 /* GFP_NOIO, because we must not cause arbitrary write-out: in a DRBD
2197 * "criss-cross" setup, that might cause write-out on some other DRBD,
2198 * which in turn might block on the other node at this very place. */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002199 peer_req = drbd_alloc_ee(mdev, p->block_id, sector, size, GFP_NOIO);
2200 if (!peer_req) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002201 put_ldev(mdev);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002202 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002203 }
2204
Philipp Reisner02918be2010-08-20 14:35:10 +02002205 switch (cmd) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002206 case P_DATA_REQUEST:
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002207 peer_req->w.cb = w_e_end_data_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002208 fault_type = DRBD_FAULT_DT_RD;
Lars Ellenberg80a40e42010-08-11 23:28:00 +02002209 /* application IO, don't drbd_rs_begin_io */
2210 goto submit;
2211
Philipp Reisnerb411b362009-09-25 16:07:19 -07002212 case P_RS_DATA_REQUEST:
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002213 peer_req->w.cb = w_e_end_rsdata_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002214 fault_type = DRBD_FAULT_RS_RD;
Lars Ellenberg5f9915b2010-11-09 14:15:24 +01002215 /* used in the sector offset progress display */
2216 mdev->bm_resync_fo = BM_SECT_TO_BIT(sector);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002217 break;
2218
2219 case P_OV_REPLY:
2220 case P_CSUM_RS_REQUEST:
2221 fault_type = DRBD_FAULT_RS_RD;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002222 di = kmalloc(sizeof(*di) + digest_size, GFP_NOIO);
2223 if (!di)
2224 goto out_free_e;
2225
2226 di->digest_size = digest_size;
2227 di->digest = (((char *)di)+sizeof(struct digest_info));
2228
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002229 peer_req->digest = di;
2230 peer_req->flags |= EE_HAS_DIGEST;
Lars Ellenbergc36c3ce2010-08-11 20:42:55 +02002231
Philipp Reisnerde0ff332011-02-07 16:56:20 +01002232 if (drbd_recv(mdev->tconn, di->digest, digest_size) != digest_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002233 goto out_free_e;
2234
Philipp Reisner02918be2010-08-20 14:35:10 +02002235 if (cmd == P_CSUM_RS_REQUEST) {
Philipp Reisner31890f42011-01-19 14:12:51 +01002236 D_ASSERT(mdev->tconn->agreed_pro_version >= 89);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002237 peer_req->w.cb = w_e_end_csum_rs_req;
Lars Ellenberg5f9915b2010-11-09 14:15:24 +01002238 /* used in the sector offset progress display */
2239 mdev->bm_resync_fo = BM_SECT_TO_BIT(sector);
Philipp Reisner02918be2010-08-20 14:35:10 +02002240 } else if (cmd == P_OV_REPLY) {
Lars Ellenberg2649f082010-11-05 10:05:47 +01002241 /* track progress, we may need to throttle */
2242 atomic_add(size >> 9, &mdev->rs_sect_in);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002243 peer_req->w.cb = w_e_end_ov_reply;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002244 dec_rs_pending(mdev);
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002245 /* drbd_rs_begin_io done when we sent this request,
2246 * but accounting still needs to be done. */
2247 goto submit_for_resync;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002248 }
2249 break;
2250
2251 case P_OV_REQUEST:
Philipp Reisnerb411b362009-09-25 16:07:19 -07002252 if (mdev->ov_start_sector == ~(sector_t)0 &&
Philipp Reisner31890f42011-01-19 14:12:51 +01002253 mdev->tconn->agreed_pro_version >= 90) {
Lars Ellenbergde228bb2010-11-05 09:43:15 +01002254 unsigned long now = jiffies;
2255 int i;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002256 mdev->ov_start_sector = sector;
2257 mdev->ov_position = sector;
Lars Ellenberg30b743a2010-11-05 09:39:06 +01002258 mdev->ov_left = drbd_bm_bits(mdev) - BM_SECT_TO_BIT(sector);
2259 mdev->rs_total = mdev->ov_left;
Lars Ellenbergde228bb2010-11-05 09:43:15 +01002260 for (i = 0; i < DRBD_SYNC_MARKS; i++) {
2261 mdev->rs_mark_left[i] = mdev->ov_left;
2262 mdev->rs_mark_time[i] = now;
2263 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07002264 dev_info(DEV, "Online Verify start sector: %llu\n",
2265 (unsigned long long)sector);
2266 }
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002267 peer_req->w.cb = w_e_end_ov_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002268 fault_type = DRBD_FAULT_RS_RD;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002269 break;
2270
Philipp Reisnerb411b362009-09-25 16:07:19 -07002271 default:
2272 dev_err(DEV, "unexpected command (%s) in receive_DataRequest\n",
Philipp Reisner02918be2010-08-20 14:35:10 +02002273 cmdname(cmd));
Philipp Reisnerb411b362009-09-25 16:07:19 -07002274 fault_type = DRBD_FAULT_MAX;
Lars Ellenberg80a40e42010-08-11 23:28:00 +02002275 goto out_free_e;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002276 }
2277
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002278 /* Throttle, drbd_rs_begin_io and submit should become asynchronous
2279 * wrt the receiver, but it is not as straightforward as it may seem.
2280 * Various places in the resync start and stop logic assume resync
2281 * requests are processed in order, requeuing this on the worker thread
2282 * introduces a bunch of new code for synchronization between threads.
2283 *
2284 * Unlimited throttling before drbd_rs_begin_io may stall the resync
2285 * "forever", throttling after drbd_rs_begin_io will lock that extent
2286 * for application writes for the same time. For now, just throttle
2287 * here, where the rest of the code expects the receiver to sleep for
2288 * a while, anyways.
2289 */
Philipp Reisnerb411b362009-09-25 16:07:19 -07002290
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002291 /* Throttle before drbd_rs_begin_io, as that locks out application IO;
2292 * this defers syncer requests for some time, before letting at least
2293 * on request through. The resync controller on the receiving side
2294 * will adapt to the incoming rate accordingly.
2295 *
2296 * We cannot throttle here if remote is Primary/SyncTarget:
2297 * we would also throttle its application reads.
2298 * In that case, throttling is done on the SyncTarget only.
2299 */
Philipp Reisnere3555d82010-11-07 15:56:29 +01002300 if (mdev->state.peer != R_PRIMARY && drbd_rs_should_slow_down(mdev, sector))
2301 schedule_timeout_uninterruptible(HZ/10);
2302 if (drbd_rs_begin_io(mdev, sector))
Lars Ellenberg80a40e42010-08-11 23:28:00 +02002303 goto out_free_e;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002304
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002305submit_for_resync:
2306 atomic_add(size >> 9, &mdev->rs_sect_ev);
2307
Lars Ellenberg80a40e42010-08-11 23:28:00 +02002308submit:
Philipp Reisnerb411b362009-09-25 16:07:19 -07002309 inc_unacked(mdev);
Philipp Reisner87eeee42011-01-19 14:16:30 +01002310 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002311 list_add_tail(&peer_req->w.list, &mdev->read_ee);
Philipp Reisner87eeee42011-01-19 14:16:30 +01002312 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002313
Andreas Gruenbacherfbe29de2011-02-17 16:38:35 +01002314 if (drbd_submit_peer_request(mdev, peer_req, READ, fault_type) == 0)
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002315 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002316
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01002317 /* don't care for the reason here */
2318 dev_err(DEV, "submit failed, triggering re-connect\n");
Philipp Reisner87eeee42011-01-19 14:16:30 +01002319 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002320 list_del(&peer_req->w.list);
Philipp Reisner87eeee42011-01-19 14:16:30 +01002321 spin_unlock_irq(&mdev->tconn->req_lock);
Lars Ellenberg22cc37a2010-09-14 20:40:41 +02002322 /* no drbd_rs_complete_io(), we are dropping the connection anyways */
2323
Philipp Reisnerb411b362009-09-25 16:07:19 -07002324out_free_e:
Philipp Reisnerb411b362009-09-25 16:07:19 -07002325 put_ldev(mdev);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002326 drbd_free_ee(mdev, peer_req);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002327 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002328}
2329
2330static int drbd_asb_recover_0p(struct drbd_conf *mdev) __must_hold(local)
2331{
2332 int self, peer, rv = -100;
2333 unsigned long ch_self, ch_peer;
2334
2335 self = mdev->ldev->md.uuid[UI_BITMAP] & 1;
2336 peer = mdev->p_uuid[UI_BITMAP] & 1;
2337
2338 ch_peer = mdev->p_uuid[UI_SIZE];
2339 ch_self = mdev->comm_bm_set;
2340
Philipp Reisner89e58e72011-01-19 13:12:45 +01002341 switch (mdev->tconn->net_conf->after_sb_0p) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002342 case ASB_CONSENSUS:
2343 case ASB_DISCARD_SECONDARY:
2344 case ASB_CALL_HELPER:
2345 dev_err(DEV, "Configuration error.\n");
2346 break;
2347 case ASB_DISCONNECT:
2348 break;
2349 case ASB_DISCARD_YOUNGER_PRI:
2350 if (self == 0 && peer == 1) {
2351 rv = -1;
2352 break;
2353 }
2354 if (self == 1 && peer == 0) {
2355 rv = 1;
2356 break;
2357 }
2358 /* Else fall through to one of the other strategies... */
2359 case ASB_DISCARD_OLDER_PRI:
2360 if (self == 0 && peer == 1) {
2361 rv = 1;
2362 break;
2363 }
2364 if (self == 1 && peer == 0) {
2365 rv = -1;
2366 break;
2367 }
2368 /* Else fall through to one of the other strategies... */
Lars Ellenbergad19bf62009-10-14 09:36:49 +02002369 dev_warn(DEV, "Discard younger/older primary did not find a decision\n"
Philipp Reisnerb411b362009-09-25 16:07:19 -07002370 "Using discard-least-changes instead\n");
2371 case ASB_DISCARD_ZERO_CHG:
2372 if (ch_peer == 0 && ch_self == 0) {
Philipp Reisner25703f82011-02-07 14:35:25 +01002373 rv = test_bit(DISCARD_CONCURRENT, &mdev->tconn->flags)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002374 ? -1 : 1;
2375 break;
2376 } else {
2377 if (ch_peer == 0) { rv = 1; break; }
2378 if (ch_self == 0) { rv = -1; break; }
2379 }
Philipp Reisner89e58e72011-01-19 13:12:45 +01002380 if (mdev->tconn->net_conf->after_sb_0p == ASB_DISCARD_ZERO_CHG)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002381 break;
2382 case ASB_DISCARD_LEAST_CHG:
2383 if (ch_self < ch_peer)
2384 rv = -1;
2385 else if (ch_self > ch_peer)
2386 rv = 1;
2387 else /* ( ch_self == ch_peer ) */
2388 /* Well, then use something else. */
Philipp Reisner25703f82011-02-07 14:35:25 +01002389 rv = test_bit(DISCARD_CONCURRENT, &mdev->tconn->flags)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002390 ? -1 : 1;
2391 break;
2392 case ASB_DISCARD_LOCAL:
2393 rv = -1;
2394 break;
2395 case ASB_DISCARD_REMOTE:
2396 rv = 1;
2397 }
2398
2399 return rv;
2400}
2401
2402static int drbd_asb_recover_1p(struct drbd_conf *mdev) __must_hold(local)
2403{
Andreas Gruenbacher6184ea22010-12-09 14:23:27 +01002404 int hg, rv = -100;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002405
Philipp Reisner89e58e72011-01-19 13:12:45 +01002406 switch (mdev->tconn->net_conf->after_sb_1p) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002407 case ASB_DISCARD_YOUNGER_PRI:
2408 case ASB_DISCARD_OLDER_PRI:
2409 case ASB_DISCARD_LEAST_CHG:
2410 case ASB_DISCARD_LOCAL:
2411 case ASB_DISCARD_REMOTE:
2412 dev_err(DEV, "Configuration error.\n");
2413 break;
2414 case ASB_DISCONNECT:
2415 break;
2416 case ASB_CONSENSUS:
2417 hg = drbd_asb_recover_0p(mdev);
2418 if (hg == -1 && mdev->state.role == R_SECONDARY)
2419 rv = hg;
2420 if (hg == 1 && mdev->state.role == R_PRIMARY)
2421 rv = hg;
2422 break;
2423 case ASB_VIOLENTLY:
2424 rv = drbd_asb_recover_0p(mdev);
2425 break;
2426 case ASB_DISCARD_SECONDARY:
2427 return mdev->state.role == R_PRIMARY ? 1 : -1;
2428 case ASB_CALL_HELPER:
2429 hg = drbd_asb_recover_0p(mdev);
2430 if (hg == -1 && mdev->state.role == R_PRIMARY) {
Andreas Gruenbacherbb437942010-12-09 14:02:35 +01002431 enum drbd_state_rv rv2;
2432
2433 drbd_set_role(mdev, R_SECONDARY, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002434 /* drbd_change_state() does not sleep while in SS_IN_TRANSIENT_STATE,
2435 * we might be here in C_WF_REPORT_PARAMS which is transient.
2436 * we do not need to wait for the after state change work either. */
Andreas Gruenbacherbb437942010-12-09 14:02:35 +01002437 rv2 = drbd_change_state(mdev, CS_VERBOSE, NS(role, R_SECONDARY));
2438 if (rv2 != SS_SUCCESS) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002439 drbd_khelper(mdev, "pri-lost-after-sb");
2440 } else {
2441 dev_warn(DEV, "Successfully gave up primary role.\n");
2442 rv = hg;
2443 }
2444 } else
2445 rv = hg;
2446 }
2447
2448 return rv;
2449}
2450
2451static int drbd_asb_recover_2p(struct drbd_conf *mdev) __must_hold(local)
2452{
Andreas Gruenbacher6184ea22010-12-09 14:23:27 +01002453 int hg, rv = -100;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002454
Philipp Reisner89e58e72011-01-19 13:12:45 +01002455 switch (mdev->tconn->net_conf->after_sb_2p) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002456 case ASB_DISCARD_YOUNGER_PRI:
2457 case ASB_DISCARD_OLDER_PRI:
2458 case ASB_DISCARD_LEAST_CHG:
2459 case ASB_DISCARD_LOCAL:
2460 case ASB_DISCARD_REMOTE:
2461 case ASB_CONSENSUS:
2462 case ASB_DISCARD_SECONDARY:
2463 dev_err(DEV, "Configuration error.\n");
2464 break;
2465 case ASB_VIOLENTLY:
2466 rv = drbd_asb_recover_0p(mdev);
2467 break;
2468 case ASB_DISCONNECT:
2469 break;
2470 case ASB_CALL_HELPER:
2471 hg = drbd_asb_recover_0p(mdev);
2472 if (hg == -1) {
Andreas Gruenbacherbb437942010-12-09 14:02:35 +01002473 enum drbd_state_rv rv2;
2474
Philipp Reisnerb411b362009-09-25 16:07:19 -07002475 /* drbd_change_state() does not sleep while in SS_IN_TRANSIENT_STATE,
2476 * we might be here in C_WF_REPORT_PARAMS which is transient.
2477 * we do not need to wait for the after state change work either. */
Andreas Gruenbacherbb437942010-12-09 14:02:35 +01002478 rv2 = drbd_change_state(mdev, CS_VERBOSE, NS(role, R_SECONDARY));
2479 if (rv2 != SS_SUCCESS) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002480 drbd_khelper(mdev, "pri-lost-after-sb");
2481 } else {
2482 dev_warn(DEV, "Successfully gave up primary role.\n");
2483 rv = hg;
2484 }
2485 } else
2486 rv = hg;
2487 }
2488
2489 return rv;
2490}
2491
2492static void drbd_uuid_dump(struct drbd_conf *mdev, char *text, u64 *uuid,
2493 u64 bits, u64 flags)
2494{
2495 if (!uuid) {
2496 dev_info(DEV, "%s uuid info vanished while I was looking!\n", text);
2497 return;
2498 }
2499 dev_info(DEV, "%s %016llX:%016llX:%016llX:%016llX bits:%llu flags:%llX\n",
2500 text,
2501 (unsigned long long)uuid[UI_CURRENT],
2502 (unsigned long long)uuid[UI_BITMAP],
2503 (unsigned long long)uuid[UI_HISTORY_START],
2504 (unsigned long long)uuid[UI_HISTORY_END],
2505 (unsigned long long)bits,
2506 (unsigned long long)flags);
2507}
2508
2509/*
2510 100 after split brain try auto recover
2511 2 C_SYNC_SOURCE set BitMap
2512 1 C_SYNC_SOURCE use BitMap
2513 0 no Sync
2514 -1 C_SYNC_TARGET use BitMap
2515 -2 C_SYNC_TARGET set BitMap
2516 -100 after split brain, disconnect
2517-1000 unrelated data
Philipp Reisner4a23f262011-01-11 17:42:17 +01002518-1091 requires proto 91
2519-1096 requires proto 96
Philipp Reisnerb411b362009-09-25 16:07:19 -07002520 */
2521static int drbd_uuid_compare(struct drbd_conf *mdev, int *rule_nr) __must_hold(local)
2522{
2523 u64 self, peer;
2524 int i, j;
2525
2526 self = mdev->ldev->md.uuid[UI_CURRENT] & ~((u64)1);
2527 peer = mdev->p_uuid[UI_CURRENT] & ~((u64)1);
2528
2529 *rule_nr = 10;
2530 if (self == UUID_JUST_CREATED && peer == UUID_JUST_CREATED)
2531 return 0;
2532
2533 *rule_nr = 20;
2534 if ((self == UUID_JUST_CREATED || self == (u64)0) &&
2535 peer != UUID_JUST_CREATED)
2536 return -2;
2537
2538 *rule_nr = 30;
2539 if (self != UUID_JUST_CREATED &&
2540 (peer == UUID_JUST_CREATED || peer == (u64)0))
2541 return 2;
2542
2543 if (self == peer) {
2544 int rct, dc; /* roles at crash time */
2545
2546 if (mdev->p_uuid[UI_BITMAP] == (u64)0 && mdev->ldev->md.uuid[UI_BITMAP] != (u64)0) {
2547
Philipp Reisner31890f42011-01-19 14:12:51 +01002548 if (mdev->tconn->agreed_pro_version < 91)
Philipp Reisner4a23f262011-01-11 17:42:17 +01002549 return -1091;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002550
2551 if ((mdev->ldev->md.uuid[UI_BITMAP] & ~((u64)1)) == (mdev->p_uuid[UI_HISTORY_START] & ~((u64)1)) &&
2552 (mdev->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) == (mdev->p_uuid[UI_HISTORY_START + 1] & ~((u64)1))) {
2553 dev_info(DEV, "was SyncSource, missed the resync finished event, corrected myself:\n");
2554 drbd_uuid_set_bm(mdev, 0UL);
2555
2556 drbd_uuid_dump(mdev, "self", mdev->ldev->md.uuid,
2557 mdev->state.disk >= D_NEGOTIATING ? drbd_bm_total_weight(mdev) : 0, 0);
2558 *rule_nr = 34;
2559 } else {
2560 dev_info(DEV, "was SyncSource (peer failed to write sync_uuid)\n");
2561 *rule_nr = 36;
2562 }
2563
2564 return 1;
2565 }
2566
2567 if (mdev->ldev->md.uuid[UI_BITMAP] == (u64)0 && mdev->p_uuid[UI_BITMAP] != (u64)0) {
2568
Philipp Reisner31890f42011-01-19 14:12:51 +01002569 if (mdev->tconn->agreed_pro_version < 91)
Philipp Reisner4a23f262011-01-11 17:42:17 +01002570 return -1091;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002571
2572 if ((mdev->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) == (mdev->p_uuid[UI_BITMAP] & ~((u64)1)) &&
2573 (mdev->ldev->md.uuid[UI_HISTORY_START + 1] & ~((u64)1)) == (mdev->p_uuid[UI_HISTORY_START] & ~((u64)1))) {
2574 dev_info(DEV, "was SyncTarget, peer missed the resync finished event, corrected peer:\n");
2575
2576 mdev->p_uuid[UI_HISTORY_START + 1] = mdev->p_uuid[UI_HISTORY_START];
2577 mdev->p_uuid[UI_HISTORY_START] = mdev->p_uuid[UI_BITMAP];
2578 mdev->p_uuid[UI_BITMAP] = 0UL;
2579
2580 drbd_uuid_dump(mdev, "peer", mdev->p_uuid, mdev->p_uuid[UI_SIZE], mdev->p_uuid[UI_FLAGS]);
2581 *rule_nr = 35;
2582 } else {
2583 dev_info(DEV, "was SyncTarget (failed to write sync_uuid)\n");
2584 *rule_nr = 37;
2585 }
2586
2587 return -1;
2588 }
2589
2590 /* Common power [off|failure] */
2591 rct = (test_bit(CRASHED_PRIMARY, &mdev->flags) ? 1 : 0) +
2592 (mdev->p_uuid[UI_FLAGS] & 2);
2593 /* lowest bit is set when we were primary,
2594 * next bit (weight 2) is set when peer was primary */
2595 *rule_nr = 40;
2596
2597 switch (rct) {
2598 case 0: /* !self_pri && !peer_pri */ return 0;
2599 case 1: /* self_pri && !peer_pri */ return 1;
2600 case 2: /* !self_pri && peer_pri */ return -1;
2601 case 3: /* self_pri && peer_pri */
Philipp Reisner25703f82011-02-07 14:35:25 +01002602 dc = test_bit(DISCARD_CONCURRENT, &mdev->tconn->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002603 return dc ? -1 : 1;
2604 }
2605 }
2606
2607 *rule_nr = 50;
2608 peer = mdev->p_uuid[UI_BITMAP] & ~((u64)1);
2609 if (self == peer)
2610 return -1;
2611
2612 *rule_nr = 51;
2613 peer = mdev->p_uuid[UI_HISTORY_START] & ~((u64)1);
2614 if (self == peer) {
Philipp Reisner31890f42011-01-19 14:12:51 +01002615 if (mdev->tconn->agreed_pro_version < 96 ?
Philipp Reisner4a23f262011-01-11 17:42:17 +01002616 (mdev->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) ==
2617 (mdev->p_uuid[UI_HISTORY_START + 1] & ~((u64)1)) :
2618 peer + UUID_NEW_BM_OFFSET == (mdev->p_uuid[UI_BITMAP] & ~((u64)1))) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002619 /* The last P_SYNC_UUID did not get though. Undo the last start of
2620 resync as sync source modifications of the peer's UUIDs. */
2621
Philipp Reisner31890f42011-01-19 14:12:51 +01002622 if (mdev->tconn->agreed_pro_version < 91)
Philipp Reisner4a23f262011-01-11 17:42:17 +01002623 return -1091;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002624
2625 mdev->p_uuid[UI_BITMAP] = mdev->p_uuid[UI_HISTORY_START];
2626 mdev->p_uuid[UI_HISTORY_START] = mdev->p_uuid[UI_HISTORY_START + 1];
Philipp Reisner4a23f262011-01-11 17:42:17 +01002627
2628 dev_info(DEV, "Did not got last syncUUID packet, corrected:\n");
2629 drbd_uuid_dump(mdev, "peer", mdev->p_uuid, mdev->p_uuid[UI_SIZE], mdev->p_uuid[UI_FLAGS]);
2630
Philipp Reisnerb411b362009-09-25 16:07:19 -07002631 return -1;
2632 }
2633 }
2634
2635 *rule_nr = 60;
2636 self = mdev->ldev->md.uuid[UI_CURRENT] & ~((u64)1);
2637 for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) {
2638 peer = mdev->p_uuid[i] & ~((u64)1);
2639 if (self == peer)
2640 return -2;
2641 }
2642
2643 *rule_nr = 70;
2644 self = mdev->ldev->md.uuid[UI_BITMAP] & ~((u64)1);
2645 peer = mdev->p_uuid[UI_CURRENT] & ~((u64)1);
2646 if (self == peer)
2647 return 1;
2648
2649 *rule_nr = 71;
2650 self = mdev->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1);
2651 if (self == peer) {
Philipp Reisner31890f42011-01-19 14:12:51 +01002652 if (mdev->tconn->agreed_pro_version < 96 ?
Philipp Reisner4a23f262011-01-11 17:42:17 +01002653 (mdev->ldev->md.uuid[UI_HISTORY_START + 1] & ~((u64)1)) ==
2654 (mdev->p_uuid[UI_HISTORY_START] & ~((u64)1)) :
2655 self + UUID_NEW_BM_OFFSET == (mdev->ldev->md.uuid[UI_BITMAP] & ~((u64)1))) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002656 /* The last P_SYNC_UUID did not get though. Undo the last start of
2657 resync as sync source modifications of our UUIDs. */
2658
Philipp Reisner31890f42011-01-19 14:12:51 +01002659 if (mdev->tconn->agreed_pro_version < 91)
Philipp Reisner4a23f262011-01-11 17:42:17 +01002660 return -1091;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002661
2662 _drbd_uuid_set(mdev, UI_BITMAP, mdev->ldev->md.uuid[UI_HISTORY_START]);
2663 _drbd_uuid_set(mdev, UI_HISTORY_START, mdev->ldev->md.uuid[UI_HISTORY_START + 1]);
2664
Philipp Reisner4a23f262011-01-11 17:42:17 +01002665 dev_info(DEV, "Last syncUUID did not get through, corrected:\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002666 drbd_uuid_dump(mdev, "self", mdev->ldev->md.uuid,
2667 mdev->state.disk >= D_NEGOTIATING ? drbd_bm_total_weight(mdev) : 0, 0);
2668
2669 return 1;
2670 }
2671 }
2672
2673
2674 *rule_nr = 80;
Philipp Reisnerd8c2a362009-11-18 15:52:51 +01002675 peer = mdev->p_uuid[UI_CURRENT] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002676 for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) {
2677 self = mdev->ldev->md.uuid[i] & ~((u64)1);
2678 if (self == peer)
2679 return 2;
2680 }
2681
2682 *rule_nr = 90;
2683 self = mdev->ldev->md.uuid[UI_BITMAP] & ~((u64)1);
2684 peer = mdev->p_uuid[UI_BITMAP] & ~((u64)1);
2685 if (self == peer && self != ((u64)0))
2686 return 100;
2687
2688 *rule_nr = 100;
2689 for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) {
2690 self = mdev->ldev->md.uuid[i] & ~((u64)1);
2691 for (j = UI_HISTORY_START; j <= UI_HISTORY_END; j++) {
2692 peer = mdev->p_uuid[j] & ~((u64)1);
2693 if (self == peer)
2694 return -100;
2695 }
2696 }
2697
2698 return -1000;
2699}
2700
2701/* drbd_sync_handshake() returns the new conn state on success, or
2702 CONN_MASK (-1) on failure.
2703 */
2704static enum drbd_conns drbd_sync_handshake(struct drbd_conf *mdev, enum drbd_role peer_role,
2705 enum drbd_disk_state peer_disk) __must_hold(local)
2706{
2707 int hg, rule_nr;
2708 enum drbd_conns rv = C_MASK;
2709 enum drbd_disk_state mydisk;
2710
2711 mydisk = mdev->state.disk;
2712 if (mydisk == D_NEGOTIATING)
2713 mydisk = mdev->new_state_tmp.disk;
2714
2715 dev_info(DEV, "drbd_sync_handshake:\n");
2716 drbd_uuid_dump(mdev, "self", mdev->ldev->md.uuid, mdev->comm_bm_set, 0);
2717 drbd_uuid_dump(mdev, "peer", mdev->p_uuid,
2718 mdev->p_uuid[UI_SIZE], mdev->p_uuid[UI_FLAGS]);
2719
2720 hg = drbd_uuid_compare(mdev, &rule_nr);
2721
2722 dev_info(DEV, "uuid_compare()=%d by rule %d\n", hg, rule_nr);
2723
2724 if (hg == -1000) {
2725 dev_alert(DEV, "Unrelated data, aborting!\n");
2726 return C_MASK;
2727 }
Philipp Reisner4a23f262011-01-11 17:42:17 +01002728 if (hg < -1000) {
2729 dev_alert(DEV, "To resolve this both sides have to support at least protocol %d\n", -hg - 1000);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002730 return C_MASK;
2731 }
2732
2733 if ((mydisk == D_INCONSISTENT && peer_disk > D_INCONSISTENT) ||
2734 (peer_disk == D_INCONSISTENT && mydisk > D_INCONSISTENT)) {
2735 int f = (hg == -100) || abs(hg) == 2;
2736 hg = mydisk > D_INCONSISTENT ? 1 : -1;
2737 if (f)
2738 hg = hg*2;
2739 dev_info(DEV, "Becoming sync %s due to disk states.\n",
2740 hg > 0 ? "source" : "target");
2741 }
2742
Adam Gandelman3a11a482010-04-08 16:48:23 -07002743 if (abs(hg) == 100)
2744 drbd_khelper(mdev, "initial-split-brain");
2745
Philipp Reisner89e58e72011-01-19 13:12:45 +01002746 if (hg == 100 || (hg == -100 && mdev->tconn->net_conf->always_asbp)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002747 int pcount = (mdev->state.role == R_PRIMARY)
2748 + (peer_role == R_PRIMARY);
2749 int forced = (hg == -100);
2750
2751 switch (pcount) {
2752 case 0:
2753 hg = drbd_asb_recover_0p(mdev);
2754 break;
2755 case 1:
2756 hg = drbd_asb_recover_1p(mdev);
2757 break;
2758 case 2:
2759 hg = drbd_asb_recover_2p(mdev);
2760 break;
2761 }
2762 if (abs(hg) < 100) {
2763 dev_warn(DEV, "Split-Brain detected, %d primaries, "
2764 "automatically solved. Sync from %s node\n",
2765 pcount, (hg < 0) ? "peer" : "this");
2766 if (forced) {
2767 dev_warn(DEV, "Doing a full sync, since"
2768 " UUIDs where ambiguous.\n");
2769 hg = hg*2;
2770 }
2771 }
2772 }
2773
2774 if (hg == -100) {
Philipp Reisner89e58e72011-01-19 13:12:45 +01002775 if (mdev->tconn->net_conf->want_lose && !(mdev->p_uuid[UI_FLAGS]&1))
Philipp Reisnerb411b362009-09-25 16:07:19 -07002776 hg = -1;
Philipp Reisner89e58e72011-01-19 13:12:45 +01002777 if (!mdev->tconn->net_conf->want_lose && (mdev->p_uuid[UI_FLAGS]&1))
Philipp Reisnerb411b362009-09-25 16:07:19 -07002778 hg = 1;
2779
2780 if (abs(hg) < 100)
2781 dev_warn(DEV, "Split-Brain detected, manually solved. "
2782 "Sync from %s node\n",
2783 (hg < 0) ? "peer" : "this");
2784 }
2785
2786 if (hg == -100) {
Lars Ellenberg580b9762010-02-26 23:15:23 +01002787 /* FIXME this log message is not correct if we end up here
2788 * after an attempted attach on a diskless node.
2789 * We just refuse to attach -- well, we drop the "connection"
2790 * to that disk, in a way... */
Adam Gandelman3a11a482010-04-08 16:48:23 -07002791 dev_alert(DEV, "Split-Brain detected but unresolved, dropping connection!\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002792 drbd_khelper(mdev, "split-brain");
2793 return C_MASK;
2794 }
2795
2796 if (hg > 0 && mydisk <= D_INCONSISTENT) {
2797 dev_err(DEV, "I shall become SyncSource, but I am inconsistent!\n");
2798 return C_MASK;
2799 }
2800
2801 if (hg < 0 && /* by intention we do not use mydisk here. */
2802 mdev->state.role == R_PRIMARY && mdev->state.disk >= D_CONSISTENT) {
Philipp Reisner89e58e72011-01-19 13:12:45 +01002803 switch (mdev->tconn->net_conf->rr_conflict) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002804 case ASB_CALL_HELPER:
2805 drbd_khelper(mdev, "pri-lost");
2806 /* fall through */
2807 case ASB_DISCONNECT:
2808 dev_err(DEV, "I shall become SyncTarget, but I am primary!\n");
2809 return C_MASK;
2810 case ASB_VIOLENTLY:
2811 dev_warn(DEV, "Becoming SyncTarget, violating the stable-data"
2812 "assumption\n");
2813 }
2814 }
2815
Philipp Reisner8169e412011-03-15 18:40:27 +01002816 if (mdev->tconn->net_conf->dry_run || test_bit(CONN_DRY_RUN, &mdev->tconn->flags)) {
Philipp Reisnercf14c2e2010-02-02 21:03:50 +01002817 if (hg == 0)
2818 dev_info(DEV, "dry-run connect: No resync, would become Connected immediately.\n");
2819 else
2820 dev_info(DEV, "dry-run connect: Would become %s, doing a %s resync.",
2821 drbd_conn_str(hg > 0 ? C_SYNC_SOURCE : C_SYNC_TARGET),
2822 abs(hg) >= 2 ? "full" : "bit-map based");
2823 return C_MASK;
2824 }
2825
Philipp Reisnerb411b362009-09-25 16:07:19 -07002826 if (abs(hg) >= 2) {
2827 dev_info(DEV, "Writing the whole bitmap, full sync required after drbd_sync_handshake.\n");
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01002828 if (drbd_bitmap_io(mdev, &drbd_bmio_set_n_write, "set_n_write from sync_handshake",
2829 BM_LOCKED_SET_ALLOWED))
Philipp Reisnerb411b362009-09-25 16:07:19 -07002830 return C_MASK;
2831 }
2832
2833 if (hg > 0) { /* become sync source. */
2834 rv = C_WF_BITMAP_S;
2835 } else if (hg < 0) { /* become sync target */
2836 rv = C_WF_BITMAP_T;
2837 } else {
2838 rv = C_CONNECTED;
2839 if (drbd_bm_total_weight(mdev)) {
2840 dev_info(DEV, "No resync, but %lu bits in bitmap!\n",
2841 drbd_bm_total_weight(mdev));
2842 }
2843 }
2844
2845 return rv;
2846}
2847
2848/* returns 1 if invalid */
2849static int cmp_after_sb(enum drbd_after_sb_p peer, enum drbd_after_sb_p self)
2850{
2851 /* ASB_DISCARD_REMOTE - ASB_DISCARD_LOCAL is valid */
2852 if ((peer == ASB_DISCARD_REMOTE && self == ASB_DISCARD_LOCAL) ||
2853 (self == ASB_DISCARD_REMOTE && peer == ASB_DISCARD_LOCAL))
2854 return 0;
2855
2856 /* any other things with ASB_DISCARD_REMOTE or ASB_DISCARD_LOCAL are invalid */
2857 if (peer == ASB_DISCARD_REMOTE || peer == ASB_DISCARD_LOCAL ||
2858 self == ASB_DISCARD_REMOTE || self == ASB_DISCARD_LOCAL)
2859 return 1;
2860
2861 /* everything else is valid if they are equal on both sides. */
2862 if (peer == self)
2863 return 0;
2864
2865 /* everything es is invalid. */
2866 return 1;
2867}
2868
Philipp Reisner72046242011-03-15 18:51:47 +01002869static int receive_protocol(struct drbd_tconn *tconn, enum drbd_packet cmd,
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01002870 unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002871{
Philipp Reisner72046242011-03-15 18:51:47 +01002872 struct p_protocol *p = &tconn->data.rbuf.protocol;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002873 int p_proto, p_after_sb_0p, p_after_sb_1p, p_after_sb_2p;
Philipp Reisnercf14c2e2010-02-02 21:03:50 +01002874 int p_want_lose, p_two_primaries, cf;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002875 char p_integrity_alg[SHARED_SECRET_MAX] = "";
2876
Philipp Reisnerb411b362009-09-25 16:07:19 -07002877 p_proto = be32_to_cpu(p->protocol);
2878 p_after_sb_0p = be32_to_cpu(p->after_sb_0p);
2879 p_after_sb_1p = be32_to_cpu(p->after_sb_1p);
2880 p_after_sb_2p = be32_to_cpu(p->after_sb_2p);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002881 p_two_primaries = be32_to_cpu(p->two_primaries);
Philipp Reisnercf14c2e2010-02-02 21:03:50 +01002882 cf = be32_to_cpu(p->conn_flags);
2883 p_want_lose = cf & CF_WANT_LOSE;
2884
Philipp Reisner72046242011-03-15 18:51:47 +01002885 clear_bit(CONN_DRY_RUN, &tconn->flags);
Philipp Reisnercf14c2e2010-02-02 21:03:50 +01002886
2887 if (cf & CF_DRY_RUN)
Philipp Reisner72046242011-03-15 18:51:47 +01002888 set_bit(CONN_DRY_RUN, &tconn->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002889
Philipp Reisner72046242011-03-15 18:51:47 +01002890 if (p_proto != tconn->net_conf->wire_protocol) {
2891 conn_err(tconn, "incompatible communication protocols\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002892 goto disconnect;
2893 }
2894
Philipp Reisner72046242011-03-15 18:51:47 +01002895 if (cmp_after_sb(p_after_sb_0p, tconn->net_conf->after_sb_0p)) {
2896 conn_err(tconn, "incompatible after-sb-0pri settings\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002897 goto disconnect;
2898 }
2899
Philipp Reisner72046242011-03-15 18:51:47 +01002900 if (cmp_after_sb(p_after_sb_1p, tconn->net_conf->after_sb_1p)) {
2901 conn_err(tconn, "incompatible after-sb-1pri settings\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002902 goto disconnect;
2903 }
2904
Philipp Reisner72046242011-03-15 18:51:47 +01002905 if (cmp_after_sb(p_after_sb_2p, tconn->net_conf->after_sb_2p)) {
2906 conn_err(tconn, "incompatible after-sb-2pri settings\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002907 goto disconnect;
2908 }
2909
Philipp Reisner72046242011-03-15 18:51:47 +01002910 if (p_want_lose && tconn->net_conf->want_lose) {
2911 conn_err(tconn, "both sides have the 'want_lose' flag set\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002912 goto disconnect;
2913 }
2914
Philipp Reisner72046242011-03-15 18:51:47 +01002915 if (p_two_primaries != tconn->net_conf->two_primaries) {
2916 conn_err(tconn, "incompatible setting of the two-primaries options\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002917 goto disconnect;
2918 }
2919
Philipp Reisner72046242011-03-15 18:51:47 +01002920 if (tconn->agreed_pro_version >= 87) {
2921 unsigned char *my_alg = tconn->net_conf->integrity_alg;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002922
Philipp Reisner72046242011-03-15 18:51:47 +01002923 if (drbd_recv(tconn, p_integrity_alg, data_size) != data_size)
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002924 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002925
2926 p_integrity_alg[SHARED_SECRET_MAX-1] = 0;
2927 if (strcmp(p_integrity_alg, my_alg)) {
Philipp Reisner72046242011-03-15 18:51:47 +01002928 conn_err(tconn, "incompatible setting of the data-integrity-alg\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002929 goto disconnect;
2930 }
Philipp Reisner72046242011-03-15 18:51:47 +01002931 conn_info(tconn, "data-integrity-alg: %s\n",
Philipp Reisnerb411b362009-09-25 16:07:19 -07002932 my_alg[0] ? my_alg : (unsigned char *)"<not-used>");
2933 }
2934
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002935 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002936
2937disconnect:
Philipp Reisner72046242011-03-15 18:51:47 +01002938 conn_request_state(tconn, NS(conn, C_DISCONNECTING), CS_HARD);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002939 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002940}
2941
2942/* helper function
2943 * input: alg name, feature name
2944 * return: NULL (alg name was "")
2945 * ERR_PTR(error) if something goes wrong
2946 * or the crypto hash ptr, if it worked out ok. */
2947struct crypto_hash *drbd_crypto_alloc_digest_safe(const struct drbd_conf *mdev,
2948 const char *alg, const char *name)
2949{
2950 struct crypto_hash *tfm;
2951
2952 if (!alg[0])
2953 return NULL;
2954
2955 tfm = crypto_alloc_hash(alg, 0, CRYPTO_ALG_ASYNC);
2956 if (IS_ERR(tfm)) {
2957 dev_err(DEV, "Can not allocate \"%s\" as %s (reason: %ld)\n",
2958 alg, name, PTR_ERR(tfm));
2959 return tfm;
2960 }
2961 if (!drbd_crypto_is_hash(crypto_hash_tfm(tfm))) {
2962 crypto_free_hash(tfm);
2963 dev_err(DEV, "\"%s\" is not a digest (%s)\n", alg, name);
2964 return ERR_PTR(-EINVAL);
2965 }
2966 return tfm;
2967}
2968
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01002969static int receive_SyncParam(struct drbd_conf *mdev, enum drbd_packet cmd,
2970 unsigned int packet_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002971{
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002972 int ok = true;
Philipp Reisnere42325a2011-01-19 13:55:45 +01002973 struct p_rs_param_95 *p = &mdev->tconn->data.rbuf.rs_param_95;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002974 unsigned int header_size, data_size, exp_max_sz;
2975 struct crypto_hash *verify_tfm = NULL;
2976 struct crypto_hash *csums_tfm = NULL;
Philipp Reisner31890f42011-01-19 14:12:51 +01002977 const int apv = mdev->tconn->agreed_pro_version;
Philipp Reisner778f2712010-07-06 11:14:00 +02002978 int *rs_plan_s = NULL;
2979 int fifo_size = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002980
2981 exp_max_sz = apv <= 87 ? sizeof(struct p_rs_param)
2982 : apv == 88 ? sizeof(struct p_rs_param)
2983 + SHARED_SECRET_MAX
Philipp Reisner8e26f9c2010-07-06 17:25:54 +02002984 : apv <= 94 ? sizeof(struct p_rs_param_89)
2985 : /* apv >= 95 */ sizeof(struct p_rs_param_95);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002986
Philipp Reisner02918be2010-08-20 14:35:10 +02002987 if (packet_size > exp_max_sz) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002988 dev_err(DEV, "SyncParam packet too long: received %u, expected <= %u bytes\n",
Philipp Reisner02918be2010-08-20 14:35:10 +02002989 packet_size, exp_max_sz);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002990 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002991 }
2992
2993 if (apv <= 88) {
Philipp Reisner257d0af2011-01-26 12:15:29 +01002994 header_size = sizeof(struct p_rs_param) - sizeof(struct p_header);
Philipp Reisner02918be2010-08-20 14:35:10 +02002995 data_size = packet_size - header_size;
Philipp Reisner8e26f9c2010-07-06 17:25:54 +02002996 } else if (apv <= 94) {
Philipp Reisner257d0af2011-01-26 12:15:29 +01002997 header_size = sizeof(struct p_rs_param_89) - sizeof(struct p_header);
Philipp Reisner02918be2010-08-20 14:35:10 +02002998 data_size = packet_size - header_size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002999 D_ASSERT(data_size == 0);
Philipp Reisner8e26f9c2010-07-06 17:25:54 +02003000 } else {
Philipp Reisner257d0af2011-01-26 12:15:29 +01003001 header_size = sizeof(struct p_rs_param_95) - sizeof(struct p_header);
Philipp Reisner02918be2010-08-20 14:35:10 +02003002 data_size = packet_size - header_size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003003 D_ASSERT(data_size == 0);
3004 }
3005
3006 /* initialize verify_alg and csums_alg */
3007 memset(p->verify_alg, 0, 2 * SHARED_SECRET_MAX);
3008
Philipp Reisnerde0ff332011-02-07 16:56:20 +01003009 if (drbd_recv(mdev->tconn, &p->head.payload, header_size) != header_size)
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003010 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003011
Lars Ellenbergf3990022011-03-23 14:31:09 +01003012 if (get_ldev(mdev)) {
3013 mdev->ldev->dc.resync_rate = be32_to_cpu(p->rate);
3014 put_ldev(mdev);
3015 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003016
3017 if (apv >= 88) {
3018 if (apv == 88) {
3019 if (data_size > SHARED_SECRET_MAX) {
3020 dev_err(DEV, "verify-alg too long, "
3021 "peer wants %u, accepting only %u byte\n",
3022 data_size, SHARED_SECRET_MAX);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003023 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003024 }
3025
Philipp Reisnerde0ff332011-02-07 16:56:20 +01003026 if (drbd_recv(mdev->tconn, p->verify_alg, data_size) != data_size)
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003027 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003028
3029 /* we expect NUL terminated string */
3030 /* but just in case someone tries to be evil */
3031 D_ASSERT(p->verify_alg[data_size-1] == 0);
3032 p->verify_alg[data_size-1] = 0;
3033
3034 } else /* apv >= 89 */ {
3035 /* we still expect NUL terminated strings */
3036 /* but just in case someone tries to be evil */
3037 D_ASSERT(p->verify_alg[SHARED_SECRET_MAX-1] == 0);
3038 D_ASSERT(p->csums_alg[SHARED_SECRET_MAX-1] == 0);
3039 p->verify_alg[SHARED_SECRET_MAX-1] = 0;
3040 p->csums_alg[SHARED_SECRET_MAX-1] = 0;
3041 }
3042
Lars Ellenbergf3990022011-03-23 14:31:09 +01003043 if (strcmp(mdev->tconn->net_conf->verify_alg, p->verify_alg)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07003044 if (mdev->state.conn == C_WF_REPORT_PARAMS) {
3045 dev_err(DEV, "Different verify-alg settings. me=\"%s\" peer=\"%s\"\n",
Lars Ellenbergf3990022011-03-23 14:31:09 +01003046 mdev->tconn->net_conf->verify_alg, p->verify_alg);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003047 goto disconnect;
3048 }
3049 verify_tfm = drbd_crypto_alloc_digest_safe(mdev,
3050 p->verify_alg, "verify-alg");
3051 if (IS_ERR(verify_tfm)) {
3052 verify_tfm = NULL;
3053 goto disconnect;
3054 }
3055 }
3056
Lars Ellenbergf3990022011-03-23 14:31:09 +01003057 if (apv >= 89 && strcmp(mdev->tconn->net_conf->csums_alg, p->csums_alg)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07003058 if (mdev->state.conn == C_WF_REPORT_PARAMS) {
3059 dev_err(DEV, "Different csums-alg settings. me=\"%s\" peer=\"%s\"\n",
Lars Ellenbergf3990022011-03-23 14:31:09 +01003060 mdev->tconn->net_conf->csums_alg, p->csums_alg);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003061 goto disconnect;
3062 }
3063 csums_tfm = drbd_crypto_alloc_digest_safe(mdev,
3064 p->csums_alg, "csums-alg");
3065 if (IS_ERR(csums_tfm)) {
3066 csums_tfm = NULL;
3067 goto disconnect;
3068 }
3069 }
3070
Lars Ellenbergf3990022011-03-23 14:31:09 +01003071 if (apv > 94 && get_ldev(mdev)) {
3072 mdev->ldev->dc.resync_rate = be32_to_cpu(p->rate);
3073 mdev->ldev->dc.c_plan_ahead = be32_to_cpu(p->c_plan_ahead);
3074 mdev->ldev->dc.c_delay_target = be32_to_cpu(p->c_delay_target);
3075 mdev->ldev->dc.c_fill_target = be32_to_cpu(p->c_fill_target);
3076 mdev->ldev->dc.c_max_rate = be32_to_cpu(p->c_max_rate);
Philipp Reisner778f2712010-07-06 11:14:00 +02003077
Lars Ellenbergf3990022011-03-23 14:31:09 +01003078 fifo_size = (mdev->ldev->dc.c_plan_ahead * 10 * SLEEP_TIME) / HZ;
Philipp Reisner778f2712010-07-06 11:14:00 +02003079 if (fifo_size != mdev->rs_plan_s.size && fifo_size > 0) {
3080 rs_plan_s = kzalloc(sizeof(int) * fifo_size, GFP_KERNEL);
3081 if (!rs_plan_s) {
3082 dev_err(DEV, "kmalloc of fifo_buffer failed");
Lars Ellenbergf3990022011-03-23 14:31:09 +01003083 put_ldev(mdev);
Philipp Reisner778f2712010-07-06 11:14:00 +02003084 goto disconnect;
3085 }
3086 }
Lars Ellenbergf3990022011-03-23 14:31:09 +01003087 put_ldev(mdev);
Philipp Reisner8e26f9c2010-07-06 17:25:54 +02003088 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003089
3090 spin_lock(&mdev->peer_seq_lock);
3091 /* lock against drbd_nl_syncer_conf() */
3092 if (verify_tfm) {
Lars Ellenbergf3990022011-03-23 14:31:09 +01003093 strcpy(mdev->tconn->net_conf->verify_alg, p->verify_alg);
3094 mdev->tconn->net_conf->verify_alg_len = strlen(p->verify_alg) + 1;
3095 crypto_free_hash(mdev->tconn->verify_tfm);
3096 mdev->tconn->verify_tfm = verify_tfm;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003097 dev_info(DEV, "using verify-alg: \"%s\"\n", p->verify_alg);
3098 }
3099 if (csums_tfm) {
Lars Ellenbergf3990022011-03-23 14:31:09 +01003100 strcpy(mdev->tconn->net_conf->csums_alg, p->csums_alg);
3101 mdev->tconn->net_conf->csums_alg_len = strlen(p->csums_alg) + 1;
3102 crypto_free_hash(mdev->tconn->csums_tfm);
3103 mdev->tconn->csums_tfm = csums_tfm;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003104 dev_info(DEV, "using csums-alg: \"%s\"\n", p->csums_alg);
3105 }
Philipp Reisner778f2712010-07-06 11:14:00 +02003106 if (fifo_size != mdev->rs_plan_s.size) {
3107 kfree(mdev->rs_plan_s.values);
3108 mdev->rs_plan_s.values = rs_plan_s;
3109 mdev->rs_plan_s.size = fifo_size;
3110 mdev->rs_planed = 0;
3111 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003112 spin_unlock(&mdev->peer_seq_lock);
3113 }
3114
3115 return ok;
3116disconnect:
3117 /* just for completeness: actually not needed,
3118 * as this is not reached if csums_tfm was ok. */
3119 crypto_free_hash(csums_tfm);
3120 /* but free the verify_tfm again, if csums_tfm did not work out */
3121 crypto_free_hash(verify_tfm);
Philipp Reisner38fa9982011-03-15 18:24:49 +01003122 conn_request_state(mdev->tconn, NS(conn, C_DISCONNECTING), CS_HARD);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003123 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003124}
3125
Philipp Reisnerb411b362009-09-25 16:07:19 -07003126/* warn if the arguments differ by more than 12.5% */
3127static void warn_if_differ_considerably(struct drbd_conf *mdev,
3128 const char *s, sector_t a, sector_t b)
3129{
3130 sector_t d;
3131 if (a == 0 || b == 0)
3132 return;
3133 d = (a > b) ? (a - b) : (b - a);
3134 if (d > (a>>3) || d > (b>>3))
3135 dev_warn(DEV, "Considerable difference in %s: %llus vs. %llus\n", s,
3136 (unsigned long long)a, (unsigned long long)b);
3137}
3138
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01003139static int receive_sizes(struct drbd_conf *mdev, enum drbd_packet cmd,
3140 unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003141{
Philipp Reisnere42325a2011-01-19 13:55:45 +01003142 struct p_sizes *p = &mdev->tconn->data.rbuf.sizes;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003143 enum determine_dev_size dd = unchanged;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003144 sector_t p_size, p_usize, my_usize;
3145 int ldsc = 0; /* local disk size changed */
Philipp Reisnere89b5912010-03-24 17:11:33 +01003146 enum dds_flags ddsf;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003147
Philipp Reisnerb411b362009-09-25 16:07:19 -07003148 p_size = be64_to_cpu(p->d_size);
3149 p_usize = be64_to_cpu(p->u_size);
3150
Philipp Reisnerb411b362009-09-25 16:07:19 -07003151 /* just store the peer's disk size for now.
3152 * we still need to figure out whether we accept that. */
3153 mdev->p_size = p_size;
3154
Philipp Reisnerb411b362009-09-25 16:07:19 -07003155 if (get_ldev(mdev)) {
3156 warn_if_differ_considerably(mdev, "lower level device sizes",
3157 p_size, drbd_get_max_capacity(mdev->ldev));
3158 warn_if_differ_considerably(mdev, "user requested size",
3159 p_usize, mdev->ldev->dc.disk_size);
3160
3161 /* if this is the first connect, or an otherwise expected
3162 * param exchange, choose the minimum */
3163 if (mdev->state.conn == C_WF_REPORT_PARAMS)
3164 p_usize = min_not_zero((sector_t)mdev->ldev->dc.disk_size,
3165 p_usize);
3166
3167 my_usize = mdev->ldev->dc.disk_size;
3168
3169 if (mdev->ldev->dc.disk_size != p_usize) {
3170 mdev->ldev->dc.disk_size = p_usize;
3171 dev_info(DEV, "Peer sets u_size to %lu sectors\n",
3172 (unsigned long)mdev->ldev->dc.disk_size);
3173 }
3174
3175 /* Never shrink a device with usable data during connect.
3176 But allow online shrinking if we are connected. */
Philipp Reisnera393db62009-12-22 13:35:52 +01003177 if (drbd_new_dev_size(mdev, mdev->ldev, 0) <
Philipp Reisnerb411b362009-09-25 16:07:19 -07003178 drbd_get_capacity(mdev->this_bdev) &&
3179 mdev->state.disk >= D_OUTDATED &&
3180 mdev->state.conn < C_CONNECTED) {
3181 dev_err(DEV, "The peer's disk size is too small!\n");
Philipp Reisner38fa9982011-03-15 18:24:49 +01003182 conn_request_state(mdev->tconn, NS(conn, C_DISCONNECTING), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003183 mdev->ldev->dc.disk_size = my_usize;
3184 put_ldev(mdev);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003185 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003186 }
3187 put_ldev(mdev);
3188 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003189
Philipp Reisnere89b5912010-03-24 17:11:33 +01003190 ddsf = be16_to_cpu(p->dds_flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003191 if (get_ldev(mdev)) {
Bart Van Assche24c48302011-05-21 18:32:29 +02003192 dd = drbd_determine_dev_size(mdev, ddsf);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003193 put_ldev(mdev);
3194 if (dd == dev_size_error)
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003195 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003196 drbd_md_sync(mdev);
3197 } else {
3198 /* I am diskless, need to accept the peer's size. */
3199 drbd_set_my_capacity(mdev, p_size);
3200 }
3201
Philipp Reisner99432fc2011-05-20 16:39:13 +02003202 mdev->peer_max_bio_size = be32_to_cpu(p->max_bio_size);
3203 drbd_reconsider_max_bio_size(mdev);
3204
Philipp Reisnerb411b362009-09-25 16:07:19 -07003205 if (get_ldev(mdev)) {
3206 if (mdev->ldev->known_size != drbd_get_capacity(mdev->ldev->backing_bdev)) {
3207 mdev->ldev->known_size = drbd_get_capacity(mdev->ldev->backing_bdev);
3208 ldsc = 1;
3209 }
3210
Philipp Reisnerb411b362009-09-25 16:07:19 -07003211 put_ldev(mdev);
3212 }
3213
3214 if (mdev->state.conn > C_WF_REPORT_PARAMS) {
3215 if (be64_to_cpu(p->c_size) !=
3216 drbd_get_capacity(mdev->this_bdev) || ldsc) {
3217 /* we have different sizes, probably peer
3218 * needs to know my new size... */
Philipp Reisnere89b5912010-03-24 17:11:33 +01003219 drbd_send_sizes(mdev, 0, ddsf);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003220 }
3221 if (test_and_clear_bit(RESIZE_PENDING, &mdev->flags) ||
3222 (dd == grew && mdev->state.conn == C_CONNECTED)) {
3223 if (mdev->state.pdsk >= D_INCONSISTENT &&
Philipp Reisnere89b5912010-03-24 17:11:33 +01003224 mdev->state.disk >= D_INCONSISTENT) {
3225 if (ddsf & DDSF_NO_RESYNC)
3226 dev_info(DEV, "Resync of new storage suppressed with --assume-clean\n");
3227 else
3228 resync_after_online_grow(mdev);
3229 } else
Philipp Reisnerb411b362009-09-25 16:07:19 -07003230 set_bit(RESYNC_AFTER_NEG, &mdev->flags);
3231 }
3232 }
3233
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003234 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003235}
3236
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01003237static int receive_uuids(struct drbd_conf *mdev, enum drbd_packet cmd,
3238 unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003239{
Philipp Reisnere42325a2011-01-19 13:55:45 +01003240 struct p_uuids *p = &mdev->tconn->data.rbuf.uuids;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003241 u64 *p_uuid;
Lars Ellenberg62b0da32011-01-20 13:25:21 +01003242 int i, updated_uuids = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003243
Philipp Reisnerb411b362009-09-25 16:07:19 -07003244 p_uuid = kmalloc(sizeof(u64)*UI_EXTENDED_SIZE, GFP_NOIO);
3245
3246 for (i = UI_CURRENT; i < UI_EXTENDED_SIZE; i++)
3247 p_uuid[i] = be64_to_cpu(p->uuid[i]);
3248
3249 kfree(mdev->p_uuid);
3250 mdev->p_uuid = p_uuid;
3251
3252 if (mdev->state.conn < C_CONNECTED &&
3253 mdev->state.disk < D_INCONSISTENT &&
3254 mdev->state.role == R_PRIMARY &&
3255 (mdev->ed_uuid & ~((u64)1)) != (p_uuid[UI_CURRENT] & ~((u64)1))) {
3256 dev_err(DEV, "Can only connect to data with current UUID=%016llX\n",
3257 (unsigned long long)mdev->ed_uuid);
Philipp Reisner38fa9982011-03-15 18:24:49 +01003258 conn_request_state(mdev->tconn, NS(conn, C_DISCONNECTING), CS_HARD);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003259 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003260 }
3261
3262 if (get_ldev(mdev)) {
3263 int skip_initial_sync =
3264 mdev->state.conn == C_CONNECTED &&
Philipp Reisner31890f42011-01-19 14:12:51 +01003265 mdev->tconn->agreed_pro_version >= 90 &&
Philipp Reisnerb411b362009-09-25 16:07:19 -07003266 mdev->ldev->md.uuid[UI_CURRENT] == UUID_JUST_CREATED &&
3267 (p_uuid[UI_FLAGS] & 8);
3268 if (skip_initial_sync) {
3269 dev_info(DEV, "Accepted new current UUID, preparing to skip initial sync\n");
3270 drbd_bitmap_io(mdev, &drbd_bmio_clear_n_write,
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01003271 "clear_n_write from receive_uuids",
3272 BM_LOCKED_TEST_ALLOWED);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003273 _drbd_uuid_set(mdev, UI_CURRENT, p_uuid[UI_CURRENT]);
3274 _drbd_uuid_set(mdev, UI_BITMAP, 0);
3275 _drbd_set_state(_NS2(mdev, disk, D_UP_TO_DATE, pdsk, D_UP_TO_DATE),
3276 CS_VERBOSE, NULL);
3277 drbd_md_sync(mdev);
Lars Ellenberg62b0da32011-01-20 13:25:21 +01003278 updated_uuids = 1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003279 }
3280 put_ldev(mdev);
Philipp Reisner18a50fa2010-06-21 14:14:15 +02003281 } else if (mdev->state.disk < D_INCONSISTENT &&
3282 mdev->state.role == R_PRIMARY) {
3283 /* I am a diskless primary, the peer just created a new current UUID
3284 for me. */
Lars Ellenberg62b0da32011-01-20 13:25:21 +01003285 updated_uuids = drbd_set_ed_uuid(mdev, p_uuid[UI_CURRENT]);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003286 }
3287
3288 /* Before we test for the disk state, we should wait until an eventually
3289 ongoing cluster wide state change is finished. That is important if
3290 we are primary and are detaching from our disk. We need to see the
3291 new disk state... */
Philipp Reisner8410da82011-02-11 20:11:10 +01003292 mutex_lock(mdev->state_mutex);
3293 mutex_unlock(mdev->state_mutex);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003294 if (mdev->state.conn >= C_CONNECTED && mdev->state.disk < D_INCONSISTENT)
Lars Ellenberg62b0da32011-01-20 13:25:21 +01003295 updated_uuids |= drbd_set_ed_uuid(mdev, p_uuid[UI_CURRENT]);
3296
3297 if (updated_uuids)
3298 drbd_print_uuids(mdev, "receiver updated UUIDs to");
Philipp Reisnerb411b362009-09-25 16:07:19 -07003299
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003300 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003301}
3302
3303/**
3304 * convert_state() - Converts the peer's view of the cluster state to our point of view
3305 * @ps: The state as seen by the peer.
3306 */
3307static union drbd_state convert_state(union drbd_state ps)
3308{
3309 union drbd_state ms;
3310
3311 static enum drbd_conns c_tab[] = {
3312 [C_CONNECTED] = C_CONNECTED,
3313
3314 [C_STARTING_SYNC_S] = C_STARTING_SYNC_T,
3315 [C_STARTING_SYNC_T] = C_STARTING_SYNC_S,
3316 [C_DISCONNECTING] = C_TEAR_DOWN, /* C_NETWORK_FAILURE, */
3317 [C_VERIFY_S] = C_VERIFY_T,
3318 [C_MASK] = C_MASK,
3319 };
3320
3321 ms.i = ps.i;
3322
3323 ms.conn = c_tab[ps.conn];
3324 ms.peer = ps.role;
3325 ms.role = ps.peer;
3326 ms.pdsk = ps.disk;
3327 ms.disk = ps.pdsk;
3328 ms.peer_isp = (ps.aftr_isp | ps.user_isp);
3329
3330 return ms;
3331}
3332
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01003333static int receive_req_state(struct drbd_conf *mdev, enum drbd_packet cmd,
3334 unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003335{
Philipp Reisnere42325a2011-01-19 13:55:45 +01003336 struct p_req_state *p = &mdev->tconn->data.rbuf.req_state;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003337 union drbd_state mask, val;
Andreas Gruenbacherbf885f82010-12-08 00:39:32 +01003338 enum drbd_state_rv rv;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003339
Philipp Reisnerb411b362009-09-25 16:07:19 -07003340 mask.i = be32_to_cpu(p->mask);
3341 val.i = be32_to_cpu(p->val);
3342
Philipp Reisner25703f82011-02-07 14:35:25 +01003343 if (test_bit(DISCARD_CONCURRENT, &mdev->tconn->flags) &&
Philipp Reisner8410da82011-02-11 20:11:10 +01003344 mutex_is_locked(mdev->state_mutex)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07003345 drbd_send_sr_reply(mdev, SS_CONCURRENT_ST_CHG);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003346 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003347 }
3348
3349 mask = convert_state(mask);
3350 val = convert_state(val);
3351
Philipp Reisnerdfafcc82011-03-16 10:55:07 +01003352 rv = drbd_change_state(mdev, CS_VERBOSE, mask, val);
3353 drbd_send_sr_reply(mdev, rv);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003354
Philipp Reisnerb411b362009-09-25 16:07:19 -07003355 drbd_md_sync(mdev);
3356
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003357 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003358}
3359
Philipp Reisnerdfafcc82011-03-16 10:55:07 +01003360static int receive_req_conn_state(struct drbd_tconn *tconn, enum drbd_packet cmd,
3361 unsigned int data_size)
3362{
3363 struct p_req_state *p = &tconn->data.rbuf.req_state;
3364 union drbd_state mask, val;
3365 enum drbd_state_rv rv;
3366
3367 mask.i = be32_to_cpu(p->mask);
3368 val.i = be32_to_cpu(p->val);
3369
3370 if (test_bit(DISCARD_CONCURRENT, &tconn->flags) &&
3371 mutex_is_locked(&tconn->cstate_mutex)) {
3372 conn_send_sr_reply(tconn, SS_CONCURRENT_ST_CHG);
3373 return true;
3374 }
3375
3376 mask = convert_state(mask);
3377 val = convert_state(val);
3378
3379 rv = conn_request_state(tconn, mask, val, CS_VERBOSE | CS_LOCAL_ONLY);
3380 conn_send_sr_reply(tconn, rv);
3381
3382 return true;
3383}
3384
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01003385static int receive_state(struct drbd_conf *mdev, enum drbd_packet cmd,
3386 unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003387{
Philipp Reisnere42325a2011-01-19 13:55:45 +01003388 struct p_state *p = &mdev->tconn->data.rbuf.state;
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003389 union drbd_state os, ns, peer_state;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003390 enum drbd_disk_state real_peer_disk;
Philipp Reisner65d922c2010-06-16 16:18:09 +02003391 enum chg_state_flags cs_flags;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003392 int rv;
3393
Philipp Reisnerb411b362009-09-25 16:07:19 -07003394 peer_state.i = be32_to_cpu(p->state);
3395
3396 real_peer_disk = peer_state.disk;
3397 if (peer_state.disk == D_NEGOTIATING) {
3398 real_peer_disk = mdev->p_uuid[UI_FLAGS] & 4 ? D_INCONSISTENT : D_CONSISTENT;
3399 dev_info(DEV, "real peer disk state = %s\n", drbd_disk_str(real_peer_disk));
3400 }
3401
Philipp Reisner87eeee42011-01-19 14:16:30 +01003402 spin_lock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003403 retry:
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003404 os = ns = mdev->state;
Philipp Reisner87eeee42011-01-19 14:16:30 +01003405 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003406
Lars Ellenberge9ef7bb2010-10-07 15:55:39 +02003407 /* peer says his disk is uptodate, while we think it is inconsistent,
3408 * and this happens while we think we have a sync going on. */
3409 if (os.pdsk == D_INCONSISTENT && real_peer_disk == D_UP_TO_DATE &&
3410 os.conn > C_CONNECTED && os.disk == D_UP_TO_DATE) {
3411 /* If we are (becoming) SyncSource, but peer is still in sync
3412 * preparation, ignore its uptodate-ness to avoid flapping, it
3413 * will change to inconsistent once the peer reaches active
3414 * syncing states.
3415 * It may have changed syncer-paused flags, however, so we
3416 * cannot ignore this completely. */
3417 if (peer_state.conn > C_CONNECTED &&
3418 peer_state.conn < C_SYNC_SOURCE)
3419 real_peer_disk = D_INCONSISTENT;
3420
3421 /* if peer_state changes to connected at the same time,
3422 * it explicitly notifies us that it finished resync.
3423 * Maybe we should finish it up, too? */
3424 else if (os.conn >= C_SYNC_SOURCE &&
3425 peer_state.conn == C_CONNECTED) {
3426 if (drbd_bm_total_weight(mdev) <= mdev->rs_failed)
3427 drbd_resync_finished(mdev);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003428 return true;
Lars Ellenberge9ef7bb2010-10-07 15:55:39 +02003429 }
3430 }
3431
3432 /* peer says his disk is inconsistent, while we think it is uptodate,
3433 * and this happens while the peer still thinks we have a sync going on,
3434 * but we think we are already done with the sync.
3435 * We ignore this to avoid flapping pdsk.
3436 * This should not happen, if the peer is a recent version of drbd. */
3437 if (os.pdsk == D_UP_TO_DATE && real_peer_disk == D_INCONSISTENT &&
3438 os.conn == C_CONNECTED && peer_state.conn > C_SYNC_SOURCE)
3439 real_peer_disk = D_UP_TO_DATE;
3440
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003441 if (ns.conn == C_WF_REPORT_PARAMS)
3442 ns.conn = C_CONNECTED;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003443
Philipp Reisner67531712010-10-27 12:21:30 +02003444 if (peer_state.conn == C_AHEAD)
3445 ns.conn = C_BEHIND;
3446
Philipp Reisnerb411b362009-09-25 16:07:19 -07003447 if (mdev->p_uuid && peer_state.disk >= D_NEGOTIATING &&
3448 get_ldev_if_state(mdev, D_NEGOTIATING)) {
3449 int cr; /* consider resync */
3450
3451 /* if we established a new connection */
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003452 cr = (os.conn < C_CONNECTED);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003453 /* if we had an established connection
3454 * and one of the nodes newly attaches a disk */
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003455 cr |= (os.conn == C_CONNECTED &&
Philipp Reisnerb411b362009-09-25 16:07:19 -07003456 (peer_state.disk == D_NEGOTIATING ||
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003457 os.disk == D_NEGOTIATING));
Philipp Reisnerb411b362009-09-25 16:07:19 -07003458 /* if we have both been inconsistent, and the peer has been
3459 * forced to be UpToDate with --overwrite-data */
3460 cr |= test_bit(CONSIDER_RESYNC, &mdev->flags);
3461 /* if we had been plain connected, and the admin requested to
3462 * start a sync by "invalidate" or "invalidate-remote" */
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003463 cr |= (os.conn == C_CONNECTED &&
Philipp Reisnerb411b362009-09-25 16:07:19 -07003464 (peer_state.conn >= C_STARTING_SYNC_S &&
3465 peer_state.conn <= C_WF_BITMAP_T));
3466
3467 if (cr)
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003468 ns.conn = drbd_sync_handshake(mdev, peer_state.role, real_peer_disk);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003469
3470 put_ldev(mdev);
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003471 if (ns.conn == C_MASK) {
3472 ns.conn = C_CONNECTED;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003473 if (mdev->state.disk == D_NEGOTIATING) {
Lars Ellenberg82f59cc2010-10-16 12:13:47 +02003474 drbd_force_state(mdev, NS(disk, D_FAILED));
Philipp Reisnerb411b362009-09-25 16:07:19 -07003475 } else if (peer_state.disk == D_NEGOTIATING) {
3476 dev_err(DEV, "Disk attach process on the peer node was aborted.\n");
3477 peer_state.disk = D_DISKLESS;
Lars Ellenberg580b9762010-02-26 23:15:23 +01003478 real_peer_disk = D_DISKLESS;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003479 } else {
Philipp Reisner8169e412011-03-15 18:40:27 +01003480 if (test_and_clear_bit(CONN_DRY_RUN, &mdev->tconn->flags))
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003481 return false;
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003482 D_ASSERT(os.conn == C_WF_REPORT_PARAMS);
Philipp Reisner38fa9982011-03-15 18:24:49 +01003483 conn_request_state(mdev->tconn, NS(conn, C_DISCONNECTING), CS_HARD);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003484 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003485 }
3486 }
3487 }
3488
Philipp Reisner87eeee42011-01-19 14:16:30 +01003489 spin_lock_irq(&mdev->tconn->req_lock);
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003490 if (mdev->state.i != os.i)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003491 goto retry;
3492 clear_bit(CONSIDER_RESYNC, &mdev->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003493 ns.peer = peer_state.role;
3494 ns.pdsk = real_peer_disk;
3495 ns.peer_isp = (peer_state.aftr_isp | peer_state.user_isp);
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003496 if ((ns.conn == C_CONNECTED || ns.conn == C_WF_BITMAP_S) && ns.disk == D_NEGOTIATING)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003497 ns.disk = mdev->new_state_tmp.disk;
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003498 cs_flags = CS_VERBOSE + (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED ? 0 : CS_HARD);
3499 if (ns.pdsk == D_CONSISTENT && is_susp(ns) && ns.conn == C_CONNECTED && os.conn < C_CONNECTED &&
Philipp Reisner481c6f52010-06-22 14:03:27 +02003500 test_bit(NEW_CUR_UUID, &mdev->flags)) {
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01003501 /* Do not allow tl_restart(RESEND) for a rebooted peer. We can only allow this
Philipp Reisner481c6f52010-06-22 14:03:27 +02003502 for temporal network outages! */
Philipp Reisner87eeee42011-01-19 14:16:30 +01003503 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisner481c6f52010-06-22 14:03:27 +02003504 dev_err(DEV, "Aborting Connect, can not thaw IO with an only Consistent peer\n");
Philipp Reisner2f5cdd02011-02-21 14:29:27 +01003505 tl_clear(mdev->tconn);
Philipp Reisner481c6f52010-06-22 14:03:27 +02003506 drbd_uuid_new_current(mdev);
3507 clear_bit(NEW_CUR_UUID, &mdev->flags);
Philipp Reisner38fa9982011-03-15 18:24:49 +01003508 conn_request_state(mdev->tconn, NS2(conn, C_PROTOCOL_ERROR, susp, 0), CS_HARD);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003509 return false;
Philipp Reisner481c6f52010-06-22 14:03:27 +02003510 }
Philipp Reisner65d922c2010-06-16 16:18:09 +02003511 rv = _drbd_set_state(mdev, ns, cs_flags, NULL);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003512 ns = mdev->state;
Philipp Reisner87eeee42011-01-19 14:16:30 +01003513 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003514
3515 if (rv < SS_SUCCESS) {
Philipp Reisner38fa9982011-03-15 18:24:49 +01003516 conn_request_state(mdev->tconn, NS(conn, C_DISCONNECTING), CS_HARD);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003517 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003518 }
3519
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003520 if (os.conn > C_WF_REPORT_PARAMS) {
3521 if (ns.conn > C_CONNECTED && peer_state.conn <= C_CONNECTED &&
Philipp Reisnerb411b362009-09-25 16:07:19 -07003522 peer_state.disk != D_NEGOTIATING ) {
3523 /* we want resync, peer has not yet decided to sync... */
3524 /* Nowadays only used when forcing a node into primary role and
3525 setting its disk to UpToDate with that */
3526 drbd_send_uuids(mdev);
3527 drbd_send_state(mdev);
3528 }
3529 }
3530
Philipp Reisner89e58e72011-01-19 13:12:45 +01003531 mdev->tconn->net_conf->want_lose = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003532
3533 drbd_md_sync(mdev); /* update connected indicator, la_size, ... */
3534
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003535 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003536}
3537
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01003538static int receive_sync_uuid(struct drbd_conf *mdev, enum drbd_packet cmd,
3539 unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003540{
Philipp Reisnere42325a2011-01-19 13:55:45 +01003541 struct p_rs_uuid *p = &mdev->tconn->data.rbuf.rs_uuid;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003542
3543 wait_event(mdev->misc_wait,
3544 mdev->state.conn == C_WF_SYNC_UUID ||
Philipp Reisnerc4752ef2010-10-27 17:32:36 +02003545 mdev->state.conn == C_BEHIND ||
Philipp Reisnerb411b362009-09-25 16:07:19 -07003546 mdev->state.conn < C_CONNECTED ||
3547 mdev->state.disk < D_NEGOTIATING);
3548
3549 /* D_ASSERT( mdev->state.conn == C_WF_SYNC_UUID ); */
3550
Philipp Reisnerb411b362009-09-25 16:07:19 -07003551 /* Here the _drbd_uuid_ functions are right, current should
3552 _not_ be rotated into the history */
3553 if (get_ldev_if_state(mdev, D_NEGOTIATING)) {
3554 _drbd_uuid_set(mdev, UI_CURRENT, be64_to_cpu(p->uuid));
3555 _drbd_uuid_set(mdev, UI_BITMAP, 0UL);
3556
Lars Ellenberg62b0da32011-01-20 13:25:21 +01003557 drbd_print_uuids(mdev, "updated sync uuid");
Philipp Reisnerb411b362009-09-25 16:07:19 -07003558 drbd_start_resync(mdev, C_SYNC_TARGET);
3559
3560 put_ldev(mdev);
3561 } else
3562 dev_err(DEV, "Ignoring SyncUUID packet!\n");
3563
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003564 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003565}
3566
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003567/**
3568 * receive_bitmap_plain
3569 *
3570 * Return 0 when done, 1 when another iteration is needed, and a negative error
3571 * code upon failure.
3572 */
3573static int
Philipp Reisner02918be2010-08-20 14:35:10 +02003574receive_bitmap_plain(struct drbd_conf *mdev, unsigned int data_size,
3575 unsigned long *buffer, struct bm_xfer_ctx *c)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003576{
3577 unsigned num_words = min_t(size_t, BM_PACKET_WORDS, c->bm_words - c->word_offset);
3578 unsigned want = num_words * sizeof(long);
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003579 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003580
Philipp Reisner02918be2010-08-20 14:35:10 +02003581 if (want != data_size) {
3582 dev_err(DEV, "%s:want (%u) != data_size (%u)\n", __func__, want, data_size);
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003583 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003584 }
3585 if (want == 0)
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003586 return 0;
Philipp Reisnerde0ff332011-02-07 16:56:20 +01003587 err = drbd_recv(mdev->tconn, buffer, want);
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003588 if (err != want) {
3589 if (err >= 0)
3590 err = -EIO;
3591 return err;
3592 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003593
3594 drbd_bm_merge_lel(mdev, c->word_offset, num_words, buffer);
3595
3596 c->word_offset += num_words;
3597 c->bit_offset = c->word_offset * BITS_PER_LONG;
3598 if (c->bit_offset > c->bm_bits)
3599 c->bit_offset = c->bm_bits;
3600
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003601 return 1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003602}
3603
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003604/**
3605 * recv_bm_rle_bits
3606 *
3607 * Return 0 when done, 1 when another iteration is needed, and a negative error
3608 * code upon failure.
3609 */
3610static int
Philipp Reisnerb411b362009-09-25 16:07:19 -07003611recv_bm_rle_bits(struct drbd_conf *mdev,
3612 struct p_compressed_bm *p,
Philipp Reisnerc6d25cf2011-01-19 16:13:06 +01003613 struct bm_xfer_ctx *c,
3614 unsigned int len)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003615{
3616 struct bitstream bs;
3617 u64 look_ahead;
3618 u64 rl;
3619 u64 tmp;
3620 unsigned long s = c->bit_offset;
3621 unsigned long e;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003622 int toggle = DCBP_get_start(p);
3623 int have;
3624 int bits;
3625
3626 bitstream_init(&bs, p->code, len, DCBP_get_pad_bits(p));
3627
3628 bits = bitstream_get_bits(&bs, &look_ahead, 64);
3629 if (bits < 0)
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003630 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003631
3632 for (have = bits; have > 0; s += rl, toggle = !toggle) {
3633 bits = vli_decode_bits(&rl, look_ahead);
3634 if (bits <= 0)
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003635 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003636
3637 if (toggle) {
3638 e = s + rl -1;
3639 if (e >= c->bm_bits) {
3640 dev_err(DEV, "bitmap overflow (e:%lu) while decoding bm RLE packet\n", e);
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003641 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003642 }
3643 _drbd_bm_set_bits(mdev, s, e);
3644 }
3645
3646 if (have < bits) {
3647 dev_err(DEV, "bitmap decoding error: h:%d b:%d la:0x%08llx l:%u/%u\n",
3648 have, bits, look_ahead,
3649 (unsigned int)(bs.cur.b - p->code),
3650 (unsigned int)bs.buf_len);
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003651 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003652 }
3653 look_ahead >>= bits;
3654 have -= bits;
3655
3656 bits = bitstream_get_bits(&bs, &tmp, 64 - have);
3657 if (bits < 0)
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003658 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003659 look_ahead |= tmp << have;
3660 have += bits;
3661 }
3662
3663 c->bit_offset = s;
3664 bm_xfer_ctx_bit_to_word_offset(c);
3665
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003666 return (s != c->bm_bits);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003667}
3668
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003669/**
3670 * decode_bitmap_c
3671 *
3672 * Return 0 when done, 1 when another iteration is needed, and a negative error
3673 * code upon failure.
3674 */
3675static int
Philipp Reisnerb411b362009-09-25 16:07:19 -07003676decode_bitmap_c(struct drbd_conf *mdev,
3677 struct p_compressed_bm *p,
Philipp Reisnerc6d25cf2011-01-19 16:13:06 +01003678 struct bm_xfer_ctx *c,
3679 unsigned int len)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003680{
3681 if (DCBP_get_code(p) == RLE_VLI_Bits)
Philipp Reisnerc6d25cf2011-01-19 16:13:06 +01003682 return recv_bm_rle_bits(mdev, p, c, len);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003683
3684 /* other variants had been implemented for evaluation,
3685 * but have been dropped as this one turned out to be "best"
3686 * during all our tests. */
3687
3688 dev_err(DEV, "receive_bitmap_c: unknown encoding %u\n", p->encoding);
Philipp Reisner38fa9982011-03-15 18:24:49 +01003689 conn_request_state(mdev->tconn, NS(conn, C_PROTOCOL_ERROR), CS_HARD);
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003690 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003691}
3692
3693void INFO_bm_xfer_stats(struct drbd_conf *mdev,
3694 const char *direction, struct bm_xfer_ctx *c)
3695{
3696 /* what would it take to transfer it "plaintext" */
Philipp Reisnerc0129492011-01-19 16:58:16 +01003697 unsigned plain = sizeof(struct p_header) *
Philipp Reisnerb411b362009-09-25 16:07:19 -07003698 ((c->bm_words+BM_PACKET_WORDS-1)/BM_PACKET_WORDS+1)
3699 + c->bm_words * sizeof(long);
3700 unsigned total = c->bytes[0] + c->bytes[1];
3701 unsigned r;
3702
3703 /* total can not be zero. but just in case: */
3704 if (total == 0)
3705 return;
3706
3707 /* don't report if not compressed */
3708 if (total >= plain)
3709 return;
3710
3711 /* total < plain. check for overflow, still */
3712 r = (total > UINT_MAX/1000) ? (total / (plain/1000))
3713 : (1000 * total / plain);
3714
3715 if (r > 1000)
3716 r = 1000;
3717
3718 r = 1000 - r;
3719 dev_info(DEV, "%s bitmap stats [Bytes(packets)]: plain %u(%u), RLE %u(%u), "
3720 "total %u; compression: %u.%u%%\n",
3721 direction,
3722 c->bytes[1], c->packets[1],
3723 c->bytes[0], c->packets[0],
3724 total, r/10, r % 10);
3725}
3726
3727/* Since we are processing the bitfield from lower addresses to higher,
3728 it does not matter if the process it in 32 bit chunks or 64 bit
3729 chunks as long as it is little endian. (Understand it as byte stream,
3730 beginning with the lowest byte...) If we would use big endian
3731 we would need to process it from the highest address to the lowest,
3732 in order to be agnostic to the 32 vs 64 bits issue.
3733
3734 returns 0 on failure, 1 if we successfully received it. */
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01003735static int receive_bitmap(struct drbd_conf *mdev, enum drbd_packet cmd,
3736 unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003737{
3738 struct bm_xfer_ctx c;
3739 void *buffer;
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003740 int err;
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003741 int ok = false;
Philipp Reisner257d0af2011-01-26 12:15:29 +01003742 struct p_header *h = &mdev->tconn->data.rbuf.header;
Philipp Reisner77351055b2011-02-07 17:24:26 +01003743 struct packet_info pi;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003744
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01003745 drbd_bm_lock(mdev, "receive bitmap", BM_LOCKED_SET_ALLOWED);
3746 /* you are supposed to send additional out-of-sync information
3747 * if you actually set bits during this phase */
Philipp Reisnerb411b362009-09-25 16:07:19 -07003748
3749 /* maybe we should use some per thread scratch page,
3750 * and allocate that during initial device creation? */
3751 buffer = (unsigned long *) __get_free_page(GFP_NOIO);
3752 if (!buffer) {
3753 dev_err(DEV, "failed to allocate one page buffer in %s\n", __func__);
3754 goto out;
3755 }
3756
3757 c = (struct bm_xfer_ctx) {
3758 .bm_bits = drbd_bm_bits(mdev),
3759 .bm_words = drbd_bm_words(mdev),
3760 };
3761
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003762 for(;;) {
Philipp Reisner02918be2010-08-20 14:35:10 +02003763 if (cmd == P_BITMAP) {
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003764 err = receive_bitmap_plain(mdev, data_size, buffer, &c);
Philipp Reisner02918be2010-08-20 14:35:10 +02003765 } else if (cmd == P_COMPRESSED_BITMAP) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07003766 /* MAYBE: sanity check that we speak proto >= 90,
3767 * and the feature is enabled! */
3768 struct p_compressed_bm *p;
3769
Philipp Reisner02918be2010-08-20 14:35:10 +02003770 if (data_size > BM_PACKET_PAYLOAD_BYTES) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07003771 dev_err(DEV, "ReportCBitmap packet too large\n");
3772 goto out;
3773 }
3774 /* use the page buff */
3775 p = buffer;
3776 memcpy(p, h, sizeof(*h));
Philipp Reisnerde0ff332011-02-07 16:56:20 +01003777 if (drbd_recv(mdev->tconn, p->head.payload, data_size) != data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003778 goto out;
Lars Ellenberg004352f2010-10-05 20:13:58 +02003779 if (data_size <= (sizeof(*p) - sizeof(p->head))) {
3780 dev_err(DEV, "ReportCBitmap packet too small (l:%u)\n", data_size);
Andreas Gruenbacher78fcbda2010-12-10 22:18:27 +01003781 goto out;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003782 }
Philipp Reisnerc6d25cf2011-01-19 16:13:06 +01003783 err = decode_bitmap_c(mdev, p, &c, data_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003784 } else {
Philipp Reisner02918be2010-08-20 14:35:10 +02003785 dev_warn(DEV, "receive_bitmap: cmd neither ReportBitMap nor ReportCBitMap (is 0x%x)", cmd);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003786 goto out;
3787 }
3788
Philipp Reisner02918be2010-08-20 14:35:10 +02003789 c.packets[cmd == P_BITMAP]++;
Philipp Reisner257d0af2011-01-26 12:15:29 +01003790 c.bytes[cmd == P_BITMAP] += sizeof(struct p_header) + data_size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003791
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003792 if (err <= 0) {
3793 if (err < 0)
3794 goto out;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003795 break;
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003796 }
Andreas Gruenbacher69bc7bc2011-03-16 17:31:52 +01003797 if (drbd_recv_header(mdev->tconn, &pi))
Philipp Reisnerb411b362009-09-25 16:07:19 -07003798 goto out;
Philipp Reisner77351055b2011-02-07 17:24:26 +01003799 cmd = pi.cmd;
3800 data_size = pi.size;
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003801 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003802
3803 INFO_bm_xfer_stats(mdev, "receive", &c);
3804
3805 if (mdev->state.conn == C_WF_BITMAP_T) {
Andreas Gruenbacherde1f8e42010-12-10 21:04:00 +01003806 enum drbd_state_rv rv;
3807
Philipp Reisnerb411b362009-09-25 16:07:19 -07003808 ok = !drbd_send_bitmap(mdev);
3809 if (!ok)
3810 goto out;
3811 /* Omit CS_ORDERED with this state transition to avoid deadlocks. */
Andreas Gruenbacherde1f8e42010-12-10 21:04:00 +01003812 rv = _drbd_request_state(mdev, NS(conn, C_WF_SYNC_UUID), CS_VERBOSE);
3813 D_ASSERT(rv == SS_SUCCESS);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003814 } else if (mdev->state.conn != C_WF_BITMAP_S) {
3815 /* admin may have requested C_DISCONNECTING,
3816 * other threads may have noticed network errors */
3817 dev_info(DEV, "unexpected cstate (%s) in receive_bitmap\n",
3818 drbd_conn_str(mdev->state.conn));
3819 }
3820
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003821 ok = true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003822 out:
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01003823 drbd_bm_unlock(mdev);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003824 if (ok && mdev->state.conn == C_WF_BITMAP_S)
3825 drbd_start_resync(mdev, C_SYNC_SOURCE);
3826 free_page((unsigned long) buffer);
3827 return ok;
3828}
3829
Philipp Reisner2de876e2011-03-15 14:38:01 +01003830static int _tconn_receive_skip(struct drbd_tconn *tconn, unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003831{
3832 /* TODO zero copy sink :) */
3833 static char sink[128];
3834 int size, want, r;
3835
Philipp Reisner02918be2010-08-20 14:35:10 +02003836 size = data_size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003837 while (size > 0) {
3838 want = min_t(int, size, sizeof(sink));
Philipp Reisner2de876e2011-03-15 14:38:01 +01003839 r = drbd_recv(tconn, sink, want);
3840 if (r <= 0)
Andreas Gruenbacher841ce242010-12-15 19:31:20 +01003841 break;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003842 size -= r;
3843 }
3844 return size == 0;
3845}
3846
Philipp Reisner2de876e2011-03-15 14:38:01 +01003847static int receive_skip(struct drbd_conf *mdev, enum drbd_packet cmd,
3848 unsigned int data_size)
3849{
3850 dev_warn(DEV, "skipping unknown optional packet type %d, l: %d!\n",
3851 cmd, data_size);
3852
3853 return _tconn_receive_skip(mdev->tconn, data_size);
3854}
3855
3856static int tconn_receive_skip(struct drbd_tconn *tconn, enum drbd_packet cmd, unsigned int data_size)
3857{
3858 conn_warn(tconn, "skipping packet for non existing volume type %d, l: %d!\n",
3859 cmd, data_size);
3860
3861 return _tconn_receive_skip(tconn, data_size);
3862}
3863
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01003864static int receive_UnplugRemote(struct drbd_conf *mdev, enum drbd_packet cmd,
3865 unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003866{
Philipp Reisnerb411b362009-09-25 16:07:19 -07003867 /* Make sure we've acked all the TCP data associated
3868 * with the data requests being unplugged */
Philipp Reisnere42325a2011-01-19 13:55:45 +01003869 drbd_tcp_quickack(mdev->tconn->data.socket);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003870
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003871 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003872}
3873
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01003874static int receive_out_of_sync(struct drbd_conf *mdev, enum drbd_packet cmd,
3875 unsigned int data_size)
Philipp Reisner73a01a12010-10-27 14:33:00 +02003876{
Philipp Reisnere42325a2011-01-19 13:55:45 +01003877 struct p_block_desc *p = &mdev->tconn->data.rbuf.block_desc;
Philipp Reisner73a01a12010-10-27 14:33:00 +02003878
Lars Ellenbergf735e3632010-12-17 21:06:18 +01003879 switch (mdev->state.conn) {
3880 case C_WF_SYNC_UUID:
3881 case C_WF_BITMAP_T:
3882 case C_BEHIND:
3883 break;
3884 default:
3885 dev_err(DEV, "ASSERT FAILED cstate = %s, expected: WFSyncUUID|WFBitMapT|Behind\n",
3886 drbd_conn_str(mdev->state.conn));
3887 }
3888
Philipp Reisner73a01a12010-10-27 14:33:00 +02003889 drbd_set_out_of_sync(mdev, be64_to_cpu(p->sector), be32_to_cpu(p->blksize));
3890
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003891 return true;
Philipp Reisner73a01a12010-10-27 14:33:00 +02003892}
3893
Philipp Reisner02918be2010-08-20 14:35:10 +02003894struct data_cmd {
3895 int expect_payload;
3896 size_t pkt_size;
Philipp Reisnera4fbda82011-03-16 11:13:17 +01003897 enum mdev_or_conn fa_type; /* first argument's type */
Philipp Reisnerd9ae84e2011-03-15 18:50:22 +01003898 union {
3899 int (*mdev_fn)(struct drbd_conf *, enum drbd_packet cmd,
3900 unsigned int to_receive);
3901 int (*conn_fn)(struct drbd_tconn *, enum drbd_packet cmd,
3902 unsigned int to_receive);
3903 };
Philipp Reisnerb411b362009-09-25 16:07:19 -07003904};
3905
Philipp Reisner02918be2010-08-20 14:35:10 +02003906static struct data_cmd drbd_cmd_handler[] = {
Philipp Reisnerd9ae84e2011-03-15 18:50:22 +01003907 [P_DATA] = { 1, sizeof(struct p_data), MDEV, { receive_Data } },
3908 [P_DATA_REPLY] = { 1, sizeof(struct p_data), MDEV, { receive_DataReply } },
3909 [P_RS_DATA_REPLY] = { 1, sizeof(struct p_data), MDEV, { receive_RSDataReply } } ,
3910 [P_BARRIER] = { 0, sizeof(struct p_barrier), MDEV, { receive_Barrier } } ,
3911 [P_BITMAP] = { 1, sizeof(struct p_header), MDEV, { receive_bitmap } } ,
3912 [P_COMPRESSED_BITMAP] = { 1, sizeof(struct p_header), MDEV, { receive_bitmap } } ,
3913 [P_UNPLUG_REMOTE] = { 0, sizeof(struct p_header), MDEV, { receive_UnplugRemote } },
3914 [P_DATA_REQUEST] = { 0, sizeof(struct p_block_req), MDEV, { receive_DataRequest } },
3915 [P_RS_DATA_REQUEST] = { 0, sizeof(struct p_block_req), MDEV, { receive_DataRequest } },
3916 [P_SYNC_PARAM] = { 1, sizeof(struct p_header), MDEV, { receive_SyncParam } },
3917 [P_SYNC_PARAM89] = { 1, sizeof(struct p_header), MDEV, { receive_SyncParam } },
Philipp Reisner72046242011-03-15 18:51:47 +01003918 [P_PROTOCOL] = { 1, sizeof(struct p_protocol), CONN, { .conn_fn = receive_protocol } },
Philipp Reisnerd9ae84e2011-03-15 18:50:22 +01003919 [P_UUIDS] = { 0, sizeof(struct p_uuids), MDEV, { receive_uuids } },
3920 [P_SIZES] = { 0, sizeof(struct p_sizes), MDEV, { receive_sizes } },
3921 [P_STATE] = { 0, sizeof(struct p_state), MDEV, { receive_state } },
3922 [P_STATE_CHG_REQ] = { 0, sizeof(struct p_req_state), MDEV, { receive_req_state } },
3923 [P_SYNC_UUID] = { 0, sizeof(struct p_rs_uuid), MDEV, { receive_sync_uuid } },
3924 [P_OV_REQUEST] = { 0, sizeof(struct p_block_req), MDEV, { receive_DataRequest } },
3925 [P_OV_REPLY] = { 1, sizeof(struct p_block_req), MDEV, { receive_DataRequest } },
3926 [P_CSUM_RS_REQUEST] = { 1, sizeof(struct p_block_req), MDEV, { receive_DataRequest } },
3927 [P_DELAY_PROBE] = { 0, sizeof(struct p_delay_probe93), MDEV, { receive_skip } },
3928 [P_OUT_OF_SYNC] = { 0, sizeof(struct p_block_desc), MDEV, { receive_out_of_sync } },
Philipp Reisnerdfafcc82011-03-16 10:55:07 +01003929 [P_CONN_ST_CHG_REQ] = { 0, sizeof(struct p_req_state), CONN, { .conn_fn = receive_req_conn_state } },
Philipp Reisner02918be2010-08-20 14:35:10 +02003930};
3931
3932/* All handler functions that expect a sub-header get that sub-heder in
Philipp Reisnere42325a2011-01-19 13:55:45 +01003933 mdev->tconn->data.rbuf.header.head.payload.
Philipp Reisner02918be2010-08-20 14:35:10 +02003934
Philipp Reisnere42325a2011-01-19 13:55:45 +01003935 Usually in mdev->tconn->data.rbuf.header.head the callback can find the usual
Philipp Reisner02918be2010-08-20 14:35:10 +02003936 p_header, but they may not rely on that. Since there is also p_header95 !
3937 */
Philipp Reisnerb411b362009-09-25 16:07:19 -07003938
Philipp Reisnereefc2f72011-02-08 12:55:24 +01003939static void drbdd(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003940{
Philipp Reisnereefc2f72011-02-08 12:55:24 +01003941 struct p_header *header = &tconn->data.rbuf.header;
Philipp Reisner77351055b2011-02-07 17:24:26 +01003942 struct packet_info pi;
Philipp Reisner02918be2010-08-20 14:35:10 +02003943 size_t shs; /* sub header size */
3944 int rv;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003945
Philipp Reisnereefc2f72011-02-08 12:55:24 +01003946 while (get_t_state(&tconn->receiver) == RUNNING) {
3947 drbd_thread_current_set_cpu(&tconn->receiver);
Andreas Gruenbacher69bc7bc2011-03-16 17:31:52 +01003948 if (drbd_recv_header(tconn, &pi))
Philipp Reisner02918be2010-08-20 14:35:10 +02003949 goto err_out;
3950
Andreas Gruenbacher6e849ce2011-03-14 17:27:45 +01003951 if (unlikely(pi.cmd >= ARRAY_SIZE(drbd_cmd_handler) ||
Philipp Reisnerd9ae84e2011-03-15 18:50:22 +01003952 !drbd_cmd_handler[pi.cmd].mdev_fn)) {
Philipp Reisnereefc2f72011-02-08 12:55:24 +01003953 conn_err(tconn, "unknown packet type %d, l: %d!\n", pi.cmd, pi.size);
Philipp Reisner02918be2010-08-20 14:35:10 +02003954 goto err_out;
Lars Ellenberg0b33a912009-11-16 15:58:04 +01003955 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003956
Philipp Reisner77351055b2011-02-07 17:24:26 +01003957 shs = drbd_cmd_handler[pi.cmd].pkt_size - sizeof(struct p_header);
3958 if (pi.size - shs > 0 && !drbd_cmd_handler[pi.cmd].expect_payload) {
Philipp Reisnereefc2f72011-02-08 12:55:24 +01003959 conn_err(tconn, "No payload expected %s l:%d\n", cmdname(pi.cmd), pi.size);
Philipp Reisner02918be2010-08-20 14:35:10 +02003960 goto err_out;
3961 }
3962
Lars Ellenbergc13f7e12010-10-29 23:32:01 +02003963 if (shs) {
Philipp Reisnereefc2f72011-02-08 12:55:24 +01003964 rv = drbd_recv(tconn, &header->payload, shs);
Lars Ellenbergc13f7e12010-10-29 23:32:01 +02003965 if (unlikely(rv != shs)) {
Lars Ellenberg0ddc5542011-01-21 12:35:15 +01003966 if (!signal_pending(current))
Philipp Reisnereefc2f72011-02-08 12:55:24 +01003967 conn_warn(tconn, "short read while reading sub header: rv=%d\n", rv);
Lars Ellenbergc13f7e12010-10-29 23:32:01 +02003968 goto err_out;
3969 }
3970 }
3971
Philipp Reisnera4fbda82011-03-16 11:13:17 +01003972 if (drbd_cmd_handler[pi.cmd].fa_type == CONN) {
Philipp Reisnerd9ae84e2011-03-15 18:50:22 +01003973 rv = drbd_cmd_handler[pi.cmd].conn_fn(tconn, pi.cmd, pi.size - shs);
3974 } else {
3975 struct drbd_conf *mdev = vnr_to_mdev(tconn, pi.vnr);
3976 rv = mdev ?
3977 drbd_cmd_handler[pi.cmd].mdev_fn(mdev, pi.cmd, pi.size - shs) :
3978 tconn_receive_skip(tconn, pi.cmd, pi.size - shs);
3979 }
Philipp Reisner02918be2010-08-20 14:35:10 +02003980
3981 if (unlikely(!rv)) {
Philipp Reisnereefc2f72011-02-08 12:55:24 +01003982 conn_err(tconn, "error receiving %s, l: %d!\n",
Philipp Reisner77351055b2011-02-07 17:24:26 +01003983 cmdname(pi.cmd), pi.size);
Philipp Reisner02918be2010-08-20 14:35:10 +02003984 goto err_out;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003985 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003986 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003987
Philipp Reisner02918be2010-08-20 14:35:10 +02003988 if (0) {
3989 err_out:
Philipp Reisnerbbeb6412011-02-10 13:45:46 +01003990 conn_request_state(tconn, NS(conn, C_PROTOCOL_ERROR), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003991 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003992}
3993
Philipp Reisner0e29d162011-02-18 14:23:11 +01003994void conn_flush_workqueue(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003995{
3996 struct drbd_wq_barrier barr;
3997
3998 barr.w.cb = w_prev_work_done;
Philipp Reisner0e29d162011-02-18 14:23:11 +01003999 barr.w.tconn = tconn;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004000 init_completion(&barr.done);
Philipp Reisner0e29d162011-02-18 14:23:11 +01004001 drbd_queue_work(&tconn->data.work, &barr.w);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004002 wait_for_completion(&barr.done);
4003}
4004
Philipp Reisner360cc742011-02-08 14:29:53 +01004005static void drbd_disconnect(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004006{
Philipp Reisnerbbeb6412011-02-10 13:45:46 +01004007 enum drbd_conns oc;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004008 int rv = SS_UNKNOWN_ERROR;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004009
Philipp Reisnerbbeb6412011-02-10 13:45:46 +01004010 if (tconn->cstate == C_STANDALONE)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004011 return;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004012
4013 /* asender does not clean up anything. it must not interfere, either */
Philipp Reisner360cc742011-02-08 14:29:53 +01004014 drbd_thread_stop(&tconn->asender);
4015 drbd_free_sock(tconn);
4016
4017 idr_for_each(&tconn->volumes, drbd_disconnected, tconn);
4018
4019 conn_info(tconn, "Connection closed\n");
4020
4021 spin_lock_irq(&tconn->req_lock);
Philipp Reisnerbbeb6412011-02-10 13:45:46 +01004022 oc = tconn->cstate;
4023 if (oc >= C_UNCONNECTED)
4024 rv = _conn_request_state(tconn, NS(conn, C_UNCONNECTED), CS_VERBOSE);
4025
Philipp Reisner360cc742011-02-08 14:29:53 +01004026 spin_unlock_irq(&tconn->req_lock);
4027
Philipp Reisnerbbeb6412011-02-10 13:45:46 +01004028 if (oc == C_DISCONNECTING) {
Philipp Reisner360cc742011-02-08 14:29:53 +01004029 wait_event(tconn->net_cnt_wait, atomic_read(&tconn->net_cnt) == 0);
4030
4031 crypto_free_hash(tconn->cram_hmac_tfm);
4032 tconn->cram_hmac_tfm = NULL;
4033
4034 kfree(tconn->net_conf);
4035 tconn->net_conf = NULL;
Philipp Reisnerbbeb6412011-02-10 13:45:46 +01004036 conn_request_state(tconn, NS(conn, C_STANDALONE), CS_VERBOSE);
Philipp Reisner360cc742011-02-08 14:29:53 +01004037 }
4038}
4039
4040static int drbd_disconnected(int vnr, void *p, void *data)
4041{
4042 struct drbd_conf *mdev = (struct drbd_conf *)p;
4043 enum drbd_fencing_p fp;
4044 unsigned int i;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004045
Philipp Reisner85719572010-07-21 10:20:17 +02004046 /* wait for current activity to cease. */
Philipp Reisner87eeee42011-01-19 14:16:30 +01004047 spin_lock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004048 _drbd_wait_ee_list_empty(mdev, &mdev->active_ee);
4049 _drbd_wait_ee_list_empty(mdev, &mdev->sync_ee);
4050 _drbd_wait_ee_list_empty(mdev, &mdev->read_ee);
Philipp Reisner87eeee42011-01-19 14:16:30 +01004051 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004052
4053 /* We do not have data structures that would allow us to
4054 * get the rs_pending_cnt down to 0 again.
4055 * * On C_SYNC_TARGET we do not have any data structures describing
4056 * the pending RSDataRequest's we have sent.
4057 * * On C_SYNC_SOURCE there is no data structure that tracks
4058 * the P_RS_DATA_REPLY blocks that we sent to the SyncTarget.
4059 * And no, it is not the sum of the reference counts in the
4060 * resync_LRU. The resync_LRU tracks the whole operation including
4061 * the disk-IO, while the rs_pending_cnt only tracks the blocks
4062 * on the fly. */
4063 drbd_rs_cancel_all(mdev);
4064 mdev->rs_total = 0;
4065 mdev->rs_failed = 0;
4066 atomic_set(&mdev->rs_pending_cnt, 0);
4067 wake_up(&mdev->misc_wait);
4068
Philipp Reisner7fde2be2011-03-01 11:08:28 +01004069 del_timer(&mdev->request_timer);
4070
Philipp Reisnerb411b362009-09-25 16:07:19 -07004071 del_timer_sync(&mdev->resync_timer);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004072 resync_timer_fn((unsigned long)mdev);
4073
Philipp Reisnerb411b362009-09-25 16:07:19 -07004074 /* wait for all w_e_end_data_req, w_e_end_rsdata_req, w_send_barrier,
4075 * w_make_resync_request etc. which may still be on the worker queue
4076 * to be "canceled" */
Philipp Reisnera21e9292011-02-08 15:08:49 +01004077 drbd_flush_workqueue(mdev);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004078
4079 /* This also does reclaim_net_ee(). If we do this too early, we might
4080 * miss some resync ee and pages.*/
4081 drbd_process_done_ee(mdev);
4082
4083 kfree(mdev->p_uuid);
4084 mdev->p_uuid = NULL;
4085
Philipp Reisnerfb22c402010-09-08 23:20:21 +02004086 if (!is_susp(mdev->state))
Philipp Reisner2f5cdd02011-02-21 14:29:27 +01004087 tl_clear(mdev->tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004088
Philipp Reisnerb411b362009-09-25 16:07:19 -07004089 drbd_md_sync(mdev);
4090
4091 fp = FP_DONT_CARE;
4092 if (get_ldev(mdev)) {
4093 fp = mdev->ldev->dc.fencing;
4094 put_ldev(mdev);
4095 }
4096
Philipp Reisner87f7be42010-06-11 13:56:33 +02004097 if (mdev->state.role == R_PRIMARY && fp >= FP_RESOURCE && mdev->state.pdsk >= D_UNKNOWN)
4098 drbd_try_outdate_peer_async(mdev);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004099
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01004100 /* serialize with bitmap writeout triggered by the state change,
4101 * if any. */
4102 wait_event(mdev->misc_wait, !test_bit(BITMAP_IO, &mdev->flags));
4103
Philipp Reisnerb411b362009-09-25 16:07:19 -07004104 /* tcp_close and release of sendpage pages can be deferred. I don't
4105 * want to use SO_LINGER, because apparently it can be deferred for
4106 * more than 20 seconds (longest time I checked).
4107 *
4108 * Actually we don't care for exactly when the network stack does its
4109 * put_page(), but release our reference on these pages right here.
4110 */
4111 i = drbd_release_ee(mdev, &mdev->net_ee);
4112 if (i)
4113 dev_info(DEV, "net_ee not empty, killed %u entries\n", i);
Lars Ellenberg435f0742010-09-06 12:30:25 +02004114 i = atomic_read(&mdev->pp_in_use_by_net);
4115 if (i)
4116 dev_info(DEV, "pp_in_use_by_net = %d, expected 0\n", i);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004117 i = atomic_read(&mdev->pp_in_use);
4118 if (i)
Lars Ellenberg45bb9122010-05-14 17:10:48 +02004119 dev_info(DEV, "pp_in_use = %d, expected 0\n", i);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004120
4121 D_ASSERT(list_empty(&mdev->read_ee));
4122 D_ASSERT(list_empty(&mdev->active_ee));
4123 D_ASSERT(list_empty(&mdev->sync_ee));
4124 D_ASSERT(list_empty(&mdev->done_ee));
4125
4126 /* ok, no more ee's on the fly, it is safe to reset the epoch_size */
4127 atomic_set(&mdev->current_epoch->epoch_size, 0);
4128 D_ASSERT(list_empty(&mdev->current_epoch->list));
Philipp Reisner360cc742011-02-08 14:29:53 +01004129
4130 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004131}
4132
4133/*
4134 * We support PRO_VERSION_MIN to PRO_VERSION_MAX. The protocol version
4135 * we can agree on is stored in agreed_pro_version.
4136 *
4137 * feature flags and the reserved array should be enough room for future
4138 * enhancements of the handshake protocol, and possible plugins...
4139 *
4140 * for now, they are expected to be zero, but ignored.
4141 */
Philipp Reisner8a22ccc2011-02-07 16:47:12 +01004142static int drbd_send_handshake(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004143{
Philipp Reisnere6b3ea82011-01-19 14:02:01 +01004144 /* ASSERT current == mdev->tconn->receiver ... */
Philipp Reisner8a22ccc2011-02-07 16:47:12 +01004145 struct p_handshake *p = &tconn->data.sbuf.handshake;
Andreas Gruenbachere8d17b02011-03-16 00:54:19 +01004146 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004147
Philipp Reisner8a22ccc2011-02-07 16:47:12 +01004148 if (mutex_lock_interruptible(&tconn->data.mutex)) {
4149 conn_err(tconn, "interrupted during initial handshake\n");
Andreas Gruenbachere8d17b02011-03-16 00:54:19 +01004150 return -EINTR;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004151 }
4152
Philipp Reisner8a22ccc2011-02-07 16:47:12 +01004153 if (tconn->data.socket == NULL) {
4154 mutex_unlock(&tconn->data.mutex);
Andreas Gruenbachere8d17b02011-03-16 00:54:19 +01004155 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004156 }
4157
4158 memset(p, 0, sizeof(*p));
4159 p->protocol_min = cpu_to_be32(PRO_VERSION_MIN);
4160 p->protocol_max = cpu_to_be32(PRO_VERSION_MAX);
Andreas Gruenbachere8d17b02011-03-16 00:54:19 +01004161 err = _conn_send_cmd(tconn, 0, tconn->data.socket, P_HAND_SHAKE,
Andreas Gruenbacherecf23632011-03-15 23:48:25 +01004162 &p->head, sizeof(*p), 0);
Philipp Reisner8a22ccc2011-02-07 16:47:12 +01004163 mutex_unlock(&tconn->data.mutex);
Andreas Gruenbachere8d17b02011-03-16 00:54:19 +01004164 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004165}
4166
4167/*
4168 * return values:
4169 * 1 yes, we have a valid connection
4170 * 0 oops, did not work out, please try again
4171 * -1 peer talks different language,
4172 * no point in trying again, please go standalone.
4173 */
Philipp Reisner65d11ed2011-02-07 17:35:59 +01004174static int drbd_do_handshake(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004175{
Philipp Reisner65d11ed2011-02-07 17:35:59 +01004176 /* ASSERT current == tconn->receiver ... */
4177 struct p_handshake *p = &tconn->data.rbuf.handshake;
Philipp Reisner02918be2010-08-20 14:35:10 +02004178 const int expect = sizeof(struct p_handshake) - sizeof(struct p_header80);
Philipp Reisner77351055b2011-02-07 17:24:26 +01004179 struct packet_info pi;
Andreas Gruenbachere8d17b02011-03-16 00:54:19 +01004180 int err, rv;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004181
Andreas Gruenbachere8d17b02011-03-16 00:54:19 +01004182 err = drbd_send_handshake(tconn);
4183 if (err)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004184 return 0;
4185
Andreas Gruenbacher69bc7bc2011-03-16 17:31:52 +01004186 err = drbd_recv_header(tconn, &pi);
4187 if (err)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004188 return 0;
4189
Philipp Reisner77351055b2011-02-07 17:24:26 +01004190 if (pi.cmd != P_HAND_SHAKE) {
Philipp Reisner65d11ed2011-02-07 17:35:59 +01004191 conn_err(tconn, "expected HandShake packet, received: %s (0x%04x)\n",
Philipp Reisner77351055b2011-02-07 17:24:26 +01004192 cmdname(pi.cmd), pi.cmd);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004193 return -1;
4194 }
4195
Philipp Reisner77351055b2011-02-07 17:24:26 +01004196 if (pi.size != expect) {
Philipp Reisner65d11ed2011-02-07 17:35:59 +01004197 conn_err(tconn, "expected HandShake length: %u, received: %u\n",
Philipp Reisner77351055b2011-02-07 17:24:26 +01004198 expect, pi.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004199 return -1;
4200 }
4201
Philipp Reisner65d11ed2011-02-07 17:35:59 +01004202 rv = drbd_recv(tconn, &p->head.payload, expect);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004203
4204 if (rv != expect) {
Lars Ellenberg0ddc5542011-01-21 12:35:15 +01004205 if (!signal_pending(current))
Philipp Reisner65d11ed2011-02-07 17:35:59 +01004206 conn_warn(tconn, "short read receiving handshake packet: l=%u\n", rv);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004207 return 0;
4208 }
4209
Philipp Reisnerb411b362009-09-25 16:07:19 -07004210 p->protocol_min = be32_to_cpu(p->protocol_min);
4211 p->protocol_max = be32_to_cpu(p->protocol_max);
4212 if (p->protocol_max == 0)
4213 p->protocol_max = p->protocol_min;
4214
4215 if (PRO_VERSION_MAX < p->protocol_min ||
4216 PRO_VERSION_MIN > p->protocol_max)
4217 goto incompat;
4218
Philipp Reisner65d11ed2011-02-07 17:35:59 +01004219 tconn->agreed_pro_version = min_t(int, PRO_VERSION_MAX, p->protocol_max);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004220
Philipp Reisner65d11ed2011-02-07 17:35:59 +01004221 conn_info(tconn, "Handshake successful: "
4222 "Agreed network protocol version %d\n", tconn->agreed_pro_version);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004223
4224 return 1;
4225
4226 incompat:
Philipp Reisner65d11ed2011-02-07 17:35:59 +01004227 conn_err(tconn, "incompatible DRBD dialects: "
Philipp Reisnerb411b362009-09-25 16:07:19 -07004228 "I support %d-%d, peer supports %d-%d\n",
4229 PRO_VERSION_MIN, PRO_VERSION_MAX,
4230 p->protocol_min, p->protocol_max);
4231 return -1;
4232}
4233
4234#if !defined(CONFIG_CRYPTO_HMAC) && !defined(CONFIG_CRYPTO_HMAC_MODULE)
Philipp Reisner13e60372011-02-08 09:54:40 +01004235static int drbd_do_auth(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004236{
4237 dev_err(DEV, "This kernel was build without CONFIG_CRYPTO_HMAC.\n");
4238 dev_err(DEV, "You need to disable 'cram-hmac-alg' in drbd.conf.\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01004239 return -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004240}
4241#else
4242#define CHALLENGE_LEN 64
Johannes Thomab10d96c2010-01-07 16:02:50 +01004243
4244/* Return value:
4245 1 - auth succeeded,
4246 0 - failed, try again (network error),
4247 -1 - auth failed, don't try again.
4248*/
4249
Philipp Reisner13e60372011-02-08 09:54:40 +01004250static int drbd_do_auth(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004251{
4252 char my_challenge[CHALLENGE_LEN]; /* 64 Bytes... */
4253 struct scatterlist sg;
4254 char *response = NULL;
4255 char *right_response = NULL;
4256 char *peers_ch = NULL;
Philipp Reisner13e60372011-02-08 09:54:40 +01004257 unsigned int key_len = strlen(tconn->net_conf->shared_secret);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004258 unsigned int resp_size;
4259 struct hash_desc desc;
Philipp Reisner77351055b2011-02-07 17:24:26 +01004260 struct packet_info pi;
Andreas Gruenbacher69bc7bc2011-03-16 17:31:52 +01004261 int err, rv;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004262
Philipp Reisner13e60372011-02-08 09:54:40 +01004263 desc.tfm = tconn->cram_hmac_tfm;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004264 desc.flags = 0;
4265
Philipp Reisner13e60372011-02-08 09:54:40 +01004266 rv = crypto_hash_setkey(tconn->cram_hmac_tfm,
4267 (u8 *)tconn->net_conf->shared_secret, key_len);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004268 if (rv) {
Philipp Reisner13e60372011-02-08 09:54:40 +01004269 conn_err(tconn, "crypto_hash_setkey() failed with %d\n", rv);
Johannes Thomab10d96c2010-01-07 16:02:50 +01004270 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004271 goto fail;
4272 }
4273
4274 get_random_bytes(my_challenge, CHALLENGE_LEN);
4275
Andreas Gruenbacherce9879c2011-03-15 23:34:29 +01004276 rv = !conn_send_cmd2(tconn, P_AUTH_CHALLENGE, my_challenge, CHALLENGE_LEN);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004277 if (!rv)
4278 goto fail;
4279
Andreas Gruenbacher69bc7bc2011-03-16 17:31:52 +01004280 err = drbd_recv_header(tconn, &pi);
4281 if (err) {
4282 rv = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004283 goto fail;
Andreas Gruenbacher69bc7bc2011-03-16 17:31:52 +01004284 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07004285
Philipp Reisner77351055b2011-02-07 17:24:26 +01004286 if (pi.cmd != P_AUTH_CHALLENGE) {
Philipp Reisner13e60372011-02-08 09:54:40 +01004287 conn_err(tconn, "expected AuthChallenge packet, received: %s (0x%04x)\n",
Philipp Reisner77351055b2011-02-07 17:24:26 +01004288 cmdname(pi.cmd), pi.cmd);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004289 rv = 0;
4290 goto fail;
4291 }
4292
Philipp Reisner77351055b2011-02-07 17:24:26 +01004293 if (pi.size > CHALLENGE_LEN * 2) {
Philipp Reisner13e60372011-02-08 09:54:40 +01004294 conn_err(tconn, "expected AuthChallenge payload too big.\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01004295 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004296 goto fail;
4297 }
4298
Philipp Reisner77351055b2011-02-07 17:24:26 +01004299 peers_ch = kmalloc(pi.size, GFP_NOIO);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004300 if (peers_ch == NULL) {
Philipp Reisner13e60372011-02-08 09:54:40 +01004301 conn_err(tconn, "kmalloc of peers_ch failed\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01004302 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004303 goto fail;
4304 }
4305
Philipp Reisner13e60372011-02-08 09:54:40 +01004306 rv = drbd_recv(tconn, peers_ch, pi.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004307
Philipp Reisner77351055b2011-02-07 17:24:26 +01004308 if (rv != pi.size) {
Lars Ellenberg0ddc5542011-01-21 12:35:15 +01004309 if (!signal_pending(current))
Philipp Reisner13e60372011-02-08 09:54:40 +01004310 conn_warn(tconn, "short read AuthChallenge: l=%u\n", rv);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004311 rv = 0;
4312 goto fail;
4313 }
4314
Philipp Reisner13e60372011-02-08 09:54:40 +01004315 resp_size = crypto_hash_digestsize(tconn->cram_hmac_tfm);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004316 response = kmalloc(resp_size, GFP_NOIO);
4317 if (response == NULL) {
Philipp Reisner13e60372011-02-08 09:54:40 +01004318 conn_err(tconn, "kmalloc of response failed\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01004319 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004320 goto fail;
4321 }
4322
4323 sg_init_table(&sg, 1);
Philipp Reisner77351055b2011-02-07 17:24:26 +01004324 sg_set_buf(&sg, peers_ch, pi.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004325
4326 rv = crypto_hash_digest(&desc, &sg, sg.length, response);
4327 if (rv) {
Philipp Reisner13e60372011-02-08 09:54:40 +01004328 conn_err(tconn, "crypto_hash_digest() failed with %d\n", rv);
Johannes Thomab10d96c2010-01-07 16:02:50 +01004329 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004330 goto fail;
4331 }
4332
Andreas Gruenbacherce9879c2011-03-15 23:34:29 +01004333 rv = !conn_send_cmd2(tconn, P_AUTH_RESPONSE, response, resp_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004334 if (!rv)
4335 goto fail;
4336
Andreas Gruenbacher69bc7bc2011-03-16 17:31:52 +01004337 err = drbd_recv_header(tconn, &pi);
4338 if (err) {
4339 rv = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004340 goto fail;
Andreas Gruenbacher69bc7bc2011-03-16 17:31:52 +01004341 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07004342
Philipp Reisner77351055b2011-02-07 17:24:26 +01004343 if (pi.cmd != P_AUTH_RESPONSE) {
Philipp Reisner13e60372011-02-08 09:54:40 +01004344 conn_err(tconn, "expected AuthResponse packet, received: %s (0x%04x)\n",
Philipp Reisner77351055b2011-02-07 17:24:26 +01004345 cmdname(pi.cmd), pi.cmd);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004346 rv = 0;
4347 goto fail;
4348 }
4349
Philipp Reisner77351055b2011-02-07 17:24:26 +01004350 if (pi.size != resp_size) {
Philipp Reisner13e60372011-02-08 09:54:40 +01004351 conn_err(tconn, "expected AuthResponse payload of wrong size\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07004352 rv = 0;
4353 goto fail;
4354 }
4355
Philipp Reisner13e60372011-02-08 09:54:40 +01004356 rv = drbd_recv(tconn, response , resp_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004357
4358 if (rv != resp_size) {
Lars Ellenberg0ddc5542011-01-21 12:35:15 +01004359 if (!signal_pending(current))
Philipp Reisner13e60372011-02-08 09:54:40 +01004360 conn_warn(tconn, "short read receiving AuthResponse: l=%u\n", rv);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004361 rv = 0;
4362 goto fail;
4363 }
4364
4365 right_response = kmalloc(resp_size, GFP_NOIO);
Julia Lawall2d1ee872009-12-27 22:27:11 +01004366 if (right_response == NULL) {
Philipp Reisner13e60372011-02-08 09:54:40 +01004367 conn_err(tconn, "kmalloc of right_response failed\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01004368 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004369 goto fail;
4370 }
4371
4372 sg_set_buf(&sg, my_challenge, CHALLENGE_LEN);
4373
4374 rv = crypto_hash_digest(&desc, &sg, sg.length, right_response);
4375 if (rv) {
Philipp Reisner13e60372011-02-08 09:54:40 +01004376 conn_err(tconn, "crypto_hash_digest() failed with %d\n", rv);
Johannes Thomab10d96c2010-01-07 16:02:50 +01004377 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004378 goto fail;
4379 }
4380
4381 rv = !memcmp(response, right_response, resp_size);
4382
4383 if (rv)
Philipp Reisner13e60372011-02-08 09:54:40 +01004384 conn_info(tconn, "Peer authenticated using %d bytes of '%s' HMAC\n",
4385 resp_size, tconn->net_conf->cram_hmac_alg);
Johannes Thomab10d96c2010-01-07 16:02:50 +01004386 else
4387 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004388
4389 fail:
4390 kfree(peers_ch);
4391 kfree(response);
4392 kfree(right_response);
4393
4394 return rv;
4395}
4396#endif
4397
4398int drbdd_init(struct drbd_thread *thi)
4399{
Philipp Reisner392c8802011-02-09 10:33:31 +01004400 struct drbd_tconn *tconn = thi->tconn;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004401 int h;
4402
Philipp Reisner4d641dd2011-02-08 15:40:24 +01004403 conn_info(tconn, "receiver (re)started\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07004404
4405 do {
Philipp Reisner4d641dd2011-02-08 15:40:24 +01004406 h = drbd_connect(tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004407 if (h == 0) {
Philipp Reisner4d641dd2011-02-08 15:40:24 +01004408 drbd_disconnect(tconn);
Philipp Reisner20ee6392011-01-18 15:28:59 +01004409 schedule_timeout_interruptible(HZ);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004410 }
4411 if (h == -1) {
Philipp Reisner4d641dd2011-02-08 15:40:24 +01004412 conn_warn(tconn, "Discarding network configuration.\n");
Philipp Reisnerbbeb6412011-02-10 13:45:46 +01004413 conn_request_state(tconn, NS(conn, C_DISCONNECTING), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004414 }
4415 } while (h == 0);
4416
4417 if (h > 0) {
Philipp Reisner4d641dd2011-02-08 15:40:24 +01004418 if (get_net_conf(tconn)) {
4419 drbdd(tconn);
4420 put_net_conf(tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004421 }
4422 }
4423
Philipp Reisner4d641dd2011-02-08 15:40:24 +01004424 drbd_disconnect(tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004425
Philipp Reisner4d641dd2011-02-08 15:40:24 +01004426 conn_info(tconn, "receiver terminated\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07004427 return 0;
4428}
4429
4430/* ********* acknowledge sender ******** */
4431
Philipp Reisnere4f78ed2011-03-16 11:27:48 +01004432static int got_conn_RqSReply(struct drbd_tconn *tconn, enum drbd_packet cmd)
4433{
4434 struct p_req_state_reply *p = &tconn->meta.rbuf.req_state_reply;
4435 int retcode = be32_to_cpu(p->retcode);
4436
4437 if (retcode >= SS_SUCCESS) {
4438 set_bit(CONN_WD_ST_CHG_OKAY, &tconn->flags);
4439 } else {
4440 set_bit(CONN_WD_ST_CHG_FAIL, &tconn->flags);
4441 conn_err(tconn, "Requested state change failed by peer: %s (%d)\n",
4442 drbd_set_st_err_str(retcode), retcode);
4443 }
4444 wake_up(&tconn->ping_wait);
4445
4446 return true;
4447}
4448
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01004449static int got_RqSReply(struct drbd_conf *mdev, enum drbd_packet cmd)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004450{
Philipp Reisner257d0af2011-01-26 12:15:29 +01004451 struct p_req_state_reply *p = &mdev->tconn->meta.rbuf.req_state_reply;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004452 int retcode = be32_to_cpu(p->retcode);
4453
Philipp Reisnere4f78ed2011-03-16 11:27:48 +01004454 if (retcode >= SS_SUCCESS) {
4455 set_bit(CL_ST_CHG_SUCCESS, &mdev->flags);
4456 } else {
4457 set_bit(CL_ST_CHG_FAIL, &mdev->flags);
4458 dev_err(DEV, "Requested state change failed by peer: %s (%d)\n",
4459 drbd_set_st_err_str(retcode), retcode);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004460 }
Philipp Reisnere4f78ed2011-03-16 11:27:48 +01004461 wake_up(&mdev->state_wait);
4462
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004463 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004464}
4465
Philipp Reisnerf19e4f82011-03-16 11:21:50 +01004466static int got_Ping(struct drbd_tconn *tconn, enum drbd_packet cmd)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004467{
Philipp Reisnerf19e4f82011-03-16 11:21:50 +01004468 return drbd_send_ping_ack(tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004469
4470}
4471
Philipp Reisnerf19e4f82011-03-16 11:21:50 +01004472static int got_PingAck(struct drbd_tconn *tconn, enum drbd_packet cmd)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004473{
4474 /* restore idle timeout */
Philipp Reisner2a67d8b2011-02-09 14:10:32 +01004475 tconn->meta.socket->sk->sk_rcvtimeo = tconn->net_conf->ping_int*HZ;
4476 if (!test_and_set_bit(GOT_PING_ACK, &tconn->flags))
4477 wake_up(&tconn->ping_wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004478
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004479 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004480}
4481
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01004482static int got_IsInSync(struct drbd_conf *mdev, enum drbd_packet cmd)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004483{
Philipp Reisner257d0af2011-01-26 12:15:29 +01004484 struct p_block_ack *p = &mdev->tconn->meta.rbuf.block_ack;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004485 sector_t sector = be64_to_cpu(p->sector);
4486 int blksize = be32_to_cpu(p->blksize);
4487
Philipp Reisner31890f42011-01-19 14:12:51 +01004488 D_ASSERT(mdev->tconn->agreed_pro_version >= 89);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004489
4490 update_peer_seq(mdev, be32_to_cpu(p->seq_num));
4491
Lars Ellenberg1d53f092010-09-05 01:13:24 +02004492 if (get_ldev(mdev)) {
4493 drbd_rs_complete_io(mdev, sector);
4494 drbd_set_in_sync(mdev, sector, blksize);
4495 /* rs_same_csums is supposed to count in units of BM_BLOCK_SIZE */
4496 mdev->rs_same_csum += (blksize >> BM_BLOCK_SHIFT);
4497 put_ldev(mdev);
4498 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07004499 dec_rs_pending(mdev);
Philipp Reisner778f2712010-07-06 11:14:00 +02004500 atomic_add(blksize >> 9, &mdev->rs_sect_in);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004501
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004502 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004503}
4504
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01004505static int
4506validate_req_change_req_state(struct drbd_conf *mdev, u64 id, sector_t sector,
4507 struct rb_root *root, const char *func,
4508 enum drbd_req_event what, bool missing_ok)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004509{
4510 struct drbd_request *req;
4511 struct bio_and_error m;
4512
Philipp Reisner87eeee42011-01-19 14:16:30 +01004513 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01004514 req = find_request(mdev, root, id, sector, missing_ok, func);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004515 if (unlikely(!req)) {
Philipp Reisner87eeee42011-01-19 14:16:30 +01004516 spin_unlock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004517 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004518 }
4519 __req_mod(req, what, &m);
Philipp Reisner87eeee42011-01-19 14:16:30 +01004520 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004521
4522 if (m.bio)
4523 complete_master_bio(mdev, &m);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004524 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004525}
4526
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01004527static int got_BlockAck(struct drbd_conf *mdev, enum drbd_packet cmd)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004528{
Philipp Reisner257d0af2011-01-26 12:15:29 +01004529 struct p_block_ack *p = &mdev->tconn->meta.rbuf.block_ack;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004530 sector_t sector = be64_to_cpu(p->sector);
4531 int blksize = be32_to_cpu(p->blksize);
4532 enum drbd_req_event what;
4533
4534 update_peer_seq(mdev, be32_to_cpu(p->seq_num));
4535
Andreas Gruenbacher579b57e2011-01-13 18:40:57 +01004536 if (p->block_id == ID_SYNCER) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004537 drbd_set_in_sync(mdev, sector, blksize);
4538 dec_rs_pending(mdev);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004539 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004540 }
Philipp Reisner257d0af2011-01-26 12:15:29 +01004541 switch (cmd) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004542 case P_RS_WRITE_ACK:
Philipp Reisner89e58e72011-01-19 13:12:45 +01004543 D_ASSERT(mdev->tconn->net_conf->wire_protocol == DRBD_PROT_C);
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01004544 what = WRITE_ACKED_BY_PEER_AND_SIS;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004545 break;
4546 case P_WRITE_ACK:
Philipp Reisner89e58e72011-01-19 13:12:45 +01004547 D_ASSERT(mdev->tconn->net_conf->wire_protocol == DRBD_PROT_C);
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01004548 what = WRITE_ACKED_BY_PEER;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004549 break;
4550 case P_RECV_ACK:
Philipp Reisner89e58e72011-01-19 13:12:45 +01004551 D_ASSERT(mdev->tconn->net_conf->wire_protocol == DRBD_PROT_B);
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01004552 what = RECV_ACKED_BY_PEER;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004553 break;
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01004554 case P_DISCARD_WRITE:
Philipp Reisner89e58e72011-01-19 13:12:45 +01004555 D_ASSERT(mdev->tconn->net_conf->wire_protocol == DRBD_PROT_C);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01004556 what = DISCARD_WRITE;
4557 break;
4558 case P_RETRY_WRITE:
4559 D_ASSERT(mdev->tconn->net_conf->wire_protocol == DRBD_PROT_C);
4560 what = POSTPONE_WRITE;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004561 break;
4562 default:
4563 D_ASSERT(0);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004564 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004565 }
4566
4567 return validate_req_change_req_state(mdev, p->block_id, sector,
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01004568 &mdev->write_requests, __func__,
4569 what, false);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004570}
4571
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01004572static int got_NegAck(struct drbd_conf *mdev, enum drbd_packet cmd)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004573{
Philipp Reisner257d0af2011-01-26 12:15:29 +01004574 struct p_block_ack *p = &mdev->tconn->meta.rbuf.block_ack;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004575 sector_t sector = be64_to_cpu(p->sector);
Philipp Reisner2deb8332011-01-17 18:39:18 +01004576 int size = be32_to_cpu(p->blksize);
Philipp Reisner89e58e72011-01-19 13:12:45 +01004577 bool missing_ok = mdev->tconn->net_conf->wire_protocol == DRBD_PROT_A ||
4578 mdev->tconn->net_conf->wire_protocol == DRBD_PROT_B;
Andreas Gruenbacherc3afd8f2011-01-20 22:25:40 +01004579 bool found;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004580
4581 update_peer_seq(mdev, be32_to_cpu(p->seq_num));
4582
Andreas Gruenbacher579b57e2011-01-13 18:40:57 +01004583 if (p->block_id == ID_SYNCER) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004584 dec_rs_pending(mdev);
4585 drbd_rs_failed_io(mdev, sector, size);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004586 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004587 }
Philipp Reisner2deb8332011-01-17 18:39:18 +01004588
Andreas Gruenbacherc3afd8f2011-01-20 22:25:40 +01004589 found = validate_req_change_req_state(mdev, p->block_id, sector,
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01004590 &mdev->write_requests, __func__,
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01004591 NEG_ACKED, missing_ok);
Andreas Gruenbacherc3afd8f2011-01-20 22:25:40 +01004592 if (!found) {
4593 /* Protocol A has no P_WRITE_ACKs, but has P_NEG_ACKs.
4594 The master bio might already be completed, therefore the
4595 request is no longer in the collision hash. */
4596 /* In Protocol B we might already have got a P_RECV_ACK
4597 but then get a P_NEG_ACK afterwards. */
4598 if (!missing_ok)
Philipp Reisner2deb8332011-01-17 18:39:18 +01004599 return false;
Andreas Gruenbacherc3afd8f2011-01-20 22:25:40 +01004600 drbd_set_out_of_sync(mdev, sector, size);
Philipp Reisner2deb8332011-01-17 18:39:18 +01004601 }
Philipp Reisner2deb8332011-01-17 18:39:18 +01004602 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004603}
4604
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01004605static int got_NegDReply(struct drbd_conf *mdev, enum drbd_packet cmd)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004606{
Philipp Reisner257d0af2011-01-26 12:15:29 +01004607 struct p_block_ack *p = &mdev->tconn->meta.rbuf.block_ack;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004608 sector_t sector = be64_to_cpu(p->sector);
4609
4610 update_peer_seq(mdev, be32_to_cpu(p->seq_num));
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01004611
Philipp Reisnerb411b362009-09-25 16:07:19 -07004612 dev_err(DEV, "Got NegDReply; Sector %llus, len %u; Fail original request.\n",
4613 (unsigned long long)sector, be32_to_cpu(p->blksize));
4614
4615 return validate_req_change_req_state(mdev, p->block_id, sector,
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01004616 &mdev->read_requests, __func__,
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01004617 NEG_ACKED, false);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004618}
4619
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01004620static int got_NegRSDReply(struct drbd_conf *mdev, enum drbd_packet cmd)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004621{
4622 sector_t sector;
4623 int size;
Philipp Reisner257d0af2011-01-26 12:15:29 +01004624 struct p_block_ack *p = &mdev->tconn->meta.rbuf.block_ack;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004625
4626 sector = be64_to_cpu(p->sector);
4627 size = be32_to_cpu(p->blksize);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004628
4629 update_peer_seq(mdev, be32_to_cpu(p->seq_num));
4630
4631 dec_rs_pending(mdev);
4632
4633 if (get_ldev_if_state(mdev, D_FAILED)) {
4634 drbd_rs_complete_io(mdev, sector);
Philipp Reisner257d0af2011-01-26 12:15:29 +01004635 switch (cmd) {
Philipp Reisnerd612d302010-12-27 10:53:28 +01004636 case P_NEG_RS_DREPLY:
4637 drbd_rs_failed_io(mdev, sector, size);
4638 case P_RS_CANCEL:
4639 break;
4640 default:
4641 D_ASSERT(0);
4642 put_ldev(mdev);
4643 return false;
4644 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07004645 put_ldev(mdev);
4646 }
4647
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004648 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004649}
4650
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01004651static int got_BarrierAck(struct drbd_conf *mdev, enum drbd_packet cmd)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004652{
Philipp Reisner257d0af2011-01-26 12:15:29 +01004653 struct p_barrier_ack *p = &mdev->tconn->meta.rbuf.barrier_ack;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004654
Philipp Reisner2f5cdd02011-02-21 14:29:27 +01004655 tl_release(mdev->tconn, p->barrier, be32_to_cpu(p->set_size));
Philipp Reisnerb411b362009-09-25 16:07:19 -07004656
Philipp Reisnerc4752ef2010-10-27 17:32:36 +02004657 if (mdev->state.conn == C_AHEAD &&
4658 atomic_read(&mdev->ap_in_flight) == 0 &&
Philipp Reisner370a43e2011-01-14 16:03:11 +01004659 !test_and_set_bit(AHEAD_TO_SYNC_SOURCE, &mdev->current_epoch->flags)) {
4660 mdev->start_resync_timer.expires = jiffies + HZ;
4661 add_timer(&mdev->start_resync_timer);
Philipp Reisnerc4752ef2010-10-27 17:32:36 +02004662 }
4663
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004664 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004665}
4666
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01004667static int got_OVResult(struct drbd_conf *mdev, enum drbd_packet cmd)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004668{
Philipp Reisner257d0af2011-01-26 12:15:29 +01004669 struct p_block_ack *p = &mdev->tconn->meta.rbuf.block_ack;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004670 struct drbd_work *w;
4671 sector_t sector;
4672 int size;
4673
4674 sector = be64_to_cpu(p->sector);
4675 size = be32_to_cpu(p->blksize);
4676
4677 update_peer_seq(mdev, be32_to_cpu(p->seq_num));
4678
4679 if (be64_to_cpu(p->block_id) == ID_OUT_OF_SYNC)
4680 drbd_ov_oos_found(mdev, sector, size);
4681 else
4682 ov_oos_print(mdev);
4683
Lars Ellenberg1d53f092010-09-05 01:13:24 +02004684 if (!get_ldev(mdev))
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004685 return true;
Lars Ellenberg1d53f092010-09-05 01:13:24 +02004686
Philipp Reisnerb411b362009-09-25 16:07:19 -07004687 drbd_rs_complete_io(mdev, sector);
4688 dec_rs_pending(mdev);
4689
Lars Ellenbergea5442a2010-11-05 09:48:01 +01004690 --mdev->ov_left;
4691
4692 /* let's advance progress step marks only for every other megabyte */
4693 if ((mdev->ov_left & 0x200) == 0x200)
4694 drbd_advance_rs_marks(mdev, mdev->ov_left);
4695
4696 if (mdev->ov_left == 0) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004697 w = kmalloc(sizeof(*w), GFP_NOIO);
4698 if (w) {
4699 w->cb = w_ov_finished;
Philipp Reisnera21e9292011-02-08 15:08:49 +01004700 w->mdev = mdev;
Philipp Reisnere42325a2011-01-19 13:55:45 +01004701 drbd_queue_work_front(&mdev->tconn->data.work, w);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004702 } else {
4703 dev_err(DEV, "kmalloc(w) failed.");
4704 ov_oos_print(mdev);
4705 drbd_resync_finished(mdev);
4706 }
4707 }
Lars Ellenberg1d53f092010-09-05 01:13:24 +02004708 put_ldev(mdev);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004709 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004710}
4711
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01004712static int got_skip(struct drbd_conf *mdev, enum drbd_packet cmd)
Philipp Reisner0ced55a2010-04-30 15:26:20 +02004713{
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004714 return true;
Philipp Reisner0ced55a2010-04-30 15:26:20 +02004715}
4716
Philipp Reisner32862ec2011-02-08 16:41:01 +01004717static int tconn_process_done_ee(struct drbd_tconn *tconn)
4718{
Philipp Reisner082a3432011-03-15 16:05:42 +01004719 struct drbd_conf *mdev;
4720 int i, not_empty = 0;
Philipp Reisner32862ec2011-02-08 16:41:01 +01004721
4722 do {
4723 clear_bit(SIGNAL_ASENDER, &tconn->flags);
4724 flush_signals(current);
Philipp Reisner082a3432011-03-15 16:05:42 +01004725 idr_for_each_entry(&tconn->volumes, mdev, i) {
Andreas Gruenbachere2b30322011-03-16 17:16:12 +01004726 if (drbd_process_done_ee(mdev))
Philipp Reisner082a3432011-03-15 16:05:42 +01004727 return 1; /* error */
4728 }
Philipp Reisner32862ec2011-02-08 16:41:01 +01004729 set_bit(SIGNAL_ASENDER, &tconn->flags);
Philipp Reisner082a3432011-03-15 16:05:42 +01004730
4731 spin_lock_irq(&tconn->req_lock);
4732 idr_for_each_entry(&tconn->volumes, mdev, i) {
4733 not_empty = !list_empty(&mdev->done_ee);
4734 if (not_empty)
4735 break;
4736 }
4737 spin_unlock_irq(&tconn->req_lock);
Philipp Reisner32862ec2011-02-08 16:41:01 +01004738 } while (not_empty);
4739
4740 return 0;
4741}
4742
Andreas Gruenbacher7201b972011-03-14 18:23:00 +01004743struct asender_cmd {
4744 size_t pkt_size;
Philipp Reisnera4fbda82011-03-16 11:13:17 +01004745 enum mdev_or_conn fa_type; /* first argument's type */
4746 union {
4747 int (*mdev_fn)(struct drbd_conf *mdev, enum drbd_packet cmd);
4748 int (*conn_fn)(struct drbd_tconn *tconn, enum drbd_packet cmd);
4749 };
Andreas Gruenbacher7201b972011-03-14 18:23:00 +01004750};
4751
4752static struct asender_cmd asender_tbl[] = {
Philipp Reisnerf19e4f82011-03-16 11:21:50 +01004753 [P_PING] = { sizeof(struct p_header), CONN, { .conn_fn = got_Ping } },
4754 [P_PING_ACK] = { sizeof(struct p_header), CONN, { .conn_fn = got_PingAck } },
Philipp Reisnera4fbda82011-03-16 11:13:17 +01004755 [P_RECV_ACK] = { sizeof(struct p_block_ack), MDEV, { got_BlockAck } },
4756 [P_WRITE_ACK] = { sizeof(struct p_block_ack), MDEV, { got_BlockAck } },
4757 [P_RS_WRITE_ACK] = { sizeof(struct p_block_ack), MDEV, { got_BlockAck } },
4758 [P_DISCARD_WRITE] = { sizeof(struct p_block_ack), MDEV, { got_BlockAck } },
4759 [P_NEG_ACK] = { sizeof(struct p_block_ack), MDEV, { got_NegAck } },
4760 [P_NEG_DREPLY] = { sizeof(struct p_block_ack), MDEV, { got_NegDReply } },
4761 [P_NEG_RS_DREPLY] = { sizeof(struct p_block_ack), MDEV, { got_NegRSDReply } },
4762 [P_OV_RESULT] = { sizeof(struct p_block_ack), MDEV, { got_OVResult } },
4763 [P_BARRIER_ACK] = { sizeof(struct p_barrier_ack), MDEV, { got_BarrierAck } },
4764 [P_STATE_CHG_REPLY] = { sizeof(struct p_req_state_reply), MDEV, { got_RqSReply } },
4765 [P_RS_IS_IN_SYNC] = { sizeof(struct p_block_ack), MDEV, { got_IsInSync } },
4766 [P_DELAY_PROBE] = { sizeof(struct p_delay_probe93), MDEV, { got_skip } },
4767 [P_RS_CANCEL] = { sizeof(struct p_block_ack), MDEV, { got_NegRSDReply } },
Philipp Reisnere4f78ed2011-03-16 11:27:48 +01004768 [P_CONN_ST_CHG_REPLY]={ sizeof(struct p_req_state_reply), CONN, {.conn_fn = got_conn_RqSReply}},
Philipp Reisnera4fbda82011-03-16 11:13:17 +01004769 [P_RETRY_WRITE] = { sizeof(struct p_block_ack), MDEV, { got_BlockAck } },
Andreas Gruenbacher7201b972011-03-14 18:23:00 +01004770};
4771
Philipp Reisnerb411b362009-09-25 16:07:19 -07004772int drbd_asender(struct drbd_thread *thi)
4773{
Philipp Reisner392c8802011-02-09 10:33:31 +01004774 struct drbd_tconn *tconn = thi->tconn;
Philipp Reisner32862ec2011-02-08 16:41:01 +01004775 struct p_header *h = &tconn->meta.rbuf.header;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004776 struct asender_cmd *cmd = NULL;
Philipp Reisner77351055b2011-02-07 17:24:26 +01004777 struct packet_info pi;
Philipp Reisner257d0af2011-01-26 12:15:29 +01004778 int rv;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004779 void *buf = h;
4780 int received = 0;
Philipp Reisner257d0af2011-01-26 12:15:29 +01004781 int expect = sizeof(struct p_header);
Lars Ellenbergf36af182011-03-09 22:44:55 +01004782 int ping_timeout_active = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004783
Philipp Reisnerb411b362009-09-25 16:07:19 -07004784 current->policy = SCHED_RR; /* Make this a realtime task! */
4785 current->rt_priority = 2; /* more important than all other tasks */
4786
Andreas Gruenbachere77a0a52011-01-25 15:43:39 +01004787 while (get_t_state(thi) == RUNNING) {
Philipp Reisner80822282011-02-08 12:46:30 +01004788 drbd_thread_current_set_cpu(thi);
Philipp Reisner32862ec2011-02-08 16:41:01 +01004789 if (test_and_clear_bit(SEND_PING, &tconn->flags)) {
Philipp Reisner2a67d8b2011-02-09 14:10:32 +01004790 if (!drbd_send_ping(tconn)) {
Philipp Reisner32862ec2011-02-08 16:41:01 +01004791 conn_err(tconn, "drbd_send_ping has failed\n");
Andreas Gruenbacher841ce242010-12-15 19:31:20 +01004792 goto reconnect;
4793 }
Philipp Reisner32862ec2011-02-08 16:41:01 +01004794 tconn->meta.socket->sk->sk_rcvtimeo =
4795 tconn->net_conf->ping_timeo*HZ/10;
Lars Ellenbergf36af182011-03-09 22:44:55 +01004796 ping_timeout_active = 1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004797 }
4798
Philipp Reisner32862ec2011-02-08 16:41:01 +01004799 /* TODO: conditionally cork; it may hurt latency if we cork without
4800 much to send */
4801 if (!tconn->net_conf->no_cork)
4802 drbd_tcp_cork(tconn->meta.socket);
Philipp Reisner082a3432011-03-15 16:05:42 +01004803 if (tconn_process_done_ee(tconn)) {
4804 conn_err(tconn, "tconn_process_done_ee() failed\n");
Philipp Reisner32862ec2011-02-08 16:41:01 +01004805 goto reconnect;
Philipp Reisner082a3432011-03-15 16:05:42 +01004806 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07004807 /* but unconditionally uncork unless disabled */
Philipp Reisner32862ec2011-02-08 16:41:01 +01004808 if (!tconn->net_conf->no_cork)
4809 drbd_tcp_uncork(tconn->meta.socket);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004810
4811 /* short circuit, recv_msg would return EINTR anyways. */
4812 if (signal_pending(current))
4813 continue;
4814
Philipp Reisner32862ec2011-02-08 16:41:01 +01004815 rv = drbd_recv_short(tconn->meta.socket, buf, expect-received, 0);
4816 clear_bit(SIGNAL_ASENDER, &tconn->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004817
4818 flush_signals(current);
4819
4820 /* Note:
4821 * -EINTR (on meta) we got a signal
4822 * -EAGAIN (on meta) rcvtimeo expired
4823 * -ECONNRESET other side closed the connection
4824 * -ERESTARTSYS (on data) we got a signal
4825 * rv < 0 other than above: unexpected error!
4826 * rv == expected: full header or command
4827 * rv < expected: "woken" by signal during receive
4828 * rv == 0 : "connection shut down by peer"
4829 */
4830 if (likely(rv > 0)) {
4831 received += rv;
4832 buf += rv;
4833 } else if (rv == 0) {
Philipp Reisner32862ec2011-02-08 16:41:01 +01004834 conn_err(tconn, "meta connection shut down by peer.\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07004835 goto reconnect;
4836 } else if (rv == -EAGAIN) {
Lars Ellenbergcb6518c2011-06-20 14:44:45 +02004837 /* If the data socket received something meanwhile,
4838 * that is good enough: peer is still alive. */
Philipp Reisner32862ec2011-02-08 16:41:01 +01004839 if (time_after(tconn->last_received,
4840 jiffies - tconn->meta.socket->sk->sk_rcvtimeo))
Lars Ellenbergcb6518c2011-06-20 14:44:45 +02004841 continue;
Lars Ellenbergf36af182011-03-09 22:44:55 +01004842 if (ping_timeout_active) {
Philipp Reisner32862ec2011-02-08 16:41:01 +01004843 conn_err(tconn, "PingAck did not arrive in time.\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07004844 goto reconnect;
4845 }
Philipp Reisner32862ec2011-02-08 16:41:01 +01004846 set_bit(SEND_PING, &tconn->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004847 continue;
4848 } else if (rv == -EINTR) {
4849 continue;
4850 } else {
Philipp Reisner32862ec2011-02-08 16:41:01 +01004851 conn_err(tconn, "sock_recvmsg returned %d\n", rv);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004852 goto reconnect;
4853 }
4854
4855 if (received == expect && cmd == NULL) {
Andreas Gruenbacher8172f3e2011-03-16 17:22:39 +01004856 if (decode_header(tconn, h, &pi))
Philipp Reisnerb411b362009-09-25 16:07:19 -07004857 goto reconnect;
Andreas Gruenbacher7201b972011-03-14 18:23:00 +01004858 cmd = &asender_tbl[pi.cmd];
4859 if (pi.cmd >= ARRAY_SIZE(asender_tbl) || !cmd) {
Philipp Reisner32862ec2011-02-08 16:41:01 +01004860 conn_err(tconn, "unknown command %d on meta (l: %d)\n",
Philipp Reisner77351055b2011-02-07 17:24:26 +01004861 pi.cmd, pi.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004862 goto disconnect;
4863 }
4864 expect = cmd->pkt_size;
Philipp Reisner77351055b2011-02-07 17:24:26 +01004865 if (pi.size != expect - sizeof(struct p_header)) {
Philipp Reisner32862ec2011-02-08 16:41:01 +01004866 conn_err(tconn, "Wrong packet size on meta (c: %d, l: %d)\n",
Philipp Reisner77351055b2011-02-07 17:24:26 +01004867 pi.cmd, pi.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004868 goto reconnect;
Philipp Reisner257d0af2011-01-26 12:15:29 +01004869 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07004870 }
4871 if (received == expect) {
Philipp Reisnera4fbda82011-03-16 11:13:17 +01004872 bool rv;
4873
4874 if (cmd->fa_type == CONN) {
4875 rv = cmd->conn_fn(tconn, pi.cmd);
4876 } else {
4877 struct drbd_conf *mdev = vnr_to_mdev(tconn, pi.vnr);
4878 rv = cmd->mdev_fn(mdev, pi.cmd);
4879 }
4880
4881 if (!rv)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004882 goto reconnect;
4883
Philipp Reisnera4fbda82011-03-16 11:13:17 +01004884 tconn->last_received = jiffies;
4885
Lars Ellenbergf36af182011-03-09 22:44:55 +01004886 /* the idle_timeout (ping-int)
4887 * has been restored in got_PingAck() */
Andreas Gruenbacher7201b972011-03-14 18:23:00 +01004888 if (cmd == &asender_tbl[P_PING_ACK])
Lars Ellenbergf36af182011-03-09 22:44:55 +01004889 ping_timeout_active = 0;
4890
Philipp Reisnerb411b362009-09-25 16:07:19 -07004891 buf = h;
4892 received = 0;
Philipp Reisner257d0af2011-01-26 12:15:29 +01004893 expect = sizeof(struct p_header);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004894 cmd = NULL;
4895 }
4896 }
4897
4898 if (0) {
4899reconnect:
Philipp Reisnerbbeb6412011-02-10 13:45:46 +01004900 conn_request_state(tconn, NS(conn, C_NETWORK_FAILURE), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004901 }
4902 if (0) {
4903disconnect:
Philipp Reisnerbbeb6412011-02-10 13:45:46 +01004904 conn_request_state(tconn, NS(conn, C_DISCONNECTING), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004905 }
Philipp Reisner32862ec2011-02-08 16:41:01 +01004906 clear_bit(SIGNAL_ASENDER, &tconn->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004907
Philipp Reisner32862ec2011-02-08 16:41:01 +01004908 conn_info(tconn, "asender terminated\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07004909
4910 return 0;
4911}