blob: 1fd871bc889e3a4e4eb36a7ff270a9f0b99c7295 [file] [log] [blame]
Philipp Reisnerb411b362009-09-25 16:07:19 -07001/*
2 drbd_receiver.c
3
4 This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
5
6 Copyright (C) 2001-2008, LINBIT Information Technologies GmbH.
7 Copyright (C) 1999-2008, Philipp Reisner <philipp.reisner@linbit.com>.
8 Copyright (C) 2002-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
9
10 drbd is free software; you can redistribute it and/or modify
11 it under the terms of the GNU General Public License as published by
12 the Free Software Foundation; either version 2, or (at your option)
13 any later version.
14
15 drbd is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License for more details.
19
20 You should have received a copy of the GNU General Public License
21 along with drbd; see the file COPYING. If not, write to
22 the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
23 */
24
25
Philipp Reisnerb411b362009-09-25 16:07:19 -070026#include <linux/module.h>
27
28#include <asm/uaccess.h>
29#include <net/sock.h>
30
Philipp Reisnerb411b362009-09-25 16:07:19 -070031#include <linux/drbd.h>
32#include <linux/fs.h>
33#include <linux/file.h>
34#include <linux/in.h>
35#include <linux/mm.h>
36#include <linux/memcontrol.h>
37#include <linux/mm_inline.h>
38#include <linux/slab.h>
Philipp Reisnerb411b362009-09-25 16:07:19 -070039#include <linux/pkt_sched.h>
40#define __KERNEL_SYSCALLS__
41#include <linux/unistd.h>
42#include <linux/vmalloc.h>
43#include <linux/random.h>
Philipp Reisnerb411b362009-09-25 16:07:19 -070044#include <linux/string.h>
45#include <linux/scatterlist.h>
46#include "drbd_int.h"
Philipp Reisnerb411b362009-09-25 16:07:19 -070047#include "drbd_req.h"
48
49#include "drbd_vli.h"
50
Philipp Reisner77351055b2011-02-07 17:24:26 +010051struct packet_info {
52 enum drbd_packet cmd;
53 int size;
54 int vnr;
55};
56
Philipp Reisnerb411b362009-09-25 16:07:19 -070057enum finish_epoch {
58 FE_STILL_LIVE,
59 FE_DESTROYED,
60 FE_RECYCLED,
61};
62
Philipp Reisnera4fbda82011-03-16 11:13:17 +010063enum mdev_or_conn {
64 MDEV,
65 CONN,
66};
67
Philipp Reisner65d11ed2011-02-07 17:35:59 +010068static int drbd_do_handshake(struct drbd_tconn *tconn);
Philipp Reisner13e60372011-02-08 09:54:40 +010069static int drbd_do_auth(struct drbd_tconn *tconn);
Philipp Reisner360cc742011-02-08 14:29:53 +010070static int drbd_disconnected(int vnr, void *p, void *data);
Philipp Reisnerb411b362009-09-25 16:07:19 -070071
72static enum finish_epoch drbd_may_finish_epoch(struct drbd_conf *, struct drbd_epoch *, enum epoch_event);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +010073static int e_end_block(struct drbd_work *, int);
Philipp Reisnerb411b362009-09-25 16:07:19 -070074
Philipp Reisnerb411b362009-09-25 16:07:19 -070075
76#define GFP_TRY (__GFP_HIGHMEM | __GFP_NOWARN)
77
Lars Ellenberg45bb9122010-05-14 17:10:48 +020078/*
79 * some helper functions to deal with single linked page lists,
80 * page->private being our "next" pointer.
81 */
82
83/* If at least n pages are linked at head, get n pages off.
84 * Otherwise, don't modify head, and return NULL.
85 * Locking is the responsibility of the caller.
86 */
87static struct page *page_chain_del(struct page **head, int n)
88{
89 struct page *page;
90 struct page *tmp;
91
92 BUG_ON(!n);
93 BUG_ON(!head);
94
95 page = *head;
Philipp Reisner23ce4222010-05-20 13:35:31 +020096
97 if (!page)
98 return NULL;
99
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200100 while (page) {
101 tmp = page_chain_next(page);
102 if (--n == 0)
103 break; /* found sufficient pages */
104 if (tmp == NULL)
105 /* insufficient pages, don't use any of them. */
106 return NULL;
107 page = tmp;
108 }
109
110 /* add end of list marker for the returned list */
111 set_page_private(page, 0);
112 /* actual return value, and adjustment of head */
113 page = *head;
114 *head = tmp;
115 return page;
116}
117
118/* may be used outside of locks to find the tail of a (usually short)
119 * "private" page chain, before adding it back to a global chain head
120 * with page_chain_add() under a spinlock. */
121static struct page *page_chain_tail(struct page *page, int *len)
122{
123 struct page *tmp;
124 int i = 1;
125 while ((tmp = page_chain_next(page)))
126 ++i, page = tmp;
127 if (len)
128 *len = i;
129 return page;
130}
131
132static int page_chain_free(struct page *page)
133{
134 struct page *tmp;
135 int i = 0;
136 page_chain_for_each_safe(page, tmp) {
137 put_page(page);
138 ++i;
139 }
140 return i;
141}
142
143static void page_chain_add(struct page **head,
144 struct page *chain_first, struct page *chain_last)
145{
146#if 1
147 struct page *tmp;
148 tmp = page_chain_tail(chain_first, NULL);
149 BUG_ON(tmp != chain_last);
150#endif
151
152 /* add chain to head */
153 set_page_private(chain_last, (unsigned long)*head);
154 *head = chain_first;
155}
156
157static struct page *drbd_pp_first_pages_or_try_alloc(struct drbd_conf *mdev, int number)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700158{
159 struct page *page = NULL;
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200160 struct page *tmp = NULL;
161 int i = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700162
163 /* Yes, testing drbd_pp_vacant outside the lock is racy.
164 * So what. It saves a spin_lock. */
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200165 if (drbd_pp_vacant >= number) {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700166 spin_lock(&drbd_pp_lock);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200167 page = page_chain_del(&drbd_pp_pool, number);
168 if (page)
169 drbd_pp_vacant -= number;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700170 spin_unlock(&drbd_pp_lock);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200171 if (page)
172 return page;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700173 }
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200174
Philipp Reisnerb411b362009-09-25 16:07:19 -0700175 /* GFP_TRY, because we must not cause arbitrary write-out: in a DRBD
176 * "criss-cross" setup, that might cause write-out on some other DRBD,
177 * which in turn might block on the other node at this very place. */
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200178 for (i = 0; i < number; i++) {
179 tmp = alloc_page(GFP_TRY);
180 if (!tmp)
181 break;
182 set_page_private(tmp, (unsigned long)page);
183 page = tmp;
184 }
185
186 if (i == number)
187 return page;
188
189 /* Not enough pages immediately available this time.
190 * No need to jump around here, drbd_pp_alloc will retry this
191 * function "soon". */
192 if (page) {
193 tmp = page_chain_tail(page, NULL);
194 spin_lock(&drbd_pp_lock);
195 page_chain_add(&drbd_pp_pool, page, tmp);
196 drbd_pp_vacant += i;
197 spin_unlock(&drbd_pp_lock);
198 }
199 return NULL;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700200}
201
Philipp Reisnerb411b362009-09-25 16:07:19 -0700202static void reclaim_net_ee(struct drbd_conf *mdev, struct list_head *to_be_freed)
203{
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100204 struct drbd_peer_request *peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700205 struct list_head *le, *tle;
206
207 /* The EEs are always appended to the end of the list. Since
208 they are sent in order over the wire, they have to finish
209 in order. As soon as we see the first not finished we can
210 stop to examine the list... */
211
212 list_for_each_safe(le, tle, &mdev->net_ee) {
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100213 peer_req = list_entry(le, struct drbd_peer_request, w.list);
214 if (drbd_ee_has_active_page(peer_req))
Philipp Reisnerb411b362009-09-25 16:07:19 -0700215 break;
216 list_move(le, to_be_freed);
217 }
218}
219
220static void drbd_kick_lo_and_reclaim_net(struct drbd_conf *mdev)
221{
222 LIST_HEAD(reclaimed);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100223 struct drbd_peer_request *peer_req, *t;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700224
Philipp Reisner87eeee42011-01-19 14:16:30 +0100225 spin_lock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700226 reclaim_net_ee(mdev, &reclaimed);
Philipp Reisner87eeee42011-01-19 14:16:30 +0100227 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700228
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100229 list_for_each_entry_safe(peer_req, t, &reclaimed, w.list)
230 drbd_free_net_ee(mdev, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700231}
232
233/**
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200234 * drbd_pp_alloc() - Returns @number pages, retries forever (or until signalled)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700235 * @mdev: DRBD device.
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200236 * @number: number of pages requested
237 * @retry: whether to retry, if not enough pages are available right now
Philipp Reisnerb411b362009-09-25 16:07:19 -0700238 *
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200239 * Tries to allocate number pages, first from our own page pool, then from
240 * the kernel, unless this allocation would exceed the max_buffers setting.
241 * Possibly retry until DRBD frees sufficient pages somewhere else.
242 *
243 * Returns a page chain linked via page->private.
Philipp Reisnerb411b362009-09-25 16:07:19 -0700244 */
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200245static struct page *drbd_pp_alloc(struct drbd_conf *mdev, unsigned number, bool retry)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700246{
247 struct page *page = NULL;
248 DEFINE_WAIT(wait);
249
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200250 /* Yes, we may run up to @number over max_buffers. If we
251 * follow it strictly, the admin will get it wrong anyways. */
Philipp Reisner89e58e72011-01-19 13:12:45 +0100252 if (atomic_read(&mdev->pp_in_use) < mdev->tconn->net_conf->max_buffers)
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200253 page = drbd_pp_first_pages_or_try_alloc(mdev, number);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700254
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200255 while (page == NULL) {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700256 prepare_to_wait(&drbd_pp_wait, &wait, TASK_INTERRUPTIBLE);
257
258 drbd_kick_lo_and_reclaim_net(mdev);
259
Philipp Reisner89e58e72011-01-19 13:12:45 +0100260 if (atomic_read(&mdev->pp_in_use) < mdev->tconn->net_conf->max_buffers) {
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200261 page = drbd_pp_first_pages_or_try_alloc(mdev, number);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700262 if (page)
263 break;
264 }
265
266 if (!retry)
267 break;
268
269 if (signal_pending(current)) {
270 dev_warn(DEV, "drbd_pp_alloc interrupted!\n");
271 break;
272 }
273
274 schedule();
275 }
276 finish_wait(&drbd_pp_wait, &wait);
277
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200278 if (page)
279 atomic_add(number, &mdev->pp_in_use);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700280 return page;
281}
282
283/* Must not be used from irq, as that may deadlock: see drbd_pp_alloc.
Philipp Reisner87eeee42011-01-19 14:16:30 +0100284 * Is also used from inside an other spin_lock_irq(&mdev->tconn->req_lock);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200285 * Either links the page chain back to the global pool,
286 * or returns all pages to the system. */
Lars Ellenberg435f0742010-09-06 12:30:25 +0200287static void drbd_pp_free(struct drbd_conf *mdev, struct page *page, int is_net)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700288{
Lars Ellenberg435f0742010-09-06 12:30:25 +0200289 atomic_t *a = is_net ? &mdev->pp_in_use_by_net : &mdev->pp_in_use;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700290 int i;
Lars Ellenberg435f0742010-09-06 12:30:25 +0200291
Philipp Reisner81a5d602011-02-22 19:53:16 -0500292 if (drbd_pp_vacant > (DRBD_MAX_BIO_SIZE/PAGE_SIZE) * minor_count)
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200293 i = page_chain_free(page);
294 else {
295 struct page *tmp;
296 tmp = page_chain_tail(page, &i);
297 spin_lock(&drbd_pp_lock);
298 page_chain_add(&drbd_pp_pool, page, tmp);
299 drbd_pp_vacant += i;
300 spin_unlock(&drbd_pp_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700301 }
Lars Ellenberg435f0742010-09-06 12:30:25 +0200302 i = atomic_sub_return(i, a);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200303 if (i < 0)
Lars Ellenberg435f0742010-09-06 12:30:25 +0200304 dev_warn(DEV, "ASSERTION FAILED: %s: %d < 0\n",
305 is_net ? "pp_in_use_by_net" : "pp_in_use", i);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700306 wake_up(&drbd_pp_wait);
307}
308
309/*
310You need to hold the req_lock:
311 _drbd_wait_ee_list_empty()
312
313You must not have the req_lock:
314 drbd_free_ee()
315 drbd_alloc_ee()
316 drbd_init_ee()
317 drbd_release_ee()
318 drbd_ee_fix_bhs()
319 drbd_process_done_ee()
320 drbd_clear_done_ee()
321 drbd_wait_ee_list_empty()
322*/
323
Andreas Gruenbacherf6ffca92011-02-04 15:30:34 +0100324struct drbd_peer_request *
325drbd_alloc_ee(struct drbd_conf *mdev, u64 id, sector_t sector,
326 unsigned int data_size, gfp_t gfp_mask) __must_hold(local)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700327{
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100328 struct drbd_peer_request *peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700329 struct page *page;
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200330 unsigned nr_pages = (data_size + PAGE_SIZE -1) >> PAGE_SHIFT;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700331
Andreas Gruenbacher0cf9d272010-12-07 10:43:29 +0100332 if (drbd_insert_fault(mdev, DRBD_FAULT_AL_EE))
Philipp Reisnerb411b362009-09-25 16:07:19 -0700333 return NULL;
334
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100335 peer_req = mempool_alloc(drbd_ee_mempool, gfp_mask & ~__GFP_HIGHMEM);
336 if (!peer_req) {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700337 if (!(gfp_mask & __GFP_NOWARN))
338 dev_err(DEV, "alloc_ee: Allocation of an EE failed\n");
339 return NULL;
340 }
341
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200342 page = drbd_pp_alloc(mdev, nr_pages, (gfp_mask & __GFP_WAIT));
343 if (!page)
344 goto fail;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700345
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100346 drbd_clear_interval(&peer_req->i);
347 peer_req->i.size = data_size;
348 peer_req->i.sector = sector;
349 peer_req->i.local = false;
350 peer_req->i.waiting = false;
Andreas Gruenbacher53840642011-01-28 10:31:04 +0100351
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100352 peer_req->epoch = NULL;
Philipp Reisnera21e9292011-02-08 15:08:49 +0100353 peer_req->w.mdev = mdev;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100354 peer_req->pages = page;
355 atomic_set(&peer_req->pending_bios, 0);
356 peer_req->flags = 0;
Andreas Gruenbacher9a8e7752011-01-11 14:04:09 +0100357 /*
358 * The block_id is opaque to the receiver. It is not endianness
359 * converted, and sent back to the sender unchanged.
360 */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100361 peer_req->block_id = id;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700362
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100363 return peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700364
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200365 fail:
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100366 mempool_free(peer_req, drbd_ee_mempool);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700367 return NULL;
368}
369
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100370void drbd_free_some_ee(struct drbd_conf *mdev, struct drbd_peer_request *peer_req,
Andreas Gruenbacherf6ffca92011-02-04 15:30:34 +0100371 int is_net)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700372{
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100373 if (peer_req->flags & EE_HAS_DIGEST)
374 kfree(peer_req->digest);
375 drbd_pp_free(mdev, peer_req->pages, is_net);
376 D_ASSERT(atomic_read(&peer_req->pending_bios) == 0);
377 D_ASSERT(drbd_interval_empty(&peer_req->i));
378 mempool_free(peer_req, drbd_ee_mempool);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700379}
380
381int drbd_release_ee(struct drbd_conf *mdev, struct list_head *list)
382{
383 LIST_HEAD(work_list);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100384 struct drbd_peer_request *peer_req, *t;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700385 int count = 0;
Lars Ellenberg435f0742010-09-06 12:30:25 +0200386 int is_net = list == &mdev->net_ee;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700387
Philipp Reisner87eeee42011-01-19 14:16:30 +0100388 spin_lock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700389 list_splice_init(list, &work_list);
Philipp Reisner87eeee42011-01-19 14:16:30 +0100390 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700391
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100392 list_for_each_entry_safe(peer_req, t, &work_list, w.list) {
393 drbd_free_some_ee(mdev, peer_req, is_net);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700394 count++;
395 }
396 return count;
397}
398
399
Philipp Reisner32862ec2011-02-08 16:41:01 +0100400/* See also comments in _req_mod(,BARRIER_ACKED)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700401 * and receive_Barrier.
402 *
403 * Move entries from net_ee to done_ee, if ready.
404 * Grab done_ee, call all callbacks, free the entries.
405 * The callbacks typically send out ACKs.
406 */
407static int drbd_process_done_ee(struct drbd_conf *mdev)
408{
409 LIST_HEAD(work_list);
410 LIST_HEAD(reclaimed);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100411 struct drbd_peer_request *peer_req, *t;
Andreas Gruenbachere2b30322011-03-16 17:16:12 +0100412 int err = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700413
Philipp Reisner87eeee42011-01-19 14:16:30 +0100414 spin_lock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700415 reclaim_net_ee(mdev, &reclaimed);
416 list_splice_init(&mdev->done_ee, &work_list);
Philipp Reisner87eeee42011-01-19 14:16:30 +0100417 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700418
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100419 list_for_each_entry_safe(peer_req, t, &reclaimed, w.list)
420 drbd_free_net_ee(mdev, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700421
422 /* possible callbacks here:
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +0100423 * e_end_block, and e_end_resync_block, e_send_discard_write.
Philipp Reisnerb411b362009-09-25 16:07:19 -0700424 * all ignore the last argument.
425 */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100426 list_for_each_entry_safe(peer_req, t, &work_list, w.list) {
Andreas Gruenbachere2b30322011-03-16 17:16:12 +0100427 int err2;
428
Philipp Reisnerb411b362009-09-25 16:07:19 -0700429 /* list_del not necessary, next/prev members not touched */
Andreas Gruenbachere2b30322011-03-16 17:16:12 +0100430 err2 = peer_req->w.cb(&peer_req->w, !!err);
431 if (!err)
432 err = err2;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100433 drbd_free_ee(mdev, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700434 }
435 wake_up(&mdev->ee_wait);
436
Andreas Gruenbachere2b30322011-03-16 17:16:12 +0100437 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700438}
439
440void _drbd_wait_ee_list_empty(struct drbd_conf *mdev, struct list_head *head)
441{
442 DEFINE_WAIT(wait);
443
444 /* avoids spin_lock/unlock
445 * and calling prepare_to_wait in the fast path */
446 while (!list_empty(head)) {
447 prepare_to_wait(&mdev->ee_wait, &wait, TASK_UNINTERRUPTIBLE);
Philipp Reisner87eeee42011-01-19 14:16:30 +0100448 spin_unlock_irq(&mdev->tconn->req_lock);
Jens Axboe7eaceac2011-03-10 08:52:07 +0100449 io_schedule();
Philipp Reisnerb411b362009-09-25 16:07:19 -0700450 finish_wait(&mdev->ee_wait, &wait);
Philipp Reisner87eeee42011-01-19 14:16:30 +0100451 spin_lock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700452 }
453}
454
455void drbd_wait_ee_list_empty(struct drbd_conf *mdev, struct list_head *head)
456{
Philipp Reisner87eeee42011-01-19 14:16:30 +0100457 spin_lock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700458 _drbd_wait_ee_list_empty(mdev, head);
Philipp Reisner87eeee42011-01-19 14:16:30 +0100459 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700460}
461
462/* see also kernel_accept; which is only present since 2.6.18.
463 * also we want to log which part of it failed, exactly */
Philipp Reisner76536202011-02-07 14:09:54 +0100464static int drbd_accept(const char **what, struct socket *sock, struct socket **newsock)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700465{
466 struct sock *sk = sock->sk;
467 int err = 0;
468
469 *what = "listen";
470 err = sock->ops->listen(sock, 5);
471 if (err < 0)
472 goto out;
473
474 *what = "sock_create_lite";
475 err = sock_create_lite(sk->sk_family, sk->sk_type, sk->sk_protocol,
476 newsock);
477 if (err < 0)
478 goto out;
479
480 *what = "accept";
481 err = sock->ops->accept(sock, *newsock, 0);
482 if (err < 0) {
483 sock_release(*newsock);
484 *newsock = NULL;
485 goto out;
486 }
487 (*newsock)->ops = sock->ops;
488
489out:
490 return err;
491}
492
Philipp Reisnerdbd9eea2011-02-07 15:34:16 +0100493static int drbd_recv_short(struct socket *sock, void *buf, size_t size, int flags)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700494{
495 mm_segment_t oldfs;
496 struct kvec iov = {
497 .iov_base = buf,
498 .iov_len = size,
499 };
500 struct msghdr msg = {
501 .msg_iovlen = 1,
502 .msg_iov = (struct iovec *)&iov,
503 .msg_flags = (flags ? flags : MSG_WAITALL | MSG_NOSIGNAL)
504 };
505 int rv;
506
507 oldfs = get_fs();
508 set_fs(KERNEL_DS);
509 rv = sock_recvmsg(sock, &msg, size, msg.msg_flags);
510 set_fs(oldfs);
511
512 return rv;
513}
514
Philipp Reisnerde0ff332011-02-07 16:56:20 +0100515static int drbd_recv(struct drbd_tconn *tconn, void *buf, size_t size)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700516{
517 mm_segment_t oldfs;
518 struct kvec iov = {
519 .iov_base = buf,
520 .iov_len = size,
521 };
522 struct msghdr msg = {
523 .msg_iovlen = 1,
524 .msg_iov = (struct iovec *)&iov,
525 .msg_flags = MSG_WAITALL | MSG_NOSIGNAL
526 };
527 int rv;
528
529 oldfs = get_fs();
530 set_fs(KERNEL_DS);
531
532 for (;;) {
Philipp Reisnerde0ff332011-02-07 16:56:20 +0100533 rv = sock_recvmsg(tconn->data.socket, &msg, size, msg.msg_flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700534 if (rv == size)
535 break;
536
537 /* Note:
538 * ECONNRESET other side closed the connection
539 * ERESTARTSYS (on sock) we got a signal
540 */
541
542 if (rv < 0) {
543 if (rv == -ECONNRESET)
Philipp Reisnerde0ff332011-02-07 16:56:20 +0100544 conn_info(tconn, "sock was reset by peer\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -0700545 else if (rv != -ERESTARTSYS)
Philipp Reisnerde0ff332011-02-07 16:56:20 +0100546 conn_err(tconn, "sock_recvmsg returned %d\n", rv);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700547 break;
548 } else if (rv == 0) {
Philipp Reisnerde0ff332011-02-07 16:56:20 +0100549 conn_info(tconn, "sock was shut down by peer\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -0700550 break;
551 } else {
552 /* signal came in, or peer/link went down,
553 * after we read a partial message
554 */
555 /* D_ASSERT(signal_pending(current)); */
556 break;
557 }
558 };
559
560 set_fs(oldfs);
561
562 if (rv != size)
Philipp Reisnerbbeb6412011-02-10 13:45:46 +0100563 conn_request_state(tconn, NS(conn, C_BROKEN_PIPE), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700564
565 return rv;
566}
567
Andreas Gruenbacherc6967742011-03-17 17:15:20 +0100568static int drbd_recv_all(struct drbd_tconn *tconn, void *buf, size_t size)
569{
570 int err;
571
572 err = drbd_recv(tconn, buf, size);
573 if (err != size) {
574 if (err >= 0)
575 err = -EIO;
576 } else
577 err = 0;
578 return err;
579}
580
Andreas Gruenbachera5c31902011-03-24 03:28:04 +0100581static int drbd_recv_all_warn(struct drbd_tconn *tconn, void *buf, size_t size)
582{
583 int err;
584
585 err = drbd_recv_all(tconn, buf, size);
586 if (err && !signal_pending(current))
587 conn_warn(tconn, "short read (expected size %d)\n", (int)size);
588 return err;
589}
590
Lars Ellenberg5dbf1672010-05-25 16:18:01 +0200591/* quoting tcp(7):
592 * On individual connections, the socket buffer size must be set prior to the
593 * listen(2) or connect(2) calls in order to have it take effect.
594 * This is our wrapper to do so.
595 */
596static void drbd_setbufsize(struct socket *sock, unsigned int snd,
597 unsigned int rcv)
598{
599 /* open coded SO_SNDBUF, SO_RCVBUF */
600 if (snd) {
601 sock->sk->sk_sndbuf = snd;
602 sock->sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
603 }
604 if (rcv) {
605 sock->sk->sk_rcvbuf = rcv;
606 sock->sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
607 }
608}
609
Philipp Reisnereac3e992011-02-07 14:05:07 +0100610static struct socket *drbd_try_connect(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700611{
612 const char *what;
613 struct socket *sock;
614 struct sockaddr_in6 src_in6;
615 int err;
616 int disconnect_on_error = 1;
617
Philipp Reisnereac3e992011-02-07 14:05:07 +0100618 if (!get_net_conf(tconn))
Philipp Reisnerb411b362009-09-25 16:07:19 -0700619 return NULL;
620
621 what = "sock_create_kern";
Philipp Reisnereac3e992011-02-07 14:05:07 +0100622 err = sock_create_kern(((struct sockaddr *)tconn->net_conf->my_addr)->sa_family,
Philipp Reisnerb411b362009-09-25 16:07:19 -0700623 SOCK_STREAM, IPPROTO_TCP, &sock);
624 if (err < 0) {
625 sock = NULL;
626 goto out;
627 }
628
629 sock->sk->sk_rcvtimeo =
Philipp Reisnereac3e992011-02-07 14:05:07 +0100630 sock->sk->sk_sndtimeo = tconn->net_conf->try_connect_int*HZ;
631 drbd_setbufsize(sock, tconn->net_conf->sndbuf_size,
632 tconn->net_conf->rcvbuf_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700633
634 /* explicitly bind to the configured IP as source IP
635 * for the outgoing connections.
636 * This is needed for multihomed hosts and to be
637 * able to use lo: interfaces for drbd.
638 * Make sure to use 0 as port number, so linux selects
639 * a free one dynamically.
640 */
Philipp Reisnereac3e992011-02-07 14:05:07 +0100641 memcpy(&src_in6, tconn->net_conf->my_addr,
642 min_t(int, tconn->net_conf->my_addr_len, sizeof(src_in6)));
643 if (((struct sockaddr *)tconn->net_conf->my_addr)->sa_family == AF_INET6)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700644 src_in6.sin6_port = 0;
645 else
646 ((struct sockaddr_in *)&src_in6)->sin_port = 0; /* AF_INET & AF_SCI */
647
648 what = "bind before connect";
649 err = sock->ops->bind(sock,
650 (struct sockaddr *) &src_in6,
Philipp Reisnereac3e992011-02-07 14:05:07 +0100651 tconn->net_conf->my_addr_len);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700652 if (err < 0)
653 goto out;
654
655 /* connect may fail, peer not yet available.
656 * stay C_WF_CONNECTION, don't go Disconnecting! */
657 disconnect_on_error = 0;
658 what = "connect";
659 err = sock->ops->connect(sock,
Philipp Reisnereac3e992011-02-07 14:05:07 +0100660 (struct sockaddr *)tconn->net_conf->peer_addr,
661 tconn->net_conf->peer_addr_len, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700662
663out:
664 if (err < 0) {
665 if (sock) {
666 sock_release(sock);
667 sock = NULL;
668 }
669 switch (-err) {
670 /* timeout, busy, signal pending */
671 case ETIMEDOUT: case EAGAIN: case EINPROGRESS:
672 case EINTR: case ERESTARTSYS:
673 /* peer not (yet) available, network problem */
674 case ECONNREFUSED: case ENETUNREACH:
675 case EHOSTDOWN: case EHOSTUNREACH:
676 disconnect_on_error = 0;
677 break;
678 default:
Philipp Reisnereac3e992011-02-07 14:05:07 +0100679 conn_err(tconn, "%s failed, err = %d\n", what, err);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700680 }
681 if (disconnect_on_error)
Philipp Reisnerbbeb6412011-02-10 13:45:46 +0100682 conn_request_state(tconn, NS(conn, C_DISCONNECTING), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700683 }
Philipp Reisnereac3e992011-02-07 14:05:07 +0100684 put_net_conf(tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700685 return sock;
686}
687
Philipp Reisner76536202011-02-07 14:09:54 +0100688static struct socket *drbd_wait_for_connect(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700689{
690 int timeo, err;
691 struct socket *s_estab = NULL, *s_listen;
692 const char *what;
693
Philipp Reisner76536202011-02-07 14:09:54 +0100694 if (!get_net_conf(tconn))
Philipp Reisnerb411b362009-09-25 16:07:19 -0700695 return NULL;
696
697 what = "sock_create_kern";
Philipp Reisner76536202011-02-07 14:09:54 +0100698 err = sock_create_kern(((struct sockaddr *)tconn->net_conf->my_addr)->sa_family,
Philipp Reisnerb411b362009-09-25 16:07:19 -0700699 SOCK_STREAM, IPPROTO_TCP, &s_listen);
700 if (err) {
701 s_listen = NULL;
702 goto out;
703 }
704
Philipp Reisner76536202011-02-07 14:09:54 +0100705 timeo = tconn->net_conf->try_connect_int * HZ;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700706 timeo += (random32() & 1) ? timeo / 7 : -timeo / 7; /* 28.5% random jitter */
707
708 s_listen->sk->sk_reuse = 1; /* SO_REUSEADDR */
709 s_listen->sk->sk_rcvtimeo = timeo;
710 s_listen->sk->sk_sndtimeo = timeo;
Philipp Reisner76536202011-02-07 14:09:54 +0100711 drbd_setbufsize(s_listen, tconn->net_conf->sndbuf_size,
712 tconn->net_conf->rcvbuf_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700713
714 what = "bind before listen";
715 err = s_listen->ops->bind(s_listen,
Philipp Reisner76536202011-02-07 14:09:54 +0100716 (struct sockaddr *) tconn->net_conf->my_addr,
717 tconn->net_conf->my_addr_len);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700718 if (err < 0)
719 goto out;
720
Philipp Reisner76536202011-02-07 14:09:54 +0100721 err = drbd_accept(&what, s_listen, &s_estab);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700722
723out:
724 if (s_listen)
725 sock_release(s_listen);
726 if (err < 0) {
727 if (err != -EAGAIN && err != -EINTR && err != -ERESTARTSYS) {
Philipp Reisner76536202011-02-07 14:09:54 +0100728 conn_err(tconn, "%s failed, err = %d\n", what, err);
Philipp Reisnerbbeb6412011-02-10 13:45:46 +0100729 conn_request_state(tconn, NS(conn, C_DISCONNECTING), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700730 }
731 }
Philipp Reisner76536202011-02-07 14:09:54 +0100732 put_net_conf(tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700733
734 return s_estab;
735}
736
Philipp Reisnerd38e7872011-02-07 15:32:04 +0100737static int drbd_send_fp(struct drbd_tconn *tconn, struct socket *sock, enum drbd_packet cmd)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700738{
Philipp Reisnerd38e7872011-02-07 15:32:04 +0100739 struct p_header *h = &tconn->data.sbuf.header;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700740
Andreas Gruenbacherecf23632011-03-15 23:48:25 +0100741 return !_conn_send_cmd(tconn, 0, sock, cmd, h, sizeof(*h), 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700742}
743
Philipp Reisnera25b63f2011-02-07 15:43:45 +0100744static enum drbd_packet drbd_recv_fp(struct drbd_tconn *tconn, struct socket *sock)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700745{
Philipp Reisnera25b63f2011-02-07 15:43:45 +0100746 struct p_header80 *h = &tconn->data.rbuf.header.h80;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700747 int rr;
748
Philipp Reisnerdbd9eea2011-02-07 15:34:16 +0100749 rr = drbd_recv_short(sock, h, sizeof(*h), 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700750
Andreas Gruenbacherca9bc122011-01-11 13:47:24 +0100751 if (rr == sizeof(*h) && h->magic == cpu_to_be32(DRBD_MAGIC))
Philipp Reisnerb411b362009-09-25 16:07:19 -0700752 return be16_to_cpu(h->command);
753
754 return 0xffff;
755}
756
757/**
758 * drbd_socket_okay() - Free the socket if its connection is not okay
Philipp Reisnerb411b362009-09-25 16:07:19 -0700759 * @sock: pointer to the pointer to the socket.
760 */
Philipp Reisnerdbd9eea2011-02-07 15:34:16 +0100761static int drbd_socket_okay(struct socket **sock)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700762{
763 int rr;
764 char tb[4];
765
766 if (!*sock)
Andreas Gruenbacher81e84652010-12-09 15:03:57 +0100767 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700768
Philipp Reisnerdbd9eea2011-02-07 15:34:16 +0100769 rr = drbd_recv_short(*sock, tb, 4, MSG_DONTWAIT | MSG_PEEK);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700770
771 if (rr > 0 || rr == -EAGAIN) {
Andreas Gruenbacher81e84652010-12-09 15:03:57 +0100772 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700773 } else {
774 sock_release(*sock);
775 *sock = NULL;
Andreas Gruenbacher81e84652010-12-09 15:03:57 +0100776 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700777 }
778}
Philipp Reisner2325eb62011-03-15 16:56:18 +0100779/* Gets called if a connection is established, or if a new minor gets created
780 in a connection */
781int drbd_connected(int vnr, void *p, void *data)
Philipp Reisner907599e2011-02-08 11:25:37 +0100782{
783 struct drbd_conf *mdev = (struct drbd_conf *)p;
Andreas Gruenbacher0829f5e2011-03-24 14:31:22 +0100784 int err;
Philipp Reisner907599e2011-02-08 11:25:37 +0100785
786 atomic_set(&mdev->packet_seq, 0);
787 mdev->peer_seq = 0;
788
Philipp Reisner8410da82011-02-11 20:11:10 +0100789 mdev->state_mutex = mdev->tconn->agreed_pro_version < 100 ?
790 &mdev->tconn->cstate_mutex :
791 &mdev->own_state_mutex;
792
Andreas Gruenbacher0829f5e2011-03-24 14:31:22 +0100793 err = drbd_send_sync_param(mdev);
794 if (!err)
795 err = drbd_send_sizes(mdev, 0, 0);
796 if (!err)
797 err = drbd_send_uuids(mdev);
798 if (!err)
799 err = drbd_send_state(mdev);
Philipp Reisner907599e2011-02-08 11:25:37 +0100800 clear_bit(USE_DEGR_WFC_T, &mdev->flags);
801 clear_bit(RESIZE_PENDING, &mdev->flags);
Andreas Gruenbacher0829f5e2011-03-24 14:31:22 +0100802 return err;
Philipp Reisner907599e2011-02-08 11:25:37 +0100803}
804
Philipp Reisnerb411b362009-09-25 16:07:19 -0700805/*
806 * return values:
807 * 1 yes, we have a valid connection
808 * 0 oops, did not work out, please try again
809 * -1 peer talks different language,
810 * no point in trying again, please go standalone.
811 * -2 We do not have a network config...
812 */
Philipp Reisner907599e2011-02-08 11:25:37 +0100813static int drbd_connect(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700814{
815 struct socket *s, *sock, *msock;
816 int try, h, ok;
817
Philipp Reisnerbbeb6412011-02-10 13:45:46 +0100818 if (conn_request_state(tconn, NS(conn, C_WF_CONNECTION), CS_VERBOSE) < SS_SUCCESS)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700819 return -2;
820
Philipp Reisner907599e2011-02-08 11:25:37 +0100821 clear_bit(DISCARD_CONCURRENT, &tconn->flags);
Andreas Gruenbacher0916e0e2011-03-21 14:10:15 +0100822
823 /* Assume that the peer only understands protocol 80 until we know better. */
824 tconn->agreed_pro_version = 80;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700825
826 sock = NULL;
827 msock = NULL;
828
829 do {
830 for (try = 0;;) {
831 /* 3 tries, this should take less than a second! */
Philipp Reisner907599e2011-02-08 11:25:37 +0100832 s = drbd_try_connect(tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700833 if (s || ++try >= 3)
834 break;
835 /* give the other side time to call bind() & listen() */
Philipp Reisner20ee6392011-01-18 15:28:59 +0100836 schedule_timeout_interruptible(HZ / 10);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700837 }
838
839 if (s) {
840 if (!sock) {
Philipp Reisner907599e2011-02-08 11:25:37 +0100841 drbd_send_fp(tconn, s, P_HAND_SHAKE_S);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700842 sock = s;
843 s = NULL;
844 } else if (!msock) {
Philipp Reisner907599e2011-02-08 11:25:37 +0100845 drbd_send_fp(tconn, s, P_HAND_SHAKE_M);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700846 msock = s;
847 s = NULL;
848 } else {
Philipp Reisner907599e2011-02-08 11:25:37 +0100849 conn_err(tconn, "Logic error in drbd_connect()\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -0700850 goto out_release_sockets;
851 }
852 }
853
854 if (sock && msock) {
Philipp Reisner907599e2011-02-08 11:25:37 +0100855 schedule_timeout_interruptible(tconn->net_conf->ping_timeo*HZ/10);
Philipp Reisnerdbd9eea2011-02-07 15:34:16 +0100856 ok = drbd_socket_okay(&sock);
857 ok = drbd_socket_okay(&msock) && ok;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700858 if (ok)
859 break;
860 }
861
862retry:
Philipp Reisner907599e2011-02-08 11:25:37 +0100863 s = drbd_wait_for_connect(tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700864 if (s) {
Philipp Reisner907599e2011-02-08 11:25:37 +0100865 try = drbd_recv_fp(tconn, s);
Philipp Reisnerdbd9eea2011-02-07 15:34:16 +0100866 drbd_socket_okay(&sock);
867 drbd_socket_okay(&msock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700868 switch (try) {
869 case P_HAND_SHAKE_S:
870 if (sock) {
Philipp Reisner907599e2011-02-08 11:25:37 +0100871 conn_warn(tconn, "initial packet S crossed\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -0700872 sock_release(sock);
873 }
874 sock = s;
875 break;
876 case P_HAND_SHAKE_M:
877 if (msock) {
Philipp Reisner907599e2011-02-08 11:25:37 +0100878 conn_warn(tconn, "initial packet M crossed\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -0700879 sock_release(msock);
880 }
881 msock = s;
Philipp Reisner907599e2011-02-08 11:25:37 +0100882 set_bit(DISCARD_CONCURRENT, &tconn->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700883 break;
884 default:
Philipp Reisner907599e2011-02-08 11:25:37 +0100885 conn_warn(tconn, "Error receiving initial packet\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -0700886 sock_release(s);
887 if (random32() & 1)
888 goto retry;
889 }
890 }
891
Philipp Reisnerbbeb6412011-02-10 13:45:46 +0100892 if (tconn->cstate <= C_DISCONNECTING)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700893 goto out_release_sockets;
894 if (signal_pending(current)) {
895 flush_signals(current);
896 smp_rmb();
Philipp Reisner907599e2011-02-08 11:25:37 +0100897 if (get_t_state(&tconn->receiver) == EXITING)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700898 goto out_release_sockets;
899 }
900
901 if (sock && msock) {
Philipp Reisnerdbd9eea2011-02-07 15:34:16 +0100902 ok = drbd_socket_okay(&sock);
903 ok = drbd_socket_okay(&msock) && ok;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700904 if (ok)
905 break;
906 }
907 } while (1);
908
909 msock->sk->sk_reuse = 1; /* SO_REUSEADDR */
910 sock->sk->sk_reuse = 1; /* SO_REUSEADDR */
911
912 sock->sk->sk_allocation = GFP_NOIO;
913 msock->sk->sk_allocation = GFP_NOIO;
914
915 sock->sk->sk_priority = TC_PRIO_INTERACTIVE_BULK;
916 msock->sk->sk_priority = TC_PRIO_INTERACTIVE;
917
Philipp Reisnerb411b362009-09-25 16:07:19 -0700918 /* NOT YET ...
Philipp Reisner907599e2011-02-08 11:25:37 +0100919 * sock->sk->sk_sndtimeo = tconn->net_conf->timeout*HZ/10;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700920 * sock->sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
921 * first set it to the P_HAND_SHAKE timeout,
922 * which we set to 4x the configured ping_timeout. */
923 sock->sk->sk_sndtimeo =
Philipp Reisner907599e2011-02-08 11:25:37 +0100924 sock->sk->sk_rcvtimeo = tconn->net_conf->ping_timeo*4*HZ/10;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700925
Philipp Reisner907599e2011-02-08 11:25:37 +0100926 msock->sk->sk_sndtimeo = tconn->net_conf->timeout*HZ/10;
927 msock->sk->sk_rcvtimeo = tconn->net_conf->ping_int*HZ;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700928
929 /* we don't want delays.
Lucas De Marchi25985ed2011-03-30 22:57:33 -0300930 * we use TCP_CORK where appropriate, though */
Philipp Reisnerb411b362009-09-25 16:07:19 -0700931 drbd_tcp_nodelay(sock);
932 drbd_tcp_nodelay(msock);
933
Philipp Reisner907599e2011-02-08 11:25:37 +0100934 tconn->data.socket = sock;
935 tconn->meta.socket = msock;
936 tconn->last_received = jiffies;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700937
Philipp Reisner907599e2011-02-08 11:25:37 +0100938 h = drbd_do_handshake(tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700939 if (h <= 0)
940 return h;
941
Philipp Reisner907599e2011-02-08 11:25:37 +0100942 if (tconn->cram_hmac_tfm) {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700943 /* drbd_request_state(mdev, NS(conn, WFAuth)); */
Philipp Reisner907599e2011-02-08 11:25:37 +0100944 switch (drbd_do_auth(tconn)) {
Johannes Thomab10d96c2010-01-07 16:02:50 +0100945 case -1:
Philipp Reisner907599e2011-02-08 11:25:37 +0100946 conn_err(tconn, "Authentication of peer failed\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -0700947 return -1;
Johannes Thomab10d96c2010-01-07 16:02:50 +0100948 case 0:
Philipp Reisner907599e2011-02-08 11:25:37 +0100949 conn_err(tconn, "Authentication of peer failed, trying again.\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +0100950 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700951 }
952 }
953
Philipp Reisnerbbeb6412011-02-10 13:45:46 +0100954 if (conn_request_state(tconn, NS(conn, C_WF_REPORT_PARAMS), CS_VERBOSE) < SS_SUCCESS)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700955 return 0;
956
Philipp Reisner907599e2011-02-08 11:25:37 +0100957 sock->sk->sk_sndtimeo = tconn->net_conf->timeout*HZ/10;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700958 sock->sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
959
Philipp Reisner907599e2011-02-08 11:25:37 +0100960 drbd_thread_start(&tconn->asender);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700961
Andreas Gruenbacher387eb302011-03-16 01:05:37 +0100962 if (drbd_send_protocol(tconn) == -EOPNOTSUPP)
Philipp Reisner7e2455c2010-04-22 14:50:23 +0200963 return -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700964
Philipp Reisner907599e2011-02-08 11:25:37 +0100965 return !idr_for_each(&tconn->volumes, drbd_connected, tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700966
967out_release_sockets:
968 if (sock)
969 sock_release(sock);
970 if (msock)
971 sock_release(msock);
972 return -1;
973}
974
Andreas Gruenbacher8172f3e2011-03-16 17:22:39 +0100975static int decode_header(struct drbd_tconn *tconn, struct p_header *h, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700976{
Philipp Reisnerfd340c12011-01-19 16:57:39 +0100977 if (h->h80.magic == cpu_to_be32(DRBD_MAGIC)) {
Philipp Reisner77351055b2011-02-07 17:24:26 +0100978 pi->cmd = be16_to_cpu(h->h80.command);
979 pi->size = be16_to_cpu(h->h80.length);
Philipp Reisnereefc2f72011-02-08 12:55:24 +0100980 pi->vnr = 0;
Andreas Gruenbacherca9bc122011-01-11 13:47:24 +0100981 } else if (h->h95.magic == cpu_to_be16(DRBD_MAGIC_BIG)) {
Philipp Reisner77351055b2011-02-07 17:24:26 +0100982 pi->cmd = be16_to_cpu(h->h95.command);
983 pi->size = be32_to_cpu(h->h95.length) & 0x00ffffff;
984 pi->vnr = 0;
Philipp Reisner02918be2010-08-20 14:35:10 +0200985 } else {
Philipp Reisnerce243852011-02-07 17:27:47 +0100986 conn_err(tconn, "magic?? on data m: 0x%08x c: %d l: %d\n",
Lars Ellenberg004352f2010-10-05 20:13:58 +0200987 be32_to_cpu(h->h80.magic),
988 be16_to_cpu(h->h80.command),
989 be16_to_cpu(h->h80.length));
Andreas Gruenbacher8172f3e2011-03-16 17:22:39 +0100990 return -EINVAL;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700991 }
Andreas Gruenbacher8172f3e2011-03-16 17:22:39 +0100992 return 0;
Philipp Reisner257d0af2011-01-26 12:15:29 +0100993}
994
Philipp Reisner9ba7aa02011-02-07 17:32:41 +0100995static int drbd_recv_header(struct drbd_tconn *tconn, struct packet_info *pi)
Philipp Reisner257d0af2011-01-26 12:15:29 +0100996{
Philipp Reisner9ba7aa02011-02-07 17:32:41 +0100997 struct p_header *h = &tconn->data.rbuf.header;
Andreas Gruenbacher69bc7bc2011-03-16 17:31:52 +0100998 int err;
Philipp Reisner257d0af2011-01-26 12:15:29 +0100999
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001000 err = drbd_recv_all_warn(tconn, h, sizeof(*h));
1001 if (err)
Andreas Gruenbacher69bc7bc2011-03-16 17:31:52 +01001002 return err;
Philipp Reisner257d0af2011-01-26 12:15:29 +01001003
Andreas Gruenbacher69bc7bc2011-03-16 17:31:52 +01001004 err = decode_header(tconn, h, pi);
Philipp Reisner9ba7aa02011-02-07 17:32:41 +01001005 tconn->last_received = jiffies;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001006
Andreas Gruenbacher69bc7bc2011-03-16 17:31:52 +01001007 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001008}
1009
Philipp Reisner2451fc32010-08-24 13:43:11 +02001010static void drbd_flush(struct drbd_conf *mdev)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001011{
1012 int rv;
1013
1014 if (mdev->write_ordering >= WO_bdev_flush && get_ldev(mdev)) {
Dmitry Monakhovfbd9b092010-04-28 17:55:06 +04001015 rv = blkdev_issue_flush(mdev->ldev->backing_bdev, GFP_KERNEL,
Christoph Hellwigdd3932e2010-09-16 20:51:46 +02001016 NULL);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001017 if (rv) {
1018 dev_err(DEV, "local disk flush failed with status %d\n", rv);
1019 /* would rather check on EOPNOTSUPP, but that is not reliable.
1020 * don't try again for ANY return value != 0
1021 * if (rv == -EOPNOTSUPP) */
1022 drbd_bump_write_ordering(mdev, WO_drain_io);
1023 }
1024 put_ldev(mdev);
1025 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07001026}
1027
1028/**
1029 * drbd_may_finish_epoch() - Applies an epoch_event to the epoch's state, eventually finishes it.
1030 * @mdev: DRBD device.
1031 * @epoch: Epoch object.
1032 * @ev: Epoch event.
1033 */
1034static enum finish_epoch drbd_may_finish_epoch(struct drbd_conf *mdev,
1035 struct drbd_epoch *epoch,
1036 enum epoch_event ev)
1037{
Philipp Reisner2451fc32010-08-24 13:43:11 +02001038 int epoch_size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001039 struct drbd_epoch *next_epoch;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001040 enum finish_epoch rv = FE_STILL_LIVE;
1041
1042 spin_lock(&mdev->epoch_lock);
1043 do {
1044 next_epoch = NULL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001045
1046 epoch_size = atomic_read(&epoch->epoch_size);
1047
1048 switch (ev & ~EV_CLEANUP) {
1049 case EV_PUT:
1050 atomic_dec(&epoch->active);
1051 break;
1052 case EV_GOT_BARRIER_NR:
1053 set_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001054 break;
1055 case EV_BECAME_LAST:
1056 /* nothing to do*/
1057 break;
1058 }
1059
Philipp Reisnerb411b362009-09-25 16:07:19 -07001060 if (epoch_size != 0 &&
1061 atomic_read(&epoch->active) == 0 &&
Philipp Reisner2451fc32010-08-24 13:43:11 +02001062 test_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001063 if (!(ev & EV_CLEANUP)) {
1064 spin_unlock(&mdev->epoch_lock);
1065 drbd_send_b_ack(mdev, epoch->barrier_nr, epoch_size);
1066 spin_lock(&mdev->epoch_lock);
1067 }
1068 dec_unacked(mdev);
1069
1070 if (mdev->current_epoch != epoch) {
1071 next_epoch = list_entry(epoch->list.next, struct drbd_epoch, list);
1072 list_del(&epoch->list);
1073 ev = EV_BECAME_LAST | (ev & EV_CLEANUP);
1074 mdev->epochs--;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001075 kfree(epoch);
1076
1077 if (rv == FE_STILL_LIVE)
1078 rv = FE_DESTROYED;
1079 } else {
1080 epoch->flags = 0;
1081 atomic_set(&epoch->epoch_size, 0);
Uwe Kleine-König698f9312010-07-02 20:41:51 +02001082 /* atomic_set(&epoch->active, 0); is already zero */
Philipp Reisnerb411b362009-09-25 16:07:19 -07001083 if (rv == FE_STILL_LIVE)
1084 rv = FE_RECYCLED;
Philipp Reisner2451fc32010-08-24 13:43:11 +02001085 wake_up(&mdev->ee_wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001086 }
1087 }
1088
1089 if (!next_epoch)
1090 break;
1091
1092 epoch = next_epoch;
1093 } while (1);
1094
1095 spin_unlock(&mdev->epoch_lock);
1096
Philipp Reisnerb411b362009-09-25 16:07:19 -07001097 return rv;
1098}
1099
1100/**
1101 * drbd_bump_write_ordering() - Fall back to an other write ordering method
1102 * @mdev: DRBD device.
1103 * @wo: Write ordering method to try.
1104 */
1105void drbd_bump_write_ordering(struct drbd_conf *mdev, enum write_ordering_e wo) __must_hold(local)
1106{
1107 enum write_ordering_e pwo;
1108 static char *write_ordering_str[] = {
1109 [WO_none] = "none",
1110 [WO_drain_io] = "drain",
1111 [WO_bdev_flush] = "flush",
Philipp Reisnerb411b362009-09-25 16:07:19 -07001112 };
1113
1114 pwo = mdev->write_ordering;
1115 wo = min(pwo, wo);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001116 if (wo == WO_bdev_flush && mdev->ldev->dc.no_disk_flush)
1117 wo = WO_drain_io;
1118 if (wo == WO_drain_io && mdev->ldev->dc.no_disk_drain)
1119 wo = WO_none;
1120 mdev->write_ordering = wo;
Philipp Reisner2451fc32010-08-24 13:43:11 +02001121 if (pwo != mdev->write_ordering || wo == WO_bdev_flush)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001122 dev_info(DEV, "Method to ensure write ordering: %s\n", write_ordering_str[mdev->write_ordering]);
1123}
1124
1125/**
Andreas Gruenbacherfbe29de2011-02-17 16:38:35 +01001126 * drbd_submit_peer_request()
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001127 * @mdev: DRBD device.
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001128 * @peer_req: peer request
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001129 * @rw: flag field, see bio->bi_rw
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001130 *
1131 * May spread the pages to multiple bios,
1132 * depending on bio_add_page restrictions.
1133 *
1134 * Returns 0 if all bios have been submitted,
1135 * -ENOMEM if we could not allocate enough bios,
1136 * -ENOSPC (any better suggestion?) if we have not been able to bio_add_page a
1137 * single page to an empty bio (which should never happen and likely indicates
1138 * that the lower level IO stack is in some way broken). This has been observed
1139 * on certain Xen deployments.
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001140 */
1141/* TODO allocate from our own bio_set. */
Andreas Gruenbacherfbe29de2011-02-17 16:38:35 +01001142int drbd_submit_peer_request(struct drbd_conf *mdev,
1143 struct drbd_peer_request *peer_req,
1144 const unsigned rw, const int fault_type)
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001145{
1146 struct bio *bios = NULL;
1147 struct bio *bio;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001148 struct page *page = peer_req->pages;
1149 sector_t sector = peer_req->i.sector;
1150 unsigned ds = peer_req->i.size;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001151 unsigned n_bios = 0;
1152 unsigned nr_pages = (ds + PAGE_SIZE -1) >> PAGE_SHIFT;
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001153 int err = -ENOMEM;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001154
1155 /* In most cases, we will only need one bio. But in case the lower
1156 * level restrictions happen to be different at this offset on this
1157 * side than those of the sending peer, we may need to submit the
Lars Ellenbergda4a75d2011-02-23 17:02:01 +01001158 * request in more than one bio.
1159 *
1160 * Plain bio_alloc is good enough here, this is no DRBD internally
1161 * generated bio, but a bio allocated on behalf of the peer.
1162 */
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001163next_bio:
1164 bio = bio_alloc(GFP_NOIO, nr_pages);
1165 if (!bio) {
1166 dev_err(DEV, "submit_ee: Allocation of a bio failed\n");
1167 goto fail;
1168 }
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001169 /* > peer_req->i.sector, unless this is the first bio */
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001170 bio->bi_sector = sector;
1171 bio->bi_bdev = mdev->ldev->backing_bdev;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001172 bio->bi_rw = rw;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001173 bio->bi_private = peer_req;
Andreas Gruenbacherfcefa622011-02-17 16:46:59 +01001174 bio->bi_end_io = drbd_peer_request_endio;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001175
1176 bio->bi_next = bios;
1177 bios = bio;
1178 ++n_bios;
1179
1180 page_chain_for_each(page) {
1181 unsigned len = min_t(unsigned, ds, PAGE_SIZE);
1182 if (!bio_add_page(bio, page, len, 0)) {
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001183 /* A single page must always be possible!
1184 * But in case it fails anyways,
1185 * we deal with it, and complain (below). */
1186 if (bio->bi_vcnt == 0) {
1187 dev_err(DEV,
1188 "bio_add_page failed for len=%u, "
1189 "bi_vcnt=0 (bi_sector=%llu)\n",
1190 len, (unsigned long long)bio->bi_sector);
1191 err = -ENOSPC;
1192 goto fail;
1193 }
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001194 goto next_bio;
1195 }
1196 ds -= len;
1197 sector += len >> 9;
1198 --nr_pages;
1199 }
1200 D_ASSERT(page == NULL);
1201 D_ASSERT(ds == 0);
1202
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001203 atomic_set(&peer_req->pending_bios, n_bios);
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001204 do {
1205 bio = bios;
1206 bios = bios->bi_next;
1207 bio->bi_next = NULL;
1208
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001209 drbd_generic_make_request(mdev, fault_type, bio);
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001210 } while (bios);
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001211 return 0;
1212
1213fail:
1214 while (bios) {
1215 bio = bios;
1216 bios = bios->bi_next;
1217 bio_put(bio);
1218 }
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001219 return err;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001220}
1221
Andreas Gruenbacher53840642011-01-28 10:31:04 +01001222static void drbd_remove_epoch_entry_interval(struct drbd_conf *mdev,
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001223 struct drbd_peer_request *peer_req)
Andreas Gruenbacher53840642011-01-28 10:31:04 +01001224{
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001225 struct drbd_interval *i = &peer_req->i;
Andreas Gruenbacher53840642011-01-28 10:31:04 +01001226
1227 drbd_remove_interval(&mdev->write_requests, i);
1228 drbd_clear_interval(i);
1229
Andreas Gruenbacher6c852be2011-02-04 15:38:52 +01001230 /* Wake up any processes waiting for this peer request to complete. */
Andreas Gruenbacher53840642011-01-28 10:31:04 +01001231 if (i->waiting)
1232 wake_up(&mdev->misc_wait);
1233}
1234
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01001235static int receive_Barrier(struct drbd_conf *mdev, enum drbd_packet cmd,
1236 unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001237{
Philipp Reisner2451fc32010-08-24 13:43:11 +02001238 int rv;
Philipp Reisnere42325a2011-01-19 13:55:45 +01001239 struct p_barrier *p = &mdev->tconn->data.rbuf.barrier;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001240 struct drbd_epoch *epoch;
1241
Philipp Reisnerb411b362009-09-25 16:07:19 -07001242 inc_unacked(mdev);
1243
Philipp Reisnerb411b362009-09-25 16:07:19 -07001244 mdev->current_epoch->barrier_nr = p->barrier;
1245 rv = drbd_may_finish_epoch(mdev, mdev->current_epoch, EV_GOT_BARRIER_NR);
1246
1247 /* P_BARRIER_ACK may imply that the corresponding extent is dropped from
1248 * the activity log, which means it would not be resynced in case the
1249 * R_PRIMARY crashes now.
1250 * Therefore we must send the barrier_ack after the barrier request was
1251 * completed. */
1252 switch (mdev->write_ordering) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001253 case WO_none:
1254 if (rv == FE_RECYCLED)
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001255 return 0;
Philipp Reisner2451fc32010-08-24 13:43:11 +02001256
1257 /* receiver context, in the writeout path of the other node.
1258 * avoid potential distributed deadlock */
1259 epoch = kmalloc(sizeof(struct drbd_epoch), GFP_NOIO);
1260 if (epoch)
1261 break;
1262 else
1263 dev_warn(DEV, "Allocation of an epoch failed, slowing down\n");
1264 /* Fall through */
Philipp Reisnerb411b362009-09-25 16:07:19 -07001265
1266 case WO_bdev_flush:
1267 case WO_drain_io:
Philipp Reisnerb411b362009-09-25 16:07:19 -07001268 drbd_wait_ee_list_empty(mdev, &mdev->active_ee);
Philipp Reisner2451fc32010-08-24 13:43:11 +02001269 drbd_flush(mdev);
1270
1271 if (atomic_read(&mdev->current_epoch->epoch_size)) {
1272 epoch = kmalloc(sizeof(struct drbd_epoch), GFP_NOIO);
1273 if (epoch)
1274 break;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001275 }
1276
Philipp Reisner2451fc32010-08-24 13:43:11 +02001277 epoch = mdev->current_epoch;
1278 wait_event(mdev->ee_wait, atomic_read(&epoch->epoch_size) == 0);
1279
1280 D_ASSERT(atomic_read(&epoch->active) == 0);
1281 D_ASSERT(epoch->flags == 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001282
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001283 return 0;
Philipp Reisner2451fc32010-08-24 13:43:11 +02001284 default:
1285 dev_err(DEV, "Strangeness in mdev->write_ordering %d\n", mdev->write_ordering);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001286 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001287 }
1288
1289 epoch->flags = 0;
1290 atomic_set(&epoch->epoch_size, 0);
1291 atomic_set(&epoch->active, 0);
1292
1293 spin_lock(&mdev->epoch_lock);
1294 if (atomic_read(&mdev->current_epoch->epoch_size)) {
1295 list_add(&epoch->list, &mdev->current_epoch->list);
1296 mdev->current_epoch = epoch;
1297 mdev->epochs++;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001298 } else {
1299 /* The current_epoch got recycled while we allocated this one... */
1300 kfree(epoch);
1301 }
1302 spin_unlock(&mdev->epoch_lock);
1303
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001304 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001305}
1306
1307/* used from receive_RSDataReply (recv_resync_read)
1308 * and from receive_Data */
Andreas Gruenbacherf6ffca92011-02-04 15:30:34 +01001309static struct drbd_peer_request *
1310read_in_block(struct drbd_conf *mdev, u64 id, sector_t sector,
1311 int data_size) __must_hold(local)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001312{
Lars Ellenberg66660322010-04-06 12:15:04 +02001313 const sector_t capacity = drbd_get_capacity(mdev->this_bdev);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001314 struct drbd_peer_request *peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001315 struct page *page;
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001316 int dgs, ds, err;
Philipp Reisnera0638452011-01-19 14:31:32 +01001317 void *dig_in = mdev->tconn->int_dig_in;
1318 void *dig_vv = mdev->tconn->int_dig_vv;
Philipp Reisner6b4388a2010-04-26 14:11:45 +02001319 unsigned long *data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001320
Philipp Reisnera0638452011-01-19 14:31:32 +01001321 dgs = (mdev->tconn->agreed_pro_version >= 87 && mdev->tconn->integrity_r_tfm) ?
1322 crypto_hash_digestsize(mdev->tconn->integrity_r_tfm) : 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001323
1324 if (dgs) {
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001325 err = drbd_recv_all_warn(mdev->tconn, dig_in, dgs);
1326 if (err)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001327 return NULL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001328 }
1329
1330 data_size -= dgs;
1331
Andreas Gruenbacher841ce242010-12-15 19:31:20 +01001332 if (!expect(data_size != 0))
1333 return NULL;
1334 if (!expect(IS_ALIGNED(data_size, 512)))
1335 return NULL;
1336 if (!expect(data_size <= DRBD_MAX_BIO_SIZE))
1337 return NULL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001338
Lars Ellenberg66660322010-04-06 12:15:04 +02001339 /* even though we trust out peer,
1340 * we sometimes have to double check. */
1341 if (sector + (data_size>>9) > capacity) {
Lars Ellenbergfdda6542011-01-24 15:11:01 +01001342 dev_err(DEV, "request from peer beyond end of local disk: "
1343 "capacity: %llus < sector: %llus + size: %u\n",
Lars Ellenberg66660322010-04-06 12:15:04 +02001344 (unsigned long long)capacity,
1345 (unsigned long long)sector, data_size);
1346 return NULL;
1347 }
1348
Philipp Reisnerb411b362009-09-25 16:07:19 -07001349 /* GFP_NOIO, because we must not cause arbitrary write-out: in a DRBD
1350 * "criss-cross" setup, that might cause write-out on some other DRBD,
1351 * which in turn might block on the other node at this very place. */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001352 peer_req = drbd_alloc_ee(mdev, id, sector, data_size, GFP_NOIO);
1353 if (!peer_req)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001354 return NULL;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001355
Philipp Reisnerb411b362009-09-25 16:07:19 -07001356 ds = data_size;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001357 page = peer_req->pages;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001358 page_chain_for_each(page) {
1359 unsigned len = min_t(int, ds, PAGE_SIZE);
Philipp Reisner6b4388a2010-04-26 14:11:45 +02001360 data = kmap(page);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001361 err = drbd_recv_all_warn(mdev->tconn, data, len);
Andreas Gruenbacher0cf9d272010-12-07 10:43:29 +01001362 if (drbd_insert_fault(mdev, DRBD_FAULT_RECEIVE)) {
Philipp Reisner6b4388a2010-04-26 14:11:45 +02001363 dev_err(DEV, "Fault injection: Corrupting data on receive\n");
1364 data[0] = data[0] ^ (unsigned long)-1;
1365 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07001366 kunmap(page);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001367 if (err) {
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001368 drbd_free_ee(mdev, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001369 return NULL;
1370 }
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001371 ds -= len;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001372 }
1373
1374 if (dgs) {
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001375 drbd_csum_ee(mdev, mdev->tconn->integrity_r_tfm, peer_req, dig_vv);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001376 if (memcmp(dig_in, dig_vv, dgs)) {
Lars Ellenberg470be442010-11-10 10:36:52 +01001377 dev_err(DEV, "Digest integrity check FAILED: %llus +%u\n",
1378 (unsigned long long)sector, data_size);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001379 drbd_free_ee(mdev, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001380 return NULL;
1381 }
1382 }
1383 mdev->recv_cnt += data_size>>9;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001384 return peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001385}
1386
1387/* drbd_drain_block() just takes a data block
1388 * out of the socket input buffer, and discards it.
1389 */
1390static int drbd_drain_block(struct drbd_conf *mdev, int data_size)
1391{
1392 struct page *page;
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001393 int err = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001394 void *data;
1395
Lars Ellenbergc3470cd2010-04-01 16:57:19 +02001396 if (!data_size)
Andreas Gruenbacherfc5be832011-03-16 17:50:50 +01001397 return 0;
Lars Ellenbergc3470cd2010-04-01 16:57:19 +02001398
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001399 page = drbd_pp_alloc(mdev, 1, 1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001400
1401 data = kmap(page);
1402 while (data_size) {
Andreas Gruenbacherfc5be832011-03-16 17:50:50 +01001403 unsigned int len = min_t(int, data_size, PAGE_SIZE);
1404
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001405 err = drbd_recv_all_warn(mdev->tconn, data, len);
1406 if (err)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001407 break;
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001408 data_size -= len;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001409 }
1410 kunmap(page);
Lars Ellenberg435f0742010-09-06 12:30:25 +02001411 drbd_pp_free(mdev, page, 0);
Andreas Gruenbacherfc5be832011-03-16 17:50:50 +01001412 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001413}
1414
1415static int recv_dless_read(struct drbd_conf *mdev, struct drbd_request *req,
1416 sector_t sector, int data_size)
1417{
1418 struct bio_vec *bvec;
1419 struct bio *bio;
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001420 int dgs, err, i, expect;
Philipp Reisnera0638452011-01-19 14:31:32 +01001421 void *dig_in = mdev->tconn->int_dig_in;
1422 void *dig_vv = mdev->tconn->int_dig_vv;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001423
Philipp Reisnera0638452011-01-19 14:31:32 +01001424 dgs = (mdev->tconn->agreed_pro_version >= 87 && mdev->tconn->integrity_r_tfm) ?
1425 crypto_hash_digestsize(mdev->tconn->integrity_r_tfm) : 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001426
1427 if (dgs) {
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001428 err = drbd_recv_all_warn(mdev->tconn, dig_in, dgs);
1429 if (err)
1430 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001431 }
1432
1433 data_size -= dgs;
1434
1435 /* optimistically update recv_cnt. if receiving fails below,
1436 * we disconnect anyways, and counters will be reset. */
1437 mdev->recv_cnt += data_size>>9;
1438
1439 bio = req->master_bio;
1440 D_ASSERT(sector == bio->bi_sector);
1441
1442 bio_for_each_segment(bvec, bio, i) {
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001443 void *mapped = kmap(bvec->bv_page) + bvec->bv_offset;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001444 expect = min_t(int, data_size, bvec->bv_len);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001445 err = drbd_recv_all_warn(mdev->tconn, mapped, expect);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001446 kunmap(bvec->bv_page);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001447 if (err)
1448 return err;
1449 data_size -= expect;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001450 }
1451
1452 if (dgs) {
Philipp Reisnera0638452011-01-19 14:31:32 +01001453 drbd_csum_bio(mdev, mdev->tconn->integrity_r_tfm, bio, dig_vv);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001454 if (memcmp(dig_in, dig_vv, dgs)) {
1455 dev_err(DEV, "Digest integrity check FAILED. Broken NICs?\n");
Andreas Gruenbacher28284ce2011-03-16 17:54:02 +01001456 return -EINVAL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001457 }
1458 }
1459
1460 D_ASSERT(data_size == 0);
Andreas Gruenbacher28284ce2011-03-16 17:54:02 +01001461 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001462}
1463
1464/* e_end_resync_block() is called via
1465 * drbd_process_done_ee() by asender only */
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001466static int e_end_resync_block(struct drbd_work *w, int unused)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001467{
Andreas Gruenbacher8050e6d2011-02-18 16:12:48 +01001468 struct drbd_peer_request *peer_req =
1469 container_of(w, struct drbd_peer_request, w);
Philipp Reisner00d56942011-02-09 18:09:48 +01001470 struct drbd_conf *mdev = w->mdev;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001471 sector_t sector = peer_req->i.sector;
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001472 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001473
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001474 D_ASSERT(drbd_interval_empty(&peer_req->i));
Philipp Reisnerb411b362009-09-25 16:07:19 -07001475
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001476 if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
1477 drbd_set_in_sync(mdev, sector, peer_req->i.size);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001478 err = drbd_send_ack(mdev, P_RS_WRITE_ACK, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001479 } else {
1480 /* Record failure to sync */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001481 drbd_rs_failed_io(mdev, sector, peer_req->i.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001482
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001483 err = drbd_send_ack(mdev, P_NEG_ACK, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001484 }
1485 dec_unacked(mdev);
1486
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001487 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001488}
1489
1490static int recv_resync_read(struct drbd_conf *mdev, sector_t sector, int data_size) __releases(local)
1491{
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001492 struct drbd_peer_request *peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001493
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001494 peer_req = read_in_block(mdev, ID_SYNCER, sector, data_size);
1495 if (!peer_req)
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001496 goto fail;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001497
1498 dec_rs_pending(mdev);
1499
Philipp Reisnerb411b362009-09-25 16:07:19 -07001500 inc_unacked(mdev);
1501 /* corresponding dec_unacked() in e_end_resync_block()
1502 * respective _drbd_clear_done_ee */
1503
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001504 peer_req->w.cb = e_end_resync_block;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001505
Philipp Reisner87eeee42011-01-19 14:16:30 +01001506 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001507 list_add(&peer_req->w.list, &mdev->sync_ee);
Philipp Reisner87eeee42011-01-19 14:16:30 +01001508 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001509
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02001510 atomic_add(data_size >> 9, &mdev->rs_sect_ev);
Andreas Gruenbacherfbe29de2011-02-17 16:38:35 +01001511 if (drbd_submit_peer_request(mdev, peer_req, WRITE, DRBD_FAULT_RS_WR) == 0)
Andreas Gruenbachere1c1b0f2011-03-16 17:58:27 +01001512 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001513
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001514 /* don't care for the reason here */
1515 dev_err(DEV, "submit failed, triggering re-connect\n");
Philipp Reisner87eeee42011-01-19 14:16:30 +01001516 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001517 list_del(&peer_req->w.list);
Philipp Reisner87eeee42011-01-19 14:16:30 +01001518 spin_unlock_irq(&mdev->tconn->req_lock);
Lars Ellenberg22cc37a2010-09-14 20:40:41 +02001519
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001520 drbd_free_ee(mdev, peer_req);
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001521fail:
1522 put_ldev(mdev);
Andreas Gruenbachere1c1b0f2011-03-16 17:58:27 +01001523 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001524}
1525
Andreas Gruenbacher668eebc2011-01-20 17:14:26 +01001526static struct drbd_request *
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01001527find_request(struct drbd_conf *mdev, struct rb_root *root, u64 id,
1528 sector_t sector, bool missing_ok, const char *func)
Andreas Gruenbacher668eebc2011-01-20 17:14:26 +01001529{
Andreas Gruenbacher668eebc2011-01-20 17:14:26 +01001530 struct drbd_request *req;
1531
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01001532 /* Request object according to our peer */
1533 req = (struct drbd_request *)(unsigned long)id;
Andreas Gruenbacher5e472262011-01-27 14:42:51 +01001534 if (drbd_contains_interval(root, sector, &req->i) && req->i.local)
Andreas Gruenbacher668eebc2011-01-20 17:14:26 +01001535 return req;
Andreas Gruenbacherc3afd8f2011-01-20 22:25:40 +01001536 if (!missing_ok) {
1537 dev_err(DEV, "%s: failed to find request %lu, sector %llus\n", func,
1538 (unsigned long)id, (unsigned long long)sector);
1539 }
Andreas Gruenbacher668eebc2011-01-20 17:14:26 +01001540 return NULL;
1541}
1542
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01001543static int receive_DataReply(struct drbd_conf *mdev, enum drbd_packet cmd,
1544 unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001545{
1546 struct drbd_request *req;
1547 sector_t sector;
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001548 int err;
Philipp Reisnere42325a2011-01-19 13:55:45 +01001549 struct p_data *p = &mdev->tconn->data.rbuf.data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001550
1551 sector = be64_to_cpu(p->sector);
1552
Philipp Reisner87eeee42011-01-19 14:16:30 +01001553 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01001554 req = find_request(mdev, &mdev->read_requests, p->block_id, sector, false, __func__);
Philipp Reisner87eeee42011-01-19 14:16:30 +01001555 spin_unlock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherc3afd8f2011-01-20 22:25:40 +01001556 if (unlikely(!req))
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001557 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001558
Bart Van Assche24c48302011-05-21 18:32:29 +02001559 /* hlist_del(&req->collision) is done in _req_may_be_done, to avoid
Philipp Reisnerb411b362009-09-25 16:07:19 -07001560 * special casing it there for the various failure cases.
1561 * still no race with drbd_fail_pending_reads */
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001562 err = recv_dless_read(mdev, req, sector, data_size);
1563 if (!err)
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01001564 req_mod(req, DATA_RECEIVED);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001565 /* else: nothing. handled from drbd_disconnect...
1566 * I don't think we may complete this just yet
1567 * in case we are "on-disconnect: freeze" */
1568
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001569 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001570}
1571
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01001572static int receive_RSDataReply(struct drbd_conf *mdev, enum drbd_packet cmd,
1573 unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001574{
1575 sector_t sector;
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001576 int err;
Philipp Reisnere42325a2011-01-19 13:55:45 +01001577 struct p_data *p = &mdev->tconn->data.rbuf.data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001578
1579 sector = be64_to_cpu(p->sector);
1580 D_ASSERT(p->block_id == ID_SYNCER);
1581
1582 if (get_ldev(mdev)) {
1583 /* data is submitted to disk within recv_resync_read.
1584 * corresponding put_ldev done below on error,
Andreas Gruenbacherfcefa622011-02-17 16:46:59 +01001585 * or in drbd_peer_request_endio. */
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001586 err = recv_resync_read(mdev, sector, data_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001587 } else {
1588 if (__ratelimit(&drbd_ratelimit_state))
1589 dev_err(DEV, "Can not write resync data to local disk.\n");
1590
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001591 err = drbd_drain_block(mdev, data_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001592
Lars Ellenberg2b2bf212010-10-06 11:46:55 +02001593 drbd_send_ack_dp(mdev, P_NEG_ACK, p, data_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001594 }
1595
Philipp Reisner778f2712010-07-06 11:14:00 +02001596 atomic_add(data_size >> 9, &mdev->rs_sect_in);
1597
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001598 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001599}
1600
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001601static int w_restart_write(struct drbd_work *w, int cancel)
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001602{
1603 struct drbd_request *req = container_of(w, struct drbd_request, w);
1604 struct drbd_conf *mdev = w->mdev;
1605 struct bio *bio;
1606 unsigned long start_time;
1607 unsigned long flags;
1608
1609 spin_lock_irqsave(&mdev->tconn->req_lock, flags);
1610 if (!expect(req->rq_state & RQ_POSTPONED)) {
1611 spin_unlock_irqrestore(&mdev->tconn->req_lock, flags);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001612 return -EIO;
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001613 }
1614 bio = req->master_bio;
1615 start_time = req->start_time;
1616 /* Postponed requests will not have their master_bio completed! */
1617 __req_mod(req, DISCARD_WRITE, NULL);
1618 spin_unlock_irqrestore(&mdev->tconn->req_lock, flags);
1619
1620 while (__drbd_make_request(mdev, bio, start_time))
1621 /* retry */ ;
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001622 return 0;
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001623}
1624
1625static void restart_conflicting_writes(struct drbd_conf *mdev,
1626 sector_t sector, int size)
1627{
1628 struct drbd_interval *i;
1629 struct drbd_request *req;
1630
1631 drbd_for_each_overlap(i, &mdev->write_requests, sector, size) {
1632 if (!i->local)
1633 continue;
1634 req = container_of(i, struct drbd_request, i);
1635 if (req->rq_state & RQ_LOCAL_PENDING ||
1636 !(req->rq_state & RQ_POSTPONED))
1637 continue;
1638 if (expect(list_empty(&req->w.list))) {
1639 req->w.mdev = mdev;
1640 req->w.cb = w_restart_write;
1641 drbd_queue_work(&mdev->tconn->data.work, &req->w);
1642 }
1643 }
1644}
1645
Philipp Reisnerb411b362009-09-25 16:07:19 -07001646/* e_end_block() is called via drbd_process_done_ee().
1647 * this means this function only runs in the asender thread
1648 */
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001649static int e_end_block(struct drbd_work *w, int cancel)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001650{
Andreas Gruenbacher8050e6d2011-02-18 16:12:48 +01001651 struct drbd_peer_request *peer_req =
1652 container_of(w, struct drbd_peer_request, w);
Philipp Reisner00d56942011-02-09 18:09:48 +01001653 struct drbd_conf *mdev = w->mdev;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001654 sector_t sector = peer_req->i.sector;
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001655 int err = 0, pcmd;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001656
Philipp Reisner89e58e72011-01-19 13:12:45 +01001657 if (mdev->tconn->net_conf->wire_protocol == DRBD_PROT_C) {
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001658 if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001659 pcmd = (mdev->state.conn >= C_SYNC_SOURCE &&
1660 mdev->state.conn <= C_PAUSED_SYNC_T &&
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001661 peer_req->flags & EE_MAY_SET_IN_SYNC) ?
Philipp Reisnerb411b362009-09-25 16:07:19 -07001662 P_RS_WRITE_ACK : P_WRITE_ACK;
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001663 err = drbd_send_ack(mdev, pcmd, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001664 if (pcmd == P_RS_WRITE_ACK)
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001665 drbd_set_in_sync(mdev, sector, peer_req->i.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001666 } else {
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001667 err = drbd_send_ack(mdev, P_NEG_ACK, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001668 /* we expect it to be marked out of sync anyways...
1669 * maybe assert this? */
1670 }
1671 dec_unacked(mdev);
1672 }
1673 /* we delete from the conflict detection hash _after_ we sent out the
1674 * P_WRITE_ACK / P_NEG_ACK, to get the sequence number right. */
Philipp Reisner89e58e72011-01-19 13:12:45 +01001675 if (mdev->tconn->net_conf->two_primaries) {
Philipp Reisner87eeee42011-01-19 14:16:30 +01001676 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001677 D_ASSERT(!drbd_interval_empty(&peer_req->i));
1678 drbd_remove_epoch_entry_interval(mdev, peer_req);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001679 if (peer_req->flags & EE_RESTART_REQUESTS)
1680 restart_conflicting_writes(mdev, sector, peer_req->i.size);
Philipp Reisner87eeee42011-01-19 14:16:30 +01001681 spin_unlock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherbb3bfe92011-01-21 15:59:23 +01001682 } else
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001683 D_ASSERT(drbd_interval_empty(&peer_req->i));
Philipp Reisnerb411b362009-09-25 16:07:19 -07001684
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001685 drbd_may_finish_epoch(mdev, peer_req->epoch, EV_PUT + (cancel ? EV_CLEANUP : 0));
Philipp Reisnerb411b362009-09-25 16:07:19 -07001686
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001687 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001688}
1689
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001690static int e_send_ack(struct drbd_work *w, enum drbd_packet ack)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001691{
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001692 struct drbd_conf *mdev = w->mdev;
Andreas Gruenbacher8050e6d2011-02-18 16:12:48 +01001693 struct drbd_peer_request *peer_req =
1694 container_of(w, struct drbd_peer_request, w);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001695 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001696
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001697 err = drbd_send_ack(mdev, ack, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001698 dec_unacked(mdev);
1699
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001700 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001701}
1702
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001703static int e_send_discard_write(struct drbd_work *w, int unused)
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001704{
1705 return e_send_ack(w, P_DISCARD_WRITE);
1706}
1707
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001708static int e_send_retry_write(struct drbd_work *w, int unused)
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001709{
1710 struct drbd_tconn *tconn = w->mdev->tconn;
1711
1712 return e_send_ack(w, tconn->agreed_pro_version >= 100 ?
1713 P_RETRY_WRITE : P_DISCARD_WRITE);
1714}
1715
Andreas Gruenbacher3e394da2011-01-26 18:36:55 +01001716static bool seq_greater(u32 a, u32 b)
1717{
1718 /*
1719 * We assume 32-bit wrap-around here.
1720 * For 24-bit wrap-around, we would have to shift:
1721 * a <<= 8; b <<= 8;
1722 */
1723 return (s32)a - (s32)b > 0;
1724}
1725
1726static u32 seq_max(u32 a, u32 b)
1727{
1728 return seq_greater(a, b) ? a : b;
1729}
1730
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001731static bool need_peer_seq(struct drbd_conf *mdev)
1732{
1733 struct drbd_tconn *tconn = mdev->tconn;
1734
1735 /*
1736 * We only need to keep track of the last packet_seq number of our peer
1737 * if we are in dual-primary mode and we have the discard flag set; see
1738 * handle_write_conflicts().
1739 */
1740 return tconn->net_conf->two_primaries &&
1741 test_bit(DISCARD_CONCURRENT, &tconn->flags);
1742}
1743
Andreas Gruenbacher43ae0772011-02-03 18:42:08 +01001744static void update_peer_seq(struct drbd_conf *mdev, unsigned int peer_seq)
Andreas Gruenbacher3e394da2011-01-26 18:36:55 +01001745{
Lars Ellenberg3c13b682011-02-23 16:10:01 +01001746 unsigned int newest_peer_seq;
Andreas Gruenbacher3e394da2011-01-26 18:36:55 +01001747
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001748 if (need_peer_seq(mdev)) {
1749 spin_lock(&mdev->peer_seq_lock);
Lars Ellenberg3c13b682011-02-23 16:10:01 +01001750 newest_peer_seq = seq_max(mdev->peer_seq, peer_seq);
1751 mdev->peer_seq = newest_peer_seq;
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001752 spin_unlock(&mdev->peer_seq_lock);
Lars Ellenberg3c13b682011-02-23 16:10:01 +01001753 /* wake up only if we actually changed mdev->peer_seq */
1754 if (peer_seq == newest_peer_seq)
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001755 wake_up(&mdev->seq_wait);
1756 }
Andreas Gruenbacher3e394da2011-01-26 18:36:55 +01001757}
1758
Philipp Reisnerb411b362009-09-25 16:07:19 -07001759/* Called from receive_Data.
1760 * Synchronize packets on sock with packets on msock.
1761 *
1762 * This is here so even when a P_DATA packet traveling via sock overtook an Ack
1763 * packet traveling on msock, they are still processed in the order they have
1764 * been sent.
1765 *
1766 * Note: we don't care for Ack packets overtaking P_DATA packets.
1767 *
1768 * In case packet_seq is larger than mdev->peer_seq number, there are
1769 * outstanding packets on the msock. We wait for them to arrive.
1770 * In case we are the logically next packet, we update mdev->peer_seq
1771 * ourselves. Correctly handles 32bit wrap around.
1772 *
1773 * Assume we have a 10 GBit connection, that is about 1<<30 byte per second,
1774 * about 1<<21 sectors per second. So "worst" case, we have 1<<3 == 8 seconds
1775 * for the 24bit wrap (historical atomic_t guarantee on some archs), and we have
1776 * 1<<9 == 512 seconds aka ages for the 32bit wrap around...
1777 *
1778 * returns 0 if we may process the packet,
1779 * -ERESTARTSYS if we were interrupted (by disconnect signal). */
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001780static int wait_for_and_update_peer_seq(struct drbd_conf *mdev, const u32 peer_seq)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001781{
1782 DEFINE_WAIT(wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001783 long timeout;
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001784 int ret;
1785
1786 if (!need_peer_seq(mdev))
1787 return 0;
1788
Philipp Reisnerb411b362009-09-25 16:07:19 -07001789 spin_lock(&mdev->peer_seq_lock);
1790 for (;;) {
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001791 if (!seq_greater(peer_seq - 1, mdev->peer_seq)) {
1792 mdev->peer_seq = seq_max(mdev->peer_seq, peer_seq);
1793 ret = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001794 break;
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001795 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07001796 if (signal_pending(current)) {
1797 ret = -ERESTARTSYS;
1798 break;
1799 }
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001800 prepare_to_wait(&mdev->seq_wait, &wait, TASK_INTERRUPTIBLE);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001801 spin_unlock(&mdev->peer_seq_lock);
Andreas Gruenbacher71b1c1e2011-03-01 15:40:43 +01001802 timeout = mdev->tconn->net_conf->ping_timeo*HZ/10;
1803 timeout = schedule_timeout(timeout);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001804 spin_lock(&mdev->peer_seq_lock);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001805 if (!timeout) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001806 ret = -ETIMEDOUT;
Andreas Gruenbacher71b1c1e2011-03-01 15:40:43 +01001807 dev_err(DEV, "Timed out waiting for missing ack packets; disconnecting\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07001808 break;
1809 }
1810 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07001811 spin_unlock(&mdev->peer_seq_lock);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001812 finish_wait(&mdev->seq_wait, &wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001813 return ret;
1814}
1815
Lars Ellenberg688593c2010-11-17 22:25:03 +01001816/* see also bio_flags_to_wire()
1817 * DRBD_REQ_*, because we need to semantically map the flags to data packet
1818 * flags and back. We may replicate to other kernel versions. */
1819static unsigned long wire_flags_to_bio(struct drbd_conf *mdev, u32 dpf)
Philipp Reisner76d2e7e2010-08-25 11:58:05 +02001820{
Lars Ellenberg688593c2010-11-17 22:25:03 +01001821 return (dpf & DP_RW_SYNC ? REQ_SYNC : 0) |
1822 (dpf & DP_FUA ? REQ_FUA : 0) |
1823 (dpf & DP_FLUSH ? REQ_FLUSH : 0) |
1824 (dpf & DP_DISCARD ? REQ_DISCARD : 0);
Philipp Reisner76d2e7e2010-08-25 11:58:05 +02001825}
1826
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001827static void fail_postponed_requests(struct drbd_conf *mdev, sector_t sector,
1828 unsigned int size)
1829{
1830 struct drbd_interval *i;
1831
1832 repeat:
1833 drbd_for_each_overlap(i, &mdev->write_requests, sector, size) {
1834 struct drbd_request *req;
1835 struct bio_and_error m;
1836
1837 if (!i->local)
1838 continue;
1839 req = container_of(i, struct drbd_request, i);
1840 if (!(req->rq_state & RQ_POSTPONED))
1841 continue;
1842 req->rq_state &= ~RQ_POSTPONED;
1843 __req_mod(req, NEG_ACKED, &m);
1844 spin_unlock_irq(&mdev->tconn->req_lock);
1845 if (m.bio)
1846 complete_master_bio(mdev, &m);
1847 spin_lock_irq(&mdev->tconn->req_lock);
1848 goto repeat;
1849 }
1850}
1851
1852static int handle_write_conflicts(struct drbd_conf *mdev,
1853 struct drbd_peer_request *peer_req)
1854{
1855 struct drbd_tconn *tconn = mdev->tconn;
1856 bool resolve_conflicts = test_bit(DISCARD_CONCURRENT, &tconn->flags);
1857 sector_t sector = peer_req->i.sector;
1858 const unsigned int size = peer_req->i.size;
1859 struct drbd_interval *i;
1860 bool equal;
1861 int err;
1862
1863 /*
1864 * Inserting the peer request into the write_requests tree will prevent
1865 * new conflicting local requests from being added.
1866 */
1867 drbd_insert_interval(&mdev->write_requests, &peer_req->i);
1868
1869 repeat:
1870 drbd_for_each_overlap(i, &mdev->write_requests, sector, size) {
1871 if (i == &peer_req->i)
1872 continue;
1873
1874 if (!i->local) {
1875 /*
1876 * Our peer has sent a conflicting remote request; this
1877 * should not happen in a two-node setup. Wait for the
1878 * earlier peer request to complete.
1879 */
1880 err = drbd_wait_misc(mdev, i);
1881 if (err)
1882 goto out;
1883 goto repeat;
1884 }
1885
1886 equal = i->sector == sector && i->size == size;
1887 if (resolve_conflicts) {
1888 /*
1889 * If the peer request is fully contained within the
1890 * overlapping request, it can be discarded; otherwise,
1891 * it will be retried once all overlapping requests
1892 * have completed.
1893 */
1894 bool discard = i->sector <= sector && i->sector +
1895 (i->size >> 9) >= sector + (size >> 9);
1896
1897 if (!equal)
1898 dev_alert(DEV, "Concurrent writes detected: "
1899 "local=%llus +%u, remote=%llus +%u, "
1900 "assuming %s came first\n",
1901 (unsigned long long)i->sector, i->size,
1902 (unsigned long long)sector, size,
1903 discard ? "local" : "remote");
1904
1905 inc_unacked(mdev);
1906 peer_req->w.cb = discard ? e_send_discard_write :
1907 e_send_retry_write;
1908 list_add_tail(&peer_req->w.list, &mdev->done_ee);
1909 wake_asender(mdev->tconn);
1910
1911 err = -ENOENT;
1912 goto out;
1913 } else {
1914 struct drbd_request *req =
1915 container_of(i, struct drbd_request, i);
1916
1917 if (!equal)
1918 dev_alert(DEV, "Concurrent writes detected: "
1919 "local=%llus +%u, remote=%llus +%u\n",
1920 (unsigned long long)i->sector, i->size,
1921 (unsigned long long)sector, size);
1922
1923 if (req->rq_state & RQ_LOCAL_PENDING ||
1924 !(req->rq_state & RQ_POSTPONED)) {
1925 /*
1926 * Wait for the node with the discard flag to
1927 * decide if this request will be discarded or
1928 * retried. Requests that are discarded will
1929 * disappear from the write_requests tree.
1930 *
1931 * In addition, wait for the conflicting
1932 * request to finish locally before submitting
1933 * the conflicting peer request.
1934 */
1935 err = drbd_wait_misc(mdev, &req->i);
1936 if (err) {
1937 _conn_request_state(mdev->tconn,
1938 NS(conn, C_TIMEOUT),
1939 CS_HARD);
1940 fail_postponed_requests(mdev, sector, size);
1941 goto out;
1942 }
1943 goto repeat;
1944 }
1945 /*
1946 * Remember to restart the conflicting requests after
1947 * the new peer request has completed.
1948 */
1949 peer_req->flags |= EE_RESTART_REQUESTS;
1950 }
1951 }
1952 err = 0;
1953
1954 out:
1955 if (err)
1956 drbd_remove_epoch_entry_interval(mdev, peer_req);
1957 return err;
1958}
1959
Philipp Reisnerb411b362009-09-25 16:07:19 -07001960/* mirrored write */
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01001961static int receive_Data(struct drbd_conf *mdev, enum drbd_packet cmd,
1962 unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001963{
1964 sector_t sector;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001965 struct drbd_peer_request *peer_req;
Philipp Reisnere42325a2011-01-19 13:55:45 +01001966 struct p_data *p = &mdev->tconn->data.rbuf.data;
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001967 u32 peer_seq = be32_to_cpu(p->seq_num);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001968 int rw = WRITE;
1969 u32 dp_flags;
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001970 int err;
1971
Philipp Reisnerb411b362009-09-25 16:07:19 -07001972 if (!get_ldev(mdev)) {
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001973 int err2;
1974
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001975 err = wait_for_and_update_peer_seq(mdev, peer_seq);
Lars Ellenberg2b2bf212010-10-06 11:46:55 +02001976 drbd_send_ack_dp(mdev, P_NEG_ACK, p, data_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001977 atomic_inc(&mdev->current_epoch->epoch_size);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001978 err2 = drbd_drain_block(mdev, data_size);
1979 if (!err)
1980 err = err2;
1981 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001982 }
1983
Andreas Gruenbacherfcefa622011-02-17 16:46:59 +01001984 /*
1985 * Corresponding put_ldev done either below (on various errors), or in
1986 * drbd_peer_request_endio, if we successfully submit the data at the
1987 * end of this function.
1988 */
Philipp Reisnerb411b362009-09-25 16:07:19 -07001989
1990 sector = be64_to_cpu(p->sector);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001991 peer_req = read_in_block(mdev, p->block_id, sector, data_size);
1992 if (!peer_req) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001993 put_ldev(mdev);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001994 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001995 }
1996
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001997 peer_req->w.cb = e_end_block;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001998
Lars Ellenberg688593c2010-11-17 22:25:03 +01001999 dp_flags = be32_to_cpu(p->dp_flags);
2000 rw |= wire_flags_to_bio(mdev, dp_flags);
2001
2002 if (dp_flags & DP_MAY_SET_IN_SYNC)
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002003 peer_req->flags |= EE_MAY_SET_IN_SYNC;
Lars Ellenberg688593c2010-11-17 22:25:03 +01002004
Philipp Reisnerb411b362009-09-25 16:07:19 -07002005 spin_lock(&mdev->epoch_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002006 peer_req->epoch = mdev->current_epoch;
2007 atomic_inc(&peer_req->epoch->epoch_size);
2008 atomic_inc(&peer_req->epoch->active);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002009 spin_unlock(&mdev->epoch_lock);
2010
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002011 if (mdev->tconn->net_conf->two_primaries) {
2012 err = wait_for_and_update_peer_seq(mdev, peer_seq);
2013 if (err)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002014 goto out_interrupted;
Philipp Reisner87eeee42011-01-19 14:16:30 +01002015 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002016 err = handle_write_conflicts(mdev, peer_req);
2017 if (err) {
2018 spin_unlock_irq(&mdev->tconn->req_lock);
2019 if (err == -ENOENT) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002020 put_ldev(mdev);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002021 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002022 }
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002023 goto out_interrupted;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002024 }
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002025 } else
2026 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002027 list_add(&peer_req->w.list, &mdev->active_ee);
Philipp Reisner87eeee42011-01-19 14:16:30 +01002028 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002029
Philipp Reisner89e58e72011-01-19 13:12:45 +01002030 switch (mdev->tconn->net_conf->wire_protocol) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002031 case DRBD_PROT_C:
2032 inc_unacked(mdev);
2033 /* corresponding dec_unacked() in e_end_block()
2034 * respective _drbd_clear_done_ee */
2035 break;
2036 case DRBD_PROT_B:
2037 /* I really don't like it that the receiver thread
2038 * sends on the msock, but anyways */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002039 drbd_send_ack(mdev, P_RECV_ACK, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002040 break;
2041 case DRBD_PROT_A:
2042 /* nothing to do */
2043 break;
2044 }
2045
Lars Ellenberg6719fb02010-10-18 23:04:07 +02002046 if (mdev->state.pdsk < D_INCONSISTENT) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002047 /* In case we have the only disk of the cluster, */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002048 drbd_set_out_of_sync(mdev, peer_req->i.sector, peer_req->i.size);
2049 peer_req->flags |= EE_CALL_AL_COMPLETE_IO;
2050 peer_req->flags &= ~EE_MAY_SET_IN_SYNC;
2051 drbd_al_begin_io(mdev, peer_req->i.sector);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002052 }
2053
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002054 err = drbd_submit_peer_request(mdev, peer_req, rw, DRBD_FAULT_DT_WR);
2055 if (!err)
2056 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002057
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01002058 /* don't care for the reason here */
2059 dev_err(DEV, "submit failed, triggering re-connect\n");
Philipp Reisner87eeee42011-01-19 14:16:30 +01002060 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002061 list_del(&peer_req->w.list);
2062 drbd_remove_epoch_entry_interval(mdev, peer_req);
Philipp Reisner87eeee42011-01-19 14:16:30 +01002063 spin_unlock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002064 if (peer_req->flags & EE_CALL_AL_COMPLETE_IO)
2065 drbd_al_complete_io(mdev, peer_req->i.sector);
Lars Ellenberg22cc37a2010-09-14 20:40:41 +02002066
Philipp Reisnerb411b362009-09-25 16:07:19 -07002067out_interrupted:
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002068 drbd_may_finish_epoch(mdev, peer_req->epoch, EV_PUT + EV_CLEANUP);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002069 put_ldev(mdev);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002070 drbd_free_ee(mdev, peer_req);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002071 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002072}
2073
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002074/* We may throttle resync, if the lower device seems to be busy,
2075 * and current sync rate is above c_min_rate.
2076 *
2077 * To decide whether or not the lower device is busy, we use a scheme similar
2078 * to MD RAID is_mddev_idle(): if the partition stats reveal "significant"
2079 * (more than 64 sectors) of activity we cannot account for with our own resync
2080 * activity, it obviously is "busy".
2081 *
2082 * The current sync rate used here uses only the most recent two step marks,
2083 * to have a short time average so we can react faster.
2084 */
Philipp Reisnere3555d82010-11-07 15:56:29 +01002085int drbd_rs_should_slow_down(struct drbd_conf *mdev, sector_t sector)
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002086{
2087 struct gendisk *disk = mdev->ldev->backing_bdev->bd_contains->bd_disk;
2088 unsigned long db, dt, dbdt;
Philipp Reisnere3555d82010-11-07 15:56:29 +01002089 struct lc_element *tmp;
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002090 int curr_events;
2091 int throttle = 0;
2092
2093 /* feature disabled? */
Lars Ellenbergf3990022011-03-23 14:31:09 +01002094 if (mdev->ldev->dc.c_min_rate == 0)
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002095 return 0;
2096
Philipp Reisnere3555d82010-11-07 15:56:29 +01002097 spin_lock_irq(&mdev->al_lock);
2098 tmp = lc_find(mdev->resync, BM_SECT_TO_EXT(sector));
2099 if (tmp) {
2100 struct bm_extent *bm_ext = lc_entry(tmp, struct bm_extent, lce);
2101 if (test_bit(BME_PRIORITY, &bm_ext->flags)) {
2102 spin_unlock_irq(&mdev->al_lock);
2103 return 0;
2104 }
2105 /* Do not slow down if app IO is already waiting for this extent */
2106 }
2107 spin_unlock_irq(&mdev->al_lock);
2108
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002109 curr_events = (int)part_stat_read(&disk->part0, sectors[0]) +
2110 (int)part_stat_read(&disk->part0, sectors[1]) -
2111 atomic_read(&mdev->rs_sect_ev);
Philipp Reisnere3555d82010-11-07 15:56:29 +01002112
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002113 if (!mdev->rs_last_events || curr_events - mdev->rs_last_events > 64) {
2114 unsigned long rs_left;
2115 int i;
2116
2117 mdev->rs_last_events = curr_events;
2118
2119 /* sync speed average over the last 2*DRBD_SYNC_MARK_STEP,
2120 * approx. */
Lars Ellenberg2649f082010-11-05 10:05:47 +01002121 i = (mdev->rs_last_mark + DRBD_SYNC_MARKS-1) % DRBD_SYNC_MARKS;
2122
2123 if (mdev->state.conn == C_VERIFY_S || mdev->state.conn == C_VERIFY_T)
2124 rs_left = mdev->ov_left;
2125 else
2126 rs_left = drbd_bm_total_weight(mdev) - mdev->rs_failed;
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002127
2128 dt = ((long)jiffies - (long)mdev->rs_mark_time[i]) / HZ;
2129 if (!dt)
2130 dt++;
2131 db = mdev->rs_mark_left[i] - rs_left;
2132 dbdt = Bit2KB(db/dt);
2133
Lars Ellenbergf3990022011-03-23 14:31:09 +01002134 if (dbdt > mdev->ldev->dc.c_min_rate)
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002135 throttle = 1;
2136 }
2137 return throttle;
2138}
2139
2140
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01002141static int receive_DataRequest(struct drbd_conf *mdev, enum drbd_packet cmd,
2142 unsigned int digest_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002143{
2144 sector_t sector;
2145 const sector_t capacity = drbd_get_capacity(mdev->this_bdev);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002146 struct drbd_peer_request *peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002147 struct digest_info *di = NULL;
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02002148 int size, verb;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002149 unsigned int fault_type;
Philipp Reisnere42325a2011-01-19 13:55:45 +01002150 struct p_block_req *p = &mdev->tconn->data.rbuf.block_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002151
2152 sector = be64_to_cpu(p->sector);
2153 size = be32_to_cpu(p->blksize);
2154
Andreas Gruenbacherc670a392011-02-21 12:41:39 +01002155 if (size <= 0 || !IS_ALIGNED(size, 512) || size > DRBD_MAX_BIO_SIZE) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002156 dev_err(DEV, "%s:%d: sector: %llus, size: %u\n", __FILE__, __LINE__,
2157 (unsigned long long)sector, size);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002158 return -EINVAL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002159 }
2160 if (sector + (size>>9) > capacity) {
2161 dev_err(DEV, "%s:%d: sector: %llus, size: %u\n", __FILE__, __LINE__,
2162 (unsigned long long)sector, size);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002163 return -EINVAL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002164 }
2165
2166 if (!get_ldev_if_state(mdev, D_UP_TO_DATE)) {
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02002167 verb = 1;
2168 switch (cmd) {
2169 case P_DATA_REQUEST:
2170 drbd_send_ack_rp(mdev, P_NEG_DREPLY, p);
2171 break;
2172 case P_RS_DATA_REQUEST:
2173 case P_CSUM_RS_REQUEST:
2174 case P_OV_REQUEST:
2175 drbd_send_ack_rp(mdev, P_NEG_RS_DREPLY , p);
2176 break;
2177 case P_OV_REPLY:
2178 verb = 0;
2179 dec_rs_pending(mdev);
2180 drbd_send_ack_ex(mdev, P_OV_RESULT, sector, size, ID_IN_SYNC);
2181 break;
2182 default:
2183 dev_err(DEV, "unexpected command (%s) in receive_DataRequest\n",
2184 cmdname(cmd));
2185 }
2186 if (verb && __ratelimit(&drbd_ratelimit_state))
Philipp Reisnerb411b362009-09-25 16:07:19 -07002187 dev_err(DEV, "Can not satisfy peer's read request, "
2188 "no local data.\n");
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02002189
Lars Ellenberga821cc42010-09-06 12:31:37 +02002190 /* drain possibly payload */
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002191 return drbd_drain_block(mdev, digest_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002192 }
2193
2194 /* GFP_NOIO, because we must not cause arbitrary write-out: in a DRBD
2195 * "criss-cross" setup, that might cause write-out on some other DRBD,
2196 * which in turn might block on the other node at this very place. */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002197 peer_req = drbd_alloc_ee(mdev, p->block_id, sector, size, GFP_NOIO);
2198 if (!peer_req) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002199 put_ldev(mdev);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002200 return -ENOMEM;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002201 }
2202
Philipp Reisner02918be2010-08-20 14:35:10 +02002203 switch (cmd) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002204 case P_DATA_REQUEST:
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002205 peer_req->w.cb = w_e_end_data_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002206 fault_type = DRBD_FAULT_DT_RD;
Lars Ellenberg80a40e42010-08-11 23:28:00 +02002207 /* application IO, don't drbd_rs_begin_io */
2208 goto submit;
2209
Philipp Reisnerb411b362009-09-25 16:07:19 -07002210 case P_RS_DATA_REQUEST:
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002211 peer_req->w.cb = w_e_end_rsdata_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002212 fault_type = DRBD_FAULT_RS_RD;
Lars Ellenberg5f9915b2010-11-09 14:15:24 +01002213 /* used in the sector offset progress display */
2214 mdev->bm_resync_fo = BM_SECT_TO_BIT(sector);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002215 break;
2216
2217 case P_OV_REPLY:
2218 case P_CSUM_RS_REQUEST:
2219 fault_type = DRBD_FAULT_RS_RD;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002220 di = kmalloc(sizeof(*di) + digest_size, GFP_NOIO);
2221 if (!di)
2222 goto out_free_e;
2223
2224 di->digest_size = digest_size;
2225 di->digest = (((char *)di)+sizeof(struct digest_info));
2226
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002227 peer_req->digest = di;
2228 peer_req->flags |= EE_HAS_DIGEST;
Lars Ellenbergc36c3ce2010-08-11 20:42:55 +02002229
Philipp Reisnerde0ff332011-02-07 16:56:20 +01002230 if (drbd_recv(mdev->tconn, di->digest, digest_size) != digest_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002231 goto out_free_e;
2232
Philipp Reisner02918be2010-08-20 14:35:10 +02002233 if (cmd == P_CSUM_RS_REQUEST) {
Philipp Reisner31890f42011-01-19 14:12:51 +01002234 D_ASSERT(mdev->tconn->agreed_pro_version >= 89);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002235 peer_req->w.cb = w_e_end_csum_rs_req;
Lars Ellenberg5f9915b2010-11-09 14:15:24 +01002236 /* used in the sector offset progress display */
2237 mdev->bm_resync_fo = BM_SECT_TO_BIT(sector);
Philipp Reisner02918be2010-08-20 14:35:10 +02002238 } else if (cmd == P_OV_REPLY) {
Lars Ellenberg2649f082010-11-05 10:05:47 +01002239 /* track progress, we may need to throttle */
2240 atomic_add(size >> 9, &mdev->rs_sect_in);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002241 peer_req->w.cb = w_e_end_ov_reply;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002242 dec_rs_pending(mdev);
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002243 /* drbd_rs_begin_io done when we sent this request,
2244 * but accounting still needs to be done. */
2245 goto submit_for_resync;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002246 }
2247 break;
2248
2249 case P_OV_REQUEST:
Philipp Reisnerb411b362009-09-25 16:07:19 -07002250 if (mdev->ov_start_sector == ~(sector_t)0 &&
Philipp Reisner31890f42011-01-19 14:12:51 +01002251 mdev->tconn->agreed_pro_version >= 90) {
Lars Ellenbergde228bb2010-11-05 09:43:15 +01002252 unsigned long now = jiffies;
2253 int i;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002254 mdev->ov_start_sector = sector;
2255 mdev->ov_position = sector;
Lars Ellenberg30b743a2010-11-05 09:39:06 +01002256 mdev->ov_left = drbd_bm_bits(mdev) - BM_SECT_TO_BIT(sector);
2257 mdev->rs_total = mdev->ov_left;
Lars Ellenbergde228bb2010-11-05 09:43:15 +01002258 for (i = 0; i < DRBD_SYNC_MARKS; i++) {
2259 mdev->rs_mark_left[i] = mdev->ov_left;
2260 mdev->rs_mark_time[i] = now;
2261 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07002262 dev_info(DEV, "Online Verify start sector: %llu\n",
2263 (unsigned long long)sector);
2264 }
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002265 peer_req->w.cb = w_e_end_ov_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002266 fault_type = DRBD_FAULT_RS_RD;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002267 break;
2268
Philipp Reisnerb411b362009-09-25 16:07:19 -07002269 default:
2270 dev_err(DEV, "unexpected command (%s) in receive_DataRequest\n",
Philipp Reisner02918be2010-08-20 14:35:10 +02002271 cmdname(cmd));
Philipp Reisnerb411b362009-09-25 16:07:19 -07002272 fault_type = DRBD_FAULT_MAX;
Lars Ellenberg80a40e42010-08-11 23:28:00 +02002273 goto out_free_e;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002274 }
2275
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002276 /* Throttle, drbd_rs_begin_io and submit should become asynchronous
2277 * wrt the receiver, but it is not as straightforward as it may seem.
2278 * Various places in the resync start and stop logic assume resync
2279 * requests are processed in order, requeuing this on the worker thread
2280 * introduces a bunch of new code for synchronization between threads.
2281 *
2282 * Unlimited throttling before drbd_rs_begin_io may stall the resync
2283 * "forever", throttling after drbd_rs_begin_io will lock that extent
2284 * for application writes for the same time. For now, just throttle
2285 * here, where the rest of the code expects the receiver to sleep for
2286 * a while, anyways.
2287 */
Philipp Reisnerb411b362009-09-25 16:07:19 -07002288
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002289 /* Throttle before drbd_rs_begin_io, as that locks out application IO;
2290 * this defers syncer requests for some time, before letting at least
2291 * on request through. The resync controller on the receiving side
2292 * will adapt to the incoming rate accordingly.
2293 *
2294 * We cannot throttle here if remote is Primary/SyncTarget:
2295 * we would also throttle its application reads.
2296 * In that case, throttling is done on the SyncTarget only.
2297 */
Philipp Reisnere3555d82010-11-07 15:56:29 +01002298 if (mdev->state.peer != R_PRIMARY && drbd_rs_should_slow_down(mdev, sector))
2299 schedule_timeout_uninterruptible(HZ/10);
2300 if (drbd_rs_begin_io(mdev, sector))
Lars Ellenberg80a40e42010-08-11 23:28:00 +02002301 goto out_free_e;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002302
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002303submit_for_resync:
2304 atomic_add(size >> 9, &mdev->rs_sect_ev);
2305
Lars Ellenberg80a40e42010-08-11 23:28:00 +02002306submit:
Philipp Reisnerb411b362009-09-25 16:07:19 -07002307 inc_unacked(mdev);
Philipp Reisner87eeee42011-01-19 14:16:30 +01002308 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002309 list_add_tail(&peer_req->w.list, &mdev->read_ee);
Philipp Reisner87eeee42011-01-19 14:16:30 +01002310 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002311
Andreas Gruenbacherfbe29de2011-02-17 16:38:35 +01002312 if (drbd_submit_peer_request(mdev, peer_req, READ, fault_type) == 0)
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002313 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002314
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01002315 /* don't care for the reason here */
2316 dev_err(DEV, "submit failed, triggering re-connect\n");
Philipp Reisner87eeee42011-01-19 14:16:30 +01002317 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002318 list_del(&peer_req->w.list);
Philipp Reisner87eeee42011-01-19 14:16:30 +01002319 spin_unlock_irq(&mdev->tconn->req_lock);
Lars Ellenberg22cc37a2010-09-14 20:40:41 +02002320 /* no drbd_rs_complete_io(), we are dropping the connection anyways */
2321
Philipp Reisnerb411b362009-09-25 16:07:19 -07002322out_free_e:
Philipp Reisnerb411b362009-09-25 16:07:19 -07002323 put_ldev(mdev);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002324 drbd_free_ee(mdev, peer_req);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002325 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002326}
2327
2328static int drbd_asb_recover_0p(struct drbd_conf *mdev) __must_hold(local)
2329{
2330 int self, peer, rv = -100;
2331 unsigned long ch_self, ch_peer;
2332
2333 self = mdev->ldev->md.uuid[UI_BITMAP] & 1;
2334 peer = mdev->p_uuid[UI_BITMAP] & 1;
2335
2336 ch_peer = mdev->p_uuid[UI_SIZE];
2337 ch_self = mdev->comm_bm_set;
2338
Philipp Reisner89e58e72011-01-19 13:12:45 +01002339 switch (mdev->tconn->net_conf->after_sb_0p) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002340 case ASB_CONSENSUS:
2341 case ASB_DISCARD_SECONDARY:
2342 case ASB_CALL_HELPER:
2343 dev_err(DEV, "Configuration error.\n");
2344 break;
2345 case ASB_DISCONNECT:
2346 break;
2347 case ASB_DISCARD_YOUNGER_PRI:
2348 if (self == 0 && peer == 1) {
2349 rv = -1;
2350 break;
2351 }
2352 if (self == 1 && peer == 0) {
2353 rv = 1;
2354 break;
2355 }
2356 /* Else fall through to one of the other strategies... */
2357 case ASB_DISCARD_OLDER_PRI:
2358 if (self == 0 && peer == 1) {
2359 rv = 1;
2360 break;
2361 }
2362 if (self == 1 && peer == 0) {
2363 rv = -1;
2364 break;
2365 }
2366 /* Else fall through to one of the other strategies... */
Lars Ellenbergad19bf62009-10-14 09:36:49 +02002367 dev_warn(DEV, "Discard younger/older primary did not find a decision\n"
Philipp Reisnerb411b362009-09-25 16:07:19 -07002368 "Using discard-least-changes instead\n");
2369 case ASB_DISCARD_ZERO_CHG:
2370 if (ch_peer == 0 && ch_self == 0) {
Philipp Reisner25703f82011-02-07 14:35:25 +01002371 rv = test_bit(DISCARD_CONCURRENT, &mdev->tconn->flags)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002372 ? -1 : 1;
2373 break;
2374 } else {
2375 if (ch_peer == 0) { rv = 1; break; }
2376 if (ch_self == 0) { rv = -1; break; }
2377 }
Philipp Reisner89e58e72011-01-19 13:12:45 +01002378 if (mdev->tconn->net_conf->after_sb_0p == ASB_DISCARD_ZERO_CHG)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002379 break;
2380 case ASB_DISCARD_LEAST_CHG:
2381 if (ch_self < ch_peer)
2382 rv = -1;
2383 else if (ch_self > ch_peer)
2384 rv = 1;
2385 else /* ( ch_self == ch_peer ) */
2386 /* Well, then use something else. */
Philipp Reisner25703f82011-02-07 14:35:25 +01002387 rv = test_bit(DISCARD_CONCURRENT, &mdev->tconn->flags)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002388 ? -1 : 1;
2389 break;
2390 case ASB_DISCARD_LOCAL:
2391 rv = -1;
2392 break;
2393 case ASB_DISCARD_REMOTE:
2394 rv = 1;
2395 }
2396
2397 return rv;
2398}
2399
2400static int drbd_asb_recover_1p(struct drbd_conf *mdev) __must_hold(local)
2401{
Andreas Gruenbacher6184ea22010-12-09 14:23:27 +01002402 int hg, rv = -100;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002403
Philipp Reisner89e58e72011-01-19 13:12:45 +01002404 switch (mdev->tconn->net_conf->after_sb_1p) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002405 case ASB_DISCARD_YOUNGER_PRI:
2406 case ASB_DISCARD_OLDER_PRI:
2407 case ASB_DISCARD_LEAST_CHG:
2408 case ASB_DISCARD_LOCAL:
2409 case ASB_DISCARD_REMOTE:
2410 dev_err(DEV, "Configuration error.\n");
2411 break;
2412 case ASB_DISCONNECT:
2413 break;
2414 case ASB_CONSENSUS:
2415 hg = drbd_asb_recover_0p(mdev);
2416 if (hg == -1 && mdev->state.role == R_SECONDARY)
2417 rv = hg;
2418 if (hg == 1 && mdev->state.role == R_PRIMARY)
2419 rv = hg;
2420 break;
2421 case ASB_VIOLENTLY:
2422 rv = drbd_asb_recover_0p(mdev);
2423 break;
2424 case ASB_DISCARD_SECONDARY:
2425 return mdev->state.role == R_PRIMARY ? 1 : -1;
2426 case ASB_CALL_HELPER:
2427 hg = drbd_asb_recover_0p(mdev);
2428 if (hg == -1 && mdev->state.role == R_PRIMARY) {
Andreas Gruenbacherbb437942010-12-09 14:02:35 +01002429 enum drbd_state_rv rv2;
2430
2431 drbd_set_role(mdev, R_SECONDARY, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002432 /* drbd_change_state() does not sleep while in SS_IN_TRANSIENT_STATE,
2433 * we might be here in C_WF_REPORT_PARAMS which is transient.
2434 * we do not need to wait for the after state change work either. */
Andreas Gruenbacherbb437942010-12-09 14:02:35 +01002435 rv2 = drbd_change_state(mdev, CS_VERBOSE, NS(role, R_SECONDARY));
2436 if (rv2 != SS_SUCCESS) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002437 drbd_khelper(mdev, "pri-lost-after-sb");
2438 } else {
2439 dev_warn(DEV, "Successfully gave up primary role.\n");
2440 rv = hg;
2441 }
2442 } else
2443 rv = hg;
2444 }
2445
2446 return rv;
2447}
2448
2449static int drbd_asb_recover_2p(struct drbd_conf *mdev) __must_hold(local)
2450{
Andreas Gruenbacher6184ea22010-12-09 14:23:27 +01002451 int hg, rv = -100;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002452
Philipp Reisner89e58e72011-01-19 13:12:45 +01002453 switch (mdev->tconn->net_conf->after_sb_2p) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002454 case ASB_DISCARD_YOUNGER_PRI:
2455 case ASB_DISCARD_OLDER_PRI:
2456 case ASB_DISCARD_LEAST_CHG:
2457 case ASB_DISCARD_LOCAL:
2458 case ASB_DISCARD_REMOTE:
2459 case ASB_CONSENSUS:
2460 case ASB_DISCARD_SECONDARY:
2461 dev_err(DEV, "Configuration error.\n");
2462 break;
2463 case ASB_VIOLENTLY:
2464 rv = drbd_asb_recover_0p(mdev);
2465 break;
2466 case ASB_DISCONNECT:
2467 break;
2468 case ASB_CALL_HELPER:
2469 hg = drbd_asb_recover_0p(mdev);
2470 if (hg == -1) {
Andreas Gruenbacherbb437942010-12-09 14:02:35 +01002471 enum drbd_state_rv rv2;
2472
Philipp Reisnerb411b362009-09-25 16:07:19 -07002473 /* drbd_change_state() does not sleep while in SS_IN_TRANSIENT_STATE,
2474 * we might be here in C_WF_REPORT_PARAMS which is transient.
2475 * we do not need to wait for the after state change work either. */
Andreas Gruenbacherbb437942010-12-09 14:02:35 +01002476 rv2 = drbd_change_state(mdev, CS_VERBOSE, NS(role, R_SECONDARY));
2477 if (rv2 != SS_SUCCESS) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002478 drbd_khelper(mdev, "pri-lost-after-sb");
2479 } else {
2480 dev_warn(DEV, "Successfully gave up primary role.\n");
2481 rv = hg;
2482 }
2483 } else
2484 rv = hg;
2485 }
2486
2487 return rv;
2488}
2489
2490static void drbd_uuid_dump(struct drbd_conf *mdev, char *text, u64 *uuid,
2491 u64 bits, u64 flags)
2492{
2493 if (!uuid) {
2494 dev_info(DEV, "%s uuid info vanished while I was looking!\n", text);
2495 return;
2496 }
2497 dev_info(DEV, "%s %016llX:%016llX:%016llX:%016llX bits:%llu flags:%llX\n",
2498 text,
2499 (unsigned long long)uuid[UI_CURRENT],
2500 (unsigned long long)uuid[UI_BITMAP],
2501 (unsigned long long)uuid[UI_HISTORY_START],
2502 (unsigned long long)uuid[UI_HISTORY_END],
2503 (unsigned long long)bits,
2504 (unsigned long long)flags);
2505}
2506
2507/*
2508 100 after split brain try auto recover
2509 2 C_SYNC_SOURCE set BitMap
2510 1 C_SYNC_SOURCE use BitMap
2511 0 no Sync
2512 -1 C_SYNC_TARGET use BitMap
2513 -2 C_SYNC_TARGET set BitMap
2514 -100 after split brain, disconnect
2515-1000 unrelated data
Philipp Reisner4a23f262011-01-11 17:42:17 +01002516-1091 requires proto 91
2517-1096 requires proto 96
Philipp Reisnerb411b362009-09-25 16:07:19 -07002518 */
2519static int drbd_uuid_compare(struct drbd_conf *mdev, int *rule_nr) __must_hold(local)
2520{
2521 u64 self, peer;
2522 int i, j;
2523
2524 self = mdev->ldev->md.uuid[UI_CURRENT] & ~((u64)1);
2525 peer = mdev->p_uuid[UI_CURRENT] & ~((u64)1);
2526
2527 *rule_nr = 10;
2528 if (self == UUID_JUST_CREATED && peer == UUID_JUST_CREATED)
2529 return 0;
2530
2531 *rule_nr = 20;
2532 if ((self == UUID_JUST_CREATED || self == (u64)0) &&
2533 peer != UUID_JUST_CREATED)
2534 return -2;
2535
2536 *rule_nr = 30;
2537 if (self != UUID_JUST_CREATED &&
2538 (peer == UUID_JUST_CREATED || peer == (u64)0))
2539 return 2;
2540
2541 if (self == peer) {
2542 int rct, dc; /* roles at crash time */
2543
2544 if (mdev->p_uuid[UI_BITMAP] == (u64)0 && mdev->ldev->md.uuid[UI_BITMAP] != (u64)0) {
2545
Philipp Reisner31890f42011-01-19 14:12:51 +01002546 if (mdev->tconn->agreed_pro_version < 91)
Philipp Reisner4a23f262011-01-11 17:42:17 +01002547 return -1091;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002548
2549 if ((mdev->ldev->md.uuid[UI_BITMAP] & ~((u64)1)) == (mdev->p_uuid[UI_HISTORY_START] & ~((u64)1)) &&
2550 (mdev->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) == (mdev->p_uuid[UI_HISTORY_START + 1] & ~((u64)1))) {
2551 dev_info(DEV, "was SyncSource, missed the resync finished event, corrected myself:\n");
2552 drbd_uuid_set_bm(mdev, 0UL);
2553
2554 drbd_uuid_dump(mdev, "self", mdev->ldev->md.uuid,
2555 mdev->state.disk >= D_NEGOTIATING ? drbd_bm_total_weight(mdev) : 0, 0);
2556 *rule_nr = 34;
2557 } else {
2558 dev_info(DEV, "was SyncSource (peer failed to write sync_uuid)\n");
2559 *rule_nr = 36;
2560 }
2561
2562 return 1;
2563 }
2564
2565 if (mdev->ldev->md.uuid[UI_BITMAP] == (u64)0 && mdev->p_uuid[UI_BITMAP] != (u64)0) {
2566
Philipp Reisner31890f42011-01-19 14:12:51 +01002567 if (mdev->tconn->agreed_pro_version < 91)
Philipp Reisner4a23f262011-01-11 17:42:17 +01002568 return -1091;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002569
2570 if ((mdev->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) == (mdev->p_uuid[UI_BITMAP] & ~((u64)1)) &&
2571 (mdev->ldev->md.uuid[UI_HISTORY_START + 1] & ~((u64)1)) == (mdev->p_uuid[UI_HISTORY_START] & ~((u64)1))) {
2572 dev_info(DEV, "was SyncTarget, peer missed the resync finished event, corrected peer:\n");
2573
2574 mdev->p_uuid[UI_HISTORY_START + 1] = mdev->p_uuid[UI_HISTORY_START];
2575 mdev->p_uuid[UI_HISTORY_START] = mdev->p_uuid[UI_BITMAP];
2576 mdev->p_uuid[UI_BITMAP] = 0UL;
2577
2578 drbd_uuid_dump(mdev, "peer", mdev->p_uuid, mdev->p_uuid[UI_SIZE], mdev->p_uuid[UI_FLAGS]);
2579 *rule_nr = 35;
2580 } else {
2581 dev_info(DEV, "was SyncTarget (failed to write sync_uuid)\n");
2582 *rule_nr = 37;
2583 }
2584
2585 return -1;
2586 }
2587
2588 /* Common power [off|failure] */
2589 rct = (test_bit(CRASHED_PRIMARY, &mdev->flags) ? 1 : 0) +
2590 (mdev->p_uuid[UI_FLAGS] & 2);
2591 /* lowest bit is set when we were primary,
2592 * next bit (weight 2) is set when peer was primary */
2593 *rule_nr = 40;
2594
2595 switch (rct) {
2596 case 0: /* !self_pri && !peer_pri */ return 0;
2597 case 1: /* self_pri && !peer_pri */ return 1;
2598 case 2: /* !self_pri && peer_pri */ return -1;
2599 case 3: /* self_pri && peer_pri */
Philipp Reisner25703f82011-02-07 14:35:25 +01002600 dc = test_bit(DISCARD_CONCURRENT, &mdev->tconn->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002601 return dc ? -1 : 1;
2602 }
2603 }
2604
2605 *rule_nr = 50;
2606 peer = mdev->p_uuid[UI_BITMAP] & ~((u64)1);
2607 if (self == peer)
2608 return -1;
2609
2610 *rule_nr = 51;
2611 peer = mdev->p_uuid[UI_HISTORY_START] & ~((u64)1);
2612 if (self == peer) {
Philipp Reisner31890f42011-01-19 14:12:51 +01002613 if (mdev->tconn->agreed_pro_version < 96 ?
Philipp Reisner4a23f262011-01-11 17:42:17 +01002614 (mdev->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) ==
2615 (mdev->p_uuid[UI_HISTORY_START + 1] & ~((u64)1)) :
2616 peer + UUID_NEW_BM_OFFSET == (mdev->p_uuid[UI_BITMAP] & ~((u64)1))) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002617 /* The last P_SYNC_UUID did not get though. Undo the last start of
2618 resync as sync source modifications of the peer's UUIDs. */
2619
Philipp Reisner31890f42011-01-19 14:12:51 +01002620 if (mdev->tconn->agreed_pro_version < 91)
Philipp Reisner4a23f262011-01-11 17:42:17 +01002621 return -1091;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002622
2623 mdev->p_uuid[UI_BITMAP] = mdev->p_uuid[UI_HISTORY_START];
2624 mdev->p_uuid[UI_HISTORY_START] = mdev->p_uuid[UI_HISTORY_START + 1];
Philipp Reisner4a23f262011-01-11 17:42:17 +01002625
2626 dev_info(DEV, "Did not got last syncUUID packet, corrected:\n");
2627 drbd_uuid_dump(mdev, "peer", mdev->p_uuid, mdev->p_uuid[UI_SIZE], mdev->p_uuid[UI_FLAGS]);
2628
Philipp Reisnerb411b362009-09-25 16:07:19 -07002629 return -1;
2630 }
2631 }
2632
2633 *rule_nr = 60;
2634 self = mdev->ldev->md.uuid[UI_CURRENT] & ~((u64)1);
2635 for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) {
2636 peer = mdev->p_uuid[i] & ~((u64)1);
2637 if (self == peer)
2638 return -2;
2639 }
2640
2641 *rule_nr = 70;
2642 self = mdev->ldev->md.uuid[UI_BITMAP] & ~((u64)1);
2643 peer = mdev->p_uuid[UI_CURRENT] & ~((u64)1);
2644 if (self == peer)
2645 return 1;
2646
2647 *rule_nr = 71;
2648 self = mdev->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1);
2649 if (self == peer) {
Philipp Reisner31890f42011-01-19 14:12:51 +01002650 if (mdev->tconn->agreed_pro_version < 96 ?
Philipp Reisner4a23f262011-01-11 17:42:17 +01002651 (mdev->ldev->md.uuid[UI_HISTORY_START + 1] & ~((u64)1)) ==
2652 (mdev->p_uuid[UI_HISTORY_START] & ~((u64)1)) :
2653 self + UUID_NEW_BM_OFFSET == (mdev->ldev->md.uuid[UI_BITMAP] & ~((u64)1))) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002654 /* The last P_SYNC_UUID did not get though. Undo the last start of
2655 resync as sync source modifications of our UUIDs. */
2656
Philipp Reisner31890f42011-01-19 14:12:51 +01002657 if (mdev->tconn->agreed_pro_version < 91)
Philipp Reisner4a23f262011-01-11 17:42:17 +01002658 return -1091;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002659
2660 _drbd_uuid_set(mdev, UI_BITMAP, mdev->ldev->md.uuid[UI_HISTORY_START]);
2661 _drbd_uuid_set(mdev, UI_HISTORY_START, mdev->ldev->md.uuid[UI_HISTORY_START + 1]);
2662
Philipp Reisner4a23f262011-01-11 17:42:17 +01002663 dev_info(DEV, "Last syncUUID did not get through, corrected:\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002664 drbd_uuid_dump(mdev, "self", mdev->ldev->md.uuid,
2665 mdev->state.disk >= D_NEGOTIATING ? drbd_bm_total_weight(mdev) : 0, 0);
2666
2667 return 1;
2668 }
2669 }
2670
2671
2672 *rule_nr = 80;
Philipp Reisnerd8c2a362009-11-18 15:52:51 +01002673 peer = mdev->p_uuid[UI_CURRENT] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002674 for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) {
2675 self = mdev->ldev->md.uuid[i] & ~((u64)1);
2676 if (self == peer)
2677 return 2;
2678 }
2679
2680 *rule_nr = 90;
2681 self = mdev->ldev->md.uuid[UI_BITMAP] & ~((u64)1);
2682 peer = mdev->p_uuid[UI_BITMAP] & ~((u64)1);
2683 if (self == peer && self != ((u64)0))
2684 return 100;
2685
2686 *rule_nr = 100;
2687 for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) {
2688 self = mdev->ldev->md.uuid[i] & ~((u64)1);
2689 for (j = UI_HISTORY_START; j <= UI_HISTORY_END; j++) {
2690 peer = mdev->p_uuid[j] & ~((u64)1);
2691 if (self == peer)
2692 return -100;
2693 }
2694 }
2695
2696 return -1000;
2697}
2698
2699/* drbd_sync_handshake() returns the new conn state on success, or
2700 CONN_MASK (-1) on failure.
2701 */
2702static enum drbd_conns drbd_sync_handshake(struct drbd_conf *mdev, enum drbd_role peer_role,
2703 enum drbd_disk_state peer_disk) __must_hold(local)
2704{
2705 int hg, rule_nr;
2706 enum drbd_conns rv = C_MASK;
2707 enum drbd_disk_state mydisk;
2708
2709 mydisk = mdev->state.disk;
2710 if (mydisk == D_NEGOTIATING)
2711 mydisk = mdev->new_state_tmp.disk;
2712
2713 dev_info(DEV, "drbd_sync_handshake:\n");
2714 drbd_uuid_dump(mdev, "self", mdev->ldev->md.uuid, mdev->comm_bm_set, 0);
2715 drbd_uuid_dump(mdev, "peer", mdev->p_uuid,
2716 mdev->p_uuid[UI_SIZE], mdev->p_uuid[UI_FLAGS]);
2717
2718 hg = drbd_uuid_compare(mdev, &rule_nr);
2719
2720 dev_info(DEV, "uuid_compare()=%d by rule %d\n", hg, rule_nr);
2721
2722 if (hg == -1000) {
2723 dev_alert(DEV, "Unrelated data, aborting!\n");
2724 return C_MASK;
2725 }
Philipp Reisner4a23f262011-01-11 17:42:17 +01002726 if (hg < -1000) {
2727 dev_alert(DEV, "To resolve this both sides have to support at least protocol %d\n", -hg - 1000);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002728 return C_MASK;
2729 }
2730
2731 if ((mydisk == D_INCONSISTENT && peer_disk > D_INCONSISTENT) ||
2732 (peer_disk == D_INCONSISTENT && mydisk > D_INCONSISTENT)) {
2733 int f = (hg == -100) || abs(hg) == 2;
2734 hg = mydisk > D_INCONSISTENT ? 1 : -1;
2735 if (f)
2736 hg = hg*2;
2737 dev_info(DEV, "Becoming sync %s due to disk states.\n",
2738 hg > 0 ? "source" : "target");
2739 }
2740
Adam Gandelman3a11a482010-04-08 16:48:23 -07002741 if (abs(hg) == 100)
2742 drbd_khelper(mdev, "initial-split-brain");
2743
Philipp Reisner89e58e72011-01-19 13:12:45 +01002744 if (hg == 100 || (hg == -100 && mdev->tconn->net_conf->always_asbp)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002745 int pcount = (mdev->state.role == R_PRIMARY)
2746 + (peer_role == R_PRIMARY);
2747 int forced = (hg == -100);
2748
2749 switch (pcount) {
2750 case 0:
2751 hg = drbd_asb_recover_0p(mdev);
2752 break;
2753 case 1:
2754 hg = drbd_asb_recover_1p(mdev);
2755 break;
2756 case 2:
2757 hg = drbd_asb_recover_2p(mdev);
2758 break;
2759 }
2760 if (abs(hg) < 100) {
2761 dev_warn(DEV, "Split-Brain detected, %d primaries, "
2762 "automatically solved. Sync from %s node\n",
2763 pcount, (hg < 0) ? "peer" : "this");
2764 if (forced) {
2765 dev_warn(DEV, "Doing a full sync, since"
2766 " UUIDs where ambiguous.\n");
2767 hg = hg*2;
2768 }
2769 }
2770 }
2771
2772 if (hg == -100) {
Philipp Reisner89e58e72011-01-19 13:12:45 +01002773 if (mdev->tconn->net_conf->want_lose && !(mdev->p_uuid[UI_FLAGS]&1))
Philipp Reisnerb411b362009-09-25 16:07:19 -07002774 hg = -1;
Philipp Reisner89e58e72011-01-19 13:12:45 +01002775 if (!mdev->tconn->net_conf->want_lose && (mdev->p_uuid[UI_FLAGS]&1))
Philipp Reisnerb411b362009-09-25 16:07:19 -07002776 hg = 1;
2777
2778 if (abs(hg) < 100)
2779 dev_warn(DEV, "Split-Brain detected, manually solved. "
2780 "Sync from %s node\n",
2781 (hg < 0) ? "peer" : "this");
2782 }
2783
2784 if (hg == -100) {
Lars Ellenberg580b9762010-02-26 23:15:23 +01002785 /* FIXME this log message is not correct if we end up here
2786 * after an attempted attach on a diskless node.
2787 * We just refuse to attach -- well, we drop the "connection"
2788 * to that disk, in a way... */
Adam Gandelman3a11a482010-04-08 16:48:23 -07002789 dev_alert(DEV, "Split-Brain detected but unresolved, dropping connection!\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002790 drbd_khelper(mdev, "split-brain");
2791 return C_MASK;
2792 }
2793
2794 if (hg > 0 && mydisk <= D_INCONSISTENT) {
2795 dev_err(DEV, "I shall become SyncSource, but I am inconsistent!\n");
2796 return C_MASK;
2797 }
2798
2799 if (hg < 0 && /* by intention we do not use mydisk here. */
2800 mdev->state.role == R_PRIMARY && mdev->state.disk >= D_CONSISTENT) {
Philipp Reisner89e58e72011-01-19 13:12:45 +01002801 switch (mdev->tconn->net_conf->rr_conflict) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002802 case ASB_CALL_HELPER:
2803 drbd_khelper(mdev, "pri-lost");
2804 /* fall through */
2805 case ASB_DISCONNECT:
2806 dev_err(DEV, "I shall become SyncTarget, but I am primary!\n");
2807 return C_MASK;
2808 case ASB_VIOLENTLY:
2809 dev_warn(DEV, "Becoming SyncTarget, violating the stable-data"
2810 "assumption\n");
2811 }
2812 }
2813
Philipp Reisner8169e412011-03-15 18:40:27 +01002814 if (mdev->tconn->net_conf->dry_run || test_bit(CONN_DRY_RUN, &mdev->tconn->flags)) {
Philipp Reisnercf14c2e2010-02-02 21:03:50 +01002815 if (hg == 0)
2816 dev_info(DEV, "dry-run connect: No resync, would become Connected immediately.\n");
2817 else
2818 dev_info(DEV, "dry-run connect: Would become %s, doing a %s resync.",
2819 drbd_conn_str(hg > 0 ? C_SYNC_SOURCE : C_SYNC_TARGET),
2820 abs(hg) >= 2 ? "full" : "bit-map based");
2821 return C_MASK;
2822 }
2823
Philipp Reisnerb411b362009-09-25 16:07:19 -07002824 if (abs(hg) >= 2) {
2825 dev_info(DEV, "Writing the whole bitmap, full sync required after drbd_sync_handshake.\n");
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01002826 if (drbd_bitmap_io(mdev, &drbd_bmio_set_n_write, "set_n_write from sync_handshake",
2827 BM_LOCKED_SET_ALLOWED))
Philipp Reisnerb411b362009-09-25 16:07:19 -07002828 return C_MASK;
2829 }
2830
2831 if (hg > 0) { /* become sync source. */
2832 rv = C_WF_BITMAP_S;
2833 } else if (hg < 0) { /* become sync target */
2834 rv = C_WF_BITMAP_T;
2835 } else {
2836 rv = C_CONNECTED;
2837 if (drbd_bm_total_weight(mdev)) {
2838 dev_info(DEV, "No resync, but %lu bits in bitmap!\n",
2839 drbd_bm_total_weight(mdev));
2840 }
2841 }
2842
2843 return rv;
2844}
2845
2846/* returns 1 if invalid */
2847static int cmp_after_sb(enum drbd_after_sb_p peer, enum drbd_after_sb_p self)
2848{
2849 /* ASB_DISCARD_REMOTE - ASB_DISCARD_LOCAL is valid */
2850 if ((peer == ASB_DISCARD_REMOTE && self == ASB_DISCARD_LOCAL) ||
2851 (self == ASB_DISCARD_REMOTE && peer == ASB_DISCARD_LOCAL))
2852 return 0;
2853
2854 /* any other things with ASB_DISCARD_REMOTE or ASB_DISCARD_LOCAL are invalid */
2855 if (peer == ASB_DISCARD_REMOTE || peer == ASB_DISCARD_LOCAL ||
2856 self == ASB_DISCARD_REMOTE || self == ASB_DISCARD_LOCAL)
2857 return 1;
2858
2859 /* everything else is valid if they are equal on both sides. */
2860 if (peer == self)
2861 return 0;
2862
2863 /* everything es is invalid. */
2864 return 1;
2865}
2866
Philipp Reisner72046242011-03-15 18:51:47 +01002867static int receive_protocol(struct drbd_tconn *tconn, enum drbd_packet cmd,
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01002868 unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002869{
Philipp Reisner72046242011-03-15 18:51:47 +01002870 struct p_protocol *p = &tconn->data.rbuf.protocol;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002871 int p_proto, p_after_sb_0p, p_after_sb_1p, p_after_sb_2p;
Philipp Reisnercf14c2e2010-02-02 21:03:50 +01002872 int p_want_lose, p_two_primaries, cf;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002873 char p_integrity_alg[SHARED_SECRET_MAX] = "";
2874
Philipp Reisnerb411b362009-09-25 16:07:19 -07002875 p_proto = be32_to_cpu(p->protocol);
2876 p_after_sb_0p = be32_to_cpu(p->after_sb_0p);
2877 p_after_sb_1p = be32_to_cpu(p->after_sb_1p);
2878 p_after_sb_2p = be32_to_cpu(p->after_sb_2p);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002879 p_two_primaries = be32_to_cpu(p->two_primaries);
Philipp Reisnercf14c2e2010-02-02 21:03:50 +01002880 cf = be32_to_cpu(p->conn_flags);
2881 p_want_lose = cf & CF_WANT_LOSE;
2882
Philipp Reisner72046242011-03-15 18:51:47 +01002883 clear_bit(CONN_DRY_RUN, &tconn->flags);
Philipp Reisnercf14c2e2010-02-02 21:03:50 +01002884
2885 if (cf & CF_DRY_RUN)
Philipp Reisner72046242011-03-15 18:51:47 +01002886 set_bit(CONN_DRY_RUN, &tconn->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002887
Philipp Reisner72046242011-03-15 18:51:47 +01002888 if (p_proto != tconn->net_conf->wire_protocol) {
2889 conn_err(tconn, "incompatible communication protocols\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002890 goto disconnect;
2891 }
2892
Philipp Reisner72046242011-03-15 18:51:47 +01002893 if (cmp_after_sb(p_after_sb_0p, tconn->net_conf->after_sb_0p)) {
2894 conn_err(tconn, "incompatible after-sb-0pri settings\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002895 goto disconnect;
2896 }
2897
Philipp Reisner72046242011-03-15 18:51:47 +01002898 if (cmp_after_sb(p_after_sb_1p, tconn->net_conf->after_sb_1p)) {
2899 conn_err(tconn, "incompatible after-sb-1pri settings\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002900 goto disconnect;
2901 }
2902
Philipp Reisner72046242011-03-15 18:51:47 +01002903 if (cmp_after_sb(p_after_sb_2p, tconn->net_conf->after_sb_2p)) {
2904 conn_err(tconn, "incompatible after-sb-2pri settings\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002905 goto disconnect;
2906 }
2907
Philipp Reisner72046242011-03-15 18:51:47 +01002908 if (p_want_lose && tconn->net_conf->want_lose) {
2909 conn_err(tconn, "both sides have the 'want_lose' flag set\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002910 goto disconnect;
2911 }
2912
Philipp Reisner72046242011-03-15 18:51:47 +01002913 if (p_two_primaries != tconn->net_conf->two_primaries) {
2914 conn_err(tconn, "incompatible setting of the two-primaries options\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002915 goto disconnect;
2916 }
2917
Philipp Reisner72046242011-03-15 18:51:47 +01002918 if (tconn->agreed_pro_version >= 87) {
2919 unsigned char *my_alg = tconn->net_conf->integrity_alg;
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002920 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002921
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002922 err = drbd_recv_all(tconn, p_integrity_alg, data_size);
2923 if (err)
2924 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002925
2926 p_integrity_alg[SHARED_SECRET_MAX-1] = 0;
2927 if (strcmp(p_integrity_alg, my_alg)) {
Philipp Reisner72046242011-03-15 18:51:47 +01002928 conn_err(tconn, "incompatible setting of the data-integrity-alg\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002929 goto disconnect;
2930 }
Philipp Reisner72046242011-03-15 18:51:47 +01002931 conn_info(tconn, "data-integrity-alg: %s\n",
Philipp Reisnerb411b362009-09-25 16:07:19 -07002932 my_alg[0] ? my_alg : (unsigned char *)"<not-used>");
2933 }
2934
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002935 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002936
2937disconnect:
Philipp Reisner72046242011-03-15 18:51:47 +01002938 conn_request_state(tconn, NS(conn, C_DISCONNECTING), CS_HARD);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002939 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002940}
2941
2942/* helper function
2943 * input: alg name, feature name
2944 * return: NULL (alg name was "")
2945 * ERR_PTR(error) if something goes wrong
2946 * or the crypto hash ptr, if it worked out ok. */
2947struct crypto_hash *drbd_crypto_alloc_digest_safe(const struct drbd_conf *mdev,
2948 const char *alg, const char *name)
2949{
2950 struct crypto_hash *tfm;
2951
2952 if (!alg[0])
2953 return NULL;
2954
2955 tfm = crypto_alloc_hash(alg, 0, CRYPTO_ALG_ASYNC);
2956 if (IS_ERR(tfm)) {
2957 dev_err(DEV, "Can not allocate \"%s\" as %s (reason: %ld)\n",
2958 alg, name, PTR_ERR(tfm));
2959 return tfm;
2960 }
2961 if (!drbd_crypto_is_hash(crypto_hash_tfm(tfm))) {
2962 crypto_free_hash(tfm);
2963 dev_err(DEV, "\"%s\" is not a digest (%s)\n", alg, name);
2964 return ERR_PTR(-EINVAL);
2965 }
2966 return tfm;
2967}
2968
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01002969static int receive_SyncParam(struct drbd_conf *mdev, enum drbd_packet cmd,
2970 unsigned int packet_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002971{
Philipp Reisnere42325a2011-01-19 13:55:45 +01002972 struct p_rs_param_95 *p = &mdev->tconn->data.rbuf.rs_param_95;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002973 unsigned int header_size, data_size, exp_max_sz;
2974 struct crypto_hash *verify_tfm = NULL;
2975 struct crypto_hash *csums_tfm = NULL;
Philipp Reisner31890f42011-01-19 14:12:51 +01002976 const int apv = mdev->tconn->agreed_pro_version;
Philipp Reisner778f2712010-07-06 11:14:00 +02002977 int *rs_plan_s = NULL;
2978 int fifo_size = 0;
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002979 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002980
2981 exp_max_sz = apv <= 87 ? sizeof(struct p_rs_param)
2982 : apv == 88 ? sizeof(struct p_rs_param)
2983 + SHARED_SECRET_MAX
Philipp Reisner8e26f9c2010-07-06 17:25:54 +02002984 : apv <= 94 ? sizeof(struct p_rs_param_89)
2985 : /* apv >= 95 */ sizeof(struct p_rs_param_95);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002986
Philipp Reisner02918be2010-08-20 14:35:10 +02002987 if (packet_size > exp_max_sz) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002988 dev_err(DEV, "SyncParam packet too long: received %u, expected <= %u bytes\n",
Philipp Reisner02918be2010-08-20 14:35:10 +02002989 packet_size, exp_max_sz);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002990 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002991 }
2992
2993 if (apv <= 88) {
Philipp Reisner257d0af2011-01-26 12:15:29 +01002994 header_size = sizeof(struct p_rs_param) - sizeof(struct p_header);
Philipp Reisner02918be2010-08-20 14:35:10 +02002995 data_size = packet_size - header_size;
Philipp Reisner8e26f9c2010-07-06 17:25:54 +02002996 } else if (apv <= 94) {
Philipp Reisner257d0af2011-01-26 12:15:29 +01002997 header_size = sizeof(struct p_rs_param_89) - sizeof(struct p_header);
Philipp Reisner02918be2010-08-20 14:35:10 +02002998 data_size = packet_size - header_size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002999 D_ASSERT(data_size == 0);
Philipp Reisner8e26f9c2010-07-06 17:25:54 +02003000 } else {
Philipp Reisner257d0af2011-01-26 12:15:29 +01003001 header_size = sizeof(struct p_rs_param_95) - sizeof(struct p_header);
Philipp Reisner02918be2010-08-20 14:35:10 +02003002 data_size = packet_size - header_size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003003 D_ASSERT(data_size == 0);
3004 }
3005
3006 /* initialize verify_alg and csums_alg */
3007 memset(p->verify_alg, 0, 2 * SHARED_SECRET_MAX);
3008
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003009 err = drbd_recv_all(mdev->tconn, &p->head.payload, header_size);
3010 if (err)
3011 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003012
Lars Ellenbergf3990022011-03-23 14:31:09 +01003013 if (get_ldev(mdev)) {
3014 mdev->ldev->dc.resync_rate = be32_to_cpu(p->rate);
3015 put_ldev(mdev);
3016 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003017
3018 if (apv >= 88) {
3019 if (apv == 88) {
3020 if (data_size > SHARED_SECRET_MAX) {
3021 dev_err(DEV, "verify-alg too long, "
3022 "peer wants %u, accepting only %u byte\n",
3023 data_size, SHARED_SECRET_MAX);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003024 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003025 }
3026
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003027 err = drbd_recv_all(mdev->tconn, p->verify_alg, data_size);
3028 if (err)
3029 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003030
3031 /* we expect NUL terminated string */
3032 /* but just in case someone tries to be evil */
3033 D_ASSERT(p->verify_alg[data_size-1] == 0);
3034 p->verify_alg[data_size-1] = 0;
3035
3036 } else /* apv >= 89 */ {
3037 /* we still expect NUL terminated strings */
3038 /* but just in case someone tries to be evil */
3039 D_ASSERT(p->verify_alg[SHARED_SECRET_MAX-1] == 0);
3040 D_ASSERT(p->csums_alg[SHARED_SECRET_MAX-1] == 0);
3041 p->verify_alg[SHARED_SECRET_MAX-1] = 0;
3042 p->csums_alg[SHARED_SECRET_MAX-1] = 0;
3043 }
3044
Lars Ellenbergf3990022011-03-23 14:31:09 +01003045 if (strcmp(mdev->tconn->net_conf->verify_alg, p->verify_alg)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07003046 if (mdev->state.conn == C_WF_REPORT_PARAMS) {
3047 dev_err(DEV, "Different verify-alg settings. me=\"%s\" peer=\"%s\"\n",
Lars Ellenbergf3990022011-03-23 14:31:09 +01003048 mdev->tconn->net_conf->verify_alg, p->verify_alg);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003049 goto disconnect;
3050 }
3051 verify_tfm = drbd_crypto_alloc_digest_safe(mdev,
3052 p->verify_alg, "verify-alg");
3053 if (IS_ERR(verify_tfm)) {
3054 verify_tfm = NULL;
3055 goto disconnect;
3056 }
3057 }
3058
Lars Ellenbergf3990022011-03-23 14:31:09 +01003059 if (apv >= 89 && strcmp(mdev->tconn->net_conf->csums_alg, p->csums_alg)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07003060 if (mdev->state.conn == C_WF_REPORT_PARAMS) {
3061 dev_err(DEV, "Different csums-alg settings. me=\"%s\" peer=\"%s\"\n",
Lars Ellenbergf3990022011-03-23 14:31:09 +01003062 mdev->tconn->net_conf->csums_alg, p->csums_alg);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003063 goto disconnect;
3064 }
3065 csums_tfm = drbd_crypto_alloc_digest_safe(mdev,
3066 p->csums_alg, "csums-alg");
3067 if (IS_ERR(csums_tfm)) {
3068 csums_tfm = NULL;
3069 goto disconnect;
3070 }
3071 }
3072
Lars Ellenbergf3990022011-03-23 14:31:09 +01003073 if (apv > 94 && get_ldev(mdev)) {
3074 mdev->ldev->dc.resync_rate = be32_to_cpu(p->rate);
3075 mdev->ldev->dc.c_plan_ahead = be32_to_cpu(p->c_plan_ahead);
3076 mdev->ldev->dc.c_delay_target = be32_to_cpu(p->c_delay_target);
3077 mdev->ldev->dc.c_fill_target = be32_to_cpu(p->c_fill_target);
3078 mdev->ldev->dc.c_max_rate = be32_to_cpu(p->c_max_rate);
Philipp Reisner778f2712010-07-06 11:14:00 +02003079
Lars Ellenbergf3990022011-03-23 14:31:09 +01003080 fifo_size = (mdev->ldev->dc.c_plan_ahead * 10 * SLEEP_TIME) / HZ;
Philipp Reisner778f2712010-07-06 11:14:00 +02003081 if (fifo_size != mdev->rs_plan_s.size && fifo_size > 0) {
3082 rs_plan_s = kzalloc(sizeof(int) * fifo_size, GFP_KERNEL);
3083 if (!rs_plan_s) {
3084 dev_err(DEV, "kmalloc of fifo_buffer failed");
Lars Ellenbergf3990022011-03-23 14:31:09 +01003085 put_ldev(mdev);
Philipp Reisner778f2712010-07-06 11:14:00 +02003086 goto disconnect;
3087 }
3088 }
Lars Ellenbergf3990022011-03-23 14:31:09 +01003089 put_ldev(mdev);
Philipp Reisner8e26f9c2010-07-06 17:25:54 +02003090 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003091
3092 spin_lock(&mdev->peer_seq_lock);
3093 /* lock against drbd_nl_syncer_conf() */
3094 if (verify_tfm) {
Lars Ellenbergf3990022011-03-23 14:31:09 +01003095 strcpy(mdev->tconn->net_conf->verify_alg, p->verify_alg);
3096 mdev->tconn->net_conf->verify_alg_len = strlen(p->verify_alg) + 1;
3097 crypto_free_hash(mdev->tconn->verify_tfm);
3098 mdev->tconn->verify_tfm = verify_tfm;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003099 dev_info(DEV, "using verify-alg: \"%s\"\n", p->verify_alg);
3100 }
3101 if (csums_tfm) {
Lars Ellenbergf3990022011-03-23 14:31:09 +01003102 strcpy(mdev->tconn->net_conf->csums_alg, p->csums_alg);
3103 mdev->tconn->net_conf->csums_alg_len = strlen(p->csums_alg) + 1;
3104 crypto_free_hash(mdev->tconn->csums_tfm);
3105 mdev->tconn->csums_tfm = csums_tfm;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003106 dev_info(DEV, "using csums-alg: \"%s\"\n", p->csums_alg);
3107 }
Philipp Reisner778f2712010-07-06 11:14:00 +02003108 if (fifo_size != mdev->rs_plan_s.size) {
3109 kfree(mdev->rs_plan_s.values);
3110 mdev->rs_plan_s.values = rs_plan_s;
3111 mdev->rs_plan_s.size = fifo_size;
3112 mdev->rs_planed = 0;
3113 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003114 spin_unlock(&mdev->peer_seq_lock);
3115 }
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003116 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003117
Philipp Reisnerb411b362009-09-25 16:07:19 -07003118disconnect:
3119 /* just for completeness: actually not needed,
3120 * as this is not reached if csums_tfm was ok. */
3121 crypto_free_hash(csums_tfm);
3122 /* but free the verify_tfm again, if csums_tfm did not work out */
3123 crypto_free_hash(verify_tfm);
Philipp Reisner38fa9982011-03-15 18:24:49 +01003124 conn_request_state(mdev->tconn, NS(conn, C_DISCONNECTING), CS_HARD);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003125 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003126}
3127
Philipp Reisnerb411b362009-09-25 16:07:19 -07003128/* warn if the arguments differ by more than 12.5% */
3129static void warn_if_differ_considerably(struct drbd_conf *mdev,
3130 const char *s, sector_t a, sector_t b)
3131{
3132 sector_t d;
3133 if (a == 0 || b == 0)
3134 return;
3135 d = (a > b) ? (a - b) : (b - a);
3136 if (d > (a>>3) || d > (b>>3))
3137 dev_warn(DEV, "Considerable difference in %s: %llus vs. %llus\n", s,
3138 (unsigned long long)a, (unsigned long long)b);
3139}
3140
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01003141static int receive_sizes(struct drbd_conf *mdev, enum drbd_packet cmd,
3142 unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003143{
Philipp Reisnere42325a2011-01-19 13:55:45 +01003144 struct p_sizes *p = &mdev->tconn->data.rbuf.sizes;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003145 enum determine_dev_size dd = unchanged;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003146 sector_t p_size, p_usize, my_usize;
3147 int ldsc = 0; /* local disk size changed */
Philipp Reisnere89b5912010-03-24 17:11:33 +01003148 enum dds_flags ddsf;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003149
Philipp Reisnerb411b362009-09-25 16:07:19 -07003150 p_size = be64_to_cpu(p->d_size);
3151 p_usize = be64_to_cpu(p->u_size);
3152
Philipp Reisnerb411b362009-09-25 16:07:19 -07003153 /* just store the peer's disk size for now.
3154 * we still need to figure out whether we accept that. */
3155 mdev->p_size = p_size;
3156
Philipp Reisnerb411b362009-09-25 16:07:19 -07003157 if (get_ldev(mdev)) {
3158 warn_if_differ_considerably(mdev, "lower level device sizes",
3159 p_size, drbd_get_max_capacity(mdev->ldev));
3160 warn_if_differ_considerably(mdev, "user requested size",
3161 p_usize, mdev->ldev->dc.disk_size);
3162
3163 /* if this is the first connect, or an otherwise expected
3164 * param exchange, choose the minimum */
3165 if (mdev->state.conn == C_WF_REPORT_PARAMS)
3166 p_usize = min_not_zero((sector_t)mdev->ldev->dc.disk_size,
3167 p_usize);
3168
3169 my_usize = mdev->ldev->dc.disk_size;
3170
3171 if (mdev->ldev->dc.disk_size != p_usize) {
3172 mdev->ldev->dc.disk_size = p_usize;
3173 dev_info(DEV, "Peer sets u_size to %lu sectors\n",
3174 (unsigned long)mdev->ldev->dc.disk_size);
3175 }
3176
3177 /* Never shrink a device with usable data during connect.
3178 But allow online shrinking if we are connected. */
Philipp Reisnera393db62009-12-22 13:35:52 +01003179 if (drbd_new_dev_size(mdev, mdev->ldev, 0) <
Philipp Reisnerb411b362009-09-25 16:07:19 -07003180 drbd_get_capacity(mdev->this_bdev) &&
3181 mdev->state.disk >= D_OUTDATED &&
3182 mdev->state.conn < C_CONNECTED) {
3183 dev_err(DEV, "The peer's disk size is too small!\n");
Philipp Reisner38fa9982011-03-15 18:24:49 +01003184 conn_request_state(mdev->tconn, NS(conn, C_DISCONNECTING), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003185 mdev->ldev->dc.disk_size = my_usize;
3186 put_ldev(mdev);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003187 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003188 }
3189 put_ldev(mdev);
3190 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003191
Philipp Reisnere89b5912010-03-24 17:11:33 +01003192 ddsf = be16_to_cpu(p->dds_flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003193 if (get_ldev(mdev)) {
Bart Van Assche24c48302011-05-21 18:32:29 +02003194 dd = drbd_determine_dev_size(mdev, ddsf);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003195 put_ldev(mdev);
3196 if (dd == dev_size_error)
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003197 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003198 drbd_md_sync(mdev);
3199 } else {
3200 /* I am diskless, need to accept the peer's size. */
3201 drbd_set_my_capacity(mdev, p_size);
3202 }
3203
Philipp Reisner99432fc2011-05-20 16:39:13 +02003204 mdev->peer_max_bio_size = be32_to_cpu(p->max_bio_size);
3205 drbd_reconsider_max_bio_size(mdev);
3206
Philipp Reisnerb411b362009-09-25 16:07:19 -07003207 if (get_ldev(mdev)) {
3208 if (mdev->ldev->known_size != drbd_get_capacity(mdev->ldev->backing_bdev)) {
3209 mdev->ldev->known_size = drbd_get_capacity(mdev->ldev->backing_bdev);
3210 ldsc = 1;
3211 }
3212
Philipp Reisnerb411b362009-09-25 16:07:19 -07003213 put_ldev(mdev);
3214 }
3215
3216 if (mdev->state.conn > C_WF_REPORT_PARAMS) {
3217 if (be64_to_cpu(p->c_size) !=
3218 drbd_get_capacity(mdev->this_bdev) || ldsc) {
3219 /* we have different sizes, probably peer
3220 * needs to know my new size... */
Philipp Reisnere89b5912010-03-24 17:11:33 +01003221 drbd_send_sizes(mdev, 0, ddsf);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003222 }
3223 if (test_and_clear_bit(RESIZE_PENDING, &mdev->flags) ||
3224 (dd == grew && mdev->state.conn == C_CONNECTED)) {
3225 if (mdev->state.pdsk >= D_INCONSISTENT &&
Philipp Reisnere89b5912010-03-24 17:11:33 +01003226 mdev->state.disk >= D_INCONSISTENT) {
3227 if (ddsf & DDSF_NO_RESYNC)
3228 dev_info(DEV, "Resync of new storage suppressed with --assume-clean\n");
3229 else
3230 resync_after_online_grow(mdev);
3231 } else
Philipp Reisnerb411b362009-09-25 16:07:19 -07003232 set_bit(RESYNC_AFTER_NEG, &mdev->flags);
3233 }
3234 }
3235
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003236 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003237}
3238
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01003239static int receive_uuids(struct drbd_conf *mdev, enum drbd_packet cmd,
3240 unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003241{
Philipp Reisnere42325a2011-01-19 13:55:45 +01003242 struct p_uuids *p = &mdev->tconn->data.rbuf.uuids;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003243 u64 *p_uuid;
Lars Ellenberg62b0da32011-01-20 13:25:21 +01003244 int i, updated_uuids = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003245
Philipp Reisnerb411b362009-09-25 16:07:19 -07003246 p_uuid = kmalloc(sizeof(u64)*UI_EXTENDED_SIZE, GFP_NOIO);
3247
3248 for (i = UI_CURRENT; i < UI_EXTENDED_SIZE; i++)
3249 p_uuid[i] = be64_to_cpu(p->uuid[i]);
3250
3251 kfree(mdev->p_uuid);
3252 mdev->p_uuid = p_uuid;
3253
3254 if (mdev->state.conn < C_CONNECTED &&
3255 mdev->state.disk < D_INCONSISTENT &&
3256 mdev->state.role == R_PRIMARY &&
3257 (mdev->ed_uuid & ~((u64)1)) != (p_uuid[UI_CURRENT] & ~((u64)1))) {
3258 dev_err(DEV, "Can only connect to data with current UUID=%016llX\n",
3259 (unsigned long long)mdev->ed_uuid);
Philipp Reisner38fa9982011-03-15 18:24:49 +01003260 conn_request_state(mdev->tconn, NS(conn, C_DISCONNECTING), CS_HARD);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003261 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003262 }
3263
3264 if (get_ldev(mdev)) {
3265 int skip_initial_sync =
3266 mdev->state.conn == C_CONNECTED &&
Philipp Reisner31890f42011-01-19 14:12:51 +01003267 mdev->tconn->agreed_pro_version >= 90 &&
Philipp Reisnerb411b362009-09-25 16:07:19 -07003268 mdev->ldev->md.uuid[UI_CURRENT] == UUID_JUST_CREATED &&
3269 (p_uuid[UI_FLAGS] & 8);
3270 if (skip_initial_sync) {
3271 dev_info(DEV, "Accepted new current UUID, preparing to skip initial sync\n");
3272 drbd_bitmap_io(mdev, &drbd_bmio_clear_n_write,
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01003273 "clear_n_write from receive_uuids",
3274 BM_LOCKED_TEST_ALLOWED);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003275 _drbd_uuid_set(mdev, UI_CURRENT, p_uuid[UI_CURRENT]);
3276 _drbd_uuid_set(mdev, UI_BITMAP, 0);
3277 _drbd_set_state(_NS2(mdev, disk, D_UP_TO_DATE, pdsk, D_UP_TO_DATE),
3278 CS_VERBOSE, NULL);
3279 drbd_md_sync(mdev);
Lars Ellenberg62b0da32011-01-20 13:25:21 +01003280 updated_uuids = 1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003281 }
3282 put_ldev(mdev);
Philipp Reisner18a50fa2010-06-21 14:14:15 +02003283 } else if (mdev->state.disk < D_INCONSISTENT &&
3284 mdev->state.role == R_PRIMARY) {
3285 /* I am a diskless primary, the peer just created a new current UUID
3286 for me. */
Lars Ellenberg62b0da32011-01-20 13:25:21 +01003287 updated_uuids = drbd_set_ed_uuid(mdev, p_uuid[UI_CURRENT]);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003288 }
3289
3290 /* Before we test for the disk state, we should wait until an eventually
3291 ongoing cluster wide state change is finished. That is important if
3292 we are primary and are detaching from our disk. We need to see the
3293 new disk state... */
Philipp Reisner8410da82011-02-11 20:11:10 +01003294 mutex_lock(mdev->state_mutex);
3295 mutex_unlock(mdev->state_mutex);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003296 if (mdev->state.conn >= C_CONNECTED && mdev->state.disk < D_INCONSISTENT)
Lars Ellenberg62b0da32011-01-20 13:25:21 +01003297 updated_uuids |= drbd_set_ed_uuid(mdev, p_uuid[UI_CURRENT]);
3298
3299 if (updated_uuids)
3300 drbd_print_uuids(mdev, "receiver updated UUIDs to");
Philipp Reisnerb411b362009-09-25 16:07:19 -07003301
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003302 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003303}
3304
3305/**
3306 * convert_state() - Converts the peer's view of the cluster state to our point of view
3307 * @ps: The state as seen by the peer.
3308 */
3309static union drbd_state convert_state(union drbd_state ps)
3310{
3311 union drbd_state ms;
3312
3313 static enum drbd_conns c_tab[] = {
3314 [C_CONNECTED] = C_CONNECTED,
3315
3316 [C_STARTING_SYNC_S] = C_STARTING_SYNC_T,
3317 [C_STARTING_SYNC_T] = C_STARTING_SYNC_S,
3318 [C_DISCONNECTING] = C_TEAR_DOWN, /* C_NETWORK_FAILURE, */
3319 [C_VERIFY_S] = C_VERIFY_T,
3320 [C_MASK] = C_MASK,
3321 };
3322
3323 ms.i = ps.i;
3324
3325 ms.conn = c_tab[ps.conn];
3326 ms.peer = ps.role;
3327 ms.role = ps.peer;
3328 ms.pdsk = ps.disk;
3329 ms.disk = ps.pdsk;
3330 ms.peer_isp = (ps.aftr_isp | ps.user_isp);
3331
3332 return ms;
3333}
3334
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01003335static int receive_req_state(struct drbd_conf *mdev, enum drbd_packet cmd,
3336 unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003337{
Philipp Reisnere42325a2011-01-19 13:55:45 +01003338 struct p_req_state *p = &mdev->tconn->data.rbuf.req_state;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003339 union drbd_state mask, val;
Andreas Gruenbacherbf885f82010-12-08 00:39:32 +01003340 enum drbd_state_rv rv;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003341
Philipp Reisnerb411b362009-09-25 16:07:19 -07003342 mask.i = be32_to_cpu(p->mask);
3343 val.i = be32_to_cpu(p->val);
3344
Philipp Reisner25703f82011-02-07 14:35:25 +01003345 if (test_bit(DISCARD_CONCURRENT, &mdev->tconn->flags) &&
Philipp Reisner8410da82011-02-11 20:11:10 +01003346 mutex_is_locked(mdev->state_mutex)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07003347 drbd_send_sr_reply(mdev, SS_CONCURRENT_ST_CHG);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003348 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003349 }
3350
3351 mask = convert_state(mask);
3352 val = convert_state(val);
3353
Philipp Reisnerdfafcc82011-03-16 10:55:07 +01003354 rv = drbd_change_state(mdev, CS_VERBOSE, mask, val);
3355 drbd_send_sr_reply(mdev, rv);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003356
Philipp Reisnerb411b362009-09-25 16:07:19 -07003357 drbd_md_sync(mdev);
3358
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003359 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003360}
3361
Philipp Reisnerdfafcc82011-03-16 10:55:07 +01003362static int receive_req_conn_state(struct drbd_tconn *tconn, enum drbd_packet cmd,
3363 unsigned int data_size)
3364{
3365 struct p_req_state *p = &tconn->data.rbuf.req_state;
3366 union drbd_state mask, val;
3367 enum drbd_state_rv rv;
3368
3369 mask.i = be32_to_cpu(p->mask);
3370 val.i = be32_to_cpu(p->val);
3371
3372 if (test_bit(DISCARD_CONCURRENT, &tconn->flags) &&
3373 mutex_is_locked(&tconn->cstate_mutex)) {
3374 conn_send_sr_reply(tconn, SS_CONCURRENT_ST_CHG);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003375 return 0;
Philipp Reisnerdfafcc82011-03-16 10:55:07 +01003376 }
3377
3378 mask = convert_state(mask);
3379 val = convert_state(val);
3380
3381 rv = conn_request_state(tconn, mask, val, CS_VERBOSE | CS_LOCAL_ONLY);
3382 conn_send_sr_reply(tconn, rv);
3383
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003384 return 0;
Philipp Reisnerdfafcc82011-03-16 10:55:07 +01003385}
3386
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01003387static int receive_state(struct drbd_conf *mdev, enum drbd_packet cmd,
3388 unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003389{
Philipp Reisnere42325a2011-01-19 13:55:45 +01003390 struct p_state *p = &mdev->tconn->data.rbuf.state;
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003391 union drbd_state os, ns, peer_state;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003392 enum drbd_disk_state real_peer_disk;
Philipp Reisner65d922c2010-06-16 16:18:09 +02003393 enum chg_state_flags cs_flags;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003394 int rv;
3395
Philipp Reisnerb411b362009-09-25 16:07:19 -07003396 peer_state.i = be32_to_cpu(p->state);
3397
3398 real_peer_disk = peer_state.disk;
3399 if (peer_state.disk == D_NEGOTIATING) {
3400 real_peer_disk = mdev->p_uuid[UI_FLAGS] & 4 ? D_INCONSISTENT : D_CONSISTENT;
3401 dev_info(DEV, "real peer disk state = %s\n", drbd_disk_str(real_peer_disk));
3402 }
3403
Philipp Reisner87eeee42011-01-19 14:16:30 +01003404 spin_lock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003405 retry:
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003406 os = ns = mdev->state;
Philipp Reisner87eeee42011-01-19 14:16:30 +01003407 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003408
Lars Ellenberge9ef7bb2010-10-07 15:55:39 +02003409 /* peer says his disk is uptodate, while we think it is inconsistent,
3410 * and this happens while we think we have a sync going on. */
3411 if (os.pdsk == D_INCONSISTENT && real_peer_disk == D_UP_TO_DATE &&
3412 os.conn > C_CONNECTED && os.disk == D_UP_TO_DATE) {
3413 /* If we are (becoming) SyncSource, but peer is still in sync
3414 * preparation, ignore its uptodate-ness to avoid flapping, it
3415 * will change to inconsistent once the peer reaches active
3416 * syncing states.
3417 * It may have changed syncer-paused flags, however, so we
3418 * cannot ignore this completely. */
3419 if (peer_state.conn > C_CONNECTED &&
3420 peer_state.conn < C_SYNC_SOURCE)
3421 real_peer_disk = D_INCONSISTENT;
3422
3423 /* if peer_state changes to connected at the same time,
3424 * it explicitly notifies us that it finished resync.
3425 * Maybe we should finish it up, too? */
3426 else if (os.conn >= C_SYNC_SOURCE &&
3427 peer_state.conn == C_CONNECTED) {
3428 if (drbd_bm_total_weight(mdev) <= mdev->rs_failed)
3429 drbd_resync_finished(mdev);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003430 return 0;
Lars Ellenberge9ef7bb2010-10-07 15:55:39 +02003431 }
3432 }
3433
3434 /* peer says his disk is inconsistent, while we think it is uptodate,
3435 * and this happens while the peer still thinks we have a sync going on,
3436 * but we think we are already done with the sync.
3437 * We ignore this to avoid flapping pdsk.
3438 * This should not happen, if the peer is a recent version of drbd. */
3439 if (os.pdsk == D_UP_TO_DATE && real_peer_disk == D_INCONSISTENT &&
3440 os.conn == C_CONNECTED && peer_state.conn > C_SYNC_SOURCE)
3441 real_peer_disk = D_UP_TO_DATE;
3442
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003443 if (ns.conn == C_WF_REPORT_PARAMS)
3444 ns.conn = C_CONNECTED;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003445
Philipp Reisner67531712010-10-27 12:21:30 +02003446 if (peer_state.conn == C_AHEAD)
3447 ns.conn = C_BEHIND;
3448
Philipp Reisnerb411b362009-09-25 16:07:19 -07003449 if (mdev->p_uuid && peer_state.disk >= D_NEGOTIATING &&
3450 get_ldev_if_state(mdev, D_NEGOTIATING)) {
3451 int cr; /* consider resync */
3452
3453 /* if we established a new connection */
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003454 cr = (os.conn < C_CONNECTED);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003455 /* if we had an established connection
3456 * and one of the nodes newly attaches a disk */
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003457 cr |= (os.conn == C_CONNECTED &&
Philipp Reisnerb411b362009-09-25 16:07:19 -07003458 (peer_state.disk == D_NEGOTIATING ||
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003459 os.disk == D_NEGOTIATING));
Philipp Reisnerb411b362009-09-25 16:07:19 -07003460 /* if we have both been inconsistent, and the peer has been
3461 * forced to be UpToDate with --overwrite-data */
3462 cr |= test_bit(CONSIDER_RESYNC, &mdev->flags);
3463 /* if we had been plain connected, and the admin requested to
3464 * start a sync by "invalidate" or "invalidate-remote" */
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003465 cr |= (os.conn == C_CONNECTED &&
Philipp Reisnerb411b362009-09-25 16:07:19 -07003466 (peer_state.conn >= C_STARTING_SYNC_S &&
3467 peer_state.conn <= C_WF_BITMAP_T));
3468
3469 if (cr)
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003470 ns.conn = drbd_sync_handshake(mdev, peer_state.role, real_peer_disk);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003471
3472 put_ldev(mdev);
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003473 if (ns.conn == C_MASK) {
3474 ns.conn = C_CONNECTED;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003475 if (mdev->state.disk == D_NEGOTIATING) {
Lars Ellenberg82f59cc2010-10-16 12:13:47 +02003476 drbd_force_state(mdev, NS(disk, D_FAILED));
Philipp Reisnerb411b362009-09-25 16:07:19 -07003477 } else if (peer_state.disk == D_NEGOTIATING) {
3478 dev_err(DEV, "Disk attach process on the peer node was aborted.\n");
3479 peer_state.disk = D_DISKLESS;
Lars Ellenberg580b9762010-02-26 23:15:23 +01003480 real_peer_disk = D_DISKLESS;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003481 } else {
Philipp Reisner8169e412011-03-15 18:40:27 +01003482 if (test_and_clear_bit(CONN_DRY_RUN, &mdev->tconn->flags))
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003483 return -EIO;
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003484 D_ASSERT(os.conn == C_WF_REPORT_PARAMS);
Philipp Reisner38fa9982011-03-15 18:24:49 +01003485 conn_request_state(mdev->tconn, NS(conn, C_DISCONNECTING), CS_HARD);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003486 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003487 }
3488 }
3489 }
3490
Philipp Reisner87eeee42011-01-19 14:16:30 +01003491 spin_lock_irq(&mdev->tconn->req_lock);
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003492 if (mdev->state.i != os.i)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003493 goto retry;
3494 clear_bit(CONSIDER_RESYNC, &mdev->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003495 ns.peer = peer_state.role;
3496 ns.pdsk = real_peer_disk;
3497 ns.peer_isp = (peer_state.aftr_isp | peer_state.user_isp);
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003498 if ((ns.conn == C_CONNECTED || ns.conn == C_WF_BITMAP_S) && ns.disk == D_NEGOTIATING)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003499 ns.disk = mdev->new_state_tmp.disk;
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003500 cs_flags = CS_VERBOSE + (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED ? 0 : CS_HARD);
3501 if (ns.pdsk == D_CONSISTENT && is_susp(ns) && ns.conn == C_CONNECTED && os.conn < C_CONNECTED &&
Philipp Reisner481c6f52010-06-22 14:03:27 +02003502 test_bit(NEW_CUR_UUID, &mdev->flags)) {
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01003503 /* Do not allow tl_restart(RESEND) for a rebooted peer. We can only allow this
Philipp Reisner481c6f52010-06-22 14:03:27 +02003504 for temporal network outages! */
Philipp Reisner87eeee42011-01-19 14:16:30 +01003505 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisner481c6f52010-06-22 14:03:27 +02003506 dev_err(DEV, "Aborting Connect, can not thaw IO with an only Consistent peer\n");
Philipp Reisner2f5cdd02011-02-21 14:29:27 +01003507 tl_clear(mdev->tconn);
Philipp Reisner481c6f52010-06-22 14:03:27 +02003508 drbd_uuid_new_current(mdev);
3509 clear_bit(NEW_CUR_UUID, &mdev->flags);
Philipp Reisner38fa9982011-03-15 18:24:49 +01003510 conn_request_state(mdev->tconn, NS2(conn, C_PROTOCOL_ERROR, susp, 0), CS_HARD);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003511 return -EIO;
Philipp Reisner481c6f52010-06-22 14:03:27 +02003512 }
Philipp Reisner65d922c2010-06-16 16:18:09 +02003513 rv = _drbd_set_state(mdev, ns, cs_flags, NULL);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003514 ns = mdev->state;
Philipp Reisner87eeee42011-01-19 14:16:30 +01003515 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003516
3517 if (rv < SS_SUCCESS) {
Philipp Reisner38fa9982011-03-15 18:24:49 +01003518 conn_request_state(mdev->tconn, NS(conn, C_DISCONNECTING), CS_HARD);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003519 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003520 }
3521
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003522 if (os.conn > C_WF_REPORT_PARAMS) {
3523 if (ns.conn > C_CONNECTED && peer_state.conn <= C_CONNECTED &&
Philipp Reisnerb411b362009-09-25 16:07:19 -07003524 peer_state.disk != D_NEGOTIATING ) {
3525 /* we want resync, peer has not yet decided to sync... */
3526 /* Nowadays only used when forcing a node into primary role and
3527 setting its disk to UpToDate with that */
3528 drbd_send_uuids(mdev);
3529 drbd_send_state(mdev);
3530 }
3531 }
3532
Philipp Reisner89e58e72011-01-19 13:12:45 +01003533 mdev->tconn->net_conf->want_lose = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003534
3535 drbd_md_sync(mdev); /* update connected indicator, la_size, ... */
3536
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003537 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003538}
3539
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01003540static int receive_sync_uuid(struct drbd_conf *mdev, enum drbd_packet cmd,
3541 unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003542{
Philipp Reisnere42325a2011-01-19 13:55:45 +01003543 struct p_rs_uuid *p = &mdev->tconn->data.rbuf.rs_uuid;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003544
3545 wait_event(mdev->misc_wait,
3546 mdev->state.conn == C_WF_SYNC_UUID ||
Philipp Reisnerc4752ef2010-10-27 17:32:36 +02003547 mdev->state.conn == C_BEHIND ||
Philipp Reisnerb411b362009-09-25 16:07:19 -07003548 mdev->state.conn < C_CONNECTED ||
3549 mdev->state.disk < D_NEGOTIATING);
3550
3551 /* D_ASSERT( mdev->state.conn == C_WF_SYNC_UUID ); */
3552
Philipp Reisnerb411b362009-09-25 16:07:19 -07003553 /* Here the _drbd_uuid_ functions are right, current should
3554 _not_ be rotated into the history */
3555 if (get_ldev_if_state(mdev, D_NEGOTIATING)) {
3556 _drbd_uuid_set(mdev, UI_CURRENT, be64_to_cpu(p->uuid));
3557 _drbd_uuid_set(mdev, UI_BITMAP, 0UL);
3558
Lars Ellenberg62b0da32011-01-20 13:25:21 +01003559 drbd_print_uuids(mdev, "updated sync uuid");
Philipp Reisnerb411b362009-09-25 16:07:19 -07003560 drbd_start_resync(mdev, C_SYNC_TARGET);
3561
3562 put_ldev(mdev);
3563 } else
3564 dev_err(DEV, "Ignoring SyncUUID packet!\n");
3565
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003566 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003567}
3568
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003569/**
3570 * receive_bitmap_plain
3571 *
3572 * Return 0 when done, 1 when another iteration is needed, and a negative error
3573 * code upon failure.
3574 */
3575static int
Philipp Reisner02918be2010-08-20 14:35:10 +02003576receive_bitmap_plain(struct drbd_conf *mdev, unsigned int data_size,
3577 unsigned long *buffer, struct bm_xfer_ctx *c)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003578{
3579 unsigned num_words = min_t(size_t, BM_PACKET_WORDS, c->bm_words - c->word_offset);
3580 unsigned want = num_words * sizeof(long);
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003581 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003582
Philipp Reisner02918be2010-08-20 14:35:10 +02003583 if (want != data_size) {
3584 dev_err(DEV, "%s:want (%u) != data_size (%u)\n", __func__, want, data_size);
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003585 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003586 }
3587 if (want == 0)
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003588 return 0;
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003589 err = drbd_recv_all(mdev->tconn, buffer, want);
3590 if (err)
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003591 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003592
3593 drbd_bm_merge_lel(mdev, c->word_offset, num_words, buffer);
3594
3595 c->word_offset += num_words;
3596 c->bit_offset = c->word_offset * BITS_PER_LONG;
3597 if (c->bit_offset > c->bm_bits)
3598 c->bit_offset = c->bm_bits;
3599
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003600 return 1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003601}
3602
Andreas Gruenbachera02d1242011-03-22 17:20:45 +01003603static enum drbd_bitmap_code dcbp_get_code(struct p_compressed_bm *p)
3604{
3605 return (enum drbd_bitmap_code)(p->encoding & 0x0f);
3606}
3607
3608static int dcbp_get_start(struct p_compressed_bm *p)
3609{
3610 return (p->encoding & 0x80) != 0;
3611}
3612
3613static int dcbp_get_pad_bits(struct p_compressed_bm *p)
3614{
3615 return (p->encoding >> 4) & 0x7;
3616}
3617
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003618/**
3619 * recv_bm_rle_bits
3620 *
3621 * Return 0 when done, 1 when another iteration is needed, and a negative error
3622 * code upon failure.
3623 */
3624static int
Philipp Reisnerb411b362009-09-25 16:07:19 -07003625recv_bm_rle_bits(struct drbd_conf *mdev,
3626 struct p_compressed_bm *p,
Philipp Reisnerc6d25cf2011-01-19 16:13:06 +01003627 struct bm_xfer_ctx *c,
3628 unsigned int len)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003629{
3630 struct bitstream bs;
3631 u64 look_ahead;
3632 u64 rl;
3633 u64 tmp;
3634 unsigned long s = c->bit_offset;
3635 unsigned long e;
Andreas Gruenbachera02d1242011-03-22 17:20:45 +01003636 int toggle = dcbp_get_start(p);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003637 int have;
3638 int bits;
3639
Andreas Gruenbachera02d1242011-03-22 17:20:45 +01003640 bitstream_init(&bs, p->code, len, dcbp_get_pad_bits(p));
Philipp Reisnerb411b362009-09-25 16:07:19 -07003641
3642 bits = bitstream_get_bits(&bs, &look_ahead, 64);
3643 if (bits < 0)
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003644 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003645
3646 for (have = bits; have > 0; s += rl, toggle = !toggle) {
3647 bits = vli_decode_bits(&rl, look_ahead);
3648 if (bits <= 0)
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003649 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003650
3651 if (toggle) {
3652 e = s + rl -1;
3653 if (e >= c->bm_bits) {
3654 dev_err(DEV, "bitmap overflow (e:%lu) while decoding bm RLE packet\n", e);
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003655 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003656 }
3657 _drbd_bm_set_bits(mdev, s, e);
3658 }
3659
3660 if (have < bits) {
3661 dev_err(DEV, "bitmap decoding error: h:%d b:%d la:0x%08llx l:%u/%u\n",
3662 have, bits, look_ahead,
3663 (unsigned int)(bs.cur.b - p->code),
3664 (unsigned int)bs.buf_len);
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003665 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003666 }
3667 look_ahead >>= bits;
3668 have -= bits;
3669
3670 bits = bitstream_get_bits(&bs, &tmp, 64 - have);
3671 if (bits < 0)
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003672 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003673 look_ahead |= tmp << have;
3674 have += bits;
3675 }
3676
3677 c->bit_offset = s;
3678 bm_xfer_ctx_bit_to_word_offset(c);
3679
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003680 return (s != c->bm_bits);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003681}
3682
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003683/**
3684 * decode_bitmap_c
3685 *
3686 * Return 0 when done, 1 when another iteration is needed, and a negative error
3687 * code upon failure.
3688 */
3689static int
Philipp Reisnerb411b362009-09-25 16:07:19 -07003690decode_bitmap_c(struct drbd_conf *mdev,
3691 struct p_compressed_bm *p,
Philipp Reisnerc6d25cf2011-01-19 16:13:06 +01003692 struct bm_xfer_ctx *c,
3693 unsigned int len)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003694{
Andreas Gruenbachera02d1242011-03-22 17:20:45 +01003695 if (dcbp_get_code(p) == RLE_VLI_Bits)
Philipp Reisnerc6d25cf2011-01-19 16:13:06 +01003696 return recv_bm_rle_bits(mdev, p, c, len);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003697
3698 /* other variants had been implemented for evaluation,
3699 * but have been dropped as this one turned out to be "best"
3700 * during all our tests. */
3701
3702 dev_err(DEV, "receive_bitmap_c: unknown encoding %u\n", p->encoding);
Philipp Reisner38fa9982011-03-15 18:24:49 +01003703 conn_request_state(mdev->tconn, NS(conn, C_PROTOCOL_ERROR), CS_HARD);
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003704 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003705}
3706
3707void INFO_bm_xfer_stats(struct drbd_conf *mdev,
3708 const char *direction, struct bm_xfer_ctx *c)
3709{
3710 /* what would it take to transfer it "plaintext" */
Philipp Reisnerc0129492011-01-19 16:58:16 +01003711 unsigned plain = sizeof(struct p_header) *
Philipp Reisnerb411b362009-09-25 16:07:19 -07003712 ((c->bm_words+BM_PACKET_WORDS-1)/BM_PACKET_WORDS+1)
3713 + c->bm_words * sizeof(long);
3714 unsigned total = c->bytes[0] + c->bytes[1];
3715 unsigned r;
3716
3717 /* total can not be zero. but just in case: */
3718 if (total == 0)
3719 return;
3720
3721 /* don't report if not compressed */
3722 if (total >= plain)
3723 return;
3724
3725 /* total < plain. check for overflow, still */
3726 r = (total > UINT_MAX/1000) ? (total / (plain/1000))
3727 : (1000 * total / plain);
3728
3729 if (r > 1000)
3730 r = 1000;
3731
3732 r = 1000 - r;
3733 dev_info(DEV, "%s bitmap stats [Bytes(packets)]: plain %u(%u), RLE %u(%u), "
3734 "total %u; compression: %u.%u%%\n",
3735 direction,
3736 c->bytes[1], c->packets[1],
3737 c->bytes[0], c->packets[0],
3738 total, r/10, r % 10);
3739}
3740
3741/* Since we are processing the bitfield from lower addresses to higher,
3742 it does not matter if the process it in 32 bit chunks or 64 bit
3743 chunks as long as it is little endian. (Understand it as byte stream,
3744 beginning with the lowest byte...) If we would use big endian
3745 we would need to process it from the highest address to the lowest,
3746 in order to be agnostic to the 32 vs 64 bits issue.
3747
3748 returns 0 on failure, 1 if we successfully received it. */
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01003749static int receive_bitmap(struct drbd_conf *mdev, enum drbd_packet cmd,
3750 unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003751{
3752 struct bm_xfer_ctx c;
3753 void *buffer;
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003754 int err;
Philipp Reisner257d0af2011-01-26 12:15:29 +01003755 struct p_header *h = &mdev->tconn->data.rbuf.header;
Philipp Reisner77351055b2011-02-07 17:24:26 +01003756 struct packet_info pi;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003757
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01003758 drbd_bm_lock(mdev, "receive bitmap", BM_LOCKED_SET_ALLOWED);
3759 /* you are supposed to send additional out-of-sync information
3760 * if you actually set bits during this phase */
Philipp Reisnerb411b362009-09-25 16:07:19 -07003761
3762 /* maybe we should use some per thread scratch page,
3763 * and allocate that during initial device creation? */
3764 buffer = (unsigned long *) __get_free_page(GFP_NOIO);
3765 if (!buffer) {
3766 dev_err(DEV, "failed to allocate one page buffer in %s\n", __func__);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003767 err = -ENOMEM;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003768 goto out;
3769 }
3770
3771 c = (struct bm_xfer_ctx) {
3772 .bm_bits = drbd_bm_bits(mdev),
3773 .bm_words = drbd_bm_words(mdev),
3774 };
3775
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003776 for(;;) {
Philipp Reisner02918be2010-08-20 14:35:10 +02003777 if (cmd == P_BITMAP) {
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003778 err = receive_bitmap_plain(mdev, data_size, buffer, &c);
Philipp Reisner02918be2010-08-20 14:35:10 +02003779 } else if (cmd == P_COMPRESSED_BITMAP) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07003780 /* MAYBE: sanity check that we speak proto >= 90,
3781 * and the feature is enabled! */
3782 struct p_compressed_bm *p;
3783
Philipp Reisner02918be2010-08-20 14:35:10 +02003784 if (data_size > BM_PACKET_PAYLOAD_BYTES) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07003785 dev_err(DEV, "ReportCBitmap packet too large\n");
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003786 err = -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003787 goto out;
3788 }
3789 /* use the page buff */
3790 p = buffer;
3791 memcpy(p, h, sizeof(*h));
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003792 err = drbd_recv_all(mdev->tconn, p->head.payload, data_size);
3793 if (err)
3794 goto out;
Lars Ellenberg004352f2010-10-05 20:13:58 +02003795 if (data_size <= (sizeof(*p) - sizeof(p->head))) {
3796 dev_err(DEV, "ReportCBitmap packet too small (l:%u)\n", data_size);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003797 err = -EIO;
Andreas Gruenbacher78fcbda2010-12-10 22:18:27 +01003798 goto out;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003799 }
Philipp Reisnerc6d25cf2011-01-19 16:13:06 +01003800 err = decode_bitmap_c(mdev, p, &c, data_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003801 } else {
Philipp Reisner02918be2010-08-20 14:35:10 +02003802 dev_warn(DEV, "receive_bitmap: cmd neither ReportBitMap nor ReportCBitMap (is 0x%x)", cmd);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003803 err = -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003804 goto out;
3805 }
3806
Philipp Reisner02918be2010-08-20 14:35:10 +02003807 c.packets[cmd == P_BITMAP]++;
Philipp Reisner257d0af2011-01-26 12:15:29 +01003808 c.bytes[cmd == P_BITMAP] += sizeof(struct p_header) + data_size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003809
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003810 if (err <= 0) {
3811 if (err < 0)
3812 goto out;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003813 break;
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003814 }
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003815 err = drbd_recv_header(mdev->tconn, &pi);
3816 if (err)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003817 goto out;
Philipp Reisner77351055b2011-02-07 17:24:26 +01003818 cmd = pi.cmd;
3819 data_size = pi.size;
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003820 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003821
3822 INFO_bm_xfer_stats(mdev, "receive", &c);
3823
3824 if (mdev->state.conn == C_WF_BITMAP_T) {
Andreas Gruenbacherde1f8e42010-12-10 21:04:00 +01003825 enum drbd_state_rv rv;
3826
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003827 err = drbd_send_bitmap(mdev);
3828 if (err)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003829 goto out;
3830 /* Omit CS_ORDERED with this state transition to avoid deadlocks. */
Andreas Gruenbacherde1f8e42010-12-10 21:04:00 +01003831 rv = _drbd_request_state(mdev, NS(conn, C_WF_SYNC_UUID), CS_VERBOSE);
3832 D_ASSERT(rv == SS_SUCCESS);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003833 } else if (mdev->state.conn != C_WF_BITMAP_S) {
3834 /* admin may have requested C_DISCONNECTING,
3835 * other threads may have noticed network errors */
3836 dev_info(DEV, "unexpected cstate (%s) in receive_bitmap\n",
3837 drbd_conn_str(mdev->state.conn));
3838 }
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003839 err = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003840
Philipp Reisnerb411b362009-09-25 16:07:19 -07003841 out:
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01003842 drbd_bm_unlock(mdev);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003843 if (!err && mdev->state.conn == C_WF_BITMAP_S)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003844 drbd_start_resync(mdev, C_SYNC_SOURCE);
3845 free_page((unsigned long) buffer);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003846 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003847}
3848
Philipp Reisner2de876e2011-03-15 14:38:01 +01003849static int _tconn_receive_skip(struct drbd_tconn *tconn, unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003850{
3851 /* TODO zero copy sink :) */
3852 static char sink[128];
3853 int size, want, r;
3854
Philipp Reisner02918be2010-08-20 14:35:10 +02003855 size = data_size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003856 while (size > 0) {
3857 want = min_t(int, size, sizeof(sink));
Philipp Reisner2de876e2011-03-15 14:38:01 +01003858 r = drbd_recv(tconn, sink, want);
3859 if (r <= 0)
Andreas Gruenbacher841ce242010-12-15 19:31:20 +01003860 break;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003861 size -= r;
3862 }
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003863 return size ? -EIO : 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003864}
3865
Philipp Reisner2de876e2011-03-15 14:38:01 +01003866static int receive_skip(struct drbd_conf *mdev, enum drbd_packet cmd,
3867 unsigned int data_size)
3868{
3869 dev_warn(DEV, "skipping unknown optional packet type %d, l: %d!\n",
3870 cmd, data_size);
3871
3872 return _tconn_receive_skip(mdev->tconn, data_size);
3873}
3874
3875static int tconn_receive_skip(struct drbd_tconn *tconn, enum drbd_packet cmd, unsigned int data_size)
3876{
3877 conn_warn(tconn, "skipping packet for non existing volume type %d, l: %d!\n",
3878 cmd, data_size);
3879
3880 return _tconn_receive_skip(tconn, data_size);
3881}
3882
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01003883static int receive_UnplugRemote(struct drbd_conf *mdev, enum drbd_packet cmd,
3884 unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003885{
Philipp Reisnerb411b362009-09-25 16:07:19 -07003886 /* Make sure we've acked all the TCP data associated
3887 * with the data requests being unplugged */
Philipp Reisnere42325a2011-01-19 13:55:45 +01003888 drbd_tcp_quickack(mdev->tconn->data.socket);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003889
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003890 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003891}
3892
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01003893static int receive_out_of_sync(struct drbd_conf *mdev, enum drbd_packet cmd,
3894 unsigned int data_size)
Philipp Reisner73a01a12010-10-27 14:33:00 +02003895{
Philipp Reisnere42325a2011-01-19 13:55:45 +01003896 struct p_block_desc *p = &mdev->tconn->data.rbuf.block_desc;
Philipp Reisner73a01a12010-10-27 14:33:00 +02003897
Lars Ellenbergf735e3632010-12-17 21:06:18 +01003898 switch (mdev->state.conn) {
3899 case C_WF_SYNC_UUID:
3900 case C_WF_BITMAP_T:
3901 case C_BEHIND:
3902 break;
3903 default:
3904 dev_err(DEV, "ASSERT FAILED cstate = %s, expected: WFSyncUUID|WFBitMapT|Behind\n",
3905 drbd_conn_str(mdev->state.conn));
3906 }
3907
Philipp Reisner73a01a12010-10-27 14:33:00 +02003908 drbd_set_out_of_sync(mdev, be64_to_cpu(p->sector), be32_to_cpu(p->blksize));
3909
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003910 return 0;
Philipp Reisner73a01a12010-10-27 14:33:00 +02003911}
3912
Philipp Reisner02918be2010-08-20 14:35:10 +02003913struct data_cmd {
3914 int expect_payload;
3915 size_t pkt_size;
Philipp Reisnera4fbda82011-03-16 11:13:17 +01003916 enum mdev_or_conn fa_type; /* first argument's type */
Philipp Reisnerd9ae84e2011-03-15 18:50:22 +01003917 union {
3918 int (*mdev_fn)(struct drbd_conf *, enum drbd_packet cmd,
3919 unsigned int to_receive);
3920 int (*conn_fn)(struct drbd_tconn *, enum drbd_packet cmd,
3921 unsigned int to_receive);
3922 };
Philipp Reisnerb411b362009-09-25 16:07:19 -07003923};
3924
Philipp Reisner02918be2010-08-20 14:35:10 +02003925static struct data_cmd drbd_cmd_handler[] = {
Philipp Reisnerd9ae84e2011-03-15 18:50:22 +01003926 [P_DATA] = { 1, sizeof(struct p_data), MDEV, { receive_Data } },
3927 [P_DATA_REPLY] = { 1, sizeof(struct p_data), MDEV, { receive_DataReply } },
3928 [P_RS_DATA_REPLY] = { 1, sizeof(struct p_data), MDEV, { receive_RSDataReply } } ,
3929 [P_BARRIER] = { 0, sizeof(struct p_barrier), MDEV, { receive_Barrier } } ,
3930 [P_BITMAP] = { 1, sizeof(struct p_header), MDEV, { receive_bitmap } } ,
3931 [P_COMPRESSED_BITMAP] = { 1, sizeof(struct p_header), MDEV, { receive_bitmap } } ,
3932 [P_UNPLUG_REMOTE] = { 0, sizeof(struct p_header), MDEV, { receive_UnplugRemote } },
3933 [P_DATA_REQUEST] = { 0, sizeof(struct p_block_req), MDEV, { receive_DataRequest } },
3934 [P_RS_DATA_REQUEST] = { 0, sizeof(struct p_block_req), MDEV, { receive_DataRequest } },
3935 [P_SYNC_PARAM] = { 1, sizeof(struct p_header), MDEV, { receive_SyncParam } },
3936 [P_SYNC_PARAM89] = { 1, sizeof(struct p_header), MDEV, { receive_SyncParam } },
Philipp Reisner72046242011-03-15 18:51:47 +01003937 [P_PROTOCOL] = { 1, sizeof(struct p_protocol), CONN, { .conn_fn = receive_protocol } },
Philipp Reisnerd9ae84e2011-03-15 18:50:22 +01003938 [P_UUIDS] = { 0, sizeof(struct p_uuids), MDEV, { receive_uuids } },
3939 [P_SIZES] = { 0, sizeof(struct p_sizes), MDEV, { receive_sizes } },
3940 [P_STATE] = { 0, sizeof(struct p_state), MDEV, { receive_state } },
3941 [P_STATE_CHG_REQ] = { 0, sizeof(struct p_req_state), MDEV, { receive_req_state } },
3942 [P_SYNC_UUID] = { 0, sizeof(struct p_rs_uuid), MDEV, { receive_sync_uuid } },
3943 [P_OV_REQUEST] = { 0, sizeof(struct p_block_req), MDEV, { receive_DataRequest } },
3944 [P_OV_REPLY] = { 1, sizeof(struct p_block_req), MDEV, { receive_DataRequest } },
3945 [P_CSUM_RS_REQUEST] = { 1, sizeof(struct p_block_req), MDEV, { receive_DataRequest } },
3946 [P_DELAY_PROBE] = { 0, sizeof(struct p_delay_probe93), MDEV, { receive_skip } },
3947 [P_OUT_OF_SYNC] = { 0, sizeof(struct p_block_desc), MDEV, { receive_out_of_sync } },
Philipp Reisnerdfafcc82011-03-16 10:55:07 +01003948 [P_CONN_ST_CHG_REQ] = { 0, sizeof(struct p_req_state), CONN, { .conn_fn = receive_req_conn_state } },
Philipp Reisner02918be2010-08-20 14:35:10 +02003949};
3950
3951/* All handler functions that expect a sub-header get that sub-heder in
Philipp Reisnere42325a2011-01-19 13:55:45 +01003952 mdev->tconn->data.rbuf.header.head.payload.
Philipp Reisner02918be2010-08-20 14:35:10 +02003953
Philipp Reisnere42325a2011-01-19 13:55:45 +01003954 Usually in mdev->tconn->data.rbuf.header.head the callback can find the usual
Philipp Reisner02918be2010-08-20 14:35:10 +02003955 p_header, but they may not rely on that. Since there is also p_header95 !
3956 */
Philipp Reisnerb411b362009-09-25 16:07:19 -07003957
Philipp Reisnereefc2f72011-02-08 12:55:24 +01003958static void drbdd(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003959{
Philipp Reisnereefc2f72011-02-08 12:55:24 +01003960 struct p_header *header = &tconn->data.rbuf.header;
Philipp Reisner77351055b2011-02-07 17:24:26 +01003961 struct packet_info pi;
Philipp Reisner02918be2010-08-20 14:35:10 +02003962 size_t shs; /* sub header size */
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003963 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003964
Philipp Reisnereefc2f72011-02-08 12:55:24 +01003965 while (get_t_state(&tconn->receiver) == RUNNING) {
3966 drbd_thread_current_set_cpu(&tconn->receiver);
Andreas Gruenbacher69bc7bc2011-03-16 17:31:52 +01003967 if (drbd_recv_header(tconn, &pi))
Philipp Reisner02918be2010-08-20 14:35:10 +02003968 goto err_out;
3969
Andreas Gruenbacher6e849ce2011-03-14 17:27:45 +01003970 if (unlikely(pi.cmd >= ARRAY_SIZE(drbd_cmd_handler) ||
Philipp Reisnerd9ae84e2011-03-15 18:50:22 +01003971 !drbd_cmd_handler[pi.cmd].mdev_fn)) {
Philipp Reisnereefc2f72011-02-08 12:55:24 +01003972 conn_err(tconn, "unknown packet type %d, l: %d!\n", pi.cmd, pi.size);
Philipp Reisner02918be2010-08-20 14:35:10 +02003973 goto err_out;
Lars Ellenberg0b33a912009-11-16 15:58:04 +01003974 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003975
Philipp Reisner77351055b2011-02-07 17:24:26 +01003976 shs = drbd_cmd_handler[pi.cmd].pkt_size - sizeof(struct p_header);
3977 if (pi.size - shs > 0 && !drbd_cmd_handler[pi.cmd].expect_payload) {
Philipp Reisnereefc2f72011-02-08 12:55:24 +01003978 conn_err(tconn, "No payload expected %s l:%d\n", cmdname(pi.cmd), pi.size);
Philipp Reisner02918be2010-08-20 14:35:10 +02003979 goto err_out;
3980 }
3981
Lars Ellenbergc13f7e12010-10-29 23:32:01 +02003982 if (shs) {
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01003983 err = drbd_recv_all_warn(tconn, &header->payload, shs);
3984 if (err)
Lars Ellenbergc13f7e12010-10-29 23:32:01 +02003985 goto err_out;
Lars Ellenbergc13f7e12010-10-29 23:32:01 +02003986 }
3987
Philipp Reisnera4fbda82011-03-16 11:13:17 +01003988 if (drbd_cmd_handler[pi.cmd].fa_type == CONN) {
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003989 err = drbd_cmd_handler[pi.cmd].conn_fn(tconn, pi.cmd, pi.size - shs);
Philipp Reisnerd9ae84e2011-03-15 18:50:22 +01003990 } else {
3991 struct drbd_conf *mdev = vnr_to_mdev(tconn, pi.vnr);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003992 err = mdev ?
Philipp Reisnerd9ae84e2011-03-15 18:50:22 +01003993 drbd_cmd_handler[pi.cmd].mdev_fn(mdev, pi.cmd, pi.size - shs) :
3994 tconn_receive_skip(tconn, pi.cmd, pi.size - shs);
3995 }
Philipp Reisner02918be2010-08-20 14:35:10 +02003996
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003997 if (unlikely(err)) {
Philipp Reisnereefc2f72011-02-08 12:55:24 +01003998 conn_err(tconn, "error receiving %s, l: %d!\n",
Philipp Reisner77351055b2011-02-07 17:24:26 +01003999 cmdname(pi.cmd), pi.size);
Philipp Reisner02918be2010-08-20 14:35:10 +02004000 goto err_out;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004001 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07004002 }
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004003 return;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004004
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004005 err_out:
4006 conn_request_state(tconn, NS(conn, C_PROTOCOL_ERROR), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004007}
4008
Philipp Reisner0e29d162011-02-18 14:23:11 +01004009void conn_flush_workqueue(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004010{
4011 struct drbd_wq_barrier barr;
4012
4013 barr.w.cb = w_prev_work_done;
Philipp Reisner0e29d162011-02-18 14:23:11 +01004014 barr.w.tconn = tconn;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004015 init_completion(&barr.done);
Philipp Reisner0e29d162011-02-18 14:23:11 +01004016 drbd_queue_work(&tconn->data.work, &barr.w);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004017 wait_for_completion(&barr.done);
4018}
4019
Philipp Reisner360cc742011-02-08 14:29:53 +01004020static void drbd_disconnect(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004021{
Philipp Reisnerbbeb6412011-02-10 13:45:46 +01004022 enum drbd_conns oc;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004023 int rv = SS_UNKNOWN_ERROR;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004024
Philipp Reisnerbbeb6412011-02-10 13:45:46 +01004025 if (tconn->cstate == C_STANDALONE)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004026 return;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004027
4028 /* asender does not clean up anything. it must not interfere, either */
Philipp Reisner360cc742011-02-08 14:29:53 +01004029 drbd_thread_stop(&tconn->asender);
4030 drbd_free_sock(tconn);
4031
4032 idr_for_each(&tconn->volumes, drbd_disconnected, tconn);
4033
4034 conn_info(tconn, "Connection closed\n");
4035
4036 spin_lock_irq(&tconn->req_lock);
Philipp Reisnerbbeb6412011-02-10 13:45:46 +01004037 oc = tconn->cstate;
4038 if (oc >= C_UNCONNECTED)
4039 rv = _conn_request_state(tconn, NS(conn, C_UNCONNECTED), CS_VERBOSE);
4040
Philipp Reisner360cc742011-02-08 14:29:53 +01004041 spin_unlock_irq(&tconn->req_lock);
4042
Philipp Reisnerbbeb6412011-02-10 13:45:46 +01004043 if (oc == C_DISCONNECTING) {
Philipp Reisner360cc742011-02-08 14:29:53 +01004044 wait_event(tconn->net_cnt_wait, atomic_read(&tconn->net_cnt) == 0);
4045
4046 crypto_free_hash(tconn->cram_hmac_tfm);
4047 tconn->cram_hmac_tfm = NULL;
4048
4049 kfree(tconn->net_conf);
4050 tconn->net_conf = NULL;
Philipp Reisnerbbeb6412011-02-10 13:45:46 +01004051 conn_request_state(tconn, NS(conn, C_STANDALONE), CS_VERBOSE);
Philipp Reisner360cc742011-02-08 14:29:53 +01004052 }
4053}
4054
4055static int drbd_disconnected(int vnr, void *p, void *data)
4056{
4057 struct drbd_conf *mdev = (struct drbd_conf *)p;
4058 enum drbd_fencing_p fp;
4059 unsigned int i;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004060
Philipp Reisner85719572010-07-21 10:20:17 +02004061 /* wait for current activity to cease. */
Philipp Reisner87eeee42011-01-19 14:16:30 +01004062 spin_lock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004063 _drbd_wait_ee_list_empty(mdev, &mdev->active_ee);
4064 _drbd_wait_ee_list_empty(mdev, &mdev->sync_ee);
4065 _drbd_wait_ee_list_empty(mdev, &mdev->read_ee);
Philipp Reisner87eeee42011-01-19 14:16:30 +01004066 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004067
4068 /* We do not have data structures that would allow us to
4069 * get the rs_pending_cnt down to 0 again.
4070 * * On C_SYNC_TARGET we do not have any data structures describing
4071 * the pending RSDataRequest's we have sent.
4072 * * On C_SYNC_SOURCE there is no data structure that tracks
4073 * the P_RS_DATA_REPLY blocks that we sent to the SyncTarget.
4074 * And no, it is not the sum of the reference counts in the
4075 * resync_LRU. The resync_LRU tracks the whole operation including
4076 * the disk-IO, while the rs_pending_cnt only tracks the blocks
4077 * on the fly. */
4078 drbd_rs_cancel_all(mdev);
4079 mdev->rs_total = 0;
4080 mdev->rs_failed = 0;
4081 atomic_set(&mdev->rs_pending_cnt, 0);
4082 wake_up(&mdev->misc_wait);
4083
Philipp Reisner7fde2be2011-03-01 11:08:28 +01004084 del_timer(&mdev->request_timer);
4085
Philipp Reisnerb411b362009-09-25 16:07:19 -07004086 del_timer_sync(&mdev->resync_timer);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004087 resync_timer_fn((unsigned long)mdev);
4088
Philipp Reisnerb411b362009-09-25 16:07:19 -07004089 /* wait for all w_e_end_data_req, w_e_end_rsdata_req, w_send_barrier,
4090 * w_make_resync_request etc. which may still be on the worker queue
4091 * to be "canceled" */
Philipp Reisnera21e9292011-02-08 15:08:49 +01004092 drbd_flush_workqueue(mdev);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004093
4094 /* This also does reclaim_net_ee(). If we do this too early, we might
4095 * miss some resync ee and pages.*/
4096 drbd_process_done_ee(mdev);
4097
4098 kfree(mdev->p_uuid);
4099 mdev->p_uuid = NULL;
4100
Philipp Reisnerfb22c402010-09-08 23:20:21 +02004101 if (!is_susp(mdev->state))
Philipp Reisner2f5cdd02011-02-21 14:29:27 +01004102 tl_clear(mdev->tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004103
Philipp Reisnerb411b362009-09-25 16:07:19 -07004104 drbd_md_sync(mdev);
4105
4106 fp = FP_DONT_CARE;
4107 if (get_ldev(mdev)) {
4108 fp = mdev->ldev->dc.fencing;
4109 put_ldev(mdev);
4110 }
4111
Philipp Reisner87f7be42010-06-11 13:56:33 +02004112 if (mdev->state.role == R_PRIMARY && fp >= FP_RESOURCE && mdev->state.pdsk >= D_UNKNOWN)
4113 drbd_try_outdate_peer_async(mdev);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004114
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01004115 /* serialize with bitmap writeout triggered by the state change,
4116 * if any. */
4117 wait_event(mdev->misc_wait, !test_bit(BITMAP_IO, &mdev->flags));
4118
Philipp Reisnerb411b362009-09-25 16:07:19 -07004119 /* tcp_close and release of sendpage pages can be deferred. I don't
4120 * want to use SO_LINGER, because apparently it can be deferred for
4121 * more than 20 seconds (longest time I checked).
4122 *
4123 * Actually we don't care for exactly when the network stack does its
4124 * put_page(), but release our reference on these pages right here.
4125 */
4126 i = drbd_release_ee(mdev, &mdev->net_ee);
4127 if (i)
4128 dev_info(DEV, "net_ee not empty, killed %u entries\n", i);
Lars Ellenberg435f0742010-09-06 12:30:25 +02004129 i = atomic_read(&mdev->pp_in_use_by_net);
4130 if (i)
4131 dev_info(DEV, "pp_in_use_by_net = %d, expected 0\n", i);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004132 i = atomic_read(&mdev->pp_in_use);
4133 if (i)
Lars Ellenberg45bb9122010-05-14 17:10:48 +02004134 dev_info(DEV, "pp_in_use = %d, expected 0\n", i);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004135
4136 D_ASSERT(list_empty(&mdev->read_ee));
4137 D_ASSERT(list_empty(&mdev->active_ee));
4138 D_ASSERT(list_empty(&mdev->sync_ee));
4139 D_ASSERT(list_empty(&mdev->done_ee));
4140
4141 /* ok, no more ee's on the fly, it is safe to reset the epoch_size */
4142 atomic_set(&mdev->current_epoch->epoch_size, 0);
4143 D_ASSERT(list_empty(&mdev->current_epoch->list));
Philipp Reisner360cc742011-02-08 14:29:53 +01004144
4145 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004146}
4147
4148/*
4149 * We support PRO_VERSION_MIN to PRO_VERSION_MAX. The protocol version
4150 * we can agree on is stored in agreed_pro_version.
4151 *
4152 * feature flags and the reserved array should be enough room for future
4153 * enhancements of the handshake protocol, and possible plugins...
4154 *
4155 * for now, they are expected to be zero, but ignored.
4156 */
Philipp Reisner8a22ccc2011-02-07 16:47:12 +01004157static int drbd_send_handshake(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004158{
Philipp Reisnere6b3ea82011-01-19 14:02:01 +01004159 /* ASSERT current == mdev->tconn->receiver ... */
Philipp Reisner8a22ccc2011-02-07 16:47:12 +01004160 struct p_handshake *p = &tconn->data.sbuf.handshake;
Andreas Gruenbachere8d17b02011-03-16 00:54:19 +01004161 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004162
Philipp Reisner8a22ccc2011-02-07 16:47:12 +01004163 if (mutex_lock_interruptible(&tconn->data.mutex)) {
4164 conn_err(tconn, "interrupted during initial handshake\n");
Andreas Gruenbachere8d17b02011-03-16 00:54:19 +01004165 return -EINTR;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004166 }
4167
Philipp Reisner8a22ccc2011-02-07 16:47:12 +01004168 if (tconn->data.socket == NULL) {
4169 mutex_unlock(&tconn->data.mutex);
Andreas Gruenbachere8d17b02011-03-16 00:54:19 +01004170 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004171 }
4172
4173 memset(p, 0, sizeof(*p));
4174 p->protocol_min = cpu_to_be32(PRO_VERSION_MIN);
4175 p->protocol_max = cpu_to_be32(PRO_VERSION_MAX);
Andreas Gruenbachere8d17b02011-03-16 00:54:19 +01004176 err = _conn_send_cmd(tconn, 0, tconn->data.socket, P_HAND_SHAKE,
Andreas Gruenbacherecf23632011-03-15 23:48:25 +01004177 &p->head, sizeof(*p), 0);
Philipp Reisner8a22ccc2011-02-07 16:47:12 +01004178 mutex_unlock(&tconn->data.mutex);
Andreas Gruenbachere8d17b02011-03-16 00:54:19 +01004179 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004180}
4181
4182/*
4183 * return values:
4184 * 1 yes, we have a valid connection
4185 * 0 oops, did not work out, please try again
4186 * -1 peer talks different language,
4187 * no point in trying again, please go standalone.
4188 */
Philipp Reisner65d11ed2011-02-07 17:35:59 +01004189static int drbd_do_handshake(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004190{
Philipp Reisner65d11ed2011-02-07 17:35:59 +01004191 /* ASSERT current == tconn->receiver ... */
4192 struct p_handshake *p = &tconn->data.rbuf.handshake;
Philipp Reisner02918be2010-08-20 14:35:10 +02004193 const int expect = sizeof(struct p_handshake) - sizeof(struct p_header80);
Philipp Reisner77351055b2011-02-07 17:24:26 +01004194 struct packet_info pi;
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01004195 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004196
Andreas Gruenbachere8d17b02011-03-16 00:54:19 +01004197 err = drbd_send_handshake(tconn);
4198 if (err)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004199 return 0;
4200
Andreas Gruenbacher69bc7bc2011-03-16 17:31:52 +01004201 err = drbd_recv_header(tconn, &pi);
4202 if (err)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004203 return 0;
4204
Philipp Reisner77351055b2011-02-07 17:24:26 +01004205 if (pi.cmd != P_HAND_SHAKE) {
Philipp Reisner65d11ed2011-02-07 17:35:59 +01004206 conn_err(tconn, "expected HandShake packet, received: %s (0x%04x)\n",
Philipp Reisner77351055b2011-02-07 17:24:26 +01004207 cmdname(pi.cmd), pi.cmd);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004208 return -1;
4209 }
4210
Philipp Reisner77351055b2011-02-07 17:24:26 +01004211 if (pi.size != expect) {
Philipp Reisner65d11ed2011-02-07 17:35:59 +01004212 conn_err(tconn, "expected HandShake length: %u, received: %u\n",
Philipp Reisner77351055b2011-02-07 17:24:26 +01004213 expect, pi.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004214 return -1;
4215 }
4216
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01004217 err = drbd_recv_all_warn(tconn, &p->head.payload, expect);
4218 if (err)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004219 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004220
Philipp Reisnerb411b362009-09-25 16:07:19 -07004221 p->protocol_min = be32_to_cpu(p->protocol_min);
4222 p->protocol_max = be32_to_cpu(p->protocol_max);
4223 if (p->protocol_max == 0)
4224 p->protocol_max = p->protocol_min;
4225
4226 if (PRO_VERSION_MAX < p->protocol_min ||
4227 PRO_VERSION_MIN > p->protocol_max)
4228 goto incompat;
4229
Philipp Reisner65d11ed2011-02-07 17:35:59 +01004230 tconn->agreed_pro_version = min_t(int, PRO_VERSION_MAX, p->protocol_max);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004231
Philipp Reisner65d11ed2011-02-07 17:35:59 +01004232 conn_info(tconn, "Handshake successful: "
4233 "Agreed network protocol version %d\n", tconn->agreed_pro_version);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004234
4235 return 1;
4236
4237 incompat:
Philipp Reisner65d11ed2011-02-07 17:35:59 +01004238 conn_err(tconn, "incompatible DRBD dialects: "
Philipp Reisnerb411b362009-09-25 16:07:19 -07004239 "I support %d-%d, peer supports %d-%d\n",
4240 PRO_VERSION_MIN, PRO_VERSION_MAX,
4241 p->protocol_min, p->protocol_max);
4242 return -1;
4243}
4244
4245#if !defined(CONFIG_CRYPTO_HMAC) && !defined(CONFIG_CRYPTO_HMAC_MODULE)
Philipp Reisner13e60372011-02-08 09:54:40 +01004246static int drbd_do_auth(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004247{
4248 dev_err(DEV, "This kernel was build without CONFIG_CRYPTO_HMAC.\n");
4249 dev_err(DEV, "You need to disable 'cram-hmac-alg' in drbd.conf.\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01004250 return -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004251}
4252#else
4253#define CHALLENGE_LEN 64
Johannes Thomab10d96c2010-01-07 16:02:50 +01004254
4255/* Return value:
4256 1 - auth succeeded,
4257 0 - failed, try again (network error),
4258 -1 - auth failed, don't try again.
4259*/
4260
Philipp Reisner13e60372011-02-08 09:54:40 +01004261static int drbd_do_auth(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004262{
4263 char my_challenge[CHALLENGE_LEN]; /* 64 Bytes... */
4264 struct scatterlist sg;
4265 char *response = NULL;
4266 char *right_response = NULL;
4267 char *peers_ch = NULL;
Philipp Reisner13e60372011-02-08 09:54:40 +01004268 unsigned int key_len = strlen(tconn->net_conf->shared_secret);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004269 unsigned int resp_size;
4270 struct hash_desc desc;
Philipp Reisner77351055b2011-02-07 17:24:26 +01004271 struct packet_info pi;
Andreas Gruenbacher69bc7bc2011-03-16 17:31:52 +01004272 int err, rv;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004273
Philipp Reisner13e60372011-02-08 09:54:40 +01004274 desc.tfm = tconn->cram_hmac_tfm;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004275 desc.flags = 0;
4276
Philipp Reisner13e60372011-02-08 09:54:40 +01004277 rv = crypto_hash_setkey(tconn->cram_hmac_tfm,
4278 (u8 *)tconn->net_conf->shared_secret, key_len);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004279 if (rv) {
Philipp Reisner13e60372011-02-08 09:54:40 +01004280 conn_err(tconn, "crypto_hash_setkey() failed with %d\n", rv);
Johannes Thomab10d96c2010-01-07 16:02:50 +01004281 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004282 goto fail;
4283 }
4284
4285 get_random_bytes(my_challenge, CHALLENGE_LEN);
4286
Andreas Gruenbacherce9879c2011-03-15 23:34:29 +01004287 rv = !conn_send_cmd2(tconn, P_AUTH_CHALLENGE, my_challenge, CHALLENGE_LEN);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004288 if (!rv)
4289 goto fail;
4290
Andreas Gruenbacher69bc7bc2011-03-16 17:31:52 +01004291 err = drbd_recv_header(tconn, &pi);
4292 if (err) {
4293 rv = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004294 goto fail;
Andreas Gruenbacher69bc7bc2011-03-16 17:31:52 +01004295 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07004296
Philipp Reisner77351055b2011-02-07 17:24:26 +01004297 if (pi.cmd != P_AUTH_CHALLENGE) {
Philipp Reisner13e60372011-02-08 09:54:40 +01004298 conn_err(tconn, "expected AuthChallenge packet, received: %s (0x%04x)\n",
Philipp Reisner77351055b2011-02-07 17:24:26 +01004299 cmdname(pi.cmd), pi.cmd);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004300 rv = 0;
4301 goto fail;
4302 }
4303
Philipp Reisner77351055b2011-02-07 17:24:26 +01004304 if (pi.size > CHALLENGE_LEN * 2) {
Philipp Reisner13e60372011-02-08 09:54:40 +01004305 conn_err(tconn, "expected AuthChallenge payload too big.\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01004306 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004307 goto fail;
4308 }
4309
Philipp Reisner77351055b2011-02-07 17:24:26 +01004310 peers_ch = kmalloc(pi.size, GFP_NOIO);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004311 if (peers_ch == NULL) {
Philipp Reisner13e60372011-02-08 09:54:40 +01004312 conn_err(tconn, "kmalloc of peers_ch failed\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01004313 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004314 goto fail;
4315 }
4316
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01004317 err = drbd_recv_all_warn(tconn, peers_ch, pi.size);
4318 if (err) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004319 rv = 0;
4320 goto fail;
4321 }
4322
Philipp Reisner13e60372011-02-08 09:54:40 +01004323 resp_size = crypto_hash_digestsize(tconn->cram_hmac_tfm);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004324 response = kmalloc(resp_size, GFP_NOIO);
4325 if (response == NULL) {
Philipp Reisner13e60372011-02-08 09:54:40 +01004326 conn_err(tconn, "kmalloc of response failed\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01004327 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004328 goto fail;
4329 }
4330
4331 sg_init_table(&sg, 1);
Philipp Reisner77351055b2011-02-07 17:24:26 +01004332 sg_set_buf(&sg, peers_ch, pi.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004333
4334 rv = crypto_hash_digest(&desc, &sg, sg.length, response);
4335 if (rv) {
Philipp Reisner13e60372011-02-08 09:54:40 +01004336 conn_err(tconn, "crypto_hash_digest() failed with %d\n", rv);
Johannes Thomab10d96c2010-01-07 16:02:50 +01004337 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004338 goto fail;
4339 }
4340
Andreas Gruenbacherce9879c2011-03-15 23:34:29 +01004341 rv = !conn_send_cmd2(tconn, P_AUTH_RESPONSE, response, resp_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004342 if (!rv)
4343 goto fail;
4344
Andreas Gruenbacher69bc7bc2011-03-16 17:31:52 +01004345 err = drbd_recv_header(tconn, &pi);
4346 if (err) {
4347 rv = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004348 goto fail;
Andreas Gruenbacher69bc7bc2011-03-16 17:31:52 +01004349 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07004350
Philipp Reisner77351055b2011-02-07 17:24:26 +01004351 if (pi.cmd != P_AUTH_RESPONSE) {
Philipp Reisner13e60372011-02-08 09:54:40 +01004352 conn_err(tconn, "expected AuthResponse packet, received: %s (0x%04x)\n",
Philipp Reisner77351055b2011-02-07 17:24:26 +01004353 cmdname(pi.cmd), pi.cmd);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004354 rv = 0;
4355 goto fail;
4356 }
4357
Philipp Reisner77351055b2011-02-07 17:24:26 +01004358 if (pi.size != resp_size) {
Philipp Reisner13e60372011-02-08 09:54:40 +01004359 conn_err(tconn, "expected AuthResponse payload of wrong size\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07004360 rv = 0;
4361 goto fail;
4362 }
4363
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01004364 err = drbd_recv_all_warn(tconn, response , resp_size);
4365 if (err) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004366 rv = 0;
4367 goto fail;
4368 }
4369
4370 right_response = kmalloc(resp_size, GFP_NOIO);
Julia Lawall2d1ee872009-12-27 22:27:11 +01004371 if (right_response == NULL) {
Philipp Reisner13e60372011-02-08 09:54:40 +01004372 conn_err(tconn, "kmalloc of right_response failed\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01004373 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004374 goto fail;
4375 }
4376
4377 sg_set_buf(&sg, my_challenge, CHALLENGE_LEN);
4378
4379 rv = crypto_hash_digest(&desc, &sg, sg.length, right_response);
4380 if (rv) {
Philipp Reisner13e60372011-02-08 09:54:40 +01004381 conn_err(tconn, "crypto_hash_digest() failed with %d\n", rv);
Johannes Thomab10d96c2010-01-07 16:02:50 +01004382 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004383 goto fail;
4384 }
4385
4386 rv = !memcmp(response, right_response, resp_size);
4387
4388 if (rv)
Philipp Reisner13e60372011-02-08 09:54:40 +01004389 conn_info(tconn, "Peer authenticated using %d bytes of '%s' HMAC\n",
4390 resp_size, tconn->net_conf->cram_hmac_alg);
Johannes Thomab10d96c2010-01-07 16:02:50 +01004391 else
4392 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004393
4394 fail:
4395 kfree(peers_ch);
4396 kfree(response);
4397 kfree(right_response);
4398
4399 return rv;
4400}
4401#endif
4402
4403int drbdd_init(struct drbd_thread *thi)
4404{
Philipp Reisner392c8802011-02-09 10:33:31 +01004405 struct drbd_tconn *tconn = thi->tconn;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004406 int h;
4407
Philipp Reisner4d641dd2011-02-08 15:40:24 +01004408 conn_info(tconn, "receiver (re)started\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07004409
4410 do {
Philipp Reisner4d641dd2011-02-08 15:40:24 +01004411 h = drbd_connect(tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004412 if (h == 0) {
Philipp Reisner4d641dd2011-02-08 15:40:24 +01004413 drbd_disconnect(tconn);
Philipp Reisner20ee6392011-01-18 15:28:59 +01004414 schedule_timeout_interruptible(HZ);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004415 }
4416 if (h == -1) {
Philipp Reisner4d641dd2011-02-08 15:40:24 +01004417 conn_warn(tconn, "Discarding network configuration.\n");
Philipp Reisnerbbeb6412011-02-10 13:45:46 +01004418 conn_request_state(tconn, NS(conn, C_DISCONNECTING), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004419 }
4420 } while (h == 0);
4421
4422 if (h > 0) {
Philipp Reisner4d641dd2011-02-08 15:40:24 +01004423 if (get_net_conf(tconn)) {
4424 drbdd(tconn);
4425 put_net_conf(tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004426 }
4427 }
4428
Philipp Reisner4d641dd2011-02-08 15:40:24 +01004429 drbd_disconnect(tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004430
Philipp Reisner4d641dd2011-02-08 15:40:24 +01004431 conn_info(tconn, "receiver terminated\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07004432 return 0;
4433}
4434
4435/* ********* acknowledge sender ******** */
4436
Philipp Reisnere4f78ed2011-03-16 11:27:48 +01004437static int got_conn_RqSReply(struct drbd_tconn *tconn, enum drbd_packet cmd)
4438{
4439 struct p_req_state_reply *p = &tconn->meta.rbuf.req_state_reply;
4440 int retcode = be32_to_cpu(p->retcode);
4441
4442 if (retcode >= SS_SUCCESS) {
4443 set_bit(CONN_WD_ST_CHG_OKAY, &tconn->flags);
4444 } else {
4445 set_bit(CONN_WD_ST_CHG_FAIL, &tconn->flags);
4446 conn_err(tconn, "Requested state change failed by peer: %s (%d)\n",
4447 drbd_set_st_err_str(retcode), retcode);
4448 }
4449 wake_up(&tconn->ping_wait);
4450
4451 return true;
4452}
4453
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01004454static int got_RqSReply(struct drbd_conf *mdev, enum drbd_packet cmd)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004455{
Philipp Reisner257d0af2011-01-26 12:15:29 +01004456 struct p_req_state_reply *p = &mdev->tconn->meta.rbuf.req_state_reply;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004457 int retcode = be32_to_cpu(p->retcode);
4458
Philipp Reisnere4f78ed2011-03-16 11:27:48 +01004459 if (retcode >= SS_SUCCESS) {
4460 set_bit(CL_ST_CHG_SUCCESS, &mdev->flags);
4461 } else {
4462 set_bit(CL_ST_CHG_FAIL, &mdev->flags);
4463 dev_err(DEV, "Requested state change failed by peer: %s (%d)\n",
4464 drbd_set_st_err_str(retcode), retcode);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004465 }
Philipp Reisnere4f78ed2011-03-16 11:27:48 +01004466 wake_up(&mdev->state_wait);
4467
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004468 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004469}
4470
Philipp Reisnerf19e4f82011-03-16 11:21:50 +01004471static int got_Ping(struct drbd_tconn *tconn, enum drbd_packet cmd)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004472{
Philipp Reisnerf19e4f82011-03-16 11:21:50 +01004473 return drbd_send_ping_ack(tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004474
4475}
4476
Philipp Reisnerf19e4f82011-03-16 11:21:50 +01004477static int got_PingAck(struct drbd_tconn *tconn, enum drbd_packet cmd)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004478{
4479 /* restore idle timeout */
Philipp Reisner2a67d8b2011-02-09 14:10:32 +01004480 tconn->meta.socket->sk->sk_rcvtimeo = tconn->net_conf->ping_int*HZ;
4481 if (!test_and_set_bit(GOT_PING_ACK, &tconn->flags))
4482 wake_up(&tconn->ping_wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004483
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004484 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004485}
4486
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01004487static int got_IsInSync(struct drbd_conf *mdev, enum drbd_packet cmd)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004488{
Philipp Reisner257d0af2011-01-26 12:15:29 +01004489 struct p_block_ack *p = &mdev->tconn->meta.rbuf.block_ack;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004490 sector_t sector = be64_to_cpu(p->sector);
4491 int blksize = be32_to_cpu(p->blksize);
4492
Philipp Reisner31890f42011-01-19 14:12:51 +01004493 D_ASSERT(mdev->tconn->agreed_pro_version >= 89);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004494
4495 update_peer_seq(mdev, be32_to_cpu(p->seq_num));
4496
Lars Ellenberg1d53f092010-09-05 01:13:24 +02004497 if (get_ldev(mdev)) {
4498 drbd_rs_complete_io(mdev, sector);
4499 drbd_set_in_sync(mdev, sector, blksize);
4500 /* rs_same_csums is supposed to count in units of BM_BLOCK_SIZE */
4501 mdev->rs_same_csum += (blksize >> BM_BLOCK_SHIFT);
4502 put_ldev(mdev);
4503 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07004504 dec_rs_pending(mdev);
Philipp Reisner778f2712010-07-06 11:14:00 +02004505 atomic_add(blksize >> 9, &mdev->rs_sect_in);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004506
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004507 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004508}
4509
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01004510static int
4511validate_req_change_req_state(struct drbd_conf *mdev, u64 id, sector_t sector,
4512 struct rb_root *root, const char *func,
4513 enum drbd_req_event what, bool missing_ok)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004514{
4515 struct drbd_request *req;
4516 struct bio_and_error m;
4517
Philipp Reisner87eeee42011-01-19 14:16:30 +01004518 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01004519 req = find_request(mdev, root, id, sector, missing_ok, func);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004520 if (unlikely(!req)) {
Philipp Reisner87eeee42011-01-19 14:16:30 +01004521 spin_unlock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004522 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004523 }
4524 __req_mod(req, what, &m);
Philipp Reisner87eeee42011-01-19 14:16:30 +01004525 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004526
4527 if (m.bio)
4528 complete_master_bio(mdev, &m);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004529 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004530}
4531
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01004532static int got_BlockAck(struct drbd_conf *mdev, enum drbd_packet cmd)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004533{
Philipp Reisner257d0af2011-01-26 12:15:29 +01004534 struct p_block_ack *p = &mdev->tconn->meta.rbuf.block_ack;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004535 sector_t sector = be64_to_cpu(p->sector);
4536 int blksize = be32_to_cpu(p->blksize);
4537 enum drbd_req_event what;
4538
4539 update_peer_seq(mdev, be32_to_cpu(p->seq_num));
4540
Andreas Gruenbacher579b57e2011-01-13 18:40:57 +01004541 if (p->block_id == ID_SYNCER) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004542 drbd_set_in_sync(mdev, sector, blksize);
4543 dec_rs_pending(mdev);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004544 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004545 }
Philipp Reisner257d0af2011-01-26 12:15:29 +01004546 switch (cmd) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004547 case P_RS_WRITE_ACK:
Philipp Reisner89e58e72011-01-19 13:12:45 +01004548 D_ASSERT(mdev->tconn->net_conf->wire_protocol == DRBD_PROT_C);
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01004549 what = WRITE_ACKED_BY_PEER_AND_SIS;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004550 break;
4551 case P_WRITE_ACK:
Philipp Reisner89e58e72011-01-19 13:12:45 +01004552 D_ASSERT(mdev->tconn->net_conf->wire_protocol == DRBD_PROT_C);
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01004553 what = WRITE_ACKED_BY_PEER;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004554 break;
4555 case P_RECV_ACK:
Philipp Reisner89e58e72011-01-19 13:12:45 +01004556 D_ASSERT(mdev->tconn->net_conf->wire_protocol == DRBD_PROT_B);
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01004557 what = RECV_ACKED_BY_PEER;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004558 break;
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01004559 case P_DISCARD_WRITE:
Philipp Reisner89e58e72011-01-19 13:12:45 +01004560 D_ASSERT(mdev->tconn->net_conf->wire_protocol == DRBD_PROT_C);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01004561 what = DISCARD_WRITE;
4562 break;
4563 case P_RETRY_WRITE:
4564 D_ASSERT(mdev->tconn->net_conf->wire_protocol == DRBD_PROT_C);
4565 what = POSTPONE_WRITE;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004566 break;
4567 default:
4568 D_ASSERT(0);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004569 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004570 }
4571
4572 return validate_req_change_req_state(mdev, p->block_id, sector,
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01004573 &mdev->write_requests, __func__,
4574 what, false);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004575}
4576
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01004577static int got_NegAck(struct drbd_conf *mdev, enum drbd_packet cmd)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004578{
Philipp Reisner257d0af2011-01-26 12:15:29 +01004579 struct p_block_ack *p = &mdev->tconn->meta.rbuf.block_ack;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004580 sector_t sector = be64_to_cpu(p->sector);
Philipp Reisner2deb8332011-01-17 18:39:18 +01004581 int size = be32_to_cpu(p->blksize);
Philipp Reisner89e58e72011-01-19 13:12:45 +01004582 bool missing_ok = mdev->tconn->net_conf->wire_protocol == DRBD_PROT_A ||
4583 mdev->tconn->net_conf->wire_protocol == DRBD_PROT_B;
Andreas Gruenbacherc3afd8f2011-01-20 22:25:40 +01004584 bool found;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004585
4586 update_peer_seq(mdev, be32_to_cpu(p->seq_num));
4587
Andreas Gruenbacher579b57e2011-01-13 18:40:57 +01004588 if (p->block_id == ID_SYNCER) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004589 dec_rs_pending(mdev);
4590 drbd_rs_failed_io(mdev, sector, size);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004591 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004592 }
Philipp Reisner2deb8332011-01-17 18:39:18 +01004593
Andreas Gruenbacherc3afd8f2011-01-20 22:25:40 +01004594 found = validate_req_change_req_state(mdev, p->block_id, sector,
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01004595 &mdev->write_requests, __func__,
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01004596 NEG_ACKED, missing_ok);
Andreas Gruenbacherc3afd8f2011-01-20 22:25:40 +01004597 if (!found) {
4598 /* Protocol A has no P_WRITE_ACKs, but has P_NEG_ACKs.
4599 The master bio might already be completed, therefore the
4600 request is no longer in the collision hash. */
4601 /* In Protocol B we might already have got a P_RECV_ACK
4602 but then get a P_NEG_ACK afterwards. */
4603 if (!missing_ok)
Philipp Reisner2deb8332011-01-17 18:39:18 +01004604 return false;
Andreas Gruenbacherc3afd8f2011-01-20 22:25:40 +01004605 drbd_set_out_of_sync(mdev, sector, size);
Philipp Reisner2deb8332011-01-17 18:39:18 +01004606 }
Philipp Reisner2deb8332011-01-17 18:39:18 +01004607 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004608}
4609
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01004610static int got_NegDReply(struct drbd_conf *mdev, enum drbd_packet cmd)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004611{
Philipp Reisner257d0af2011-01-26 12:15:29 +01004612 struct p_block_ack *p = &mdev->tconn->meta.rbuf.block_ack;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004613 sector_t sector = be64_to_cpu(p->sector);
4614
4615 update_peer_seq(mdev, be32_to_cpu(p->seq_num));
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01004616
Philipp Reisnerb411b362009-09-25 16:07:19 -07004617 dev_err(DEV, "Got NegDReply; Sector %llus, len %u; Fail original request.\n",
4618 (unsigned long long)sector, be32_to_cpu(p->blksize));
4619
4620 return validate_req_change_req_state(mdev, p->block_id, sector,
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01004621 &mdev->read_requests, __func__,
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01004622 NEG_ACKED, false);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004623}
4624
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01004625static int got_NegRSDReply(struct drbd_conf *mdev, enum drbd_packet cmd)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004626{
4627 sector_t sector;
4628 int size;
Philipp Reisner257d0af2011-01-26 12:15:29 +01004629 struct p_block_ack *p = &mdev->tconn->meta.rbuf.block_ack;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004630
4631 sector = be64_to_cpu(p->sector);
4632 size = be32_to_cpu(p->blksize);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004633
4634 update_peer_seq(mdev, be32_to_cpu(p->seq_num));
4635
4636 dec_rs_pending(mdev);
4637
4638 if (get_ldev_if_state(mdev, D_FAILED)) {
4639 drbd_rs_complete_io(mdev, sector);
Philipp Reisner257d0af2011-01-26 12:15:29 +01004640 switch (cmd) {
Philipp Reisnerd612d302010-12-27 10:53:28 +01004641 case P_NEG_RS_DREPLY:
4642 drbd_rs_failed_io(mdev, sector, size);
4643 case P_RS_CANCEL:
4644 break;
4645 default:
4646 D_ASSERT(0);
4647 put_ldev(mdev);
4648 return false;
4649 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07004650 put_ldev(mdev);
4651 }
4652
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004653 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004654}
4655
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01004656static int got_BarrierAck(struct drbd_conf *mdev, enum drbd_packet cmd)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004657{
Philipp Reisner257d0af2011-01-26 12:15:29 +01004658 struct p_barrier_ack *p = &mdev->tconn->meta.rbuf.barrier_ack;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004659
Philipp Reisner2f5cdd02011-02-21 14:29:27 +01004660 tl_release(mdev->tconn, p->barrier, be32_to_cpu(p->set_size));
Philipp Reisnerb411b362009-09-25 16:07:19 -07004661
Philipp Reisnerc4752ef2010-10-27 17:32:36 +02004662 if (mdev->state.conn == C_AHEAD &&
4663 atomic_read(&mdev->ap_in_flight) == 0 &&
Philipp Reisner370a43e2011-01-14 16:03:11 +01004664 !test_and_set_bit(AHEAD_TO_SYNC_SOURCE, &mdev->current_epoch->flags)) {
4665 mdev->start_resync_timer.expires = jiffies + HZ;
4666 add_timer(&mdev->start_resync_timer);
Philipp Reisnerc4752ef2010-10-27 17:32:36 +02004667 }
4668
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004669 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004670}
4671
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01004672static int got_OVResult(struct drbd_conf *mdev, enum drbd_packet cmd)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004673{
Philipp Reisner257d0af2011-01-26 12:15:29 +01004674 struct p_block_ack *p = &mdev->tconn->meta.rbuf.block_ack;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004675 struct drbd_work *w;
4676 sector_t sector;
4677 int size;
4678
4679 sector = be64_to_cpu(p->sector);
4680 size = be32_to_cpu(p->blksize);
4681
4682 update_peer_seq(mdev, be32_to_cpu(p->seq_num));
4683
4684 if (be64_to_cpu(p->block_id) == ID_OUT_OF_SYNC)
Andreas Gruenbacher8f7bed72010-12-19 23:53:14 +01004685 drbd_ov_out_of_sync_found(mdev, sector, size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004686 else
Andreas Gruenbacher8f7bed72010-12-19 23:53:14 +01004687 ov_out_of_sync_print(mdev);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004688
Lars Ellenberg1d53f092010-09-05 01:13:24 +02004689 if (!get_ldev(mdev))
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004690 return true;
Lars Ellenberg1d53f092010-09-05 01:13:24 +02004691
Philipp Reisnerb411b362009-09-25 16:07:19 -07004692 drbd_rs_complete_io(mdev, sector);
4693 dec_rs_pending(mdev);
4694
Lars Ellenbergea5442a2010-11-05 09:48:01 +01004695 --mdev->ov_left;
4696
4697 /* let's advance progress step marks only for every other megabyte */
4698 if ((mdev->ov_left & 0x200) == 0x200)
4699 drbd_advance_rs_marks(mdev, mdev->ov_left);
4700
4701 if (mdev->ov_left == 0) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004702 w = kmalloc(sizeof(*w), GFP_NOIO);
4703 if (w) {
4704 w->cb = w_ov_finished;
Philipp Reisnera21e9292011-02-08 15:08:49 +01004705 w->mdev = mdev;
Philipp Reisnere42325a2011-01-19 13:55:45 +01004706 drbd_queue_work_front(&mdev->tconn->data.work, w);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004707 } else {
4708 dev_err(DEV, "kmalloc(w) failed.");
Andreas Gruenbacher8f7bed72010-12-19 23:53:14 +01004709 ov_out_of_sync_print(mdev);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004710 drbd_resync_finished(mdev);
4711 }
4712 }
Lars Ellenberg1d53f092010-09-05 01:13:24 +02004713 put_ldev(mdev);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004714 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004715}
4716
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01004717static int got_skip(struct drbd_conf *mdev, enum drbd_packet cmd)
Philipp Reisner0ced55a2010-04-30 15:26:20 +02004718{
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004719 return true;
Philipp Reisner0ced55a2010-04-30 15:26:20 +02004720}
4721
Philipp Reisner32862ec2011-02-08 16:41:01 +01004722static int tconn_process_done_ee(struct drbd_tconn *tconn)
4723{
Philipp Reisner082a3432011-03-15 16:05:42 +01004724 struct drbd_conf *mdev;
4725 int i, not_empty = 0;
Philipp Reisner32862ec2011-02-08 16:41:01 +01004726
4727 do {
4728 clear_bit(SIGNAL_ASENDER, &tconn->flags);
4729 flush_signals(current);
Philipp Reisner082a3432011-03-15 16:05:42 +01004730 idr_for_each_entry(&tconn->volumes, mdev, i) {
Andreas Gruenbachere2b30322011-03-16 17:16:12 +01004731 if (drbd_process_done_ee(mdev))
Philipp Reisner082a3432011-03-15 16:05:42 +01004732 return 1; /* error */
4733 }
Philipp Reisner32862ec2011-02-08 16:41:01 +01004734 set_bit(SIGNAL_ASENDER, &tconn->flags);
Philipp Reisner082a3432011-03-15 16:05:42 +01004735
4736 spin_lock_irq(&tconn->req_lock);
4737 idr_for_each_entry(&tconn->volumes, mdev, i) {
4738 not_empty = !list_empty(&mdev->done_ee);
4739 if (not_empty)
4740 break;
4741 }
4742 spin_unlock_irq(&tconn->req_lock);
Philipp Reisner32862ec2011-02-08 16:41:01 +01004743 } while (not_empty);
4744
4745 return 0;
4746}
4747
Andreas Gruenbacher7201b972011-03-14 18:23:00 +01004748struct asender_cmd {
4749 size_t pkt_size;
Philipp Reisnera4fbda82011-03-16 11:13:17 +01004750 enum mdev_or_conn fa_type; /* first argument's type */
4751 union {
4752 int (*mdev_fn)(struct drbd_conf *mdev, enum drbd_packet cmd);
4753 int (*conn_fn)(struct drbd_tconn *tconn, enum drbd_packet cmd);
4754 };
Andreas Gruenbacher7201b972011-03-14 18:23:00 +01004755};
4756
4757static struct asender_cmd asender_tbl[] = {
Philipp Reisnerf19e4f82011-03-16 11:21:50 +01004758 [P_PING] = { sizeof(struct p_header), CONN, { .conn_fn = got_Ping } },
4759 [P_PING_ACK] = { sizeof(struct p_header), CONN, { .conn_fn = got_PingAck } },
Philipp Reisnera4fbda82011-03-16 11:13:17 +01004760 [P_RECV_ACK] = { sizeof(struct p_block_ack), MDEV, { got_BlockAck } },
4761 [P_WRITE_ACK] = { sizeof(struct p_block_ack), MDEV, { got_BlockAck } },
4762 [P_RS_WRITE_ACK] = { sizeof(struct p_block_ack), MDEV, { got_BlockAck } },
4763 [P_DISCARD_WRITE] = { sizeof(struct p_block_ack), MDEV, { got_BlockAck } },
4764 [P_NEG_ACK] = { sizeof(struct p_block_ack), MDEV, { got_NegAck } },
4765 [P_NEG_DREPLY] = { sizeof(struct p_block_ack), MDEV, { got_NegDReply } },
4766 [P_NEG_RS_DREPLY] = { sizeof(struct p_block_ack), MDEV, { got_NegRSDReply } },
4767 [P_OV_RESULT] = { sizeof(struct p_block_ack), MDEV, { got_OVResult } },
4768 [P_BARRIER_ACK] = { sizeof(struct p_barrier_ack), MDEV, { got_BarrierAck } },
4769 [P_STATE_CHG_REPLY] = { sizeof(struct p_req_state_reply), MDEV, { got_RqSReply } },
4770 [P_RS_IS_IN_SYNC] = { sizeof(struct p_block_ack), MDEV, { got_IsInSync } },
4771 [P_DELAY_PROBE] = { sizeof(struct p_delay_probe93), MDEV, { got_skip } },
4772 [P_RS_CANCEL] = { sizeof(struct p_block_ack), MDEV, { got_NegRSDReply } },
Philipp Reisnere4f78ed2011-03-16 11:27:48 +01004773 [P_CONN_ST_CHG_REPLY]={ sizeof(struct p_req_state_reply), CONN, {.conn_fn = got_conn_RqSReply}},
Philipp Reisnera4fbda82011-03-16 11:13:17 +01004774 [P_RETRY_WRITE] = { sizeof(struct p_block_ack), MDEV, { got_BlockAck } },
Andreas Gruenbacher7201b972011-03-14 18:23:00 +01004775};
4776
Philipp Reisnerb411b362009-09-25 16:07:19 -07004777int drbd_asender(struct drbd_thread *thi)
4778{
Philipp Reisner392c8802011-02-09 10:33:31 +01004779 struct drbd_tconn *tconn = thi->tconn;
Philipp Reisner32862ec2011-02-08 16:41:01 +01004780 struct p_header *h = &tconn->meta.rbuf.header;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004781 struct asender_cmd *cmd = NULL;
Philipp Reisner77351055b2011-02-07 17:24:26 +01004782 struct packet_info pi;
Philipp Reisner257d0af2011-01-26 12:15:29 +01004783 int rv;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004784 void *buf = h;
4785 int received = 0;
Philipp Reisner257d0af2011-01-26 12:15:29 +01004786 int expect = sizeof(struct p_header);
Lars Ellenbergf36af182011-03-09 22:44:55 +01004787 int ping_timeout_active = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004788
Philipp Reisnerb411b362009-09-25 16:07:19 -07004789 current->policy = SCHED_RR; /* Make this a realtime task! */
4790 current->rt_priority = 2; /* more important than all other tasks */
4791
Andreas Gruenbachere77a0a52011-01-25 15:43:39 +01004792 while (get_t_state(thi) == RUNNING) {
Philipp Reisner80822282011-02-08 12:46:30 +01004793 drbd_thread_current_set_cpu(thi);
Philipp Reisner32862ec2011-02-08 16:41:01 +01004794 if (test_and_clear_bit(SEND_PING, &tconn->flags)) {
Philipp Reisner2a67d8b2011-02-09 14:10:32 +01004795 if (!drbd_send_ping(tconn)) {
Philipp Reisner32862ec2011-02-08 16:41:01 +01004796 conn_err(tconn, "drbd_send_ping has failed\n");
Andreas Gruenbacher841ce242010-12-15 19:31:20 +01004797 goto reconnect;
4798 }
Philipp Reisner32862ec2011-02-08 16:41:01 +01004799 tconn->meta.socket->sk->sk_rcvtimeo =
4800 tconn->net_conf->ping_timeo*HZ/10;
Lars Ellenbergf36af182011-03-09 22:44:55 +01004801 ping_timeout_active = 1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004802 }
4803
Philipp Reisner32862ec2011-02-08 16:41:01 +01004804 /* TODO: conditionally cork; it may hurt latency if we cork without
4805 much to send */
4806 if (!tconn->net_conf->no_cork)
4807 drbd_tcp_cork(tconn->meta.socket);
Philipp Reisner082a3432011-03-15 16:05:42 +01004808 if (tconn_process_done_ee(tconn)) {
4809 conn_err(tconn, "tconn_process_done_ee() failed\n");
Philipp Reisner32862ec2011-02-08 16:41:01 +01004810 goto reconnect;
Philipp Reisner082a3432011-03-15 16:05:42 +01004811 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07004812 /* but unconditionally uncork unless disabled */
Philipp Reisner32862ec2011-02-08 16:41:01 +01004813 if (!tconn->net_conf->no_cork)
4814 drbd_tcp_uncork(tconn->meta.socket);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004815
4816 /* short circuit, recv_msg would return EINTR anyways. */
4817 if (signal_pending(current))
4818 continue;
4819
Philipp Reisner32862ec2011-02-08 16:41:01 +01004820 rv = drbd_recv_short(tconn->meta.socket, buf, expect-received, 0);
4821 clear_bit(SIGNAL_ASENDER, &tconn->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004822
4823 flush_signals(current);
4824
4825 /* Note:
4826 * -EINTR (on meta) we got a signal
4827 * -EAGAIN (on meta) rcvtimeo expired
4828 * -ECONNRESET other side closed the connection
4829 * -ERESTARTSYS (on data) we got a signal
4830 * rv < 0 other than above: unexpected error!
4831 * rv == expected: full header or command
4832 * rv < expected: "woken" by signal during receive
4833 * rv == 0 : "connection shut down by peer"
4834 */
4835 if (likely(rv > 0)) {
4836 received += rv;
4837 buf += rv;
4838 } else if (rv == 0) {
Philipp Reisner32862ec2011-02-08 16:41:01 +01004839 conn_err(tconn, "meta connection shut down by peer.\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07004840 goto reconnect;
4841 } else if (rv == -EAGAIN) {
Lars Ellenbergcb6518c2011-06-20 14:44:45 +02004842 /* If the data socket received something meanwhile,
4843 * that is good enough: peer is still alive. */
Philipp Reisner32862ec2011-02-08 16:41:01 +01004844 if (time_after(tconn->last_received,
4845 jiffies - tconn->meta.socket->sk->sk_rcvtimeo))
Lars Ellenbergcb6518c2011-06-20 14:44:45 +02004846 continue;
Lars Ellenbergf36af182011-03-09 22:44:55 +01004847 if (ping_timeout_active) {
Philipp Reisner32862ec2011-02-08 16:41:01 +01004848 conn_err(tconn, "PingAck did not arrive in time.\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07004849 goto reconnect;
4850 }
Philipp Reisner32862ec2011-02-08 16:41:01 +01004851 set_bit(SEND_PING, &tconn->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004852 continue;
4853 } else if (rv == -EINTR) {
4854 continue;
4855 } else {
Philipp Reisner32862ec2011-02-08 16:41:01 +01004856 conn_err(tconn, "sock_recvmsg returned %d\n", rv);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004857 goto reconnect;
4858 }
4859
4860 if (received == expect && cmd == NULL) {
Andreas Gruenbacher8172f3e2011-03-16 17:22:39 +01004861 if (decode_header(tconn, h, &pi))
Philipp Reisnerb411b362009-09-25 16:07:19 -07004862 goto reconnect;
Andreas Gruenbacher7201b972011-03-14 18:23:00 +01004863 cmd = &asender_tbl[pi.cmd];
4864 if (pi.cmd >= ARRAY_SIZE(asender_tbl) || !cmd) {
Philipp Reisner32862ec2011-02-08 16:41:01 +01004865 conn_err(tconn, "unknown command %d on meta (l: %d)\n",
Philipp Reisner77351055b2011-02-07 17:24:26 +01004866 pi.cmd, pi.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004867 goto disconnect;
4868 }
4869 expect = cmd->pkt_size;
Philipp Reisner77351055b2011-02-07 17:24:26 +01004870 if (pi.size != expect - sizeof(struct p_header)) {
Philipp Reisner32862ec2011-02-08 16:41:01 +01004871 conn_err(tconn, "Wrong packet size on meta (c: %d, l: %d)\n",
Philipp Reisner77351055b2011-02-07 17:24:26 +01004872 pi.cmd, pi.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004873 goto reconnect;
Philipp Reisner257d0af2011-01-26 12:15:29 +01004874 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07004875 }
4876 if (received == expect) {
Philipp Reisnera4fbda82011-03-16 11:13:17 +01004877 bool rv;
4878
4879 if (cmd->fa_type == CONN) {
4880 rv = cmd->conn_fn(tconn, pi.cmd);
4881 } else {
4882 struct drbd_conf *mdev = vnr_to_mdev(tconn, pi.vnr);
4883 rv = cmd->mdev_fn(mdev, pi.cmd);
4884 }
4885
4886 if (!rv)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004887 goto reconnect;
4888
Philipp Reisnera4fbda82011-03-16 11:13:17 +01004889 tconn->last_received = jiffies;
4890
Lars Ellenbergf36af182011-03-09 22:44:55 +01004891 /* the idle_timeout (ping-int)
4892 * has been restored in got_PingAck() */
Andreas Gruenbacher7201b972011-03-14 18:23:00 +01004893 if (cmd == &asender_tbl[P_PING_ACK])
Lars Ellenbergf36af182011-03-09 22:44:55 +01004894 ping_timeout_active = 0;
4895
Philipp Reisnerb411b362009-09-25 16:07:19 -07004896 buf = h;
4897 received = 0;
Philipp Reisner257d0af2011-01-26 12:15:29 +01004898 expect = sizeof(struct p_header);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004899 cmd = NULL;
4900 }
4901 }
4902
4903 if (0) {
4904reconnect:
Philipp Reisnerbbeb6412011-02-10 13:45:46 +01004905 conn_request_state(tconn, NS(conn, C_NETWORK_FAILURE), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004906 }
4907 if (0) {
4908disconnect:
Philipp Reisnerbbeb6412011-02-10 13:45:46 +01004909 conn_request_state(tconn, NS(conn, C_DISCONNECTING), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004910 }
Philipp Reisner32862ec2011-02-08 16:41:01 +01004911 clear_bit(SIGNAL_ASENDER, &tconn->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004912
Philipp Reisner32862ec2011-02-08 16:41:01 +01004913 conn_info(tconn, "asender terminated\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07004914
4915 return 0;
4916}