blob: 5fc536727afefcc06bc85b7adfc3320058c6c985 [file] [log] [blame]
Philipp Reisnerb411b362009-09-25 16:07:19 -07001/*
2 drbd_receiver.c
3
4 This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
5
6 Copyright (C) 2001-2008, LINBIT Information Technologies GmbH.
7 Copyright (C) 1999-2008, Philipp Reisner <philipp.reisner@linbit.com>.
8 Copyright (C) 2002-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
9
10 drbd is free software; you can redistribute it and/or modify
11 it under the terms of the GNU General Public License as published by
12 the Free Software Foundation; either version 2, or (at your option)
13 any later version.
14
15 drbd is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License for more details.
19
20 You should have received a copy of the GNU General Public License
21 along with drbd; see the file COPYING. If not, write to
22 the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
23 */
24
25
Philipp Reisnerb411b362009-09-25 16:07:19 -070026#include <linux/module.h>
27
28#include <asm/uaccess.h>
29#include <net/sock.h>
30
Philipp Reisnerb411b362009-09-25 16:07:19 -070031#include <linux/drbd.h>
32#include <linux/fs.h>
33#include <linux/file.h>
34#include <linux/in.h>
35#include <linux/mm.h>
36#include <linux/memcontrol.h>
37#include <linux/mm_inline.h>
38#include <linux/slab.h>
Philipp Reisnerb411b362009-09-25 16:07:19 -070039#include <linux/pkt_sched.h>
40#define __KERNEL_SYSCALLS__
41#include <linux/unistd.h>
42#include <linux/vmalloc.h>
43#include <linux/random.h>
Philipp Reisnerb411b362009-09-25 16:07:19 -070044#include <linux/string.h>
45#include <linux/scatterlist.h>
46#include "drbd_int.h"
Philipp Reisnerb411b362009-09-25 16:07:19 -070047#include "drbd_req.h"
48
49#include "drbd_vli.h"
50
Philipp Reisner77351055b2011-02-07 17:24:26 +010051struct packet_info {
52 enum drbd_packet cmd;
53 int size;
54 int vnr;
55};
56
Philipp Reisnerb411b362009-09-25 16:07:19 -070057enum finish_epoch {
58 FE_STILL_LIVE,
59 FE_DESTROYED,
60 FE_RECYCLED,
61};
62
Philipp Reisnera4fbda82011-03-16 11:13:17 +010063enum mdev_or_conn {
64 MDEV,
65 CONN,
66};
67
Philipp Reisner65d11ed2011-02-07 17:35:59 +010068static int drbd_do_handshake(struct drbd_tconn *tconn);
Philipp Reisner13e60372011-02-08 09:54:40 +010069static int drbd_do_auth(struct drbd_tconn *tconn);
Philipp Reisner360cc742011-02-08 14:29:53 +010070static int drbd_disconnected(int vnr, void *p, void *data);
Philipp Reisnerb411b362009-09-25 16:07:19 -070071
72static enum finish_epoch drbd_may_finish_epoch(struct drbd_conf *, struct drbd_epoch *, enum epoch_event);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +010073static int e_end_block(struct drbd_work *, int);
Philipp Reisnerb411b362009-09-25 16:07:19 -070074
Philipp Reisnerb411b362009-09-25 16:07:19 -070075
76#define GFP_TRY (__GFP_HIGHMEM | __GFP_NOWARN)
77
Lars Ellenberg45bb9122010-05-14 17:10:48 +020078/*
79 * some helper functions to deal with single linked page lists,
80 * page->private being our "next" pointer.
81 */
82
83/* If at least n pages are linked at head, get n pages off.
84 * Otherwise, don't modify head, and return NULL.
85 * Locking is the responsibility of the caller.
86 */
87static struct page *page_chain_del(struct page **head, int n)
88{
89 struct page *page;
90 struct page *tmp;
91
92 BUG_ON(!n);
93 BUG_ON(!head);
94
95 page = *head;
Philipp Reisner23ce4222010-05-20 13:35:31 +020096
97 if (!page)
98 return NULL;
99
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200100 while (page) {
101 tmp = page_chain_next(page);
102 if (--n == 0)
103 break; /* found sufficient pages */
104 if (tmp == NULL)
105 /* insufficient pages, don't use any of them. */
106 return NULL;
107 page = tmp;
108 }
109
110 /* add end of list marker for the returned list */
111 set_page_private(page, 0);
112 /* actual return value, and adjustment of head */
113 page = *head;
114 *head = tmp;
115 return page;
116}
117
118/* may be used outside of locks to find the tail of a (usually short)
119 * "private" page chain, before adding it back to a global chain head
120 * with page_chain_add() under a spinlock. */
121static struct page *page_chain_tail(struct page *page, int *len)
122{
123 struct page *tmp;
124 int i = 1;
125 while ((tmp = page_chain_next(page)))
126 ++i, page = tmp;
127 if (len)
128 *len = i;
129 return page;
130}
131
132static int page_chain_free(struct page *page)
133{
134 struct page *tmp;
135 int i = 0;
136 page_chain_for_each_safe(page, tmp) {
137 put_page(page);
138 ++i;
139 }
140 return i;
141}
142
143static void page_chain_add(struct page **head,
144 struct page *chain_first, struct page *chain_last)
145{
146#if 1
147 struct page *tmp;
148 tmp = page_chain_tail(chain_first, NULL);
149 BUG_ON(tmp != chain_last);
150#endif
151
152 /* add chain to head */
153 set_page_private(chain_last, (unsigned long)*head);
154 *head = chain_first;
155}
156
157static struct page *drbd_pp_first_pages_or_try_alloc(struct drbd_conf *mdev, int number)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700158{
159 struct page *page = NULL;
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200160 struct page *tmp = NULL;
161 int i = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700162
163 /* Yes, testing drbd_pp_vacant outside the lock is racy.
164 * So what. It saves a spin_lock. */
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200165 if (drbd_pp_vacant >= number) {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700166 spin_lock(&drbd_pp_lock);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200167 page = page_chain_del(&drbd_pp_pool, number);
168 if (page)
169 drbd_pp_vacant -= number;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700170 spin_unlock(&drbd_pp_lock);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200171 if (page)
172 return page;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700173 }
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200174
Philipp Reisnerb411b362009-09-25 16:07:19 -0700175 /* GFP_TRY, because we must not cause arbitrary write-out: in a DRBD
176 * "criss-cross" setup, that might cause write-out on some other DRBD,
177 * which in turn might block on the other node at this very place. */
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200178 for (i = 0; i < number; i++) {
179 tmp = alloc_page(GFP_TRY);
180 if (!tmp)
181 break;
182 set_page_private(tmp, (unsigned long)page);
183 page = tmp;
184 }
185
186 if (i == number)
187 return page;
188
189 /* Not enough pages immediately available this time.
190 * No need to jump around here, drbd_pp_alloc will retry this
191 * function "soon". */
192 if (page) {
193 tmp = page_chain_tail(page, NULL);
194 spin_lock(&drbd_pp_lock);
195 page_chain_add(&drbd_pp_pool, page, tmp);
196 drbd_pp_vacant += i;
197 spin_unlock(&drbd_pp_lock);
198 }
199 return NULL;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700200}
201
Philipp Reisnerb411b362009-09-25 16:07:19 -0700202static void reclaim_net_ee(struct drbd_conf *mdev, struct list_head *to_be_freed)
203{
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100204 struct drbd_peer_request *peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700205 struct list_head *le, *tle;
206
207 /* The EEs are always appended to the end of the list. Since
208 they are sent in order over the wire, they have to finish
209 in order. As soon as we see the first not finished we can
210 stop to examine the list... */
211
212 list_for_each_safe(le, tle, &mdev->net_ee) {
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100213 peer_req = list_entry(le, struct drbd_peer_request, w.list);
214 if (drbd_ee_has_active_page(peer_req))
Philipp Reisnerb411b362009-09-25 16:07:19 -0700215 break;
216 list_move(le, to_be_freed);
217 }
218}
219
220static void drbd_kick_lo_and_reclaim_net(struct drbd_conf *mdev)
221{
222 LIST_HEAD(reclaimed);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100223 struct drbd_peer_request *peer_req, *t;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700224
Philipp Reisner87eeee42011-01-19 14:16:30 +0100225 spin_lock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700226 reclaim_net_ee(mdev, &reclaimed);
Philipp Reisner87eeee42011-01-19 14:16:30 +0100227 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700228
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100229 list_for_each_entry_safe(peer_req, t, &reclaimed, w.list)
230 drbd_free_net_ee(mdev, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700231}
232
233/**
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200234 * drbd_pp_alloc() - Returns @number pages, retries forever (or until signalled)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700235 * @mdev: DRBD device.
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200236 * @number: number of pages requested
237 * @retry: whether to retry, if not enough pages are available right now
Philipp Reisnerb411b362009-09-25 16:07:19 -0700238 *
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200239 * Tries to allocate number pages, first from our own page pool, then from
240 * the kernel, unless this allocation would exceed the max_buffers setting.
241 * Possibly retry until DRBD frees sufficient pages somewhere else.
242 *
243 * Returns a page chain linked via page->private.
Philipp Reisnerb411b362009-09-25 16:07:19 -0700244 */
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200245static struct page *drbd_pp_alloc(struct drbd_conf *mdev, unsigned number, bool retry)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700246{
247 struct page *page = NULL;
248 DEFINE_WAIT(wait);
249
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200250 /* Yes, we may run up to @number over max_buffers. If we
251 * follow it strictly, the admin will get it wrong anyways. */
Philipp Reisner89e58e72011-01-19 13:12:45 +0100252 if (atomic_read(&mdev->pp_in_use) < mdev->tconn->net_conf->max_buffers)
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200253 page = drbd_pp_first_pages_or_try_alloc(mdev, number);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700254
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200255 while (page == NULL) {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700256 prepare_to_wait(&drbd_pp_wait, &wait, TASK_INTERRUPTIBLE);
257
258 drbd_kick_lo_and_reclaim_net(mdev);
259
Philipp Reisner89e58e72011-01-19 13:12:45 +0100260 if (atomic_read(&mdev->pp_in_use) < mdev->tconn->net_conf->max_buffers) {
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200261 page = drbd_pp_first_pages_or_try_alloc(mdev, number);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700262 if (page)
263 break;
264 }
265
266 if (!retry)
267 break;
268
269 if (signal_pending(current)) {
270 dev_warn(DEV, "drbd_pp_alloc interrupted!\n");
271 break;
272 }
273
274 schedule();
275 }
276 finish_wait(&drbd_pp_wait, &wait);
277
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200278 if (page)
279 atomic_add(number, &mdev->pp_in_use);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700280 return page;
281}
282
283/* Must not be used from irq, as that may deadlock: see drbd_pp_alloc.
Philipp Reisner87eeee42011-01-19 14:16:30 +0100284 * Is also used from inside an other spin_lock_irq(&mdev->tconn->req_lock);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200285 * Either links the page chain back to the global pool,
286 * or returns all pages to the system. */
Lars Ellenberg435f0742010-09-06 12:30:25 +0200287static void drbd_pp_free(struct drbd_conf *mdev, struct page *page, int is_net)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700288{
Lars Ellenberg435f0742010-09-06 12:30:25 +0200289 atomic_t *a = is_net ? &mdev->pp_in_use_by_net : &mdev->pp_in_use;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700290 int i;
Lars Ellenberg435f0742010-09-06 12:30:25 +0200291
Philipp Reisner81a5d602011-02-22 19:53:16 -0500292 if (drbd_pp_vacant > (DRBD_MAX_BIO_SIZE/PAGE_SIZE) * minor_count)
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200293 i = page_chain_free(page);
294 else {
295 struct page *tmp;
296 tmp = page_chain_tail(page, &i);
297 spin_lock(&drbd_pp_lock);
298 page_chain_add(&drbd_pp_pool, page, tmp);
299 drbd_pp_vacant += i;
300 spin_unlock(&drbd_pp_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700301 }
Lars Ellenberg435f0742010-09-06 12:30:25 +0200302 i = atomic_sub_return(i, a);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200303 if (i < 0)
Lars Ellenberg435f0742010-09-06 12:30:25 +0200304 dev_warn(DEV, "ASSERTION FAILED: %s: %d < 0\n",
305 is_net ? "pp_in_use_by_net" : "pp_in_use", i);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700306 wake_up(&drbd_pp_wait);
307}
308
309/*
310You need to hold the req_lock:
311 _drbd_wait_ee_list_empty()
312
313You must not have the req_lock:
314 drbd_free_ee()
315 drbd_alloc_ee()
316 drbd_init_ee()
317 drbd_release_ee()
318 drbd_ee_fix_bhs()
319 drbd_process_done_ee()
320 drbd_clear_done_ee()
321 drbd_wait_ee_list_empty()
322*/
323
Andreas Gruenbacherf6ffca92011-02-04 15:30:34 +0100324struct drbd_peer_request *
325drbd_alloc_ee(struct drbd_conf *mdev, u64 id, sector_t sector,
326 unsigned int data_size, gfp_t gfp_mask) __must_hold(local)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700327{
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100328 struct drbd_peer_request *peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700329 struct page *page;
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200330 unsigned nr_pages = (data_size + PAGE_SIZE -1) >> PAGE_SHIFT;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700331
Andreas Gruenbacher0cf9d272010-12-07 10:43:29 +0100332 if (drbd_insert_fault(mdev, DRBD_FAULT_AL_EE))
Philipp Reisnerb411b362009-09-25 16:07:19 -0700333 return NULL;
334
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100335 peer_req = mempool_alloc(drbd_ee_mempool, gfp_mask & ~__GFP_HIGHMEM);
336 if (!peer_req) {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700337 if (!(gfp_mask & __GFP_NOWARN))
338 dev_err(DEV, "alloc_ee: Allocation of an EE failed\n");
339 return NULL;
340 }
341
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200342 page = drbd_pp_alloc(mdev, nr_pages, (gfp_mask & __GFP_WAIT));
343 if (!page)
344 goto fail;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700345
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100346 drbd_clear_interval(&peer_req->i);
347 peer_req->i.size = data_size;
348 peer_req->i.sector = sector;
349 peer_req->i.local = false;
350 peer_req->i.waiting = false;
Andreas Gruenbacher53840642011-01-28 10:31:04 +0100351
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100352 peer_req->epoch = NULL;
Philipp Reisnera21e9292011-02-08 15:08:49 +0100353 peer_req->w.mdev = mdev;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100354 peer_req->pages = page;
355 atomic_set(&peer_req->pending_bios, 0);
356 peer_req->flags = 0;
Andreas Gruenbacher9a8e7752011-01-11 14:04:09 +0100357 /*
358 * The block_id is opaque to the receiver. It is not endianness
359 * converted, and sent back to the sender unchanged.
360 */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100361 peer_req->block_id = id;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700362
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100363 return peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700364
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200365 fail:
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100366 mempool_free(peer_req, drbd_ee_mempool);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700367 return NULL;
368}
369
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100370void drbd_free_some_ee(struct drbd_conf *mdev, struct drbd_peer_request *peer_req,
Andreas Gruenbacherf6ffca92011-02-04 15:30:34 +0100371 int is_net)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700372{
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100373 if (peer_req->flags & EE_HAS_DIGEST)
374 kfree(peer_req->digest);
375 drbd_pp_free(mdev, peer_req->pages, is_net);
376 D_ASSERT(atomic_read(&peer_req->pending_bios) == 0);
377 D_ASSERT(drbd_interval_empty(&peer_req->i));
378 mempool_free(peer_req, drbd_ee_mempool);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700379}
380
381int drbd_release_ee(struct drbd_conf *mdev, struct list_head *list)
382{
383 LIST_HEAD(work_list);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100384 struct drbd_peer_request *peer_req, *t;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700385 int count = 0;
Lars Ellenberg435f0742010-09-06 12:30:25 +0200386 int is_net = list == &mdev->net_ee;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700387
Philipp Reisner87eeee42011-01-19 14:16:30 +0100388 spin_lock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700389 list_splice_init(list, &work_list);
Philipp Reisner87eeee42011-01-19 14:16:30 +0100390 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700391
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100392 list_for_each_entry_safe(peer_req, t, &work_list, w.list) {
393 drbd_free_some_ee(mdev, peer_req, is_net);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700394 count++;
395 }
396 return count;
397}
398
399
Philipp Reisner32862ec2011-02-08 16:41:01 +0100400/* See also comments in _req_mod(,BARRIER_ACKED)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700401 * and receive_Barrier.
402 *
403 * Move entries from net_ee to done_ee, if ready.
404 * Grab done_ee, call all callbacks, free the entries.
405 * The callbacks typically send out ACKs.
406 */
407static int drbd_process_done_ee(struct drbd_conf *mdev)
408{
409 LIST_HEAD(work_list);
410 LIST_HEAD(reclaimed);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100411 struct drbd_peer_request *peer_req, *t;
Andreas Gruenbachere2b30322011-03-16 17:16:12 +0100412 int err = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700413
Philipp Reisner87eeee42011-01-19 14:16:30 +0100414 spin_lock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700415 reclaim_net_ee(mdev, &reclaimed);
416 list_splice_init(&mdev->done_ee, &work_list);
Philipp Reisner87eeee42011-01-19 14:16:30 +0100417 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700418
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100419 list_for_each_entry_safe(peer_req, t, &reclaimed, w.list)
420 drbd_free_net_ee(mdev, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700421
422 /* possible callbacks here:
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +0100423 * e_end_block, and e_end_resync_block, e_send_discard_write.
Philipp Reisnerb411b362009-09-25 16:07:19 -0700424 * all ignore the last argument.
425 */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100426 list_for_each_entry_safe(peer_req, t, &work_list, w.list) {
Andreas Gruenbachere2b30322011-03-16 17:16:12 +0100427 int err2;
428
Philipp Reisnerb411b362009-09-25 16:07:19 -0700429 /* list_del not necessary, next/prev members not touched */
Andreas Gruenbachere2b30322011-03-16 17:16:12 +0100430 err2 = peer_req->w.cb(&peer_req->w, !!err);
431 if (!err)
432 err = err2;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100433 drbd_free_ee(mdev, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700434 }
435 wake_up(&mdev->ee_wait);
436
Andreas Gruenbachere2b30322011-03-16 17:16:12 +0100437 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700438}
439
440void _drbd_wait_ee_list_empty(struct drbd_conf *mdev, struct list_head *head)
441{
442 DEFINE_WAIT(wait);
443
444 /* avoids spin_lock/unlock
445 * and calling prepare_to_wait in the fast path */
446 while (!list_empty(head)) {
447 prepare_to_wait(&mdev->ee_wait, &wait, TASK_UNINTERRUPTIBLE);
Philipp Reisner87eeee42011-01-19 14:16:30 +0100448 spin_unlock_irq(&mdev->tconn->req_lock);
Jens Axboe7eaceac2011-03-10 08:52:07 +0100449 io_schedule();
Philipp Reisnerb411b362009-09-25 16:07:19 -0700450 finish_wait(&mdev->ee_wait, &wait);
Philipp Reisner87eeee42011-01-19 14:16:30 +0100451 spin_lock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700452 }
453}
454
455void drbd_wait_ee_list_empty(struct drbd_conf *mdev, struct list_head *head)
456{
Philipp Reisner87eeee42011-01-19 14:16:30 +0100457 spin_lock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700458 _drbd_wait_ee_list_empty(mdev, head);
Philipp Reisner87eeee42011-01-19 14:16:30 +0100459 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700460}
461
462/* see also kernel_accept; which is only present since 2.6.18.
463 * also we want to log which part of it failed, exactly */
Philipp Reisner76536202011-02-07 14:09:54 +0100464static int drbd_accept(const char **what, struct socket *sock, struct socket **newsock)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700465{
466 struct sock *sk = sock->sk;
467 int err = 0;
468
469 *what = "listen";
470 err = sock->ops->listen(sock, 5);
471 if (err < 0)
472 goto out;
473
474 *what = "sock_create_lite";
475 err = sock_create_lite(sk->sk_family, sk->sk_type, sk->sk_protocol,
476 newsock);
477 if (err < 0)
478 goto out;
479
480 *what = "accept";
481 err = sock->ops->accept(sock, *newsock, 0);
482 if (err < 0) {
483 sock_release(*newsock);
484 *newsock = NULL;
485 goto out;
486 }
487 (*newsock)->ops = sock->ops;
488
489out:
490 return err;
491}
492
Philipp Reisnerdbd9eea2011-02-07 15:34:16 +0100493static int drbd_recv_short(struct socket *sock, void *buf, size_t size, int flags)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700494{
495 mm_segment_t oldfs;
496 struct kvec iov = {
497 .iov_base = buf,
498 .iov_len = size,
499 };
500 struct msghdr msg = {
501 .msg_iovlen = 1,
502 .msg_iov = (struct iovec *)&iov,
503 .msg_flags = (flags ? flags : MSG_WAITALL | MSG_NOSIGNAL)
504 };
505 int rv;
506
507 oldfs = get_fs();
508 set_fs(KERNEL_DS);
509 rv = sock_recvmsg(sock, &msg, size, msg.msg_flags);
510 set_fs(oldfs);
511
512 return rv;
513}
514
Philipp Reisnerde0ff332011-02-07 16:56:20 +0100515static int drbd_recv(struct drbd_tconn *tconn, void *buf, size_t size)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700516{
517 mm_segment_t oldfs;
518 struct kvec iov = {
519 .iov_base = buf,
520 .iov_len = size,
521 };
522 struct msghdr msg = {
523 .msg_iovlen = 1,
524 .msg_iov = (struct iovec *)&iov,
525 .msg_flags = MSG_WAITALL | MSG_NOSIGNAL
526 };
527 int rv;
528
529 oldfs = get_fs();
530 set_fs(KERNEL_DS);
531
532 for (;;) {
Philipp Reisnerde0ff332011-02-07 16:56:20 +0100533 rv = sock_recvmsg(tconn->data.socket, &msg, size, msg.msg_flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700534 if (rv == size)
535 break;
536
537 /* Note:
538 * ECONNRESET other side closed the connection
539 * ERESTARTSYS (on sock) we got a signal
540 */
541
542 if (rv < 0) {
543 if (rv == -ECONNRESET)
Philipp Reisnerde0ff332011-02-07 16:56:20 +0100544 conn_info(tconn, "sock was reset by peer\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -0700545 else if (rv != -ERESTARTSYS)
Philipp Reisnerde0ff332011-02-07 16:56:20 +0100546 conn_err(tconn, "sock_recvmsg returned %d\n", rv);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700547 break;
548 } else if (rv == 0) {
Philipp Reisnerde0ff332011-02-07 16:56:20 +0100549 conn_info(tconn, "sock was shut down by peer\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -0700550 break;
551 } else {
552 /* signal came in, or peer/link went down,
553 * after we read a partial message
554 */
555 /* D_ASSERT(signal_pending(current)); */
556 break;
557 }
558 };
559
560 set_fs(oldfs);
561
562 if (rv != size)
Philipp Reisnerbbeb6412011-02-10 13:45:46 +0100563 conn_request_state(tconn, NS(conn, C_BROKEN_PIPE), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700564
565 return rv;
566}
567
Andreas Gruenbacherc6967742011-03-17 17:15:20 +0100568static int drbd_recv_all(struct drbd_tconn *tconn, void *buf, size_t size)
569{
570 int err;
571
572 err = drbd_recv(tconn, buf, size);
573 if (err != size) {
574 if (err >= 0)
575 err = -EIO;
576 } else
577 err = 0;
578 return err;
579}
580
Lars Ellenberg5dbf1672010-05-25 16:18:01 +0200581/* quoting tcp(7):
582 * On individual connections, the socket buffer size must be set prior to the
583 * listen(2) or connect(2) calls in order to have it take effect.
584 * This is our wrapper to do so.
585 */
586static void drbd_setbufsize(struct socket *sock, unsigned int snd,
587 unsigned int rcv)
588{
589 /* open coded SO_SNDBUF, SO_RCVBUF */
590 if (snd) {
591 sock->sk->sk_sndbuf = snd;
592 sock->sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
593 }
594 if (rcv) {
595 sock->sk->sk_rcvbuf = rcv;
596 sock->sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
597 }
598}
599
Philipp Reisnereac3e992011-02-07 14:05:07 +0100600static struct socket *drbd_try_connect(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700601{
602 const char *what;
603 struct socket *sock;
604 struct sockaddr_in6 src_in6;
605 int err;
606 int disconnect_on_error = 1;
607
Philipp Reisnereac3e992011-02-07 14:05:07 +0100608 if (!get_net_conf(tconn))
Philipp Reisnerb411b362009-09-25 16:07:19 -0700609 return NULL;
610
611 what = "sock_create_kern";
Philipp Reisnereac3e992011-02-07 14:05:07 +0100612 err = sock_create_kern(((struct sockaddr *)tconn->net_conf->my_addr)->sa_family,
Philipp Reisnerb411b362009-09-25 16:07:19 -0700613 SOCK_STREAM, IPPROTO_TCP, &sock);
614 if (err < 0) {
615 sock = NULL;
616 goto out;
617 }
618
619 sock->sk->sk_rcvtimeo =
Philipp Reisnereac3e992011-02-07 14:05:07 +0100620 sock->sk->sk_sndtimeo = tconn->net_conf->try_connect_int*HZ;
621 drbd_setbufsize(sock, tconn->net_conf->sndbuf_size,
622 tconn->net_conf->rcvbuf_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700623
624 /* explicitly bind to the configured IP as source IP
625 * for the outgoing connections.
626 * This is needed for multihomed hosts and to be
627 * able to use lo: interfaces for drbd.
628 * Make sure to use 0 as port number, so linux selects
629 * a free one dynamically.
630 */
Philipp Reisnereac3e992011-02-07 14:05:07 +0100631 memcpy(&src_in6, tconn->net_conf->my_addr,
632 min_t(int, tconn->net_conf->my_addr_len, sizeof(src_in6)));
633 if (((struct sockaddr *)tconn->net_conf->my_addr)->sa_family == AF_INET6)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700634 src_in6.sin6_port = 0;
635 else
636 ((struct sockaddr_in *)&src_in6)->sin_port = 0; /* AF_INET & AF_SCI */
637
638 what = "bind before connect";
639 err = sock->ops->bind(sock,
640 (struct sockaddr *) &src_in6,
Philipp Reisnereac3e992011-02-07 14:05:07 +0100641 tconn->net_conf->my_addr_len);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700642 if (err < 0)
643 goto out;
644
645 /* connect may fail, peer not yet available.
646 * stay C_WF_CONNECTION, don't go Disconnecting! */
647 disconnect_on_error = 0;
648 what = "connect";
649 err = sock->ops->connect(sock,
Philipp Reisnereac3e992011-02-07 14:05:07 +0100650 (struct sockaddr *)tconn->net_conf->peer_addr,
651 tconn->net_conf->peer_addr_len, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700652
653out:
654 if (err < 0) {
655 if (sock) {
656 sock_release(sock);
657 sock = NULL;
658 }
659 switch (-err) {
660 /* timeout, busy, signal pending */
661 case ETIMEDOUT: case EAGAIN: case EINPROGRESS:
662 case EINTR: case ERESTARTSYS:
663 /* peer not (yet) available, network problem */
664 case ECONNREFUSED: case ENETUNREACH:
665 case EHOSTDOWN: case EHOSTUNREACH:
666 disconnect_on_error = 0;
667 break;
668 default:
Philipp Reisnereac3e992011-02-07 14:05:07 +0100669 conn_err(tconn, "%s failed, err = %d\n", what, err);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700670 }
671 if (disconnect_on_error)
Philipp Reisnerbbeb6412011-02-10 13:45:46 +0100672 conn_request_state(tconn, NS(conn, C_DISCONNECTING), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700673 }
Philipp Reisnereac3e992011-02-07 14:05:07 +0100674 put_net_conf(tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700675 return sock;
676}
677
Philipp Reisner76536202011-02-07 14:09:54 +0100678static struct socket *drbd_wait_for_connect(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700679{
680 int timeo, err;
681 struct socket *s_estab = NULL, *s_listen;
682 const char *what;
683
Philipp Reisner76536202011-02-07 14:09:54 +0100684 if (!get_net_conf(tconn))
Philipp Reisnerb411b362009-09-25 16:07:19 -0700685 return NULL;
686
687 what = "sock_create_kern";
Philipp Reisner76536202011-02-07 14:09:54 +0100688 err = sock_create_kern(((struct sockaddr *)tconn->net_conf->my_addr)->sa_family,
Philipp Reisnerb411b362009-09-25 16:07:19 -0700689 SOCK_STREAM, IPPROTO_TCP, &s_listen);
690 if (err) {
691 s_listen = NULL;
692 goto out;
693 }
694
Philipp Reisner76536202011-02-07 14:09:54 +0100695 timeo = tconn->net_conf->try_connect_int * HZ;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700696 timeo += (random32() & 1) ? timeo / 7 : -timeo / 7; /* 28.5% random jitter */
697
698 s_listen->sk->sk_reuse = 1; /* SO_REUSEADDR */
699 s_listen->sk->sk_rcvtimeo = timeo;
700 s_listen->sk->sk_sndtimeo = timeo;
Philipp Reisner76536202011-02-07 14:09:54 +0100701 drbd_setbufsize(s_listen, tconn->net_conf->sndbuf_size,
702 tconn->net_conf->rcvbuf_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700703
704 what = "bind before listen";
705 err = s_listen->ops->bind(s_listen,
Philipp Reisner76536202011-02-07 14:09:54 +0100706 (struct sockaddr *) tconn->net_conf->my_addr,
707 tconn->net_conf->my_addr_len);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700708 if (err < 0)
709 goto out;
710
Philipp Reisner76536202011-02-07 14:09:54 +0100711 err = drbd_accept(&what, s_listen, &s_estab);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700712
713out:
714 if (s_listen)
715 sock_release(s_listen);
716 if (err < 0) {
717 if (err != -EAGAIN && err != -EINTR && err != -ERESTARTSYS) {
Philipp Reisner76536202011-02-07 14:09:54 +0100718 conn_err(tconn, "%s failed, err = %d\n", what, err);
Philipp Reisnerbbeb6412011-02-10 13:45:46 +0100719 conn_request_state(tconn, NS(conn, C_DISCONNECTING), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700720 }
721 }
Philipp Reisner76536202011-02-07 14:09:54 +0100722 put_net_conf(tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700723
724 return s_estab;
725}
726
Philipp Reisnerd38e7872011-02-07 15:32:04 +0100727static int drbd_send_fp(struct drbd_tconn *tconn, struct socket *sock, enum drbd_packet cmd)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700728{
Philipp Reisnerd38e7872011-02-07 15:32:04 +0100729 struct p_header *h = &tconn->data.sbuf.header;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700730
Andreas Gruenbacherecf23632011-03-15 23:48:25 +0100731 return !_conn_send_cmd(tconn, 0, sock, cmd, h, sizeof(*h), 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700732}
733
Philipp Reisnera25b63f2011-02-07 15:43:45 +0100734static enum drbd_packet drbd_recv_fp(struct drbd_tconn *tconn, struct socket *sock)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700735{
Philipp Reisnera25b63f2011-02-07 15:43:45 +0100736 struct p_header80 *h = &tconn->data.rbuf.header.h80;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700737 int rr;
738
Philipp Reisnerdbd9eea2011-02-07 15:34:16 +0100739 rr = drbd_recv_short(sock, h, sizeof(*h), 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700740
Andreas Gruenbacherca9bc122011-01-11 13:47:24 +0100741 if (rr == sizeof(*h) && h->magic == cpu_to_be32(DRBD_MAGIC))
Philipp Reisnerb411b362009-09-25 16:07:19 -0700742 return be16_to_cpu(h->command);
743
744 return 0xffff;
745}
746
747/**
748 * drbd_socket_okay() - Free the socket if its connection is not okay
Philipp Reisnerb411b362009-09-25 16:07:19 -0700749 * @sock: pointer to the pointer to the socket.
750 */
Philipp Reisnerdbd9eea2011-02-07 15:34:16 +0100751static int drbd_socket_okay(struct socket **sock)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700752{
753 int rr;
754 char tb[4];
755
756 if (!*sock)
Andreas Gruenbacher81e84652010-12-09 15:03:57 +0100757 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700758
Philipp Reisnerdbd9eea2011-02-07 15:34:16 +0100759 rr = drbd_recv_short(*sock, tb, 4, MSG_DONTWAIT | MSG_PEEK);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700760
761 if (rr > 0 || rr == -EAGAIN) {
Andreas Gruenbacher81e84652010-12-09 15:03:57 +0100762 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700763 } else {
764 sock_release(*sock);
765 *sock = NULL;
Andreas Gruenbacher81e84652010-12-09 15:03:57 +0100766 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700767 }
768}
Philipp Reisner2325eb62011-03-15 16:56:18 +0100769/* Gets called if a connection is established, or if a new minor gets created
770 in a connection */
771int drbd_connected(int vnr, void *p, void *data)
Philipp Reisner907599e2011-02-08 11:25:37 +0100772{
773 struct drbd_conf *mdev = (struct drbd_conf *)p;
774 int ok = 1;
775
776 atomic_set(&mdev->packet_seq, 0);
777 mdev->peer_seq = 0;
778
Philipp Reisner8410da82011-02-11 20:11:10 +0100779 mdev->state_mutex = mdev->tconn->agreed_pro_version < 100 ?
780 &mdev->tconn->cstate_mutex :
781 &mdev->own_state_mutex;
782
Andreas Gruenbacher103ea272011-03-16 00:43:02 +0100783 ok &= !drbd_send_sync_param(mdev);
Andreas Gruenbacherf02d4d02011-03-16 01:12:50 +0100784 ok &= !drbd_send_sizes(mdev, 0, 0);
Andreas Gruenbacher2ae5f952011-03-16 01:07:20 +0100785 ok &= !drbd_send_uuids(mdev);
Andreas Gruenbacher927036f2011-03-16 00:50:00 +0100786 ok &= !drbd_send_state(mdev);
Philipp Reisner907599e2011-02-08 11:25:37 +0100787 clear_bit(USE_DEGR_WFC_T, &mdev->flags);
788 clear_bit(RESIZE_PENDING, &mdev->flags);
789
Philipp Reisner8410da82011-02-11 20:11:10 +0100790
Philipp Reisner907599e2011-02-08 11:25:37 +0100791 return !ok;
792}
793
Philipp Reisnerb411b362009-09-25 16:07:19 -0700794/*
795 * return values:
796 * 1 yes, we have a valid connection
797 * 0 oops, did not work out, please try again
798 * -1 peer talks different language,
799 * no point in trying again, please go standalone.
800 * -2 We do not have a network config...
801 */
Philipp Reisner907599e2011-02-08 11:25:37 +0100802static int drbd_connect(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700803{
804 struct socket *s, *sock, *msock;
805 int try, h, ok;
806
Philipp Reisnerbbeb6412011-02-10 13:45:46 +0100807 if (conn_request_state(tconn, NS(conn, C_WF_CONNECTION), CS_VERBOSE) < SS_SUCCESS)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700808 return -2;
809
Philipp Reisner907599e2011-02-08 11:25:37 +0100810 clear_bit(DISCARD_CONCURRENT, &tconn->flags);
811 tconn->agreed_pro_version = 99;
Philipp Reisnerfd340c12011-01-19 16:57:39 +0100812 /* agreed_pro_version must be smaller than 100 so we send the old
813 header (h80) in the first packet and in the handshake packet. */
Philipp Reisnerb411b362009-09-25 16:07:19 -0700814
815 sock = NULL;
816 msock = NULL;
817
818 do {
819 for (try = 0;;) {
820 /* 3 tries, this should take less than a second! */
Philipp Reisner907599e2011-02-08 11:25:37 +0100821 s = drbd_try_connect(tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700822 if (s || ++try >= 3)
823 break;
824 /* give the other side time to call bind() & listen() */
Philipp Reisner20ee6392011-01-18 15:28:59 +0100825 schedule_timeout_interruptible(HZ / 10);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700826 }
827
828 if (s) {
829 if (!sock) {
Philipp Reisner907599e2011-02-08 11:25:37 +0100830 drbd_send_fp(tconn, s, P_HAND_SHAKE_S);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700831 sock = s;
832 s = NULL;
833 } else if (!msock) {
Philipp Reisner907599e2011-02-08 11:25:37 +0100834 drbd_send_fp(tconn, s, P_HAND_SHAKE_M);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700835 msock = s;
836 s = NULL;
837 } else {
Philipp Reisner907599e2011-02-08 11:25:37 +0100838 conn_err(tconn, "Logic error in drbd_connect()\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -0700839 goto out_release_sockets;
840 }
841 }
842
843 if (sock && msock) {
Philipp Reisner907599e2011-02-08 11:25:37 +0100844 schedule_timeout_interruptible(tconn->net_conf->ping_timeo*HZ/10);
Philipp Reisnerdbd9eea2011-02-07 15:34:16 +0100845 ok = drbd_socket_okay(&sock);
846 ok = drbd_socket_okay(&msock) && ok;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700847 if (ok)
848 break;
849 }
850
851retry:
Philipp Reisner907599e2011-02-08 11:25:37 +0100852 s = drbd_wait_for_connect(tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700853 if (s) {
Philipp Reisner907599e2011-02-08 11:25:37 +0100854 try = drbd_recv_fp(tconn, s);
Philipp Reisnerdbd9eea2011-02-07 15:34:16 +0100855 drbd_socket_okay(&sock);
856 drbd_socket_okay(&msock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700857 switch (try) {
858 case P_HAND_SHAKE_S:
859 if (sock) {
Philipp Reisner907599e2011-02-08 11:25:37 +0100860 conn_warn(tconn, "initial packet S crossed\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -0700861 sock_release(sock);
862 }
863 sock = s;
864 break;
865 case P_HAND_SHAKE_M:
866 if (msock) {
Philipp Reisner907599e2011-02-08 11:25:37 +0100867 conn_warn(tconn, "initial packet M crossed\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -0700868 sock_release(msock);
869 }
870 msock = s;
Philipp Reisner907599e2011-02-08 11:25:37 +0100871 set_bit(DISCARD_CONCURRENT, &tconn->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700872 break;
873 default:
Philipp Reisner907599e2011-02-08 11:25:37 +0100874 conn_warn(tconn, "Error receiving initial packet\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -0700875 sock_release(s);
876 if (random32() & 1)
877 goto retry;
878 }
879 }
880
Philipp Reisnerbbeb6412011-02-10 13:45:46 +0100881 if (tconn->cstate <= C_DISCONNECTING)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700882 goto out_release_sockets;
883 if (signal_pending(current)) {
884 flush_signals(current);
885 smp_rmb();
Philipp Reisner907599e2011-02-08 11:25:37 +0100886 if (get_t_state(&tconn->receiver) == EXITING)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700887 goto out_release_sockets;
888 }
889
890 if (sock && msock) {
Philipp Reisnerdbd9eea2011-02-07 15:34:16 +0100891 ok = drbd_socket_okay(&sock);
892 ok = drbd_socket_okay(&msock) && ok;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700893 if (ok)
894 break;
895 }
896 } while (1);
897
898 msock->sk->sk_reuse = 1; /* SO_REUSEADDR */
899 sock->sk->sk_reuse = 1; /* SO_REUSEADDR */
900
901 sock->sk->sk_allocation = GFP_NOIO;
902 msock->sk->sk_allocation = GFP_NOIO;
903
904 sock->sk->sk_priority = TC_PRIO_INTERACTIVE_BULK;
905 msock->sk->sk_priority = TC_PRIO_INTERACTIVE;
906
Philipp Reisnerb411b362009-09-25 16:07:19 -0700907 /* NOT YET ...
Philipp Reisner907599e2011-02-08 11:25:37 +0100908 * sock->sk->sk_sndtimeo = tconn->net_conf->timeout*HZ/10;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700909 * sock->sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
910 * first set it to the P_HAND_SHAKE timeout,
911 * which we set to 4x the configured ping_timeout. */
912 sock->sk->sk_sndtimeo =
Philipp Reisner907599e2011-02-08 11:25:37 +0100913 sock->sk->sk_rcvtimeo = tconn->net_conf->ping_timeo*4*HZ/10;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700914
Philipp Reisner907599e2011-02-08 11:25:37 +0100915 msock->sk->sk_sndtimeo = tconn->net_conf->timeout*HZ/10;
916 msock->sk->sk_rcvtimeo = tconn->net_conf->ping_int*HZ;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700917
918 /* we don't want delays.
Lucas De Marchi25985ed2011-03-30 22:57:33 -0300919 * we use TCP_CORK where appropriate, though */
Philipp Reisnerb411b362009-09-25 16:07:19 -0700920 drbd_tcp_nodelay(sock);
921 drbd_tcp_nodelay(msock);
922
Philipp Reisner907599e2011-02-08 11:25:37 +0100923 tconn->data.socket = sock;
924 tconn->meta.socket = msock;
925 tconn->last_received = jiffies;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700926
Philipp Reisner907599e2011-02-08 11:25:37 +0100927 h = drbd_do_handshake(tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700928 if (h <= 0)
929 return h;
930
Philipp Reisner907599e2011-02-08 11:25:37 +0100931 if (tconn->cram_hmac_tfm) {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700932 /* drbd_request_state(mdev, NS(conn, WFAuth)); */
Philipp Reisner907599e2011-02-08 11:25:37 +0100933 switch (drbd_do_auth(tconn)) {
Johannes Thomab10d96c2010-01-07 16:02:50 +0100934 case -1:
Philipp Reisner907599e2011-02-08 11:25:37 +0100935 conn_err(tconn, "Authentication of peer failed\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -0700936 return -1;
Johannes Thomab10d96c2010-01-07 16:02:50 +0100937 case 0:
Philipp Reisner907599e2011-02-08 11:25:37 +0100938 conn_err(tconn, "Authentication of peer failed, trying again.\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +0100939 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700940 }
941 }
942
Philipp Reisnerbbeb6412011-02-10 13:45:46 +0100943 if (conn_request_state(tconn, NS(conn, C_WF_REPORT_PARAMS), CS_VERBOSE) < SS_SUCCESS)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700944 return 0;
945
Philipp Reisner907599e2011-02-08 11:25:37 +0100946 sock->sk->sk_sndtimeo = tconn->net_conf->timeout*HZ/10;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700947 sock->sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
948
Philipp Reisner907599e2011-02-08 11:25:37 +0100949 drbd_thread_start(&tconn->asender);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700950
Andreas Gruenbacher387eb302011-03-16 01:05:37 +0100951 if (drbd_send_protocol(tconn) == -EOPNOTSUPP)
Philipp Reisner7e2455c2010-04-22 14:50:23 +0200952 return -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700953
Philipp Reisner907599e2011-02-08 11:25:37 +0100954 return !idr_for_each(&tconn->volumes, drbd_connected, tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700955
956out_release_sockets:
957 if (sock)
958 sock_release(sock);
959 if (msock)
960 sock_release(msock);
961 return -1;
962}
963
Andreas Gruenbacher8172f3e2011-03-16 17:22:39 +0100964static int decode_header(struct drbd_tconn *tconn, struct p_header *h, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700965{
Philipp Reisnerfd340c12011-01-19 16:57:39 +0100966 if (h->h80.magic == cpu_to_be32(DRBD_MAGIC)) {
Philipp Reisner77351055b2011-02-07 17:24:26 +0100967 pi->cmd = be16_to_cpu(h->h80.command);
968 pi->size = be16_to_cpu(h->h80.length);
Philipp Reisnereefc2f72011-02-08 12:55:24 +0100969 pi->vnr = 0;
Andreas Gruenbacherca9bc122011-01-11 13:47:24 +0100970 } else if (h->h95.magic == cpu_to_be16(DRBD_MAGIC_BIG)) {
Philipp Reisner77351055b2011-02-07 17:24:26 +0100971 pi->cmd = be16_to_cpu(h->h95.command);
972 pi->size = be32_to_cpu(h->h95.length) & 0x00ffffff;
973 pi->vnr = 0;
Philipp Reisner02918be2010-08-20 14:35:10 +0200974 } else {
Philipp Reisnerce243852011-02-07 17:27:47 +0100975 conn_err(tconn, "magic?? on data m: 0x%08x c: %d l: %d\n",
Lars Ellenberg004352f2010-10-05 20:13:58 +0200976 be32_to_cpu(h->h80.magic),
977 be16_to_cpu(h->h80.command),
978 be16_to_cpu(h->h80.length));
Andreas Gruenbacher8172f3e2011-03-16 17:22:39 +0100979 return -EINVAL;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700980 }
Andreas Gruenbacher8172f3e2011-03-16 17:22:39 +0100981 return 0;
Philipp Reisner257d0af2011-01-26 12:15:29 +0100982}
983
Philipp Reisner9ba7aa02011-02-07 17:32:41 +0100984static int drbd_recv_header(struct drbd_tconn *tconn, struct packet_info *pi)
Philipp Reisner257d0af2011-01-26 12:15:29 +0100985{
Philipp Reisner9ba7aa02011-02-07 17:32:41 +0100986 struct p_header *h = &tconn->data.rbuf.header;
Andreas Gruenbacher69bc7bc2011-03-16 17:31:52 +0100987 int err;
Philipp Reisner257d0af2011-01-26 12:15:29 +0100988
Andreas Gruenbacher69bc7bc2011-03-16 17:31:52 +0100989 err = drbd_recv(tconn, h, sizeof(*h));
990 if (unlikely(err != sizeof(*h))) {
Philipp Reisner257d0af2011-01-26 12:15:29 +0100991 if (!signal_pending(current))
Andreas Gruenbacher69bc7bc2011-03-16 17:31:52 +0100992 conn_warn(tconn, "short read expecting header on sock: r=%d\n", err);
993 if (err >= 0)
994 err = -EIO;
995 return err;
Philipp Reisner257d0af2011-01-26 12:15:29 +0100996 }
997
Andreas Gruenbacher69bc7bc2011-03-16 17:31:52 +0100998 err = decode_header(tconn, h, pi);
Philipp Reisner9ba7aa02011-02-07 17:32:41 +0100999 tconn->last_received = jiffies;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001000
Andreas Gruenbacher69bc7bc2011-03-16 17:31:52 +01001001 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001002}
1003
Philipp Reisner2451fc32010-08-24 13:43:11 +02001004static void drbd_flush(struct drbd_conf *mdev)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001005{
1006 int rv;
1007
1008 if (mdev->write_ordering >= WO_bdev_flush && get_ldev(mdev)) {
Dmitry Monakhovfbd9b092010-04-28 17:55:06 +04001009 rv = blkdev_issue_flush(mdev->ldev->backing_bdev, GFP_KERNEL,
Christoph Hellwigdd3932e2010-09-16 20:51:46 +02001010 NULL);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001011 if (rv) {
1012 dev_err(DEV, "local disk flush failed with status %d\n", rv);
1013 /* would rather check on EOPNOTSUPP, but that is not reliable.
1014 * don't try again for ANY return value != 0
1015 * if (rv == -EOPNOTSUPP) */
1016 drbd_bump_write_ordering(mdev, WO_drain_io);
1017 }
1018 put_ldev(mdev);
1019 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07001020}
1021
1022/**
1023 * drbd_may_finish_epoch() - Applies an epoch_event to the epoch's state, eventually finishes it.
1024 * @mdev: DRBD device.
1025 * @epoch: Epoch object.
1026 * @ev: Epoch event.
1027 */
1028static enum finish_epoch drbd_may_finish_epoch(struct drbd_conf *mdev,
1029 struct drbd_epoch *epoch,
1030 enum epoch_event ev)
1031{
Philipp Reisner2451fc32010-08-24 13:43:11 +02001032 int epoch_size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001033 struct drbd_epoch *next_epoch;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001034 enum finish_epoch rv = FE_STILL_LIVE;
1035
1036 spin_lock(&mdev->epoch_lock);
1037 do {
1038 next_epoch = NULL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001039
1040 epoch_size = atomic_read(&epoch->epoch_size);
1041
1042 switch (ev & ~EV_CLEANUP) {
1043 case EV_PUT:
1044 atomic_dec(&epoch->active);
1045 break;
1046 case EV_GOT_BARRIER_NR:
1047 set_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001048 break;
1049 case EV_BECAME_LAST:
1050 /* nothing to do*/
1051 break;
1052 }
1053
Philipp Reisnerb411b362009-09-25 16:07:19 -07001054 if (epoch_size != 0 &&
1055 atomic_read(&epoch->active) == 0 &&
Philipp Reisner2451fc32010-08-24 13:43:11 +02001056 test_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001057 if (!(ev & EV_CLEANUP)) {
1058 spin_unlock(&mdev->epoch_lock);
1059 drbd_send_b_ack(mdev, epoch->barrier_nr, epoch_size);
1060 spin_lock(&mdev->epoch_lock);
1061 }
1062 dec_unacked(mdev);
1063
1064 if (mdev->current_epoch != epoch) {
1065 next_epoch = list_entry(epoch->list.next, struct drbd_epoch, list);
1066 list_del(&epoch->list);
1067 ev = EV_BECAME_LAST | (ev & EV_CLEANUP);
1068 mdev->epochs--;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001069 kfree(epoch);
1070
1071 if (rv == FE_STILL_LIVE)
1072 rv = FE_DESTROYED;
1073 } else {
1074 epoch->flags = 0;
1075 atomic_set(&epoch->epoch_size, 0);
Uwe Kleine-König698f9312010-07-02 20:41:51 +02001076 /* atomic_set(&epoch->active, 0); is already zero */
Philipp Reisnerb411b362009-09-25 16:07:19 -07001077 if (rv == FE_STILL_LIVE)
1078 rv = FE_RECYCLED;
Philipp Reisner2451fc32010-08-24 13:43:11 +02001079 wake_up(&mdev->ee_wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001080 }
1081 }
1082
1083 if (!next_epoch)
1084 break;
1085
1086 epoch = next_epoch;
1087 } while (1);
1088
1089 spin_unlock(&mdev->epoch_lock);
1090
Philipp Reisnerb411b362009-09-25 16:07:19 -07001091 return rv;
1092}
1093
1094/**
1095 * drbd_bump_write_ordering() - Fall back to an other write ordering method
1096 * @mdev: DRBD device.
1097 * @wo: Write ordering method to try.
1098 */
1099void drbd_bump_write_ordering(struct drbd_conf *mdev, enum write_ordering_e wo) __must_hold(local)
1100{
1101 enum write_ordering_e pwo;
1102 static char *write_ordering_str[] = {
1103 [WO_none] = "none",
1104 [WO_drain_io] = "drain",
1105 [WO_bdev_flush] = "flush",
Philipp Reisnerb411b362009-09-25 16:07:19 -07001106 };
1107
1108 pwo = mdev->write_ordering;
1109 wo = min(pwo, wo);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001110 if (wo == WO_bdev_flush && mdev->ldev->dc.no_disk_flush)
1111 wo = WO_drain_io;
1112 if (wo == WO_drain_io && mdev->ldev->dc.no_disk_drain)
1113 wo = WO_none;
1114 mdev->write_ordering = wo;
Philipp Reisner2451fc32010-08-24 13:43:11 +02001115 if (pwo != mdev->write_ordering || wo == WO_bdev_flush)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001116 dev_info(DEV, "Method to ensure write ordering: %s\n", write_ordering_str[mdev->write_ordering]);
1117}
1118
1119/**
Andreas Gruenbacherfbe29de2011-02-17 16:38:35 +01001120 * drbd_submit_peer_request()
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001121 * @mdev: DRBD device.
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001122 * @peer_req: peer request
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001123 * @rw: flag field, see bio->bi_rw
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001124 *
1125 * May spread the pages to multiple bios,
1126 * depending on bio_add_page restrictions.
1127 *
1128 * Returns 0 if all bios have been submitted,
1129 * -ENOMEM if we could not allocate enough bios,
1130 * -ENOSPC (any better suggestion?) if we have not been able to bio_add_page a
1131 * single page to an empty bio (which should never happen and likely indicates
1132 * that the lower level IO stack is in some way broken). This has been observed
1133 * on certain Xen deployments.
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001134 */
1135/* TODO allocate from our own bio_set. */
Andreas Gruenbacherfbe29de2011-02-17 16:38:35 +01001136int drbd_submit_peer_request(struct drbd_conf *mdev,
1137 struct drbd_peer_request *peer_req,
1138 const unsigned rw, const int fault_type)
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001139{
1140 struct bio *bios = NULL;
1141 struct bio *bio;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001142 struct page *page = peer_req->pages;
1143 sector_t sector = peer_req->i.sector;
1144 unsigned ds = peer_req->i.size;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001145 unsigned n_bios = 0;
1146 unsigned nr_pages = (ds + PAGE_SIZE -1) >> PAGE_SHIFT;
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001147 int err = -ENOMEM;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001148
1149 /* In most cases, we will only need one bio. But in case the lower
1150 * level restrictions happen to be different at this offset on this
1151 * side than those of the sending peer, we may need to submit the
Lars Ellenbergda4a75d2011-02-23 17:02:01 +01001152 * request in more than one bio.
1153 *
1154 * Plain bio_alloc is good enough here, this is no DRBD internally
1155 * generated bio, but a bio allocated on behalf of the peer.
1156 */
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001157next_bio:
1158 bio = bio_alloc(GFP_NOIO, nr_pages);
1159 if (!bio) {
1160 dev_err(DEV, "submit_ee: Allocation of a bio failed\n");
1161 goto fail;
1162 }
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001163 /* > peer_req->i.sector, unless this is the first bio */
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001164 bio->bi_sector = sector;
1165 bio->bi_bdev = mdev->ldev->backing_bdev;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001166 bio->bi_rw = rw;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001167 bio->bi_private = peer_req;
Andreas Gruenbacherfcefa622011-02-17 16:46:59 +01001168 bio->bi_end_io = drbd_peer_request_endio;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001169
1170 bio->bi_next = bios;
1171 bios = bio;
1172 ++n_bios;
1173
1174 page_chain_for_each(page) {
1175 unsigned len = min_t(unsigned, ds, PAGE_SIZE);
1176 if (!bio_add_page(bio, page, len, 0)) {
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001177 /* A single page must always be possible!
1178 * But in case it fails anyways,
1179 * we deal with it, and complain (below). */
1180 if (bio->bi_vcnt == 0) {
1181 dev_err(DEV,
1182 "bio_add_page failed for len=%u, "
1183 "bi_vcnt=0 (bi_sector=%llu)\n",
1184 len, (unsigned long long)bio->bi_sector);
1185 err = -ENOSPC;
1186 goto fail;
1187 }
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001188 goto next_bio;
1189 }
1190 ds -= len;
1191 sector += len >> 9;
1192 --nr_pages;
1193 }
1194 D_ASSERT(page == NULL);
1195 D_ASSERT(ds == 0);
1196
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001197 atomic_set(&peer_req->pending_bios, n_bios);
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001198 do {
1199 bio = bios;
1200 bios = bios->bi_next;
1201 bio->bi_next = NULL;
1202
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001203 drbd_generic_make_request(mdev, fault_type, bio);
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001204 } while (bios);
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001205 return 0;
1206
1207fail:
1208 while (bios) {
1209 bio = bios;
1210 bios = bios->bi_next;
1211 bio_put(bio);
1212 }
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001213 return err;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001214}
1215
Andreas Gruenbacher53840642011-01-28 10:31:04 +01001216static void drbd_remove_epoch_entry_interval(struct drbd_conf *mdev,
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001217 struct drbd_peer_request *peer_req)
Andreas Gruenbacher53840642011-01-28 10:31:04 +01001218{
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001219 struct drbd_interval *i = &peer_req->i;
Andreas Gruenbacher53840642011-01-28 10:31:04 +01001220
1221 drbd_remove_interval(&mdev->write_requests, i);
1222 drbd_clear_interval(i);
1223
Andreas Gruenbacher6c852be2011-02-04 15:38:52 +01001224 /* Wake up any processes waiting for this peer request to complete. */
Andreas Gruenbacher53840642011-01-28 10:31:04 +01001225 if (i->waiting)
1226 wake_up(&mdev->misc_wait);
1227}
1228
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01001229static int receive_Barrier(struct drbd_conf *mdev, enum drbd_packet cmd,
1230 unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001231{
Philipp Reisner2451fc32010-08-24 13:43:11 +02001232 int rv;
Philipp Reisnere42325a2011-01-19 13:55:45 +01001233 struct p_barrier *p = &mdev->tconn->data.rbuf.barrier;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001234 struct drbd_epoch *epoch;
1235
Philipp Reisnerb411b362009-09-25 16:07:19 -07001236 inc_unacked(mdev);
1237
Philipp Reisnerb411b362009-09-25 16:07:19 -07001238 mdev->current_epoch->barrier_nr = p->barrier;
1239 rv = drbd_may_finish_epoch(mdev, mdev->current_epoch, EV_GOT_BARRIER_NR);
1240
1241 /* P_BARRIER_ACK may imply that the corresponding extent is dropped from
1242 * the activity log, which means it would not be resynced in case the
1243 * R_PRIMARY crashes now.
1244 * Therefore we must send the barrier_ack after the barrier request was
1245 * completed. */
1246 switch (mdev->write_ordering) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001247 case WO_none:
1248 if (rv == FE_RECYCLED)
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001249 return 0;
Philipp Reisner2451fc32010-08-24 13:43:11 +02001250
1251 /* receiver context, in the writeout path of the other node.
1252 * avoid potential distributed deadlock */
1253 epoch = kmalloc(sizeof(struct drbd_epoch), GFP_NOIO);
1254 if (epoch)
1255 break;
1256 else
1257 dev_warn(DEV, "Allocation of an epoch failed, slowing down\n");
1258 /* Fall through */
Philipp Reisnerb411b362009-09-25 16:07:19 -07001259
1260 case WO_bdev_flush:
1261 case WO_drain_io:
Philipp Reisnerb411b362009-09-25 16:07:19 -07001262 drbd_wait_ee_list_empty(mdev, &mdev->active_ee);
Philipp Reisner2451fc32010-08-24 13:43:11 +02001263 drbd_flush(mdev);
1264
1265 if (atomic_read(&mdev->current_epoch->epoch_size)) {
1266 epoch = kmalloc(sizeof(struct drbd_epoch), GFP_NOIO);
1267 if (epoch)
1268 break;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001269 }
1270
Philipp Reisner2451fc32010-08-24 13:43:11 +02001271 epoch = mdev->current_epoch;
1272 wait_event(mdev->ee_wait, atomic_read(&epoch->epoch_size) == 0);
1273
1274 D_ASSERT(atomic_read(&epoch->active) == 0);
1275 D_ASSERT(epoch->flags == 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001276
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001277 return 0;
Philipp Reisner2451fc32010-08-24 13:43:11 +02001278 default:
1279 dev_err(DEV, "Strangeness in mdev->write_ordering %d\n", mdev->write_ordering);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001280 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001281 }
1282
1283 epoch->flags = 0;
1284 atomic_set(&epoch->epoch_size, 0);
1285 atomic_set(&epoch->active, 0);
1286
1287 spin_lock(&mdev->epoch_lock);
1288 if (atomic_read(&mdev->current_epoch->epoch_size)) {
1289 list_add(&epoch->list, &mdev->current_epoch->list);
1290 mdev->current_epoch = epoch;
1291 mdev->epochs++;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001292 } else {
1293 /* The current_epoch got recycled while we allocated this one... */
1294 kfree(epoch);
1295 }
1296 spin_unlock(&mdev->epoch_lock);
1297
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001298 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001299}
1300
1301/* used from receive_RSDataReply (recv_resync_read)
1302 * and from receive_Data */
Andreas Gruenbacherf6ffca92011-02-04 15:30:34 +01001303static struct drbd_peer_request *
1304read_in_block(struct drbd_conf *mdev, u64 id, sector_t sector,
1305 int data_size) __must_hold(local)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001306{
Lars Ellenberg66660322010-04-06 12:15:04 +02001307 const sector_t capacity = drbd_get_capacity(mdev->this_bdev);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001308 struct drbd_peer_request *peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001309 struct page *page;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001310 int dgs, ds, rr;
Philipp Reisnera0638452011-01-19 14:31:32 +01001311 void *dig_in = mdev->tconn->int_dig_in;
1312 void *dig_vv = mdev->tconn->int_dig_vv;
Philipp Reisner6b4388a2010-04-26 14:11:45 +02001313 unsigned long *data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001314
Philipp Reisnera0638452011-01-19 14:31:32 +01001315 dgs = (mdev->tconn->agreed_pro_version >= 87 && mdev->tconn->integrity_r_tfm) ?
1316 crypto_hash_digestsize(mdev->tconn->integrity_r_tfm) : 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001317
1318 if (dgs) {
Philipp Reisnerde0ff332011-02-07 16:56:20 +01001319 rr = drbd_recv(mdev->tconn, dig_in, dgs);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001320 if (rr != dgs) {
Lars Ellenberg0ddc5542011-01-21 12:35:15 +01001321 if (!signal_pending(current))
1322 dev_warn(DEV,
1323 "short read receiving data digest: read %d expected %d\n",
1324 rr, dgs);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001325 return NULL;
1326 }
1327 }
1328
1329 data_size -= dgs;
1330
Andreas Gruenbacher841ce242010-12-15 19:31:20 +01001331 if (!expect(data_size != 0))
1332 return NULL;
1333 if (!expect(IS_ALIGNED(data_size, 512)))
1334 return NULL;
1335 if (!expect(data_size <= DRBD_MAX_BIO_SIZE))
1336 return NULL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001337
Lars Ellenberg66660322010-04-06 12:15:04 +02001338 /* even though we trust out peer,
1339 * we sometimes have to double check. */
1340 if (sector + (data_size>>9) > capacity) {
Lars Ellenbergfdda6542011-01-24 15:11:01 +01001341 dev_err(DEV, "request from peer beyond end of local disk: "
1342 "capacity: %llus < sector: %llus + size: %u\n",
Lars Ellenberg66660322010-04-06 12:15:04 +02001343 (unsigned long long)capacity,
1344 (unsigned long long)sector, data_size);
1345 return NULL;
1346 }
1347
Philipp Reisnerb411b362009-09-25 16:07:19 -07001348 /* GFP_NOIO, because we must not cause arbitrary write-out: in a DRBD
1349 * "criss-cross" setup, that might cause write-out on some other DRBD,
1350 * which in turn might block on the other node at this very place. */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001351 peer_req = drbd_alloc_ee(mdev, id, sector, data_size, GFP_NOIO);
1352 if (!peer_req)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001353 return NULL;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001354
Philipp Reisnerb411b362009-09-25 16:07:19 -07001355 ds = data_size;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001356 page = peer_req->pages;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001357 page_chain_for_each(page) {
1358 unsigned len = min_t(int, ds, PAGE_SIZE);
Philipp Reisner6b4388a2010-04-26 14:11:45 +02001359 data = kmap(page);
Philipp Reisnerde0ff332011-02-07 16:56:20 +01001360 rr = drbd_recv(mdev->tconn, data, len);
Andreas Gruenbacher0cf9d272010-12-07 10:43:29 +01001361 if (drbd_insert_fault(mdev, DRBD_FAULT_RECEIVE)) {
Philipp Reisner6b4388a2010-04-26 14:11:45 +02001362 dev_err(DEV, "Fault injection: Corrupting data on receive\n");
1363 data[0] = data[0] ^ (unsigned long)-1;
1364 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07001365 kunmap(page);
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001366 if (rr != len) {
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001367 drbd_free_ee(mdev, peer_req);
Lars Ellenberg0ddc5542011-01-21 12:35:15 +01001368 if (!signal_pending(current))
1369 dev_warn(DEV, "short read receiving data: read %d expected %d\n",
1370 rr, len);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001371 return NULL;
1372 }
1373 ds -= rr;
1374 }
1375
1376 if (dgs) {
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001377 drbd_csum_ee(mdev, mdev->tconn->integrity_r_tfm, peer_req, dig_vv);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001378 if (memcmp(dig_in, dig_vv, dgs)) {
Lars Ellenberg470be442010-11-10 10:36:52 +01001379 dev_err(DEV, "Digest integrity check FAILED: %llus +%u\n",
1380 (unsigned long long)sector, data_size);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001381 drbd_free_ee(mdev, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001382 return NULL;
1383 }
1384 }
1385 mdev->recv_cnt += data_size>>9;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001386 return peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001387}
1388
1389/* drbd_drain_block() just takes a data block
1390 * out of the socket input buffer, and discards it.
1391 */
1392static int drbd_drain_block(struct drbd_conf *mdev, int data_size)
1393{
1394 struct page *page;
Andreas Gruenbacherfc5be832011-03-16 17:50:50 +01001395 int rr, err = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001396 void *data;
1397
Lars Ellenbergc3470cd2010-04-01 16:57:19 +02001398 if (!data_size)
Andreas Gruenbacherfc5be832011-03-16 17:50:50 +01001399 return 0;
Lars Ellenbergc3470cd2010-04-01 16:57:19 +02001400
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001401 page = drbd_pp_alloc(mdev, 1, 1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001402
1403 data = kmap(page);
1404 while (data_size) {
Andreas Gruenbacherfc5be832011-03-16 17:50:50 +01001405 unsigned int len = min_t(int, data_size, PAGE_SIZE);
1406
1407 rr = drbd_recv(mdev->tconn, data, len);
1408 if (rr != len) {
Lars Ellenberg0ddc5542011-01-21 12:35:15 +01001409 if (!signal_pending(current))
1410 dev_warn(DEV,
1411 "short read receiving data: read %d expected %d\n",
Andreas Gruenbacherfc5be832011-03-16 17:50:50 +01001412 rr, len);
1413 err = (rr < 0) ? rr : -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001414 break;
1415 }
1416 data_size -= rr;
1417 }
1418 kunmap(page);
Lars Ellenberg435f0742010-09-06 12:30:25 +02001419 drbd_pp_free(mdev, page, 0);
Andreas Gruenbacherfc5be832011-03-16 17:50:50 +01001420 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001421}
1422
1423static int recv_dless_read(struct drbd_conf *mdev, struct drbd_request *req,
1424 sector_t sector, int data_size)
1425{
1426 struct bio_vec *bvec;
1427 struct bio *bio;
1428 int dgs, rr, i, expect;
Philipp Reisnera0638452011-01-19 14:31:32 +01001429 void *dig_in = mdev->tconn->int_dig_in;
1430 void *dig_vv = mdev->tconn->int_dig_vv;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001431
Philipp Reisnera0638452011-01-19 14:31:32 +01001432 dgs = (mdev->tconn->agreed_pro_version >= 87 && mdev->tconn->integrity_r_tfm) ?
1433 crypto_hash_digestsize(mdev->tconn->integrity_r_tfm) : 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001434
1435 if (dgs) {
Philipp Reisnerde0ff332011-02-07 16:56:20 +01001436 rr = drbd_recv(mdev->tconn, dig_in, dgs);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001437 if (rr != dgs) {
Lars Ellenberg0ddc5542011-01-21 12:35:15 +01001438 if (!signal_pending(current))
1439 dev_warn(DEV,
1440 "short read receiving data reply digest: read %d expected %d\n",
1441 rr, dgs);
Andreas Gruenbacher28284ce2011-03-16 17:54:02 +01001442 return rr < 0 ? rr : -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001443 }
1444 }
1445
1446 data_size -= dgs;
1447
1448 /* optimistically update recv_cnt. if receiving fails below,
1449 * we disconnect anyways, and counters will be reset. */
1450 mdev->recv_cnt += data_size>>9;
1451
1452 bio = req->master_bio;
1453 D_ASSERT(sector == bio->bi_sector);
1454
1455 bio_for_each_segment(bvec, bio, i) {
1456 expect = min_t(int, data_size, bvec->bv_len);
Philipp Reisnerde0ff332011-02-07 16:56:20 +01001457 rr = drbd_recv(mdev->tconn,
Philipp Reisnerb411b362009-09-25 16:07:19 -07001458 kmap(bvec->bv_page)+bvec->bv_offset,
1459 expect);
1460 kunmap(bvec->bv_page);
1461 if (rr != expect) {
Lars Ellenberg0ddc5542011-01-21 12:35:15 +01001462 if (!signal_pending(current))
1463 dev_warn(DEV, "short read receiving data reply: "
1464 "read %d expected %d\n",
1465 rr, expect);
Andreas Gruenbacher28284ce2011-03-16 17:54:02 +01001466 return rr < 0 ? rr : -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001467 }
1468 data_size -= rr;
1469 }
1470
1471 if (dgs) {
Philipp Reisnera0638452011-01-19 14:31:32 +01001472 drbd_csum_bio(mdev, mdev->tconn->integrity_r_tfm, bio, dig_vv);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001473 if (memcmp(dig_in, dig_vv, dgs)) {
1474 dev_err(DEV, "Digest integrity check FAILED. Broken NICs?\n");
Andreas Gruenbacher28284ce2011-03-16 17:54:02 +01001475 return -EINVAL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001476 }
1477 }
1478
1479 D_ASSERT(data_size == 0);
Andreas Gruenbacher28284ce2011-03-16 17:54:02 +01001480 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001481}
1482
1483/* e_end_resync_block() is called via
1484 * drbd_process_done_ee() by asender only */
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001485static int e_end_resync_block(struct drbd_work *w, int unused)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001486{
Andreas Gruenbacher8050e6d2011-02-18 16:12:48 +01001487 struct drbd_peer_request *peer_req =
1488 container_of(w, struct drbd_peer_request, w);
Philipp Reisner00d56942011-02-09 18:09:48 +01001489 struct drbd_conf *mdev = w->mdev;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001490 sector_t sector = peer_req->i.sector;
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001491 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001492
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001493 D_ASSERT(drbd_interval_empty(&peer_req->i));
Philipp Reisnerb411b362009-09-25 16:07:19 -07001494
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001495 if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
1496 drbd_set_in_sync(mdev, sector, peer_req->i.size);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001497 err = drbd_send_ack(mdev, P_RS_WRITE_ACK, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001498 } else {
1499 /* Record failure to sync */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001500 drbd_rs_failed_io(mdev, sector, peer_req->i.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001501
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001502 err = drbd_send_ack(mdev, P_NEG_ACK, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001503 }
1504 dec_unacked(mdev);
1505
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001506 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001507}
1508
1509static int recv_resync_read(struct drbd_conf *mdev, sector_t sector, int data_size) __releases(local)
1510{
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001511 struct drbd_peer_request *peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001512
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001513 peer_req = read_in_block(mdev, ID_SYNCER, sector, data_size);
1514 if (!peer_req)
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001515 goto fail;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001516
1517 dec_rs_pending(mdev);
1518
Philipp Reisnerb411b362009-09-25 16:07:19 -07001519 inc_unacked(mdev);
1520 /* corresponding dec_unacked() in e_end_resync_block()
1521 * respective _drbd_clear_done_ee */
1522
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001523 peer_req->w.cb = e_end_resync_block;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001524
Philipp Reisner87eeee42011-01-19 14:16:30 +01001525 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001526 list_add(&peer_req->w.list, &mdev->sync_ee);
Philipp Reisner87eeee42011-01-19 14:16:30 +01001527 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001528
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02001529 atomic_add(data_size >> 9, &mdev->rs_sect_ev);
Andreas Gruenbacherfbe29de2011-02-17 16:38:35 +01001530 if (drbd_submit_peer_request(mdev, peer_req, WRITE, DRBD_FAULT_RS_WR) == 0)
Andreas Gruenbachere1c1b0f2011-03-16 17:58:27 +01001531 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001532
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001533 /* don't care for the reason here */
1534 dev_err(DEV, "submit failed, triggering re-connect\n");
Philipp Reisner87eeee42011-01-19 14:16:30 +01001535 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001536 list_del(&peer_req->w.list);
Philipp Reisner87eeee42011-01-19 14:16:30 +01001537 spin_unlock_irq(&mdev->tconn->req_lock);
Lars Ellenberg22cc37a2010-09-14 20:40:41 +02001538
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001539 drbd_free_ee(mdev, peer_req);
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001540fail:
1541 put_ldev(mdev);
Andreas Gruenbachere1c1b0f2011-03-16 17:58:27 +01001542 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001543}
1544
Andreas Gruenbacher668eebc2011-01-20 17:14:26 +01001545static struct drbd_request *
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01001546find_request(struct drbd_conf *mdev, struct rb_root *root, u64 id,
1547 sector_t sector, bool missing_ok, const char *func)
Andreas Gruenbacher668eebc2011-01-20 17:14:26 +01001548{
Andreas Gruenbacher668eebc2011-01-20 17:14:26 +01001549 struct drbd_request *req;
1550
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01001551 /* Request object according to our peer */
1552 req = (struct drbd_request *)(unsigned long)id;
Andreas Gruenbacher5e472262011-01-27 14:42:51 +01001553 if (drbd_contains_interval(root, sector, &req->i) && req->i.local)
Andreas Gruenbacher668eebc2011-01-20 17:14:26 +01001554 return req;
Andreas Gruenbacherc3afd8f2011-01-20 22:25:40 +01001555 if (!missing_ok) {
1556 dev_err(DEV, "%s: failed to find request %lu, sector %llus\n", func,
1557 (unsigned long)id, (unsigned long long)sector);
1558 }
Andreas Gruenbacher668eebc2011-01-20 17:14:26 +01001559 return NULL;
1560}
1561
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01001562static int receive_DataReply(struct drbd_conf *mdev, enum drbd_packet cmd,
1563 unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001564{
1565 struct drbd_request *req;
1566 sector_t sector;
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001567 int err;
Philipp Reisnere42325a2011-01-19 13:55:45 +01001568 struct p_data *p = &mdev->tconn->data.rbuf.data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001569
1570 sector = be64_to_cpu(p->sector);
1571
Philipp Reisner87eeee42011-01-19 14:16:30 +01001572 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01001573 req = find_request(mdev, &mdev->read_requests, p->block_id, sector, false, __func__);
Philipp Reisner87eeee42011-01-19 14:16:30 +01001574 spin_unlock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherc3afd8f2011-01-20 22:25:40 +01001575 if (unlikely(!req))
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001576 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001577
Bart Van Assche24c48302011-05-21 18:32:29 +02001578 /* hlist_del(&req->collision) is done in _req_may_be_done, to avoid
Philipp Reisnerb411b362009-09-25 16:07:19 -07001579 * special casing it there for the various failure cases.
1580 * still no race with drbd_fail_pending_reads */
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001581 err = recv_dless_read(mdev, req, sector, data_size);
1582 if (!err)
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01001583 req_mod(req, DATA_RECEIVED);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001584 /* else: nothing. handled from drbd_disconnect...
1585 * I don't think we may complete this just yet
1586 * in case we are "on-disconnect: freeze" */
1587
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001588 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001589}
1590
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01001591static int receive_RSDataReply(struct drbd_conf *mdev, enum drbd_packet cmd,
1592 unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001593{
1594 sector_t sector;
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001595 int err;
Philipp Reisnere42325a2011-01-19 13:55:45 +01001596 struct p_data *p = &mdev->tconn->data.rbuf.data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001597
1598 sector = be64_to_cpu(p->sector);
1599 D_ASSERT(p->block_id == ID_SYNCER);
1600
1601 if (get_ldev(mdev)) {
1602 /* data is submitted to disk within recv_resync_read.
1603 * corresponding put_ldev done below on error,
Andreas Gruenbacherfcefa622011-02-17 16:46:59 +01001604 * or in drbd_peer_request_endio. */
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001605 err = recv_resync_read(mdev, sector, data_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001606 } else {
1607 if (__ratelimit(&drbd_ratelimit_state))
1608 dev_err(DEV, "Can not write resync data to local disk.\n");
1609
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001610 err = drbd_drain_block(mdev, data_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001611
Lars Ellenberg2b2bf212010-10-06 11:46:55 +02001612 drbd_send_ack_dp(mdev, P_NEG_ACK, p, data_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001613 }
1614
Philipp Reisner778f2712010-07-06 11:14:00 +02001615 atomic_add(data_size >> 9, &mdev->rs_sect_in);
1616
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001617 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001618}
1619
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001620static int w_restart_write(struct drbd_work *w, int cancel)
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001621{
1622 struct drbd_request *req = container_of(w, struct drbd_request, w);
1623 struct drbd_conf *mdev = w->mdev;
1624 struct bio *bio;
1625 unsigned long start_time;
1626 unsigned long flags;
1627
1628 spin_lock_irqsave(&mdev->tconn->req_lock, flags);
1629 if (!expect(req->rq_state & RQ_POSTPONED)) {
1630 spin_unlock_irqrestore(&mdev->tconn->req_lock, flags);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001631 return -EIO;
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001632 }
1633 bio = req->master_bio;
1634 start_time = req->start_time;
1635 /* Postponed requests will not have their master_bio completed! */
1636 __req_mod(req, DISCARD_WRITE, NULL);
1637 spin_unlock_irqrestore(&mdev->tconn->req_lock, flags);
1638
1639 while (__drbd_make_request(mdev, bio, start_time))
1640 /* retry */ ;
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001641 return 0;
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001642}
1643
1644static void restart_conflicting_writes(struct drbd_conf *mdev,
1645 sector_t sector, int size)
1646{
1647 struct drbd_interval *i;
1648 struct drbd_request *req;
1649
1650 drbd_for_each_overlap(i, &mdev->write_requests, sector, size) {
1651 if (!i->local)
1652 continue;
1653 req = container_of(i, struct drbd_request, i);
1654 if (req->rq_state & RQ_LOCAL_PENDING ||
1655 !(req->rq_state & RQ_POSTPONED))
1656 continue;
1657 if (expect(list_empty(&req->w.list))) {
1658 req->w.mdev = mdev;
1659 req->w.cb = w_restart_write;
1660 drbd_queue_work(&mdev->tconn->data.work, &req->w);
1661 }
1662 }
1663}
1664
Philipp Reisnerb411b362009-09-25 16:07:19 -07001665/* e_end_block() is called via drbd_process_done_ee().
1666 * this means this function only runs in the asender thread
1667 */
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001668static int e_end_block(struct drbd_work *w, int cancel)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001669{
Andreas Gruenbacher8050e6d2011-02-18 16:12:48 +01001670 struct drbd_peer_request *peer_req =
1671 container_of(w, struct drbd_peer_request, w);
Philipp Reisner00d56942011-02-09 18:09:48 +01001672 struct drbd_conf *mdev = w->mdev;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001673 sector_t sector = peer_req->i.sector;
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001674 int err = 0, pcmd;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001675
Philipp Reisner89e58e72011-01-19 13:12:45 +01001676 if (mdev->tconn->net_conf->wire_protocol == DRBD_PROT_C) {
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001677 if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001678 pcmd = (mdev->state.conn >= C_SYNC_SOURCE &&
1679 mdev->state.conn <= C_PAUSED_SYNC_T &&
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001680 peer_req->flags & EE_MAY_SET_IN_SYNC) ?
Philipp Reisnerb411b362009-09-25 16:07:19 -07001681 P_RS_WRITE_ACK : P_WRITE_ACK;
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001682 err = drbd_send_ack(mdev, pcmd, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001683 if (pcmd == P_RS_WRITE_ACK)
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001684 drbd_set_in_sync(mdev, sector, peer_req->i.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001685 } else {
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001686 err = drbd_send_ack(mdev, P_NEG_ACK, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001687 /* we expect it to be marked out of sync anyways...
1688 * maybe assert this? */
1689 }
1690 dec_unacked(mdev);
1691 }
1692 /* we delete from the conflict detection hash _after_ we sent out the
1693 * P_WRITE_ACK / P_NEG_ACK, to get the sequence number right. */
Philipp Reisner89e58e72011-01-19 13:12:45 +01001694 if (mdev->tconn->net_conf->two_primaries) {
Philipp Reisner87eeee42011-01-19 14:16:30 +01001695 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001696 D_ASSERT(!drbd_interval_empty(&peer_req->i));
1697 drbd_remove_epoch_entry_interval(mdev, peer_req);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001698 if (peer_req->flags & EE_RESTART_REQUESTS)
1699 restart_conflicting_writes(mdev, sector, peer_req->i.size);
Philipp Reisner87eeee42011-01-19 14:16:30 +01001700 spin_unlock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherbb3bfe92011-01-21 15:59:23 +01001701 } else
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001702 D_ASSERT(drbd_interval_empty(&peer_req->i));
Philipp Reisnerb411b362009-09-25 16:07:19 -07001703
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001704 drbd_may_finish_epoch(mdev, peer_req->epoch, EV_PUT + (cancel ? EV_CLEANUP : 0));
Philipp Reisnerb411b362009-09-25 16:07:19 -07001705
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001706 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001707}
1708
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001709static int e_send_ack(struct drbd_work *w, enum drbd_packet ack)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001710{
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001711 struct drbd_conf *mdev = w->mdev;
Andreas Gruenbacher8050e6d2011-02-18 16:12:48 +01001712 struct drbd_peer_request *peer_req =
1713 container_of(w, struct drbd_peer_request, w);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001714 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001715
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001716 err = drbd_send_ack(mdev, ack, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001717 dec_unacked(mdev);
1718
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001719 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001720}
1721
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001722static int e_send_discard_write(struct drbd_work *w, int unused)
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001723{
1724 return e_send_ack(w, P_DISCARD_WRITE);
1725}
1726
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001727static int e_send_retry_write(struct drbd_work *w, int unused)
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001728{
1729 struct drbd_tconn *tconn = w->mdev->tconn;
1730
1731 return e_send_ack(w, tconn->agreed_pro_version >= 100 ?
1732 P_RETRY_WRITE : P_DISCARD_WRITE);
1733}
1734
Andreas Gruenbacher3e394da2011-01-26 18:36:55 +01001735static bool seq_greater(u32 a, u32 b)
1736{
1737 /*
1738 * We assume 32-bit wrap-around here.
1739 * For 24-bit wrap-around, we would have to shift:
1740 * a <<= 8; b <<= 8;
1741 */
1742 return (s32)a - (s32)b > 0;
1743}
1744
1745static u32 seq_max(u32 a, u32 b)
1746{
1747 return seq_greater(a, b) ? a : b;
1748}
1749
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001750static bool need_peer_seq(struct drbd_conf *mdev)
1751{
1752 struct drbd_tconn *tconn = mdev->tconn;
1753
1754 /*
1755 * We only need to keep track of the last packet_seq number of our peer
1756 * if we are in dual-primary mode and we have the discard flag set; see
1757 * handle_write_conflicts().
1758 */
1759 return tconn->net_conf->two_primaries &&
1760 test_bit(DISCARD_CONCURRENT, &tconn->flags);
1761}
1762
Andreas Gruenbacher43ae0772011-02-03 18:42:08 +01001763static void update_peer_seq(struct drbd_conf *mdev, unsigned int peer_seq)
Andreas Gruenbacher3e394da2011-01-26 18:36:55 +01001764{
Lars Ellenberg3c13b682011-02-23 16:10:01 +01001765 unsigned int newest_peer_seq;
Andreas Gruenbacher3e394da2011-01-26 18:36:55 +01001766
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001767 if (need_peer_seq(mdev)) {
1768 spin_lock(&mdev->peer_seq_lock);
Lars Ellenberg3c13b682011-02-23 16:10:01 +01001769 newest_peer_seq = seq_max(mdev->peer_seq, peer_seq);
1770 mdev->peer_seq = newest_peer_seq;
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001771 spin_unlock(&mdev->peer_seq_lock);
Lars Ellenberg3c13b682011-02-23 16:10:01 +01001772 /* wake up only if we actually changed mdev->peer_seq */
1773 if (peer_seq == newest_peer_seq)
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001774 wake_up(&mdev->seq_wait);
1775 }
Andreas Gruenbacher3e394da2011-01-26 18:36:55 +01001776}
1777
Philipp Reisnerb411b362009-09-25 16:07:19 -07001778/* Called from receive_Data.
1779 * Synchronize packets on sock with packets on msock.
1780 *
1781 * This is here so even when a P_DATA packet traveling via sock overtook an Ack
1782 * packet traveling on msock, they are still processed in the order they have
1783 * been sent.
1784 *
1785 * Note: we don't care for Ack packets overtaking P_DATA packets.
1786 *
1787 * In case packet_seq is larger than mdev->peer_seq number, there are
1788 * outstanding packets on the msock. We wait for them to arrive.
1789 * In case we are the logically next packet, we update mdev->peer_seq
1790 * ourselves. Correctly handles 32bit wrap around.
1791 *
1792 * Assume we have a 10 GBit connection, that is about 1<<30 byte per second,
1793 * about 1<<21 sectors per second. So "worst" case, we have 1<<3 == 8 seconds
1794 * for the 24bit wrap (historical atomic_t guarantee on some archs), and we have
1795 * 1<<9 == 512 seconds aka ages for the 32bit wrap around...
1796 *
1797 * returns 0 if we may process the packet,
1798 * -ERESTARTSYS if we were interrupted (by disconnect signal). */
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001799static int wait_for_and_update_peer_seq(struct drbd_conf *mdev, const u32 peer_seq)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001800{
1801 DEFINE_WAIT(wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001802 long timeout;
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001803 int ret;
1804
1805 if (!need_peer_seq(mdev))
1806 return 0;
1807
Philipp Reisnerb411b362009-09-25 16:07:19 -07001808 spin_lock(&mdev->peer_seq_lock);
1809 for (;;) {
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001810 if (!seq_greater(peer_seq - 1, mdev->peer_seq)) {
1811 mdev->peer_seq = seq_max(mdev->peer_seq, peer_seq);
1812 ret = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001813 break;
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001814 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07001815 if (signal_pending(current)) {
1816 ret = -ERESTARTSYS;
1817 break;
1818 }
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001819 prepare_to_wait(&mdev->seq_wait, &wait, TASK_INTERRUPTIBLE);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001820 spin_unlock(&mdev->peer_seq_lock);
Andreas Gruenbacher71b1c1e2011-03-01 15:40:43 +01001821 timeout = mdev->tconn->net_conf->ping_timeo*HZ/10;
1822 timeout = schedule_timeout(timeout);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001823 spin_lock(&mdev->peer_seq_lock);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001824 if (!timeout) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001825 ret = -ETIMEDOUT;
Andreas Gruenbacher71b1c1e2011-03-01 15:40:43 +01001826 dev_err(DEV, "Timed out waiting for missing ack packets; disconnecting\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07001827 break;
1828 }
1829 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07001830 spin_unlock(&mdev->peer_seq_lock);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001831 finish_wait(&mdev->seq_wait, &wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001832 return ret;
1833}
1834
Lars Ellenberg688593c2010-11-17 22:25:03 +01001835/* see also bio_flags_to_wire()
1836 * DRBD_REQ_*, because we need to semantically map the flags to data packet
1837 * flags and back. We may replicate to other kernel versions. */
1838static unsigned long wire_flags_to_bio(struct drbd_conf *mdev, u32 dpf)
Philipp Reisner76d2e7e2010-08-25 11:58:05 +02001839{
Lars Ellenberg688593c2010-11-17 22:25:03 +01001840 return (dpf & DP_RW_SYNC ? REQ_SYNC : 0) |
1841 (dpf & DP_FUA ? REQ_FUA : 0) |
1842 (dpf & DP_FLUSH ? REQ_FLUSH : 0) |
1843 (dpf & DP_DISCARD ? REQ_DISCARD : 0);
Philipp Reisner76d2e7e2010-08-25 11:58:05 +02001844}
1845
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001846static void fail_postponed_requests(struct drbd_conf *mdev, sector_t sector,
1847 unsigned int size)
1848{
1849 struct drbd_interval *i;
1850
1851 repeat:
1852 drbd_for_each_overlap(i, &mdev->write_requests, sector, size) {
1853 struct drbd_request *req;
1854 struct bio_and_error m;
1855
1856 if (!i->local)
1857 continue;
1858 req = container_of(i, struct drbd_request, i);
1859 if (!(req->rq_state & RQ_POSTPONED))
1860 continue;
1861 req->rq_state &= ~RQ_POSTPONED;
1862 __req_mod(req, NEG_ACKED, &m);
1863 spin_unlock_irq(&mdev->tconn->req_lock);
1864 if (m.bio)
1865 complete_master_bio(mdev, &m);
1866 spin_lock_irq(&mdev->tconn->req_lock);
1867 goto repeat;
1868 }
1869}
1870
1871static int handle_write_conflicts(struct drbd_conf *mdev,
1872 struct drbd_peer_request *peer_req)
1873{
1874 struct drbd_tconn *tconn = mdev->tconn;
1875 bool resolve_conflicts = test_bit(DISCARD_CONCURRENT, &tconn->flags);
1876 sector_t sector = peer_req->i.sector;
1877 const unsigned int size = peer_req->i.size;
1878 struct drbd_interval *i;
1879 bool equal;
1880 int err;
1881
1882 /*
1883 * Inserting the peer request into the write_requests tree will prevent
1884 * new conflicting local requests from being added.
1885 */
1886 drbd_insert_interval(&mdev->write_requests, &peer_req->i);
1887
1888 repeat:
1889 drbd_for_each_overlap(i, &mdev->write_requests, sector, size) {
1890 if (i == &peer_req->i)
1891 continue;
1892
1893 if (!i->local) {
1894 /*
1895 * Our peer has sent a conflicting remote request; this
1896 * should not happen in a two-node setup. Wait for the
1897 * earlier peer request to complete.
1898 */
1899 err = drbd_wait_misc(mdev, i);
1900 if (err)
1901 goto out;
1902 goto repeat;
1903 }
1904
1905 equal = i->sector == sector && i->size == size;
1906 if (resolve_conflicts) {
1907 /*
1908 * If the peer request is fully contained within the
1909 * overlapping request, it can be discarded; otherwise,
1910 * it will be retried once all overlapping requests
1911 * have completed.
1912 */
1913 bool discard = i->sector <= sector && i->sector +
1914 (i->size >> 9) >= sector + (size >> 9);
1915
1916 if (!equal)
1917 dev_alert(DEV, "Concurrent writes detected: "
1918 "local=%llus +%u, remote=%llus +%u, "
1919 "assuming %s came first\n",
1920 (unsigned long long)i->sector, i->size,
1921 (unsigned long long)sector, size,
1922 discard ? "local" : "remote");
1923
1924 inc_unacked(mdev);
1925 peer_req->w.cb = discard ? e_send_discard_write :
1926 e_send_retry_write;
1927 list_add_tail(&peer_req->w.list, &mdev->done_ee);
1928 wake_asender(mdev->tconn);
1929
1930 err = -ENOENT;
1931 goto out;
1932 } else {
1933 struct drbd_request *req =
1934 container_of(i, struct drbd_request, i);
1935
1936 if (!equal)
1937 dev_alert(DEV, "Concurrent writes detected: "
1938 "local=%llus +%u, remote=%llus +%u\n",
1939 (unsigned long long)i->sector, i->size,
1940 (unsigned long long)sector, size);
1941
1942 if (req->rq_state & RQ_LOCAL_PENDING ||
1943 !(req->rq_state & RQ_POSTPONED)) {
1944 /*
1945 * Wait for the node with the discard flag to
1946 * decide if this request will be discarded or
1947 * retried. Requests that are discarded will
1948 * disappear from the write_requests tree.
1949 *
1950 * In addition, wait for the conflicting
1951 * request to finish locally before submitting
1952 * the conflicting peer request.
1953 */
1954 err = drbd_wait_misc(mdev, &req->i);
1955 if (err) {
1956 _conn_request_state(mdev->tconn,
1957 NS(conn, C_TIMEOUT),
1958 CS_HARD);
1959 fail_postponed_requests(mdev, sector, size);
1960 goto out;
1961 }
1962 goto repeat;
1963 }
1964 /*
1965 * Remember to restart the conflicting requests after
1966 * the new peer request has completed.
1967 */
1968 peer_req->flags |= EE_RESTART_REQUESTS;
1969 }
1970 }
1971 err = 0;
1972
1973 out:
1974 if (err)
1975 drbd_remove_epoch_entry_interval(mdev, peer_req);
1976 return err;
1977}
1978
Philipp Reisnerb411b362009-09-25 16:07:19 -07001979/* mirrored write */
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01001980static int receive_Data(struct drbd_conf *mdev, enum drbd_packet cmd,
1981 unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001982{
1983 sector_t sector;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001984 struct drbd_peer_request *peer_req;
Philipp Reisnere42325a2011-01-19 13:55:45 +01001985 struct p_data *p = &mdev->tconn->data.rbuf.data;
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001986 u32 peer_seq = be32_to_cpu(p->seq_num);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001987 int rw = WRITE;
1988 u32 dp_flags;
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001989 int err;
1990
Philipp Reisnerb411b362009-09-25 16:07:19 -07001991 if (!get_ldev(mdev)) {
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001992 int err2;
1993
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001994 err = wait_for_and_update_peer_seq(mdev, peer_seq);
Lars Ellenberg2b2bf212010-10-06 11:46:55 +02001995 drbd_send_ack_dp(mdev, P_NEG_ACK, p, data_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001996 atomic_inc(&mdev->current_epoch->epoch_size);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001997 err2 = drbd_drain_block(mdev, data_size);
1998 if (!err)
1999 err = err2;
2000 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002001 }
2002
Andreas Gruenbacherfcefa622011-02-17 16:46:59 +01002003 /*
2004 * Corresponding put_ldev done either below (on various errors), or in
2005 * drbd_peer_request_endio, if we successfully submit the data at the
2006 * end of this function.
2007 */
Philipp Reisnerb411b362009-09-25 16:07:19 -07002008
2009 sector = be64_to_cpu(p->sector);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002010 peer_req = read_in_block(mdev, p->block_id, sector, data_size);
2011 if (!peer_req) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002012 put_ldev(mdev);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002013 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002014 }
2015
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002016 peer_req->w.cb = e_end_block;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002017
Lars Ellenberg688593c2010-11-17 22:25:03 +01002018 dp_flags = be32_to_cpu(p->dp_flags);
2019 rw |= wire_flags_to_bio(mdev, dp_flags);
2020
2021 if (dp_flags & DP_MAY_SET_IN_SYNC)
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002022 peer_req->flags |= EE_MAY_SET_IN_SYNC;
Lars Ellenberg688593c2010-11-17 22:25:03 +01002023
Philipp Reisnerb411b362009-09-25 16:07:19 -07002024 spin_lock(&mdev->epoch_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002025 peer_req->epoch = mdev->current_epoch;
2026 atomic_inc(&peer_req->epoch->epoch_size);
2027 atomic_inc(&peer_req->epoch->active);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002028 spin_unlock(&mdev->epoch_lock);
2029
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002030 if (mdev->tconn->net_conf->two_primaries) {
2031 err = wait_for_and_update_peer_seq(mdev, peer_seq);
2032 if (err)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002033 goto out_interrupted;
Philipp Reisner87eeee42011-01-19 14:16:30 +01002034 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002035 err = handle_write_conflicts(mdev, peer_req);
2036 if (err) {
2037 spin_unlock_irq(&mdev->tconn->req_lock);
2038 if (err == -ENOENT) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002039 put_ldev(mdev);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002040 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002041 }
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002042 goto out_interrupted;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002043 }
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002044 } else
2045 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002046 list_add(&peer_req->w.list, &mdev->active_ee);
Philipp Reisner87eeee42011-01-19 14:16:30 +01002047 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002048
Philipp Reisner89e58e72011-01-19 13:12:45 +01002049 switch (mdev->tconn->net_conf->wire_protocol) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002050 case DRBD_PROT_C:
2051 inc_unacked(mdev);
2052 /* corresponding dec_unacked() in e_end_block()
2053 * respective _drbd_clear_done_ee */
2054 break;
2055 case DRBD_PROT_B:
2056 /* I really don't like it that the receiver thread
2057 * sends on the msock, but anyways */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002058 drbd_send_ack(mdev, P_RECV_ACK, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002059 break;
2060 case DRBD_PROT_A:
2061 /* nothing to do */
2062 break;
2063 }
2064
Lars Ellenberg6719fb02010-10-18 23:04:07 +02002065 if (mdev->state.pdsk < D_INCONSISTENT) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002066 /* In case we have the only disk of the cluster, */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002067 drbd_set_out_of_sync(mdev, peer_req->i.sector, peer_req->i.size);
2068 peer_req->flags |= EE_CALL_AL_COMPLETE_IO;
2069 peer_req->flags &= ~EE_MAY_SET_IN_SYNC;
2070 drbd_al_begin_io(mdev, peer_req->i.sector);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002071 }
2072
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002073 err = drbd_submit_peer_request(mdev, peer_req, rw, DRBD_FAULT_DT_WR);
2074 if (!err)
2075 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002076
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01002077 /* don't care for the reason here */
2078 dev_err(DEV, "submit failed, triggering re-connect\n");
Philipp Reisner87eeee42011-01-19 14:16:30 +01002079 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002080 list_del(&peer_req->w.list);
2081 drbd_remove_epoch_entry_interval(mdev, peer_req);
Philipp Reisner87eeee42011-01-19 14:16:30 +01002082 spin_unlock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002083 if (peer_req->flags & EE_CALL_AL_COMPLETE_IO)
2084 drbd_al_complete_io(mdev, peer_req->i.sector);
Lars Ellenberg22cc37a2010-09-14 20:40:41 +02002085
Philipp Reisnerb411b362009-09-25 16:07:19 -07002086out_interrupted:
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002087 drbd_may_finish_epoch(mdev, peer_req->epoch, EV_PUT + EV_CLEANUP);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002088 put_ldev(mdev);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002089 drbd_free_ee(mdev, peer_req);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002090 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002091}
2092
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002093/* We may throttle resync, if the lower device seems to be busy,
2094 * and current sync rate is above c_min_rate.
2095 *
2096 * To decide whether or not the lower device is busy, we use a scheme similar
2097 * to MD RAID is_mddev_idle(): if the partition stats reveal "significant"
2098 * (more than 64 sectors) of activity we cannot account for with our own resync
2099 * activity, it obviously is "busy".
2100 *
2101 * The current sync rate used here uses only the most recent two step marks,
2102 * to have a short time average so we can react faster.
2103 */
Philipp Reisnere3555d82010-11-07 15:56:29 +01002104int drbd_rs_should_slow_down(struct drbd_conf *mdev, sector_t sector)
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002105{
2106 struct gendisk *disk = mdev->ldev->backing_bdev->bd_contains->bd_disk;
2107 unsigned long db, dt, dbdt;
Philipp Reisnere3555d82010-11-07 15:56:29 +01002108 struct lc_element *tmp;
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002109 int curr_events;
2110 int throttle = 0;
2111
2112 /* feature disabled? */
Lars Ellenbergf3990022011-03-23 14:31:09 +01002113 if (mdev->ldev->dc.c_min_rate == 0)
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002114 return 0;
2115
Philipp Reisnere3555d82010-11-07 15:56:29 +01002116 spin_lock_irq(&mdev->al_lock);
2117 tmp = lc_find(mdev->resync, BM_SECT_TO_EXT(sector));
2118 if (tmp) {
2119 struct bm_extent *bm_ext = lc_entry(tmp, struct bm_extent, lce);
2120 if (test_bit(BME_PRIORITY, &bm_ext->flags)) {
2121 spin_unlock_irq(&mdev->al_lock);
2122 return 0;
2123 }
2124 /* Do not slow down if app IO is already waiting for this extent */
2125 }
2126 spin_unlock_irq(&mdev->al_lock);
2127
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002128 curr_events = (int)part_stat_read(&disk->part0, sectors[0]) +
2129 (int)part_stat_read(&disk->part0, sectors[1]) -
2130 atomic_read(&mdev->rs_sect_ev);
Philipp Reisnere3555d82010-11-07 15:56:29 +01002131
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002132 if (!mdev->rs_last_events || curr_events - mdev->rs_last_events > 64) {
2133 unsigned long rs_left;
2134 int i;
2135
2136 mdev->rs_last_events = curr_events;
2137
2138 /* sync speed average over the last 2*DRBD_SYNC_MARK_STEP,
2139 * approx. */
Lars Ellenberg2649f082010-11-05 10:05:47 +01002140 i = (mdev->rs_last_mark + DRBD_SYNC_MARKS-1) % DRBD_SYNC_MARKS;
2141
2142 if (mdev->state.conn == C_VERIFY_S || mdev->state.conn == C_VERIFY_T)
2143 rs_left = mdev->ov_left;
2144 else
2145 rs_left = drbd_bm_total_weight(mdev) - mdev->rs_failed;
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002146
2147 dt = ((long)jiffies - (long)mdev->rs_mark_time[i]) / HZ;
2148 if (!dt)
2149 dt++;
2150 db = mdev->rs_mark_left[i] - rs_left;
2151 dbdt = Bit2KB(db/dt);
2152
Lars Ellenbergf3990022011-03-23 14:31:09 +01002153 if (dbdt > mdev->ldev->dc.c_min_rate)
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002154 throttle = 1;
2155 }
2156 return throttle;
2157}
2158
2159
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01002160static int receive_DataRequest(struct drbd_conf *mdev, enum drbd_packet cmd,
2161 unsigned int digest_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002162{
2163 sector_t sector;
2164 const sector_t capacity = drbd_get_capacity(mdev->this_bdev);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002165 struct drbd_peer_request *peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002166 struct digest_info *di = NULL;
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02002167 int size, verb;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002168 unsigned int fault_type;
Philipp Reisnere42325a2011-01-19 13:55:45 +01002169 struct p_block_req *p = &mdev->tconn->data.rbuf.block_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002170
2171 sector = be64_to_cpu(p->sector);
2172 size = be32_to_cpu(p->blksize);
2173
Andreas Gruenbacherc670a392011-02-21 12:41:39 +01002174 if (size <= 0 || !IS_ALIGNED(size, 512) || size > DRBD_MAX_BIO_SIZE) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002175 dev_err(DEV, "%s:%d: sector: %llus, size: %u\n", __FILE__, __LINE__,
2176 (unsigned long long)sector, size);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002177 return -EINVAL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002178 }
2179 if (sector + (size>>9) > capacity) {
2180 dev_err(DEV, "%s:%d: sector: %llus, size: %u\n", __FILE__, __LINE__,
2181 (unsigned long long)sector, size);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002182 return -EINVAL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002183 }
2184
2185 if (!get_ldev_if_state(mdev, D_UP_TO_DATE)) {
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02002186 verb = 1;
2187 switch (cmd) {
2188 case P_DATA_REQUEST:
2189 drbd_send_ack_rp(mdev, P_NEG_DREPLY, p);
2190 break;
2191 case P_RS_DATA_REQUEST:
2192 case P_CSUM_RS_REQUEST:
2193 case P_OV_REQUEST:
2194 drbd_send_ack_rp(mdev, P_NEG_RS_DREPLY , p);
2195 break;
2196 case P_OV_REPLY:
2197 verb = 0;
2198 dec_rs_pending(mdev);
2199 drbd_send_ack_ex(mdev, P_OV_RESULT, sector, size, ID_IN_SYNC);
2200 break;
2201 default:
2202 dev_err(DEV, "unexpected command (%s) in receive_DataRequest\n",
2203 cmdname(cmd));
2204 }
2205 if (verb && __ratelimit(&drbd_ratelimit_state))
Philipp Reisnerb411b362009-09-25 16:07:19 -07002206 dev_err(DEV, "Can not satisfy peer's read request, "
2207 "no local data.\n");
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02002208
Lars Ellenberga821cc42010-09-06 12:31:37 +02002209 /* drain possibly payload */
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002210 return drbd_drain_block(mdev, digest_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002211 }
2212
2213 /* GFP_NOIO, because we must not cause arbitrary write-out: in a DRBD
2214 * "criss-cross" setup, that might cause write-out on some other DRBD,
2215 * which in turn might block on the other node at this very place. */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002216 peer_req = drbd_alloc_ee(mdev, p->block_id, sector, size, GFP_NOIO);
2217 if (!peer_req) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002218 put_ldev(mdev);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002219 return -ENOMEM;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002220 }
2221
Philipp Reisner02918be2010-08-20 14:35:10 +02002222 switch (cmd) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002223 case P_DATA_REQUEST:
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002224 peer_req->w.cb = w_e_end_data_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002225 fault_type = DRBD_FAULT_DT_RD;
Lars Ellenberg80a40e42010-08-11 23:28:00 +02002226 /* application IO, don't drbd_rs_begin_io */
2227 goto submit;
2228
Philipp Reisnerb411b362009-09-25 16:07:19 -07002229 case P_RS_DATA_REQUEST:
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002230 peer_req->w.cb = w_e_end_rsdata_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002231 fault_type = DRBD_FAULT_RS_RD;
Lars Ellenberg5f9915b2010-11-09 14:15:24 +01002232 /* used in the sector offset progress display */
2233 mdev->bm_resync_fo = BM_SECT_TO_BIT(sector);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002234 break;
2235
2236 case P_OV_REPLY:
2237 case P_CSUM_RS_REQUEST:
2238 fault_type = DRBD_FAULT_RS_RD;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002239 di = kmalloc(sizeof(*di) + digest_size, GFP_NOIO);
2240 if (!di)
2241 goto out_free_e;
2242
2243 di->digest_size = digest_size;
2244 di->digest = (((char *)di)+sizeof(struct digest_info));
2245
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002246 peer_req->digest = di;
2247 peer_req->flags |= EE_HAS_DIGEST;
Lars Ellenbergc36c3ce2010-08-11 20:42:55 +02002248
Philipp Reisnerde0ff332011-02-07 16:56:20 +01002249 if (drbd_recv(mdev->tconn, di->digest, digest_size) != digest_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002250 goto out_free_e;
2251
Philipp Reisner02918be2010-08-20 14:35:10 +02002252 if (cmd == P_CSUM_RS_REQUEST) {
Philipp Reisner31890f42011-01-19 14:12:51 +01002253 D_ASSERT(mdev->tconn->agreed_pro_version >= 89);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002254 peer_req->w.cb = w_e_end_csum_rs_req;
Lars Ellenberg5f9915b2010-11-09 14:15:24 +01002255 /* used in the sector offset progress display */
2256 mdev->bm_resync_fo = BM_SECT_TO_BIT(sector);
Philipp Reisner02918be2010-08-20 14:35:10 +02002257 } else if (cmd == P_OV_REPLY) {
Lars Ellenberg2649f082010-11-05 10:05:47 +01002258 /* track progress, we may need to throttle */
2259 atomic_add(size >> 9, &mdev->rs_sect_in);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002260 peer_req->w.cb = w_e_end_ov_reply;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002261 dec_rs_pending(mdev);
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002262 /* drbd_rs_begin_io done when we sent this request,
2263 * but accounting still needs to be done. */
2264 goto submit_for_resync;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002265 }
2266 break;
2267
2268 case P_OV_REQUEST:
Philipp Reisnerb411b362009-09-25 16:07:19 -07002269 if (mdev->ov_start_sector == ~(sector_t)0 &&
Philipp Reisner31890f42011-01-19 14:12:51 +01002270 mdev->tconn->agreed_pro_version >= 90) {
Lars Ellenbergde228bb2010-11-05 09:43:15 +01002271 unsigned long now = jiffies;
2272 int i;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002273 mdev->ov_start_sector = sector;
2274 mdev->ov_position = sector;
Lars Ellenberg30b743a2010-11-05 09:39:06 +01002275 mdev->ov_left = drbd_bm_bits(mdev) - BM_SECT_TO_BIT(sector);
2276 mdev->rs_total = mdev->ov_left;
Lars Ellenbergde228bb2010-11-05 09:43:15 +01002277 for (i = 0; i < DRBD_SYNC_MARKS; i++) {
2278 mdev->rs_mark_left[i] = mdev->ov_left;
2279 mdev->rs_mark_time[i] = now;
2280 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07002281 dev_info(DEV, "Online Verify start sector: %llu\n",
2282 (unsigned long long)sector);
2283 }
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002284 peer_req->w.cb = w_e_end_ov_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002285 fault_type = DRBD_FAULT_RS_RD;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002286 break;
2287
Philipp Reisnerb411b362009-09-25 16:07:19 -07002288 default:
2289 dev_err(DEV, "unexpected command (%s) in receive_DataRequest\n",
Philipp Reisner02918be2010-08-20 14:35:10 +02002290 cmdname(cmd));
Philipp Reisnerb411b362009-09-25 16:07:19 -07002291 fault_type = DRBD_FAULT_MAX;
Lars Ellenberg80a40e42010-08-11 23:28:00 +02002292 goto out_free_e;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002293 }
2294
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002295 /* Throttle, drbd_rs_begin_io and submit should become asynchronous
2296 * wrt the receiver, but it is not as straightforward as it may seem.
2297 * Various places in the resync start and stop logic assume resync
2298 * requests are processed in order, requeuing this on the worker thread
2299 * introduces a bunch of new code for synchronization between threads.
2300 *
2301 * Unlimited throttling before drbd_rs_begin_io may stall the resync
2302 * "forever", throttling after drbd_rs_begin_io will lock that extent
2303 * for application writes for the same time. For now, just throttle
2304 * here, where the rest of the code expects the receiver to sleep for
2305 * a while, anyways.
2306 */
Philipp Reisnerb411b362009-09-25 16:07:19 -07002307
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002308 /* Throttle before drbd_rs_begin_io, as that locks out application IO;
2309 * this defers syncer requests for some time, before letting at least
2310 * on request through. The resync controller on the receiving side
2311 * will adapt to the incoming rate accordingly.
2312 *
2313 * We cannot throttle here if remote is Primary/SyncTarget:
2314 * we would also throttle its application reads.
2315 * In that case, throttling is done on the SyncTarget only.
2316 */
Philipp Reisnere3555d82010-11-07 15:56:29 +01002317 if (mdev->state.peer != R_PRIMARY && drbd_rs_should_slow_down(mdev, sector))
2318 schedule_timeout_uninterruptible(HZ/10);
2319 if (drbd_rs_begin_io(mdev, sector))
Lars Ellenberg80a40e42010-08-11 23:28:00 +02002320 goto out_free_e;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002321
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002322submit_for_resync:
2323 atomic_add(size >> 9, &mdev->rs_sect_ev);
2324
Lars Ellenberg80a40e42010-08-11 23:28:00 +02002325submit:
Philipp Reisnerb411b362009-09-25 16:07:19 -07002326 inc_unacked(mdev);
Philipp Reisner87eeee42011-01-19 14:16:30 +01002327 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002328 list_add_tail(&peer_req->w.list, &mdev->read_ee);
Philipp Reisner87eeee42011-01-19 14:16:30 +01002329 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002330
Andreas Gruenbacherfbe29de2011-02-17 16:38:35 +01002331 if (drbd_submit_peer_request(mdev, peer_req, READ, fault_type) == 0)
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002332 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002333
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01002334 /* don't care for the reason here */
2335 dev_err(DEV, "submit failed, triggering re-connect\n");
Philipp Reisner87eeee42011-01-19 14:16:30 +01002336 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002337 list_del(&peer_req->w.list);
Philipp Reisner87eeee42011-01-19 14:16:30 +01002338 spin_unlock_irq(&mdev->tconn->req_lock);
Lars Ellenberg22cc37a2010-09-14 20:40:41 +02002339 /* no drbd_rs_complete_io(), we are dropping the connection anyways */
2340
Philipp Reisnerb411b362009-09-25 16:07:19 -07002341out_free_e:
Philipp Reisnerb411b362009-09-25 16:07:19 -07002342 put_ldev(mdev);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002343 drbd_free_ee(mdev, peer_req);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002344 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002345}
2346
2347static int drbd_asb_recover_0p(struct drbd_conf *mdev) __must_hold(local)
2348{
2349 int self, peer, rv = -100;
2350 unsigned long ch_self, ch_peer;
2351
2352 self = mdev->ldev->md.uuid[UI_BITMAP] & 1;
2353 peer = mdev->p_uuid[UI_BITMAP] & 1;
2354
2355 ch_peer = mdev->p_uuid[UI_SIZE];
2356 ch_self = mdev->comm_bm_set;
2357
Philipp Reisner89e58e72011-01-19 13:12:45 +01002358 switch (mdev->tconn->net_conf->after_sb_0p) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002359 case ASB_CONSENSUS:
2360 case ASB_DISCARD_SECONDARY:
2361 case ASB_CALL_HELPER:
2362 dev_err(DEV, "Configuration error.\n");
2363 break;
2364 case ASB_DISCONNECT:
2365 break;
2366 case ASB_DISCARD_YOUNGER_PRI:
2367 if (self == 0 && peer == 1) {
2368 rv = -1;
2369 break;
2370 }
2371 if (self == 1 && peer == 0) {
2372 rv = 1;
2373 break;
2374 }
2375 /* Else fall through to one of the other strategies... */
2376 case ASB_DISCARD_OLDER_PRI:
2377 if (self == 0 && peer == 1) {
2378 rv = 1;
2379 break;
2380 }
2381 if (self == 1 && peer == 0) {
2382 rv = -1;
2383 break;
2384 }
2385 /* Else fall through to one of the other strategies... */
Lars Ellenbergad19bf62009-10-14 09:36:49 +02002386 dev_warn(DEV, "Discard younger/older primary did not find a decision\n"
Philipp Reisnerb411b362009-09-25 16:07:19 -07002387 "Using discard-least-changes instead\n");
2388 case ASB_DISCARD_ZERO_CHG:
2389 if (ch_peer == 0 && ch_self == 0) {
Philipp Reisner25703f82011-02-07 14:35:25 +01002390 rv = test_bit(DISCARD_CONCURRENT, &mdev->tconn->flags)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002391 ? -1 : 1;
2392 break;
2393 } else {
2394 if (ch_peer == 0) { rv = 1; break; }
2395 if (ch_self == 0) { rv = -1; break; }
2396 }
Philipp Reisner89e58e72011-01-19 13:12:45 +01002397 if (mdev->tconn->net_conf->after_sb_0p == ASB_DISCARD_ZERO_CHG)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002398 break;
2399 case ASB_DISCARD_LEAST_CHG:
2400 if (ch_self < ch_peer)
2401 rv = -1;
2402 else if (ch_self > ch_peer)
2403 rv = 1;
2404 else /* ( ch_self == ch_peer ) */
2405 /* Well, then use something else. */
Philipp Reisner25703f82011-02-07 14:35:25 +01002406 rv = test_bit(DISCARD_CONCURRENT, &mdev->tconn->flags)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002407 ? -1 : 1;
2408 break;
2409 case ASB_DISCARD_LOCAL:
2410 rv = -1;
2411 break;
2412 case ASB_DISCARD_REMOTE:
2413 rv = 1;
2414 }
2415
2416 return rv;
2417}
2418
2419static int drbd_asb_recover_1p(struct drbd_conf *mdev) __must_hold(local)
2420{
Andreas Gruenbacher6184ea22010-12-09 14:23:27 +01002421 int hg, rv = -100;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002422
Philipp Reisner89e58e72011-01-19 13:12:45 +01002423 switch (mdev->tconn->net_conf->after_sb_1p) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002424 case ASB_DISCARD_YOUNGER_PRI:
2425 case ASB_DISCARD_OLDER_PRI:
2426 case ASB_DISCARD_LEAST_CHG:
2427 case ASB_DISCARD_LOCAL:
2428 case ASB_DISCARD_REMOTE:
2429 dev_err(DEV, "Configuration error.\n");
2430 break;
2431 case ASB_DISCONNECT:
2432 break;
2433 case ASB_CONSENSUS:
2434 hg = drbd_asb_recover_0p(mdev);
2435 if (hg == -1 && mdev->state.role == R_SECONDARY)
2436 rv = hg;
2437 if (hg == 1 && mdev->state.role == R_PRIMARY)
2438 rv = hg;
2439 break;
2440 case ASB_VIOLENTLY:
2441 rv = drbd_asb_recover_0p(mdev);
2442 break;
2443 case ASB_DISCARD_SECONDARY:
2444 return mdev->state.role == R_PRIMARY ? 1 : -1;
2445 case ASB_CALL_HELPER:
2446 hg = drbd_asb_recover_0p(mdev);
2447 if (hg == -1 && mdev->state.role == R_PRIMARY) {
Andreas Gruenbacherbb437942010-12-09 14:02:35 +01002448 enum drbd_state_rv rv2;
2449
2450 drbd_set_role(mdev, R_SECONDARY, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002451 /* drbd_change_state() does not sleep while in SS_IN_TRANSIENT_STATE,
2452 * we might be here in C_WF_REPORT_PARAMS which is transient.
2453 * we do not need to wait for the after state change work either. */
Andreas Gruenbacherbb437942010-12-09 14:02:35 +01002454 rv2 = drbd_change_state(mdev, CS_VERBOSE, NS(role, R_SECONDARY));
2455 if (rv2 != SS_SUCCESS) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002456 drbd_khelper(mdev, "pri-lost-after-sb");
2457 } else {
2458 dev_warn(DEV, "Successfully gave up primary role.\n");
2459 rv = hg;
2460 }
2461 } else
2462 rv = hg;
2463 }
2464
2465 return rv;
2466}
2467
2468static int drbd_asb_recover_2p(struct drbd_conf *mdev) __must_hold(local)
2469{
Andreas Gruenbacher6184ea22010-12-09 14:23:27 +01002470 int hg, rv = -100;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002471
Philipp Reisner89e58e72011-01-19 13:12:45 +01002472 switch (mdev->tconn->net_conf->after_sb_2p) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002473 case ASB_DISCARD_YOUNGER_PRI:
2474 case ASB_DISCARD_OLDER_PRI:
2475 case ASB_DISCARD_LEAST_CHG:
2476 case ASB_DISCARD_LOCAL:
2477 case ASB_DISCARD_REMOTE:
2478 case ASB_CONSENSUS:
2479 case ASB_DISCARD_SECONDARY:
2480 dev_err(DEV, "Configuration error.\n");
2481 break;
2482 case ASB_VIOLENTLY:
2483 rv = drbd_asb_recover_0p(mdev);
2484 break;
2485 case ASB_DISCONNECT:
2486 break;
2487 case ASB_CALL_HELPER:
2488 hg = drbd_asb_recover_0p(mdev);
2489 if (hg == -1) {
Andreas Gruenbacherbb437942010-12-09 14:02:35 +01002490 enum drbd_state_rv rv2;
2491
Philipp Reisnerb411b362009-09-25 16:07:19 -07002492 /* drbd_change_state() does not sleep while in SS_IN_TRANSIENT_STATE,
2493 * we might be here in C_WF_REPORT_PARAMS which is transient.
2494 * we do not need to wait for the after state change work either. */
Andreas Gruenbacherbb437942010-12-09 14:02:35 +01002495 rv2 = drbd_change_state(mdev, CS_VERBOSE, NS(role, R_SECONDARY));
2496 if (rv2 != SS_SUCCESS) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002497 drbd_khelper(mdev, "pri-lost-after-sb");
2498 } else {
2499 dev_warn(DEV, "Successfully gave up primary role.\n");
2500 rv = hg;
2501 }
2502 } else
2503 rv = hg;
2504 }
2505
2506 return rv;
2507}
2508
2509static void drbd_uuid_dump(struct drbd_conf *mdev, char *text, u64 *uuid,
2510 u64 bits, u64 flags)
2511{
2512 if (!uuid) {
2513 dev_info(DEV, "%s uuid info vanished while I was looking!\n", text);
2514 return;
2515 }
2516 dev_info(DEV, "%s %016llX:%016llX:%016llX:%016llX bits:%llu flags:%llX\n",
2517 text,
2518 (unsigned long long)uuid[UI_CURRENT],
2519 (unsigned long long)uuid[UI_BITMAP],
2520 (unsigned long long)uuid[UI_HISTORY_START],
2521 (unsigned long long)uuid[UI_HISTORY_END],
2522 (unsigned long long)bits,
2523 (unsigned long long)flags);
2524}
2525
2526/*
2527 100 after split brain try auto recover
2528 2 C_SYNC_SOURCE set BitMap
2529 1 C_SYNC_SOURCE use BitMap
2530 0 no Sync
2531 -1 C_SYNC_TARGET use BitMap
2532 -2 C_SYNC_TARGET set BitMap
2533 -100 after split brain, disconnect
2534-1000 unrelated data
Philipp Reisner4a23f262011-01-11 17:42:17 +01002535-1091 requires proto 91
2536-1096 requires proto 96
Philipp Reisnerb411b362009-09-25 16:07:19 -07002537 */
2538static int drbd_uuid_compare(struct drbd_conf *mdev, int *rule_nr) __must_hold(local)
2539{
2540 u64 self, peer;
2541 int i, j;
2542
2543 self = mdev->ldev->md.uuid[UI_CURRENT] & ~((u64)1);
2544 peer = mdev->p_uuid[UI_CURRENT] & ~((u64)1);
2545
2546 *rule_nr = 10;
2547 if (self == UUID_JUST_CREATED && peer == UUID_JUST_CREATED)
2548 return 0;
2549
2550 *rule_nr = 20;
2551 if ((self == UUID_JUST_CREATED || self == (u64)0) &&
2552 peer != UUID_JUST_CREATED)
2553 return -2;
2554
2555 *rule_nr = 30;
2556 if (self != UUID_JUST_CREATED &&
2557 (peer == UUID_JUST_CREATED || peer == (u64)0))
2558 return 2;
2559
2560 if (self == peer) {
2561 int rct, dc; /* roles at crash time */
2562
2563 if (mdev->p_uuid[UI_BITMAP] == (u64)0 && mdev->ldev->md.uuid[UI_BITMAP] != (u64)0) {
2564
Philipp Reisner31890f42011-01-19 14:12:51 +01002565 if (mdev->tconn->agreed_pro_version < 91)
Philipp Reisner4a23f262011-01-11 17:42:17 +01002566 return -1091;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002567
2568 if ((mdev->ldev->md.uuid[UI_BITMAP] & ~((u64)1)) == (mdev->p_uuid[UI_HISTORY_START] & ~((u64)1)) &&
2569 (mdev->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) == (mdev->p_uuid[UI_HISTORY_START + 1] & ~((u64)1))) {
2570 dev_info(DEV, "was SyncSource, missed the resync finished event, corrected myself:\n");
2571 drbd_uuid_set_bm(mdev, 0UL);
2572
2573 drbd_uuid_dump(mdev, "self", mdev->ldev->md.uuid,
2574 mdev->state.disk >= D_NEGOTIATING ? drbd_bm_total_weight(mdev) : 0, 0);
2575 *rule_nr = 34;
2576 } else {
2577 dev_info(DEV, "was SyncSource (peer failed to write sync_uuid)\n");
2578 *rule_nr = 36;
2579 }
2580
2581 return 1;
2582 }
2583
2584 if (mdev->ldev->md.uuid[UI_BITMAP] == (u64)0 && mdev->p_uuid[UI_BITMAP] != (u64)0) {
2585
Philipp Reisner31890f42011-01-19 14:12:51 +01002586 if (mdev->tconn->agreed_pro_version < 91)
Philipp Reisner4a23f262011-01-11 17:42:17 +01002587 return -1091;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002588
2589 if ((mdev->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) == (mdev->p_uuid[UI_BITMAP] & ~((u64)1)) &&
2590 (mdev->ldev->md.uuid[UI_HISTORY_START + 1] & ~((u64)1)) == (mdev->p_uuid[UI_HISTORY_START] & ~((u64)1))) {
2591 dev_info(DEV, "was SyncTarget, peer missed the resync finished event, corrected peer:\n");
2592
2593 mdev->p_uuid[UI_HISTORY_START + 1] = mdev->p_uuid[UI_HISTORY_START];
2594 mdev->p_uuid[UI_HISTORY_START] = mdev->p_uuid[UI_BITMAP];
2595 mdev->p_uuid[UI_BITMAP] = 0UL;
2596
2597 drbd_uuid_dump(mdev, "peer", mdev->p_uuid, mdev->p_uuid[UI_SIZE], mdev->p_uuid[UI_FLAGS]);
2598 *rule_nr = 35;
2599 } else {
2600 dev_info(DEV, "was SyncTarget (failed to write sync_uuid)\n");
2601 *rule_nr = 37;
2602 }
2603
2604 return -1;
2605 }
2606
2607 /* Common power [off|failure] */
2608 rct = (test_bit(CRASHED_PRIMARY, &mdev->flags) ? 1 : 0) +
2609 (mdev->p_uuid[UI_FLAGS] & 2);
2610 /* lowest bit is set when we were primary,
2611 * next bit (weight 2) is set when peer was primary */
2612 *rule_nr = 40;
2613
2614 switch (rct) {
2615 case 0: /* !self_pri && !peer_pri */ return 0;
2616 case 1: /* self_pri && !peer_pri */ return 1;
2617 case 2: /* !self_pri && peer_pri */ return -1;
2618 case 3: /* self_pri && peer_pri */
Philipp Reisner25703f82011-02-07 14:35:25 +01002619 dc = test_bit(DISCARD_CONCURRENT, &mdev->tconn->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002620 return dc ? -1 : 1;
2621 }
2622 }
2623
2624 *rule_nr = 50;
2625 peer = mdev->p_uuid[UI_BITMAP] & ~((u64)1);
2626 if (self == peer)
2627 return -1;
2628
2629 *rule_nr = 51;
2630 peer = mdev->p_uuid[UI_HISTORY_START] & ~((u64)1);
2631 if (self == peer) {
Philipp Reisner31890f42011-01-19 14:12:51 +01002632 if (mdev->tconn->agreed_pro_version < 96 ?
Philipp Reisner4a23f262011-01-11 17:42:17 +01002633 (mdev->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) ==
2634 (mdev->p_uuid[UI_HISTORY_START + 1] & ~((u64)1)) :
2635 peer + UUID_NEW_BM_OFFSET == (mdev->p_uuid[UI_BITMAP] & ~((u64)1))) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002636 /* The last P_SYNC_UUID did not get though. Undo the last start of
2637 resync as sync source modifications of the peer's UUIDs. */
2638
Philipp Reisner31890f42011-01-19 14:12:51 +01002639 if (mdev->tconn->agreed_pro_version < 91)
Philipp Reisner4a23f262011-01-11 17:42:17 +01002640 return -1091;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002641
2642 mdev->p_uuid[UI_BITMAP] = mdev->p_uuid[UI_HISTORY_START];
2643 mdev->p_uuid[UI_HISTORY_START] = mdev->p_uuid[UI_HISTORY_START + 1];
Philipp Reisner4a23f262011-01-11 17:42:17 +01002644
2645 dev_info(DEV, "Did not got last syncUUID packet, corrected:\n");
2646 drbd_uuid_dump(mdev, "peer", mdev->p_uuid, mdev->p_uuid[UI_SIZE], mdev->p_uuid[UI_FLAGS]);
2647
Philipp Reisnerb411b362009-09-25 16:07:19 -07002648 return -1;
2649 }
2650 }
2651
2652 *rule_nr = 60;
2653 self = mdev->ldev->md.uuid[UI_CURRENT] & ~((u64)1);
2654 for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) {
2655 peer = mdev->p_uuid[i] & ~((u64)1);
2656 if (self == peer)
2657 return -2;
2658 }
2659
2660 *rule_nr = 70;
2661 self = mdev->ldev->md.uuid[UI_BITMAP] & ~((u64)1);
2662 peer = mdev->p_uuid[UI_CURRENT] & ~((u64)1);
2663 if (self == peer)
2664 return 1;
2665
2666 *rule_nr = 71;
2667 self = mdev->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1);
2668 if (self == peer) {
Philipp Reisner31890f42011-01-19 14:12:51 +01002669 if (mdev->tconn->agreed_pro_version < 96 ?
Philipp Reisner4a23f262011-01-11 17:42:17 +01002670 (mdev->ldev->md.uuid[UI_HISTORY_START + 1] & ~((u64)1)) ==
2671 (mdev->p_uuid[UI_HISTORY_START] & ~((u64)1)) :
2672 self + UUID_NEW_BM_OFFSET == (mdev->ldev->md.uuid[UI_BITMAP] & ~((u64)1))) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002673 /* The last P_SYNC_UUID did not get though. Undo the last start of
2674 resync as sync source modifications of our UUIDs. */
2675
Philipp Reisner31890f42011-01-19 14:12:51 +01002676 if (mdev->tconn->agreed_pro_version < 91)
Philipp Reisner4a23f262011-01-11 17:42:17 +01002677 return -1091;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002678
2679 _drbd_uuid_set(mdev, UI_BITMAP, mdev->ldev->md.uuid[UI_HISTORY_START]);
2680 _drbd_uuid_set(mdev, UI_HISTORY_START, mdev->ldev->md.uuid[UI_HISTORY_START + 1]);
2681
Philipp Reisner4a23f262011-01-11 17:42:17 +01002682 dev_info(DEV, "Last syncUUID did not get through, corrected:\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002683 drbd_uuid_dump(mdev, "self", mdev->ldev->md.uuid,
2684 mdev->state.disk >= D_NEGOTIATING ? drbd_bm_total_weight(mdev) : 0, 0);
2685
2686 return 1;
2687 }
2688 }
2689
2690
2691 *rule_nr = 80;
Philipp Reisnerd8c2a362009-11-18 15:52:51 +01002692 peer = mdev->p_uuid[UI_CURRENT] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002693 for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) {
2694 self = mdev->ldev->md.uuid[i] & ~((u64)1);
2695 if (self == peer)
2696 return 2;
2697 }
2698
2699 *rule_nr = 90;
2700 self = mdev->ldev->md.uuid[UI_BITMAP] & ~((u64)1);
2701 peer = mdev->p_uuid[UI_BITMAP] & ~((u64)1);
2702 if (self == peer && self != ((u64)0))
2703 return 100;
2704
2705 *rule_nr = 100;
2706 for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) {
2707 self = mdev->ldev->md.uuid[i] & ~((u64)1);
2708 for (j = UI_HISTORY_START; j <= UI_HISTORY_END; j++) {
2709 peer = mdev->p_uuid[j] & ~((u64)1);
2710 if (self == peer)
2711 return -100;
2712 }
2713 }
2714
2715 return -1000;
2716}
2717
2718/* drbd_sync_handshake() returns the new conn state on success, or
2719 CONN_MASK (-1) on failure.
2720 */
2721static enum drbd_conns drbd_sync_handshake(struct drbd_conf *mdev, enum drbd_role peer_role,
2722 enum drbd_disk_state peer_disk) __must_hold(local)
2723{
2724 int hg, rule_nr;
2725 enum drbd_conns rv = C_MASK;
2726 enum drbd_disk_state mydisk;
2727
2728 mydisk = mdev->state.disk;
2729 if (mydisk == D_NEGOTIATING)
2730 mydisk = mdev->new_state_tmp.disk;
2731
2732 dev_info(DEV, "drbd_sync_handshake:\n");
2733 drbd_uuid_dump(mdev, "self", mdev->ldev->md.uuid, mdev->comm_bm_set, 0);
2734 drbd_uuid_dump(mdev, "peer", mdev->p_uuid,
2735 mdev->p_uuid[UI_SIZE], mdev->p_uuid[UI_FLAGS]);
2736
2737 hg = drbd_uuid_compare(mdev, &rule_nr);
2738
2739 dev_info(DEV, "uuid_compare()=%d by rule %d\n", hg, rule_nr);
2740
2741 if (hg == -1000) {
2742 dev_alert(DEV, "Unrelated data, aborting!\n");
2743 return C_MASK;
2744 }
Philipp Reisner4a23f262011-01-11 17:42:17 +01002745 if (hg < -1000) {
2746 dev_alert(DEV, "To resolve this both sides have to support at least protocol %d\n", -hg - 1000);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002747 return C_MASK;
2748 }
2749
2750 if ((mydisk == D_INCONSISTENT && peer_disk > D_INCONSISTENT) ||
2751 (peer_disk == D_INCONSISTENT && mydisk > D_INCONSISTENT)) {
2752 int f = (hg == -100) || abs(hg) == 2;
2753 hg = mydisk > D_INCONSISTENT ? 1 : -1;
2754 if (f)
2755 hg = hg*2;
2756 dev_info(DEV, "Becoming sync %s due to disk states.\n",
2757 hg > 0 ? "source" : "target");
2758 }
2759
Adam Gandelman3a11a482010-04-08 16:48:23 -07002760 if (abs(hg) == 100)
2761 drbd_khelper(mdev, "initial-split-brain");
2762
Philipp Reisner89e58e72011-01-19 13:12:45 +01002763 if (hg == 100 || (hg == -100 && mdev->tconn->net_conf->always_asbp)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002764 int pcount = (mdev->state.role == R_PRIMARY)
2765 + (peer_role == R_PRIMARY);
2766 int forced = (hg == -100);
2767
2768 switch (pcount) {
2769 case 0:
2770 hg = drbd_asb_recover_0p(mdev);
2771 break;
2772 case 1:
2773 hg = drbd_asb_recover_1p(mdev);
2774 break;
2775 case 2:
2776 hg = drbd_asb_recover_2p(mdev);
2777 break;
2778 }
2779 if (abs(hg) < 100) {
2780 dev_warn(DEV, "Split-Brain detected, %d primaries, "
2781 "automatically solved. Sync from %s node\n",
2782 pcount, (hg < 0) ? "peer" : "this");
2783 if (forced) {
2784 dev_warn(DEV, "Doing a full sync, since"
2785 " UUIDs where ambiguous.\n");
2786 hg = hg*2;
2787 }
2788 }
2789 }
2790
2791 if (hg == -100) {
Philipp Reisner89e58e72011-01-19 13:12:45 +01002792 if (mdev->tconn->net_conf->want_lose && !(mdev->p_uuid[UI_FLAGS]&1))
Philipp Reisnerb411b362009-09-25 16:07:19 -07002793 hg = -1;
Philipp Reisner89e58e72011-01-19 13:12:45 +01002794 if (!mdev->tconn->net_conf->want_lose && (mdev->p_uuid[UI_FLAGS]&1))
Philipp Reisnerb411b362009-09-25 16:07:19 -07002795 hg = 1;
2796
2797 if (abs(hg) < 100)
2798 dev_warn(DEV, "Split-Brain detected, manually solved. "
2799 "Sync from %s node\n",
2800 (hg < 0) ? "peer" : "this");
2801 }
2802
2803 if (hg == -100) {
Lars Ellenberg580b9762010-02-26 23:15:23 +01002804 /* FIXME this log message is not correct if we end up here
2805 * after an attempted attach on a diskless node.
2806 * We just refuse to attach -- well, we drop the "connection"
2807 * to that disk, in a way... */
Adam Gandelman3a11a482010-04-08 16:48:23 -07002808 dev_alert(DEV, "Split-Brain detected but unresolved, dropping connection!\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002809 drbd_khelper(mdev, "split-brain");
2810 return C_MASK;
2811 }
2812
2813 if (hg > 0 && mydisk <= D_INCONSISTENT) {
2814 dev_err(DEV, "I shall become SyncSource, but I am inconsistent!\n");
2815 return C_MASK;
2816 }
2817
2818 if (hg < 0 && /* by intention we do not use mydisk here. */
2819 mdev->state.role == R_PRIMARY && mdev->state.disk >= D_CONSISTENT) {
Philipp Reisner89e58e72011-01-19 13:12:45 +01002820 switch (mdev->tconn->net_conf->rr_conflict) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002821 case ASB_CALL_HELPER:
2822 drbd_khelper(mdev, "pri-lost");
2823 /* fall through */
2824 case ASB_DISCONNECT:
2825 dev_err(DEV, "I shall become SyncTarget, but I am primary!\n");
2826 return C_MASK;
2827 case ASB_VIOLENTLY:
2828 dev_warn(DEV, "Becoming SyncTarget, violating the stable-data"
2829 "assumption\n");
2830 }
2831 }
2832
Philipp Reisner8169e412011-03-15 18:40:27 +01002833 if (mdev->tconn->net_conf->dry_run || test_bit(CONN_DRY_RUN, &mdev->tconn->flags)) {
Philipp Reisnercf14c2e2010-02-02 21:03:50 +01002834 if (hg == 0)
2835 dev_info(DEV, "dry-run connect: No resync, would become Connected immediately.\n");
2836 else
2837 dev_info(DEV, "dry-run connect: Would become %s, doing a %s resync.",
2838 drbd_conn_str(hg > 0 ? C_SYNC_SOURCE : C_SYNC_TARGET),
2839 abs(hg) >= 2 ? "full" : "bit-map based");
2840 return C_MASK;
2841 }
2842
Philipp Reisnerb411b362009-09-25 16:07:19 -07002843 if (abs(hg) >= 2) {
2844 dev_info(DEV, "Writing the whole bitmap, full sync required after drbd_sync_handshake.\n");
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01002845 if (drbd_bitmap_io(mdev, &drbd_bmio_set_n_write, "set_n_write from sync_handshake",
2846 BM_LOCKED_SET_ALLOWED))
Philipp Reisnerb411b362009-09-25 16:07:19 -07002847 return C_MASK;
2848 }
2849
2850 if (hg > 0) { /* become sync source. */
2851 rv = C_WF_BITMAP_S;
2852 } else if (hg < 0) { /* become sync target */
2853 rv = C_WF_BITMAP_T;
2854 } else {
2855 rv = C_CONNECTED;
2856 if (drbd_bm_total_weight(mdev)) {
2857 dev_info(DEV, "No resync, but %lu bits in bitmap!\n",
2858 drbd_bm_total_weight(mdev));
2859 }
2860 }
2861
2862 return rv;
2863}
2864
2865/* returns 1 if invalid */
2866static int cmp_after_sb(enum drbd_after_sb_p peer, enum drbd_after_sb_p self)
2867{
2868 /* ASB_DISCARD_REMOTE - ASB_DISCARD_LOCAL is valid */
2869 if ((peer == ASB_DISCARD_REMOTE && self == ASB_DISCARD_LOCAL) ||
2870 (self == ASB_DISCARD_REMOTE && peer == ASB_DISCARD_LOCAL))
2871 return 0;
2872
2873 /* any other things with ASB_DISCARD_REMOTE or ASB_DISCARD_LOCAL are invalid */
2874 if (peer == ASB_DISCARD_REMOTE || peer == ASB_DISCARD_LOCAL ||
2875 self == ASB_DISCARD_REMOTE || self == ASB_DISCARD_LOCAL)
2876 return 1;
2877
2878 /* everything else is valid if they are equal on both sides. */
2879 if (peer == self)
2880 return 0;
2881
2882 /* everything es is invalid. */
2883 return 1;
2884}
2885
Philipp Reisner72046242011-03-15 18:51:47 +01002886static int receive_protocol(struct drbd_tconn *tconn, enum drbd_packet cmd,
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01002887 unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002888{
Philipp Reisner72046242011-03-15 18:51:47 +01002889 struct p_protocol *p = &tconn->data.rbuf.protocol;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002890 int p_proto, p_after_sb_0p, p_after_sb_1p, p_after_sb_2p;
Philipp Reisnercf14c2e2010-02-02 21:03:50 +01002891 int p_want_lose, p_two_primaries, cf;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002892 char p_integrity_alg[SHARED_SECRET_MAX] = "";
2893
Philipp Reisnerb411b362009-09-25 16:07:19 -07002894 p_proto = be32_to_cpu(p->protocol);
2895 p_after_sb_0p = be32_to_cpu(p->after_sb_0p);
2896 p_after_sb_1p = be32_to_cpu(p->after_sb_1p);
2897 p_after_sb_2p = be32_to_cpu(p->after_sb_2p);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002898 p_two_primaries = be32_to_cpu(p->two_primaries);
Philipp Reisnercf14c2e2010-02-02 21:03:50 +01002899 cf = be32_to_cpu(p->conn_flags);
2900 p_want_lose = cf & CF_WANT_LOSE;
2901
Philipp Reisner72046242011-03-15 18:51:47 +01002902 clear_bit(CONN_DRY_RUN, &tconn->flags);
Philipp Reisnercf14c2e2010-02-02 21:03:50 +01002903
2904 if (cf & CF_DRY_RUN)
Philipp Reisner72046242011-03-15 18:51:47 +01002905 set_bit(CONN_DRY_RUN, &tconn->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002906
Philipp Reisner72046242011-03-15 18:51:47 +01002907 if (p_proto != tconn->net_conf->wire_protocol) {
2908 conn_err(tconn, "incompatible communication protocols\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002909 goto disconnect;
2910 }
2911
Philipp Reisner72046242011-03-15 18:51:47 +01002912 if (cmp_after_sb(p_after_sb_0p, tconn->net_conf->after_sb_0p)) {
2913 conn_err(tconn, "incompatible after-sb-0pri settings\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002914 goto disconnect;
2915 }
2916
Philipp Reisner72046242011-03-15 18:51:47 +01002917 if (cmp_after_sb(p_after_sb_1p, tconn->net_conf->after_sb_1p)) {
2918 conn_err(tconn, "incompatible after-sb-1pri settings\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002919 goto disconnect;
2920 }
2921
Philipp Reisner72046242011-03-15 18:51:47 +01002922 if (cmp_after_sb(p_after_sb_2p, tconn->net_conf->after_sb_2p)) {
2923 conn_err(tconn, "incompatible after-sb-2pri settings\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002924 goto disconnect;
2925 }
2926
Philipp Reisner72046242011-03-15 18:51:47 +01002927 if (p_want_lose && tconn->net_conf->want_lose) {
2928 conn_err(tconn, "both sides have the 'want_lose' flag set\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002929 goto disconnect;
2930 }
2931
Philipp Reisner72046242011-03-15 18:51:47 +01002932 if (p_two_primaries != tconn->net_conf->two_primaries) {
2933 conn_err(tconn, "incompatible setting of the two-primaries options\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002934 goto disconnect;
2935 }
2936
Philipp Reisner72046242011-03-15 18:51:47 +01002937 if (tconn->agreed_pro_version >= 87) {
2938 unsigned char *my_alg = tconn->net_conf->integrity_alg;
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002939 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002940
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002941 err = drbd_recv_all(tconn, p_integrity_alg, data_size);
2942 if (err)
2943 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002944
2945 p_integrity_alg[SHARED_SECRET_MAX-1] = 0;
2946 if (strcmp(p_integrity_alg, my_alg)) {
Philipp Reisner72046242011-03-15 18:51:47 +01002947 conn_err(tconn, "incompatible setting of the data-integrity-alg\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002948 goto disconnect;
2949 }
Philipp Reisner72046242011-03-15 18:51:47 +01002950 conn_info(tconn, "data-integrity-alg: %s\n",
Philipp Reisnerb411b362009-09-25 16:07:19 -07002951 my_alg[0] ? my_alg : (unsigned char *)"<not-used>");
2952 }
2953
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002954 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002955
2956disconnect:
Philipp Reisner72046242011-03-15 18:51:47 +01002957 conn_request_state(tconn, NS(conn, C_DISCONNECTING), CS_HARD);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002958 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002959}
2960
2961/* helper function
2962 * input: alg name, feature name
2963 * return: NULL (alg name was "")
2964 * ERR_PTR(error) if something goes wrong
2965 * or the crypto hash ptr, if it worked out ok. */
2966struct crypto_hash *drbd_crypto_alloc_digest_safe(const struct drbd_conf *mdev,
2967 const char *alg, const char *name)
2968{
2969 struct crypto_hash *tfm;
2970
2971 if (!alg[0])
2972 return NULL;
2973
2974 tfm = crypto_alloc_hash(alg, 0, CRYPTO_ALG_ASYNC);
2975 if (IS_ERR(tfm)) {
2976 dev_err(DEV, "Can not allocate \"%s\" as %s (reason: %ld)\n",
2977 alg, name, PTR_ERR(tfm));
2978 return tfm;
2979 }
2980 if (!drbd_crypto_is_hash(crypto_hash_tfm(tfm))) {
2981 crypto_free_hash(tfm);
2982 dev_err(DEV, "\"%s\" is not a digest (%s)\n", alg, name);
2983 return ERR_PTR(-EINVAL);
2984 }
2985 return tfm;
2986}
2987
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01002988static int receive_SyncParam(struct drbd_conf *mdev, enum drbd_packet cmd,
2989 unsigned int packet_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002990{
Philipp Reisnere42325a2011-01-19 13:55:45 +01002991 struct p_rs_param_95 *p = &mdev->tconn->data.rbuf.rs_param_95;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002992 unsigned int header_size, data_size, exp_max_sz;
2993 struct crypto_hash *verify_tfm = NULL;
2994 struct crypto_hash *csums_tfm = NULL;
Philipp Reisner31890f42011-01-19 14:12:51 +01002995 const int apv = mdev->tconn->agreed_pro_version;
Philipp Reisner778f2712010-07-06 11:14:00 +02002996 int *rs_plan_s = NULL;
2997 int fifo_size = 0;
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002998 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002999
3000 exp_max_sz = apv <= 87 ? sizeof(struct p_rs_param)
3001 : apv == 88 ? sizeof(struct p_rs_param)
3002 + SHARED_SECRET_MAX
Philipp Reisner8e26f9c2010-07-06 17:25:54 +02003003 : apv <= 94 ? sizeof(struct p_rs_param_89)
3004 : /* apv >= 95 */ sizeof(struct p_rs_param_95);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003005
Philipp Reisner02918be2010-08-20 14:35:10 +02003006 if (packet_size > exp_max_sz) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07003007 dev_err(DEV, "SyncParam packet too long: received %u, expected <= %u bytes\n",
Philipp Reisner02918be2010-08-20 14:35:10 +02003008 packet_size, exp_max_sz);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003009 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003010 }
3011
3012 if (apv <= 88) {
Philipp Reisner257d0af2011-01-26 12:15:29 +01003013 header_size = sizeof(struct p_rs_param) - sizeof(struct p_header);
Philipp Reisner02918be2010-08-20 14:35:10 +02003014 data_size = packet_size - header_size;
Philipp Reisner8e26f9c2010-07-06 17:25:54 +02003015 } else if (apv <= 94) {
Philipp Reisner257d0af2011-01-26 12:15:29 +01003016 header_size = sizeof(struct p_rs_param_89) - sizeof(struct p_header);
Philipp Reisner02918be2010-08-20 14:35:10 +02003017 data_size = packet_size - header_size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003018 D_ASSERT(data_size == 0);
Philipp Reisner8e26f9c2010-07-06 17:25:54 +02003019 } else {
Philipp Reisner257d0af2011-01-26 12:15:29 +01003020 header_size = sizeof(struct p_rs_param_95) - sizeof(struct p_header);
Philipp Reisner02918be2010-08-20 14:35:10 +02003021 data_size = packet_size - header_size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003022 D_ASSERT(data_size == 0);
3023 }
3024
3025 /* initialize verify_alg and csums_alg */
3026 memset(p->verify_alg, 0, 2 * SHARED_SECRET_MAX);
3027
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003028 err = drbd_recv_all(mdev->tconn, &p->head.payload, header_size);
3029 if (err)
3030 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003031
Lars Ellenbergf3990022011-03-23 14:31:09 +01003032 if (get_ldev(mdev)) {
3033 mdev->ldev->dc.resync_rate = be32_to_cpu(p->rate);
3034 put_ldev(mdev);
3035 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003036
3037 if (apv >= 88) {
3038 if (apv == 88) {
3039 if (data_size > SHARED_SECRET_MAX) {
3040 dev_err(DEV, "verify-alg too long, "
3041 "peer wants %u, accepting only %u byte\n",
3042 data_size, SHARED_SECRET_MAX);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003043 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003044 }
3045
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003046 err = drbd_recv_all(mdev->tconn, p->verify_alg, data_size);
3047 if (err)
3048 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003049
3050 /* we expect NUL terminated string */
3051 /* but just in case someone tries to be evil */
3052 D_ASSERT(p->verify_alg[data_size-1] == 0);
3053 p->verify_alg[data_size-1] = 0;
3054
3055 } else /* apv >= 89 */ {
3056 /* we still expect NUL terminated strings */
3057 /* but just in case someone tries to be evil */
3058 D_ASSERT(p->verify_alg[SHARED_SECRET_MAX-1] == 0);
3059 D_ASSERT(p->csums_alg[SHARED_SECRET_MAX-1] == 0);
3060 p->verify_alg[SHARED_SECRET_MAX-1] = 0;
3061 p->csums_alg[SHARED_SECRET_MAX-1] = 0;
3062 }
3063
Lars Ellenbergf3990022011-03-23 14:31:09 +01003064 if (strcmp(mdev->tconn->net_conf->verify_alg, p->verify_alg)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07003065 if (mdev->state.conn == C_WF_REPORT_PARAMS) {
3066 dev_err(DEV, "Different verify-alg settings. me=\"%s\" peer=\"%s\"\n",
Lars Ellenbergf3990022011-03-23 14:31:09 +01003067 mdev->tconn->net_conf->verify_alg, p->verify_alg);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003068 goto disconnect;
3069 }
3070 verify_tfm = drbd_crypto_alloc_digest_safe(mdev,
3071 p->verify_alg, "verify-alg");
3072 if (IS_ERR(verify_tfm)) {
3073 verify_tfm = NULL;
3074 goto disconnect;
3075 }
3076 }
3077
Lars Ellenbergf3990022011-03-23 14:31:09 +01003078 if (apv >= 89 && strcmp(mdev->tconn->net_conf->csums_alg, p->csums_alg)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07003079 if (mdev->state.conn == C_WF_REPORT_PARAMS) {
3080 dev_err(DEV, "Different csums-alg settings. me=\"%s\" peer=\"%s\"\n",
Lars Ellenbergf3990022011-03-23 14:31:09 +01003081 mdev->tconn->net_conf->csums_alg, p->csums_alg);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003082 goto disconnect;
3083 }
3084 csums_tfm = drbd_crypto_alloc_digest_safe(mdev,
3085 p->csums_alg, "csums-alg");
3086 if (IS_ERR(csums_tfm)) {
3087 csums_tfm = NULL;
3088 goto disconnect;
3089 }
3090 }
3091
Lars Ellenbergf3990022011-03-23 14:31:09 +01003092 if (apv > 94 && get_ldev(mdev)) {
3093 mdev->ldev->dc.resync_rate = be32_to_cpu(p->rate);
3094 mdev->ldev->dc.c_plan_ahead = be32_to_cpu(p->c_plan_ahead);
3095 mdev->ldev->dc.c_delay_target = be32_to_cpu(p->c_delay_target);
3096 mdev->ldev->dc.c_fill_target = be32_to_cpu(p->c_fill_target);
3097 mdev->ldev->dc.c_max_rate = be32_to_cpu(p->c_max_rate);
Philipp Reisner778f2712010-07-06 11:14:00 +02003098
Lars Ellenbergf3990022011-03-23 14:31:09 +01003099 fifo_size = (mdev->ldev->dc.c_plan_ahead * 10 * SLEEP_TIME) / HZ;
Philipp Reisner778f2712010-07-06 11:14:00 +02003100 if (fifo_size != mdev->rs_plan_s.size && fifo_size > 0) {
3101 rs_plan_s = kzalloc(sizeof(int) * fifo_size, GFP_KERNEL);
3102 if (!rs_plan_s) {
3103 dev_err(DEV, "kmalloc of fifo_buffer failed");
Lars Ellenbergf3990022011-03-23 14:31:09 +01003104 put_ldev(mdev);
Philipp Reisner778f2712010-07-06 11:14:00 +02003105 goto disconnect;
3106 }
3107 }
Lars Ellenbergf3990022011-03-23 14:31:09 +01003108 put_ldev(mdev);
Philipp Reisner8e26f9c2010-07-06 17:25:54 +02003109 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003110
3111 spin_lock(&mdev->peer_seq_lock);
3112 /* lock against drbd_nl_syncer_conf() */
3113 if (verify_tfm) {
Lars Ellenbergf3990022011-03-23 14:31:09 +01003114 strcpy(mdev->tconn->net_conf->verify_alg, p->verify_alg);
3115 mdev->tconn->net_conf->verify_alg_len = strlen(p->verify_alg) + 1;
3116 crypto_free_hash(mdev->tconn->verify_tfm);
3117 mdev->tconn->verify_tfm = verify_tfm;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003118 dev_info(DEV, "using verify-alg: \"%s\"\n", p->verify_alg);
3119 }
3120 if (csums_tfm) {
Lars Ellenbergf3990022011-03-23 14:31:09 +01003121 strcpy(mdev->tconn->net_conf->csums_alg, p->csums_alg);
3122 mdev->tconn->net_conf->csums_alg_len = strlen(p->csums_alg) + 1;
3123 crypto_free_hash(mdev->tconn->csums_tfm);
3124 mdev->tconn->csums_tfm = csums_tfm;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003125 dev_info(DEV, "using csums-alg: \"%s\"\n", p->csums_alg);
3126 }
Philipp Reisner778f2712010-07-06 11:14:00 +02003127 if (fifo_size != mdev->rs_plan_s.size) {
3128 kfree(mdev->rs_plan_s.values);
3129 mdev->rs_plan_s.values = rs_plan_s;
3130 mdev->rs_plan_s.size = fifo_size;
3131 mdev->rs_planed = 0;
3132 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003133 spin_unlock(&mdev->peer_seq_lock);
3134 }
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003135 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003136
Philipp Reisnerb411b362009-09-25 16:07:19 -07003137disconnect:
3138 /* just for completeness: actually not needed,
3139 * as this is not reached if csums_tfm was ok. */
3140 crypto_free_hash(csums_tfm);
3141 /* but free the verify_tfm again, if csums_tfm did not work out */
3142 crypto_free_hash(verify_tfm);
Philipp Reisner38fa9982011-03-15 18:24:49 +01003143 conn_request_state(mdev->tconn, NS(conn, C_DISCONNECTING), CS_HARD);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003144 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003145}
3146
Philipp Reisnerb411b362009-09-25 16:07:19 -07003147/* warn if the arguments differ by more than 12.5% */
3148static void warn_if_differ_considerably(struct drbd_conf *mdev,
3149 const char *s, sector_t a, sector_t b)
3150{
3151 sector_t d;
3152 if (a == 0 || b == 0)
3153 return;
3154 d = (a > b) ? (a - b) : (b - a);
3155 if (d > (a>>3) || d > (b>>3))
3156 dev_warn(DEV, "Considerable difference in %s: %llus vs. %llus\n", s,
3157 (unsigned long long)a, (unsigned long long)b);
3158}
3159
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01003160static int receive_sizes(struct drbd_conf *mdev, enum drbd_packet cmd,
3161 unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003162{
Philipp Reisnere42325a2011-01-19 13:55:45 +01003163 struct p_sizes *p = &mdev->tconn->data.rbuf.sizes;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003164 enum determine_dev_size dd = unchanged;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003165 sector_t p_size, p_usize, my_usize;
3166 int ldsc = 0; /* local disk size changed */
Philipp Reisnere89b5912010-03-24 17:11:33 +01003167 enum dds_flags ddsf;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003168
Philipp Reisnerb411b362009-09-25 16:07:19 -07003169 p_size = be64_to_cpu(p->d_size);
3170 p_usize = be64_to_cpu(p->u_size);
3171
Philipp Reisnerb411b362009-09-25 16:07:19 -07003172 /* just store the peer's disk size for now.
3173 * we still need to figure out whether we accept that. */
3174 mdev->p_size = p_size;
3175
Philipp Reisnerb411b362009-09-25 16:07:19 -07003176 if (get_ldev(mdev)) {
3177 warn_if_differ_considerably(mdev, "lower level device sizes",
3178 p_size, drbd_get_max_capacity(mdev->ldev));
3179 warn_if_differ_considerably(mdev, "user requested size",
3180 p_usize, mdev->ldev->dc.disk_size);
3181
3182 /* if this is the first connect, or an otherwise expected
3183 * param exchange, choose the minimum */
3184 if (mdev->state.conn == C_WF_REPORT_PARAMS)
3185 p_usize = min_not_zero((sector_t)mdev->ldev->dc.disk_size,
3186 p_usize);
3187
3188 my_usize = mdev->ldev->dc.disk_size;
3189
3190 if (mdev->ldev->dc.disk_size != p_usize) {
3191 mdev->ldev->dc.disk_size = p_usize;
3192 dev_info(DEV, "Peer sets u_size to %lu sectors\n",
3193 (unsigned long)mdev->ldev->dc.disk_size);
3194 }
3195
3196 /* Never shrink a device with usable data during connect.
3197 But allow online shrinking if we are connected. */
Philipp Reisnera393db62009-12-22 13:35:52 +01003198 if (drbd_new_dev_size(mdev, mdev->ldev, 0) <
Philipp Reisnerb411b362009-09-25 16:07:19 -07003199 drbd_get_capacity(mdev->this_bdev) &&
3200 mdev->state.disk >= D_OUTDATED &&
3201 mdev->state.conn < C_CONNECTED) {
3202 dev_err(DEV, "The peer's disk size is too small!\n");
Philipp Reisner38fa9982011-03-15 18:24:49 +01003203 conn_request_state(mdev->tconn, NS(conn, C_DISCONNECTING), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003204 mdev->ldev->dc.disk_size = my_usize;
3205 put_ldev(mdev);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003206 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003207 }
3208 put_ldev(mdev);
3209 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003210
Philipp Reisnere89b5912010-03-24 17:11:33 +01003211 ddsf = be16_to_cpu(p->dds_flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003212 if (get_ldev(mdev)) {
Bart Van Assche24c48302011-05-21 18:32:29 +02003213 dd = drbd_determine_dev_size(mdev, ddsf);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003214 put_ldev(mdev);
3215 if (dd == dev_size_error)
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003216 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003217 drbd_md_sync(mdev);
3218 } else {
3219 /* I am diskless, need to accept the peer's size. */
3220 drbd_set_my_capacity(mdev, p_size);
3221 }
3222
Philipp Reisner99432fc2011-05-20 16:39:13 +02003223 mdev->peer_max_bio_size = be32_to_cpu(p->max_bio_size);
3224 drbd_reconsider_max_bio_size(mdev);
3225
Philipp Reisnerb411b362009-09-25 16:07:19 -07003226 if (get_ldev(mdev)) {
3227 if (mdev->ldev->known_size != drbd_get_capacity(mdev->ldev->backing_bdev)) {
3228 mdev->ldev->known_size = drbd_get_capacity(mdev->ldev->backing_bdev);
3229 ldsc = 1;
3230 }
3231
Philipp Reisnerb411b362009-09-25 16:07:19 -07003232 put_ldev(mdev);
3233 }
3234
3235 if (mdev->state.conn > C_WF_REPORT_PARAMS) {
3236 if (be64_to_cpu(p->c_size) !=
3237 drbd_get_capacity(mdev->this_bdev) || ldsc) {
3238 /* we have different sizes, probably peer
3239 * needs to know my new size... */
Philipp Reisnere89b5912010-03-24 17:11:33 +01003240 drbd_send_sizes(mdev, 0, ddsf);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003241 }
3242 if (test_and_clear_bit(RESIZE_PENDING, &mdev->flags) ||
3243 (dd == grew && mdev->state.conn == C_CONNECTED)) {
3244 if (mdev->state.pdsk >= D_INCONSISTENT &&
Philipp Reisnere89b5912010-03-24 17:11:33 +01003245 mdev->state.disk >= D_INCONSISTENT) {
3246 if (ddsf & DDSF_NO_RESYNC)
3247 dev_info(DEV, "Resync of new storage suppressed with --assume-clean\n");
3248 else
3249 resync_after_online_grow(mdev);
3250 } else
Philipp Reisnerb411b362009-09-25 16:07:19 -07003251 set_bit(RESYNC_AFTER_NEG, &mdev->flags);
3252 }
3253 }
3254
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003255 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003256}
3257
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01003258static int receive_uuids(struct drbd_conf *mdev, enum drbd_packet cmd,
3259 unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003260{
Philipp Reisnere42325a2011-01-19 13:55:45 +01003261 struct p_uuids *p = &mdev->tconn->data.rbuf.uuids;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003262 u64 *p_uuid;
Lars Ellenberg62b0da32011-01-20 13:25:21 +01003263 int i, updated_uuids = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003264
Philipp Reisnerb411b362009-09-25 16:07:19 -07003265 p_uuid = kmalloc(sizeof(u64)*UI_EXTENDED_SIZE, GFP_NOIO);
3266
3267 for (i = UI_CURRENT; i < UI_EXTENDED_SIZE; i++)
3268 p_uuid[i] = be64_to_cpu(p->uuid[i]);
3269
3270 kfree(mdev->p_uuid);
3271 mdev->p_uuid = p_uuid;
3272
3273 if (mdev->state.conn < C_CONNECTED &&
3274 mdev->state.disk < D_INCONSISTENT &&
3275 mdev->state.role == R_PRIMARY &&
3276 (mdev->ed_uuid & ~((u64)1)) != (p_uuid[UI_CURRENT] & ~((u64)1))) {
3277 dev_err(DEV, "Can only connect to data with current UUID=%016llX\n",
3278 (unsigned long long)mdev->ed_uuid);
Philipp Reisner38fa9982011-03-15 18:24:49 +01003279 conn_request_state(mdev->tconn, NS(conn, C_DISCONNECTING), CS_HARD);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003280 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003281 }
3282
3283 if (get_ldev(mdev)) {
3284 int skip_initial_sync =
3285 mdev->state.conn == C_CONNECTED &&
Philipp Reisner31890f42011-01-19 14:12:51 +01003286 mdev->tconn->agreed_pro_version >= 90 &&
Philipp Reisnerb411b362009-09-25 16:07:19 -07003287 mdev->ldev->md.uuid[UI_CURRENT] == UUID_JUST_CREATED &&
3288 (p_uuid[UI_FLAGS] & 8);
3289 if (skip_initial_sync) {
3290 dev_info(DEV, "Accepted new current UUID, preparing to skip initial sync\n");
3291 drbd_bitmap_io(mdev, &drbd_bmio_clear_n_write,
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01003292 "clear_n_write from receive_uuids",
3293 BM_LOCKED_TEST_ALLOWED);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003294 _drbd_uuid_set(mdev, UI_CURRENT, p_uuid[UI_CURRENT]);
3295 _drbd_uuid_set(mdev, UI_BITMAP, 0);
3296 _drbd_set_state(_NS2(mdev, disk, D_UP_TO_DATE, pdsk, D_UP_TO_DATE),
3297 CS_VERBOSE, NULL);
3298 drbd_md_sync(mdev);
Lars Ellenberg62b0da32011-01-20 13:25:21 +01003299 updated_uuids = 1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003300 }
3301 put_ldev(mdev);
Philipp Reisner18a50fa2010-06-21 14:14:15 +02003302 } else if (mdev->state.disk < D_INCONSISTENT &&
3303 mdev->state.role == R_PRIMARY) {
3304 /* I am a diskless primary, the peer just created a new current UUID
3305 for me. */
Lars Ellenberg62b0da32011-01-20 13:25:21 +01003306 updated_uuids = drbd_set_ed_uuid(mdev, p_uuid[UI_CURRENT]);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003307 }
3308
3309 /* Before we test for the disk state, we should wait until an eventually
3310 ongoing cluster wide state change is finished. That is important if
3311 we are primary and are detaching from our disk. We need to see the
3312 new disk state... */
Philipp Reisner8410da82011-02-11 20:11:10 +01003313 mutex_lock(mdev->state_mutex);
3314 mutex_unlock(mdev->state_mutex);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003315 if (mdev->state.conn >= C_CONNECTED && mdev->state.disk < D_INCONSISTENT)
Lars Ellenberg62b0da32011-01-20 13:25:21 +01003316 updated_uuids |= drbd_set_ed_uuid(mdev, p_uuid[UI_CURRENT]);
3317
3318 if (updated_uuids)
3319 drbd_print_uuids(mdev, "receiver updated UUIDs to");
Philipp Reisnerb411b362009-09-25 16:07:19 -07003320
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003321 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003322}
3323
3324/**
3325 * convert_state() - Converts the peer's view of the cluster state to our point of view
3326 * @ps: The state as seen by the peer.
3327 */
3328static union drbd_state convert_state(union drbd_state ps)
3329{
3330 union drbd_state ms;
3331
3332 static enum drbd_conns c_tab[] = {
3333 [C_CONNECTED] = C_CONNECTED,
3334
3335 [C_STARTING_SYNC_S] = C_STARTING_SYNC_T,
3336 [C_STARTING_SYNC_T] = C_STARTING_SYNC_S,
3337 [C_DISCONNECTING] = C_TEAR_DOWN, /* C_NETWORK_FAILURE, */
3338 [C_VERIFY_S] = C_VERIFY_T,
3339 [C_MASK] = C_MASK,
3340 };
3341
3342 ms.i = ps.i;
3343
3344 ms.conn = c_tab[ps.conn];
3345 ms.peer = ps.role;
3346 ms.role = ps.peer;
3347 ms.pdsk = ps.disk;
3348 ms.disk = ps.pdsk;
3349 ms.peer_isp = (ps.aftr_isp | ps.user_isp);
3350
3351 return ms;
3352}
3353
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01003354static int receive_req_state(struct drbd_conf *mdev, enum drbd_packet cmd,
3355 unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003356{
Philipp Reisnere42325a2011-01-19 13:55:45 +01003357 struct p_req_state *p = &mdev->tconn->data.rbuf.req_state;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003358 union drbd_state mask, val;
Andreas Gruenbacherbf885f82010-12-08 00:39:32 +01003359 enum drbd_state_rv rv;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003360
Philipp Reisnerb411b362009-09-25 16:07:19 -07003361 mask.i = be32_to_cpu(p->mask);
3362 val.i = be32_to_cpu(p->val);
3363
Philipp Reisner25703f82011-02-07 14:35:25 +01003364 if (test_bit(DISCARD_CONCURRENT, &mdev->tconn->flags) &&
Philipp Reisner8410da82011-02-11 20:11:10 +01003365 mutex_is_locked(mdev->state_mutex)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07003366 drbd_send_sr_reply(mdev, SS_CONCURRENT_ST_CHG);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003367 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003368 }
3369
3370 mask = convert_state(mask);
3371 val = convert_state(val);
3372
Philipp Reisnerdfafcc82011-03-16 10:55:07 +01003373 rv = drbd_change_state(mdev, CS_VERBOSE, mask, val);
3374 drbd_send_sr_reply(mdev, rv);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003375
Philipp Reisnerb411b362009-09-25 16:07:19 -07003376 drbd_md_sync(mdev);
3377
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003378 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003379}
3380
Philipp Reisnerdfafcc82011-03-16 10:55:07 +01003381static int receive_req_conn_state(struct drbd_tconn *tconn, enum drbd_packet cmd,
3382 unsigned int data_size)
3383{
3384 struct p_req_state *p = &tconn->data.rbuf.req_state;
3385 union drbd_state mask, val;
3386 enum drbd_state_rv rv;
3387
3388 mask.i = be32_to_cpu(p->mask);
3389 val.i = be32_to_cpu(p->val);
3390
3391 if (test_bit(DISCARD_CONCURRENT, &tconn->flags) &&
3392 mutex_is_locked(&tconn->cstate_mutex)) {
3393 conn_send_sr_reply(tconn, SS_CONCURRENT_ST_CHG);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003394 return 0;
Philipp Reisnerdfafcc82011-03-16 10:55:07 +01003395 }
3396
3397 mask = convert_state(mask);
3398 val = convert_state(val);
3399
3400 rv = conn_request_state(tconn, mask, val, CS_VERBOSE | CS_LOCAL_ONLY);
3401 conn_send_sr_reply(tconn, rv);
3402
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003403 return 0;
Philipp Reisnerdfafcc82011-03-16 10:55:07 +01003404}
3405
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01003406static int receive_state(struct drbd_conf *mdev, enum drbd_packet cmd,
3407 unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003408{
Philipp Reisnere42325a2011-01-19 13:55:45 +01003409 struct p_state *p = &mdev->tconn->data.rbuf.state;
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003410 union drbd_state os, ns, peer_state;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003411 enum drbd_disk_state real_peer_disk;
Philipp Reisner65d922c2010-06-16 16:18:09 +02003412 enum chg_state_flags cs_flags;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003413 int rv;
3414
Philipp Reisnerb411b362009-09-25 16:07:19 -07003415 peer_state.i = be32_to_cpu(p->state);
3416
3417 real_peer_disk = peer_state.disk;
3418 if (peer_state.disk == D_NEGOTIATING) {
3419 real_peer_disk = mdev->p_uuid[UI_FLAGS] & 4 ? D_INCONSISTENT : D_CONSISTENT;
3420 dev_info(DEV, "real peer disk state = %s\n", drbd_disk_str(real_peer_disk));
3421 }
3422
Philipp Reisner87eeee42011-01-19 14:16:30 +01003423 spin_lock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003424 retry:
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003425 os = ns = mdev->state;
Philipp Reisner87eeee42011-01-19 14:16:30 +01003426 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003427
Lars Ellenberge9ef7bb2010-10-07 15:55:39 +02003428 /* peer says his disk is uptodate, while we think it is inconsistent,
3429 * and this happens while we think we have a sync going on. */
3430 if (os.pdsk == D_INCONSISTENT && real_peer_disk == D_UP_TO_DATE &&
3431 os.conn > C_CONNECTED && os.disk == D_UP_TO_DATE) {
3432 /* If we are (becoming) SyncSource, but peer is still in sync
3433 * preparation, ignore its uptodate-ness to avoid flapping, it
3434 * will change to inconsistent once the peer reaches active
3435 * syncing states.
3436 * It may have changed syncer-paused flags, however, so we
3437 * cannot ignore this completely. */
3438 if (peer_state.conn > C_CONNECTED &&
3439 peer_state.conn < C_SYNC_SOURCE)
3440 real_peer_disk = D_INCONSISTENT;
3441
3442 /* if peer_state changes to connected at the same time,
3443 * it explicitly notifies us that it finished resync.
3444 * Maybe we should finish it up, too? */
3445 else if (os.conn >= C_SYNC_SOURCE &&
3446 peer_state.conn == C_CONNECTED) {
3447 if (drbd_bm_total_weight(mdev) <= mdev->rs_failed)
3448 drbd_resync_finished(mdev);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003449 return 0;
Lars Ellenberge9ef7bb2010-10-07 15:55:39 +02003450 }
3451 }
3452
3453 /* peer says his disk is inconsistent, while we think it is uptodate,
3454 * and this happens while the peer still thinks we have a sync going on,
3455 * but we think we are already done with the sync.
3456 * We ignore this to avoid flapping pdsk.
3457 * This should not happen, if the peer is a recent version of drbd. */
3458 if (os.pdsk == D_UP_TO_DATE && real_peer_disk == D_INCONSISTENT &&
3459 os.conn == C_CONNECTED && peer_state.conn > C_SYNC_SOURCE)
3460 real_peer_disk = D_UP_TO_DATE;
3461
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003462 if (ns.conn == C_WF_REPORT_PARAMS)
3463 ns.conn = C_CONNECTED;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003464
Philipp Reisner67531712010-10-27 12:21:30 +02003465 if (peer_state.conn == C_AHEAD)
3466 ns.conn = C_BEHIND;
3467
Philipp Reisnerb411b362009-09-25 16:07:19 -07003468 if (mdev->p_uuid && peer_state.disk >= D_NEGOTIATING &&
3469 get_ldev_if_state(mdev, D_NEGOTIATING)) {
3470 int cr; /* consider resync */
3471
3472 /* if we established a new connection */
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003473 cr = (os.conn < C_CONNECTED);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003474 /* if we had an established connection
3475 * and one of the nodes newly attaches a disk */
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003476 cr |= (os.conn == C_CONNECTED &&
Philipp Reisnerb411b362009-09-25 16:07:19 -07003477 (peer_state.disk == D_NEGOTIATING ||
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003478 os.disk == D_NEGOTIATING));
Philipp Reisnerb411b362009-09-25 16:07:19 -07003479 /* if we have both been inconsistent, and the peer has been
3480 * forced to be UpToDate with --overwrite-data */
3481 cr |= test_bit(CONSIDER_RESYNC, &mdev->flags);
3482 /* if we had been plain connected, and the admin requested to
3483 * start a sync by "invalidate" or "invalidate-remote" */
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003484 cr |= (os.conn == C_CONNECTED &&
Philipp Reisnerb411b362009-09-25 16:07:19 -07003485 (peer_state.conn >= C_STARTING_SYNC_S &&
3486 peer_state.conn <= C_WF_BITMAP_T));
3487
3488 if (cr)
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003489 ns.conn = drbd_sync_handshake(mdev, peer_state.role, real_peer_disk);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003490
3491 put_ldev(mdev);
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003492 if (ns.conn == C_MASK) {
3493 ns.conn = C_CONNECTED;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003494 if (mdev->state.disk == D_NEGOTIATING) {
Lars Ellenberg82f59cc2010-10-16 12:13:47 +02003495 drbd_force_state(mdev, NS(disk, D_FAILED));
Philipp Reisnerb411b362009-09-25 16:07:19 -07003496 } else if (peer_state.disk == D_NEGOTIATING) {
3497 dev_err(DEV, "Disk attach process on the peer node was aborted.\n");
3498 peer_state.disk = D_DISKLESS;
Lars Ellenberg580b9762010-02-26 23:15:23 +01003499 real_peer_disk = D_DISKLESS;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003500 } else {
Philipp Reisner8169e412011-03-15 18:40:27 +01003501 if (test_and_clear_bit(CONN_DRY_RUN, &mdev->tconn->flags))
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003502 return -EIO;
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003503 D_ASSERT(os.conn == C_WF_REPORT_PARAMS);
Philipp Reisner38fa9982011-03-15 18:24:49 +01003504 conn_request_state(mdev->tconn, NS(conn, C_DISCONNECTING), CS_HARD);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003505 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003506 }
3507 }
3508 }
3509
Philipp Reisner87eeee42011-01-19 14:16:30 +01003510 spin_lock_irq(&mdev->tconn->req_lock);
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003511 if (mdev->state.i != os.i)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003512 goto retry;
3513 clear_bit(CONSIDER_RESYNC, &mdev->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003514 ns.peer = peer_state.role;
3515 ns.pdsk = real_peer_disk;
3516 ns.peer_isp = (peer_state.aftr_isp | peer_state.user_isp);
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003517 if ((ns.conn == C_CONNECTED || ns.conn == C_WF_BITMAP_S) && ns.disk == D_NEGOTIATING)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003518 ns.disk = mdev->new_state_tmp.disk;
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003519 cs_flags = CS_VERBOSE + (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED ? 0 : CS_HARD);
3520 if (ns.pdsk == D_CONSISTENT && is_susp(ns) && ns.conn == C_CONNECTED && os.conn < C_CONNECTED &&
Philipp Reisner481c6f52010-06-22 14:03:27 +02003521 test_bit(NEW_CUR_UUID, &mdev->flags)) {
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01003522 /* Do not allow tl_restart(RESEND) for a rebooted peer. We can only allow this
Philipp Reisner481c6f52010-06-22 14:03:27 +02003523 for temporal network outages! */
Philipp Reisner87eeee42011-01-19 14:16:30 +01003524 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisner481c6f52010-06-22 14:03:27 +02003525 dev_err(DEV, "Aborting Connect, can not thaw IO with an only Consistent peer\n");
Philipp Reisner2f5cdd02011-02-21 14:29:27 +01003526 tl_clear(mdev->tconn);
Philipp Reisner481c6f52010-06-22 14:03:27 +02003527 drbd_uuid_new_current(mdev);
3528 clear_bit(NEW_CUR_UUID, &mdev->flags);
Philipp Reisner38fa9982011-03-15 18:24:49 +01003529 conn_request_state(mdev->tconn, NS2(conn, C_PROTOCOL_ERROR, susp, 0), CS_HARD);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003530 return -EIO;
Philipp Reisner481c6f52010-06-22 14:03:27 +02003531 }
Philipp Reisner65d922c2010-06-16 16:18:09 +02003532 rv = _drbd_set_state(mdev, ns, cs_flags, NULL);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003533 ns = mdev->state;
Philipp Reisner87eeee42011-01-19 14:16:30 +01003534 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003535
3536 if (rv < SS_SUCCESS) {
Philipp Reisner38fa9982011-03-15 18:24:49 +01003537 conn_request_state(mdev->tconn, NS(conn, C_DISCONNECTING), CS_HARD);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003538 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003539 }
3540
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003541 if (os.conn > C_WF_REPORT_PARAMS) {
3542 if (ns.conn > C_CONNECTED && peer_state.conn <= C_CONNECTED &&
Philipp Reisnerb411b362009-09-25 16:07:19 -07003543 peer_state.disk != D_NEGOTIATING ) {
3544 /* we want resync, peer has not yet decided to sync... */
3545 /* Nowadays only used when forcing a node into primary role and
3546 setting its disk to UpToDate with that */
3547 drbd_send_uuids(mdev);
3548 drbd_send_state(mdev);
3549 }
3550 }
3551
Philipp Reisner89e58e72011-01-19 13:12:45 +01003552 mdev->tconn->net_conf->want_lose = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003553
3554 drbd_md_sync(mdev); /* update connected indicator, la_size, ... */
3555
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003556 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003557}
3558
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01003559static int receive_sync_uuid(struct drbd_conf *mdev, enum drbd_packet cmd,
3560 unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003561{
Philipp Reisnere42325a2011-01-19 13:55:45 +01003562 struct p_rs_uuid *p = &mdev->tconn->data.rbuf.rs_uuid;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003563
3564 wait_event(mdev->misc_wait,
3565 mdev->state.conn == C_WF_SYNC_UUID ||
Philipp Reisnerc4752ef2010-10-27 17:32:36 +02003566 mdev->state.conn == C_BEHIND ||
Philipp Reisnerb411b362009-09-25 16:07:19 -07003567 mdev->state.conn < C_CONNECTED ||
3568 mdev->state.disk < D_NEGOTIATING);
3569
3570 /* D_ASSERT( mdev->state.conn == C_WF_SYNC_UUID ); */
3571
Philipp Reisnerb411b362009-09-25 16:07:19 -07003572 /* Here the _drbd_uuid_ functions are right, current should
3573 _not_ be rotated into the history */
3574 if (get_ldev_if_state(mdev, D_NEGOTIATING)) {
3575 _drbd_uuid_set(mdev, UI_CURRENT, be64_to_cpu(p->uuid));
3576 _drbd_uuid_set(mdev, UI_BITMAP, 0UL);
3577
Lars Ellenberg62b0da32011-01-20 13:25:21 +01003578 drbd_print_uuids(mdev, "updated sync uuid");
Philipp Reisnerb411b362009-09-25 16:07:19 -07003579 drbd_start_resync(mdev, C_SYNC_TARGET);
3580
3581 put_ldev(mdev);
3582 } else
3583 dev_err(DEV, "Ignoring SyncUUID packet!\n");
3584
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003585 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003586}
3587
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003588/**
3589 * receive_bitmap_plain
3590 *
3591 * Return 0 when done, 1 when another iteration is needed, and a negative error
3592 * code upon failure.
3593 */
3594static int
Philipp Reisner02918be2010-08-20 14:35:10 +02003595receive_bitmap_plain(struct drbd_conf *mdev, unsigned int data_size,
3596 unsigned long *buffer, struct bm_xfer_ctx *c)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003597{
3598 unsigned num_words = min_t(size_t, BM_PACKET_WORDS, c->bm_words - c->word_offset);
3599 unsigned want = num_words * sizeof(long);
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003600 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003601
Philipp Reisner02918be2010-08-20 14:35:10 +02003602 if (want != data_size) {
3603 dev_err(DEV, "%s:want (%u) != data_size (%u)\n", __func__, want, data_size);
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003604 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003605 }
3606 if (want == 0)
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003607 return 0;
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003608 err = drbd_recv_all(mdev->tconn, buffer, want);
3609 if (err)
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003610 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003611
3612 drbd_bm_merge_lel(mdev, c->word_offset, num_words, buffer);
3613
3614 c->word_offset += num_words;
3615 c->bit_offset = c->word_offset * BITS_PER_LONG;
3616 if (c->bit_offset > c->bm_bits)
3617 c->bit_offset = c->bm_bits;
3618
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003619 return 1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003620}
3621
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003622/**
3623 * recv_bm_rle_bits
3624 *
3625 * Return 0 when done, 1 when another iteration is needed, and a negative error
3626 * code upon failure.
3627 */
3628static int
Philipp Reisnerb411b362009-09-25 16:07:19 -07003629recv_bm_rle_bits(struct drbd_conf *mdev,
3630 struct p_compressed_bm *p,
Philipp Reisnerc6d25cf2011-01-19 16:13:06 +01003631 struct bm_xfer_ctx *c,
3632 unsigned int len)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003633{
3634 struct bitstream bs;
3635 u64 look_ahead;
3636 u64 rl;
3637 u64 tmp;
3638 unsigned long s = c->bit_offset;
3639 unsigned long e;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003640 int toggle = DCBP_get_start(p);
3641 int have;
3642 int bits;
3643
3644 bitstream_init(&bs, p->code, len, DCBP_get_pad_bits(p));
3645
3646 bits = bitstream_get_bits(&bs, &look_ahead, 64);
3647 if (bits < 0)
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003648 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003649
3650 for (have = bits; have > 0; s += rl, toggle = !toggle) {
3651 bits = vli_decode_bits(&rl, look_ahead);
3652 if (bits <= 0)
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003653 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003654
3655 if (toggle) {
3656 e = s + rl -1;
3657 if (e >= c->bm_bits) {
3658 dev_err(DEV, "bitmap overflow (e:%lu) while decoding bm RLE packet\n", e);
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003659 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003660 }
3661 _drbd_bm_set_bits(mdev, s, e);
3662 }
3663
3664 if (have < bits) {
3665 dev_err(DEV, "bitmap decoding error: h:%d b:%d la:0x%08llx l:%u/%u\n",
3666 have, bits, look_ahead,
3667 (unsigned int)(bs.cur.b - p->code),
3668 (unsigned int)bs.buf_len);
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003669 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003670 }
3671 look_ahead >>= bits;
3672 have -= bits;
3673
3674 bits = bitstream_get_bits(&bs, &tmp, 64 - have);
3675 if (bits < 0)
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003676 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003677 look_ahead |= tmp << have;
3678 have += bits;
3679 }
3680
3681 c->bit_offset = s;
3682 bm_xfer_ctx_bit_to_word_offset(c);
3683
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003684 return (s != c->bm_bits);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003685}
3686
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003687/**
3688 * decode_bitmap_c
3689 *
3690 * Return 0 when done, 1 when another iteration is needed, and a negative error
3691 * code upon failure.
3692 */
3693static int
Philipp Reisnerb411b362009-09-25 16:07:19 -07003694decode_bitmap_c(struct drbd_conf *mdev,
3695 struct p_compressed_bm *p,
Philipp Reisnerc6d25cf2011-01-19 16:13:06 +01003696 struct bm_xfer_ctx *c,
3697 unsigned int len)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003698{
3699 if (DCBP_get_code(p) == RLE_VLI_Bits)
Philipp Reisnerc6d25cf2011-01-19 16:13:06 +01003700 return recv_bm_rle_bits(mdev, p, c, len);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003701
3702 /* other variants had been implemented for evaluation,
3703 * but have been dropped as this one turned out to be "best"
3704 * during all our tests. */
3705
3706 dev_err(DEV, "receive_bitmap_c: unknown encoding %u\n", p->encoding);
Philipp Reisner38fa9982011-03-15 18:24:49 +01003707 conn_request_state(mdev->tconn, NS(conn, C_PROTOCOL_ERROR), CS_HARD);
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003708 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003709}
3710
3711void INFO_bm_xfer_stats(struct drbd_conf *mdev,
3712 const char *direction, struct bm_xfer_ctx *c)
3713{
3714 /* what would it take to transfer it "plaintext" */
Philipp Reisnerc0129492011-01-19 16:58:16 +01003715 unsigned plain = sizeof(struct p_header) *
Philipp Reisnerb411b362009-09-25 16:07:19 -07003716 ((c->bm_words+BM_PACKET_WORDS-1)/BM_PACKET_WORDS+1)
3717 + c->bm_words * sizeof(long);
3718 unsigned total = c->bytes[0] + c->bytes[1];
3719 unsigned r;
3720
3721 /* total can not be zero. but just in case: */
3722 if (total == 0)
3723 return;
3724
3725 /* don't report if not compressed */
3726 if (total >= plain)
3727 return;
3728
3729 /* total < plain. check for overflow, still */
3730 r = (total > UINT_MAX/1000) ? (total / (plain/1000))
3731 : (1000 * total / plain);
3732
3733 if (r > 1000)
3734 r = 1000;
3735
3736 r = 1000 - r;
3737 dev_info(DEV, "%s bitmap stats [Bytes(packets)]: plain %u(%u), RLE %u(%u), "
3738 "total %u; compression: %u.%u%%\n",
3739 direction,
3740 c->bytes[1], c->packets[1],
3741 c->bytes[0], c->packets[0],
3742 total, r/10, r % 10);
3743}
3744
3745/* Since we are processing the bitfield from lower addresses to higher,
3746 it does not matter if the process it in 32 bit chunks or 64 bit
3747 chunks as long as it is little endian. (Understand it as byte stream,
3748 beginning with the lowest byte...) If we would use big endian
3749 we would need to process it from the highest address to the lowest,
3750 in order to be agnostic to the 32 vs 64 bits issue.
3751
3752 returns 0 on failure, 1 if we successfully received it. */
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01003753static int receive_bitmap(struct drbd_conf *mdev, enum drbd_packet cmd,
3754 unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003755{
3756 struct bm_xfer_ctx c;
3757 void *buffer;
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003758 int err;
Philipp Reisner257d0af2011-01-26 12:15:29 +01003759 struct p_header *h = &mdev->tconn->data.rbuf.header;
Philipp Reisner77351055b2011-02-07 17:24:26 +01003760 struct packet_info pi;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003761
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01003762 drbd_bm_lock(mdev, "receive bitmap", BM_LOCKED_SET_ALLOWED);
3763 /* you are supposed to send additional out-of-sync information
3764 * if you actually set bits during this phase */
Philipp Reisnerb411b362009-09-25 16:07:19 -07003765
3766 /* maybe we should use some per thread scratch page,
3767 * and allocate that during initial device creation? */
3768 buffer = (unsigned long *) __get_free_page(GFP_NOIO);
3769 if (!buffer) {
3770 dev_err(DEV, "failed to allocate one page buffer in %s\n", __func__);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003771 err = -ENOMEM;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003772 goto out;
3773 }
3774
3775 c = (struct bm_xfer_ctx) {
3776 .bm_bits = drbd_bm_bits(mdev),
3777 .bm_words = drbd_bm_words(mdev),
3778 };
3779
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003780 for(;;) {
Philipp Reisner02918be2010-08-20 14:35:10 +02003781 if (cmd == P_BITMAP) {
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003782 err = receive_bitmap_plain(mdev, data_size, buffer, &c);
Philipp Reisner02918be2010-08-20 14:35:10 +02003783 } else if (cmd == P_COMPRESSED_BITMAP) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07003784 /* MAYBE: sanity check that we speak proto >= 90,
3785 * and the feature is enabled! */
3786 struct p_compressed_bm *p;
3787
Philipp Reisner02918be2010-08-20 14:35:10 +02003788 if (data_size > BM_PACKET_PAYLOAD_BYTES) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07003789 dev_err(DEV, "ReportCBitmap packet too large\n");
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003790 err = -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003791 goto out;
3792 }
3793 /* use the page buff */
3794 p = buffer;
3795 memcpy(p, h, sizeof(*h));
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003796 err = drbd_recv_all(mdev->tconn, p->head.payload, data_size);
3797 if (err)
3798 goto out;
Lars Ellenberg004352f2010-10-05 20:13:58 +02003799 if (data_size <= (sizeof(*p) - sizeof(p->head))) {
3800 dev_err(DEV, "ReportCBitmap packet too small (l:%u)\n", data_size);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003801 err = -EIO;
Andreas Gruenbacher78fcbda2010-12-10 22:18:27 +01003802 goto out;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003803 }
Philipp Reisnerc6d25cf2011-01-19 16:13:06 +01003804 err = decode_bitmap_c(mdev, p, &c, data_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003805 } else {
Philipp Reisner02918be2010-08-20 14:35:10 +02003806 dev_warn(DEV, "receive_bitmap: cmd neither ReportBitMap nor ReportCBitMap (is 0x%x)", cmd);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003807 err = -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003808 goto out;
3809 }
3810
Philipp Reisner02918be2010-08-20 14:35:10 +02003811 c.packets[cmd == P_BITMAP]++;
Philipp Reisner257d0af2011-01-26 12:15:29 +01003812 c.bytes[cmd == P_BITMAP] += sizeof(struct p_header) + data_size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003813
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003814 if (err <= 0) {
3815 if (err < 0)
3816 goto out;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003817 break;
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003818 }
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003819 err = drbd_recv_header(mdev->tconn, &pi);
3820 if (err)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003821 goto out;
Philipp Reisner77351055b2011-02-07 17:24:26 +01003822 cmd = pi.cmd;
3823 data_size = pi.size;
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003824 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003825
3826 INFO_bm_xfer_stats(mdev, "receive", &c);
3827
3828 if (mdev->state.conn == C_WF_BITMAP_T) {
Andreas Gruenbacherde1f8e42010-12-10 21:04:00 +01003829 enum drbd_state_rv rv;
3830
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003831 err = drbd_send_bitmap(mdev);
3832 if (err)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003833 goto out;
3834 /* Omit CS_ORDERED with this state transition to avoid deadlocks. */
Andreas Gruenbacherde1f8e42010-12-10 21:04:00 +01003835 rv = _drbd_request_state(mdev, NS(conn, C_WF_SYNC_UUID), CS_VERBOSE);
3836 D_ASSERT(rv == SS_SUCCESS);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003837 } else if (mdev->state.conn != C_WF_BITMAP_S) {
3838 /* admin may have requested C_DISCONNECTING,
3839 * other threads may have noticed network errors */
3840 dev_info(DEV, "unexpected cstate (%s) in receive_bitmap\n",
3841 drbd_conn_str(mdev->state.conn));
3842 }
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003843 err = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003844
Philipp Reisnerb411b362009-09-25 16:07:19 -07003845 out:
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01003846 drbd_bm_unlock(mdev);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003847 if (!err && mdev->state.conn == C_WF_BITMAP_S)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003848 drbd_start_resync(mdev, C_SYNC_SOURCE);
3849 free_page((unsigned long) buffer);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003850 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003851}
3852
Philipp Reisner2de876e2011-03-15 14:38:01 +01003853static int _tconn_receive_skip(struct drbd_tconn *tconn, unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003854{
3855 /* TODO zero copy sink :) */
3856 static char sink[128];
3857 int size, want, r;
3858
Philipp Reisner02918be2010-08-20 14:35:10 +02003859 size = data_size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003860 while (size > 0) {
3861 want = min_t(int, size, sizeof(sink));
Philipp Reisner2de876e2011-03-15 14:38:01 +01003862 r = drbd_recv(tconn, sink, want);
3863 if (r <= 0)
Andreas Gruenbacher841ce242010-12-15 19:31:20 +01003864 break;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003865 size -= r;
3866 }
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003867 return size ? -EIO : 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003868}
3869
Philipp Reisner2de876e2011-03-15 14:38:01 +01003870static int receive_skip(struct drbd_conf *mdev, enum drbd_packet cmd,
3871 unsigned int data_size)
3872{
3873 dev_warn(DEV, "skipping unknown optional packet type %d, l: %d!\n",
3874 cmd, data_size);
3875
3876 return _tconn_receive_skip(mdev->tconn, data_size);
3877}
3878
3879static int tconn_receive_skip(struct drbd_tconn *tconn, enum drbd_packet cmd, unsigned int data_size)
3880{
3881 conn_warn(tconn, "skipping packet for non existing volume type %d, l: %d!\n",
3882 cmd, data_size);
3883
3884 return _tconn_receive_skip(tconn, data_size);
3885}
3886
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01003887static int receive_UnplugRemote(struct drbd_conf *mdev, enum drbd_packet cmd,
3888 unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003889{
Philipp Reisnerb411b362009-09-25 16:07:19 -07003890 /* Make sure we've acked all the TCP data associated
3891 * with the data requests being unplugged */
Philipp Reisnere42325a2011-01-19 13:55:45 +01003892 drbd_tcp_quickack(mdev->tconn->data.socket);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003893
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003894 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003895}
3896
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01003897static int receive_out_of_sync(struct drbd_conf *mdev, enum drbd_packet cmd,
3898 unsigned int data_size)
Philipp Reisner73a01a12010-10-27 14:33:00 +02003899{
Philipp Reisnere42325a2011-01-19 13:55:45 +01003900 struct p_block_desc *p = &mdev->tconn->data.rbuf.block_desc;
Philipp Reisner73a01a12010-10-27 14:33:00 +02003901
Lars Ellenbergf735e3632010-12-17 21:06:18 +01003902 switch (mdev->state.conn) {
3903 case C_WF_SYNC_UUID:
3904 case C_WF_BITMAP_T:
3905 case C_BEHIND:
3906 break;
3907 default:
3908 dev_err(DEV, "ASSERT FAILED cstate = %s, expected: WFSyncUUID|WFBitMapT|Behind\n",
3909 drbd_conn_str(mdev->state.conn));
3910 }
3911
Philipp Reisner73a01a12010-10-27 14:33:00 +02003912 drbd_set_out_of_sync(mdev, be64_to_cpu(p->sector), be32_to_cpu(p->blksize));
3913
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003914 return 0;
Philipp Reisner73a01a12010-10-27 14:33:00 +02003915}
3916
Philipp Reisner02918be2010-08-20 14:35:10 +02003917struct data_cmd {
3918 int expect_payload;
3919 size_t pkt_size;
Philipp Reisnera4fbda82011-03-16 11:13:17 +01003920 enum mdev_or_conn fa_type; /* first argument's type */
Philipp Reisnerd9ae84e2011-03-15 18:50:22 +01003921 union {
3922 int (*mdev_fn)(struct drbd_conf *, enum drbd_packet cmd,
3923 unsigned int to_receive);
3924 int (*conn_fn)(struct drbd_tconn *, enum drbd_packet cmd,
3925 unsigned int to_receive);
3926 };
Philipp Reisnerb411b362009-09-25 16:07:19 -07003927};
3928
Philipp Reisner02918be2010-08-20 14:35:10 +02003929static struct data_cmd drbd_cmd_handler[] = {
Philipp Reisnerd9ae84e2011-03-15 18:50:22 +01003930 [P_DATA] = { 1, sizeof(struct p_data), MDEV, { receive_Data } },
3931 [P_DATA_REPLY] = { 1, sizeof(struct p_data), MDEV, { receive_DataReply } },
3932 [P_RS_DATA_REPLY] = { 1, sizeof(struct p_data), MDEV, { receive_RSDataReply } } ,
3933 [P_BARRIER] = { 0, sizeof(struct p_barrier), MDEV, { receive_Barrier } } ,
3934 [P_BITMAP] = { 1, sizeof(struct p_header), MDEV, { receive_bitmap } } ,
3935 [P_COMPRESSED_BITMAP] = { 1, sizeof(struct p_header), MDEV, { receive_bitmap } } ,
3936 [P_UNPLUG_REMOTE] = { 0, sizeof(struct p_header), MDEV, { receive_UnplugRemote } },
3937 [P_DATA_REQUEST] = { 0, sizeof(struct p_block_req), MDEV, { receive_DataRequest } },
3938 [P_RS_DATA_REQUEST] = { 0, sizeof(struct p_block_req), MDEV, { receive_DataRequest } },
3939 [P_SYNC_PARAM] = { 1, sizeof(struct p_header), MDEV, { receive_SyncParam } },
3940 [P_SYNC_PARAM89] = { 1, sizeof(struct p_header), MDEV, { receive_SyncParam } },
Philipp Reisner72046242011-03-15 18:51:47 +01003941 [P_PROTOCOL] = { 1, sizeof(struct p_protocol), CONN, { .conn_fn = receive_protocol } },
Philipp Reisnerd9ae84e2011-03-15 18:50:22 +01003942 [P_UUIDS] = { 0, sizeof(struct p_uuids), MDEV, { receive_uuids } },
3943 [P_SIZES] = { 0, sizeof(struct p_sizes), MDEV, { receive_sizes } },
3944 [P_STATE] = { 0, sizeof(struct p_state), MDEV, { receive_state } },
3945 [P_STATE_CHG_REQ] = { 0, sizeof(struct p_req_state), MDEV, { receive_req_state } },
3946 [P_SYNC_UUID] = { 0, sizeof(struct p_rs_uuid), MDEV, { receive_sync_uuid } },
3947 [P_OV_REQUEST] = { 0, sizeof(struct p_block_req), MDEV, { receive_DataRequest } },
3948 [P_OV_REPLY] = { 1, sizeof(struct p_block_req), MDEV, { receive_DataRequest } },
3949 [P_CSUM_RS_REQUEST] = { 1, sizeof(struct p_block_req), MDEV, { receive_DataRequest } },
3950 [P_DELAY_PROBE] = { 0, sizeof(struct p_delay_probe93), MDEV, { receive_skip } },
3951 [P_OUT_OF_SYNC] = { 0, sizeof(struct p_block_desc), MDEV, { receive_out_of_sync } },
Philipp Reisnerdfafcc82011-03-16 10:55:07 +01003952 [P_CONN_ST_CHG_REQ] = { 0, sizeof(struct p_req_state), CONN, { .conn_fn = receive_req_conn_state } },
Philipp Reisner02918be2010-08-20 14:35:10 +02003953};
3954
3955/* All handler functions that expect a sub-header get that sub-heder in
Philipp Reisnere42325a2011-01-19 13:55:45 +01003956 mdev->tconn->data.rbuf.header.head.payload.
Philipp Reisner02918be2010-08-20 14:35:10 +02003957
Philipp Reisnere42325a2011-01-19 13:55:45 +01003958 Usually in mdev->tconn->data.rbuf.header.head the callback can find the usual
Philipp Reisner02918be2010-08-20 14:35:10 +02003959 p_header, but they may not rely on that. Since there is also p_header95 !
3960 */
Philipp Reisnerb411b362009-09-25 16:07:19 -07003961
Philipp Reisnereefc2f72011-02-08 12:55:24 +01003962static void drbdd(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003963{
Philipp Reisnereefc2f72011-02-08 12:55:24 +01003964 struct p_header *header = &tconn->data.rbuf.header;
Philipp Reisner77351055b2011-02-07 17:24:26 +01003965 struct packet_info pi;
Philipp Reisner02918be2010-08-20 14:35:10 +02003966 size_t shs; /* sub header size */
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003967 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003968
Philipp Reisnereefc2f72011-02-08 12:55:24 +01003969 while (get_t_state(&tconn->receiver) == RUNNING) {
3970 drbd_thread_current_set_cpu(&tconn->receiver);
Andreas Gruenbacher69bc7bc2011-03-16 17:31:52 +01003971 if (drbd_recv_header(tconn, &pi))
Philipp Reisner02918be2010-08-20 14:35:10 +02003972 goto err_out;
3973
Andreas Gruenbacher6e849ce2011-03-14 17:27:45 +01003974 if (unlikely(pi.cmd >= ARRAY_SIZE(drbd_cmd_handler) ||
Philipp Reisnerd9ae84e2011-03-15 18:50:22 +01003975 !drbd_cmd_handler[pi.cmd].mdev_fn)) {
Philipp Reisnereefc2f72011-02-08 12:55:24 +01003976 conn_err(tconn, "unknown packet type %d, l: %d!\n", pi.cmd, pi.size);
Philipp Reisner02918be2010-08-20 14:35:10 +02003977 goto err_out;
Lars Ellenberg0b33a912009-11-16 15:58:04 +01003978 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003979
Philipp Reisner77351055b2011-02-07 17:24:26 +01003980 shs = drbd_cmd_handler[pi.cmd].pkt_size - sizeof(struct p_header);
3981 if (pi.size - shs > 0 && !drbd_cmd_handler[pi.cmd].expect_payload) {
Philipp Reisnereefc2f72011-02-08 12:55:24 +01003982 conn_err(tconn, "No payload expected %s l:%d\n", cmdname(pi.cmd), pi.size);
Philipp Reisner02918be2010-08-20 14:35:10 +02003983 goto err_out;
3984 }
3985
Lars Ellenbergc13f7e12010-10-29 23:32:01 +02003986 if (shs) {
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003987 err = drbd_recv_all(tconn, &header->payload, shs);
3988 if (err) {
Lars Ellenberg0ddc5542011-01-21 12:35:15 +01003989 if (!signal_pending(current))
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003990 conn_warn(tconn, "short read while reading sub header: rv=%d\n", err);
Lars Ellenbergc13f7e12010-10-29 23:32:01 +02003991 goto err_out;
3992 }
3993 }
3994
Philipp Reisnera4fbda82011-03-16 11:13:17 +01003995 if (drbd_cmd_handler[pi.cmd].fa_type == CONN) {
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003996 err = drbd_cmd_handler[pi.cmd].conn_fn(tconn, pi.cmd, pi.size - shs);
Philipp Reisnerd9ae84e2011-03-15 18:50:22 +01003997 } else {
3998 struct drbd_conf *mdev = vnr_to_mdev(tconn, pi.vnr);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003999 err = mdev ?
Philipp Reisnerd9ae84e2011-03-15 18:50:22 +01004000 drbd_cmd_handler[pi.cmd].mdev_fn(mdev, pi.cmd, pi.size - shs) :
4001 tconn_receive_skip(tconn, pi.cmd, pi.size - shs);
4002 }
Philipp Reisner02918be2010-08-20 14:35:10 +02004003
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004004 if (unlikely(err)) {
Philipp Reisnereefc2f72011-02-08 12:55:24 +01004005 conn_err(tconn, "error receiving %s, l: %d!\n",
Philipp Reisner77351055b2011-02-07 17:24:26 +01004006 cmdname(pi.cmd), pi.size);
Philipp Reisner02918be2010-08-20 14:35:10 +02004007 goto err_out;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004008 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07004009 }
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004010 return;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004011
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004012 err_out:
4013 conn_request_state(tconn, NS(conn, C_PROTOCOL_ERROR), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004014}
4015
Philipp Reisner0e29d162011-02-18 14:23:11 +01004016void conn_flush_workqueue(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004017{
4018 struct drbd_wq_barrier barr;
4019
4020 barr.w.cb = w_prev_work_done;
Philipp Reisner0e29d162011-02-18 14:23:11 +01004021 barr.w.tconn = tconn;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004022 init_completion(&barr.done);
Philipp Reisner0e29d162011-02-18 14:23:11 +01004023 drbd_queue_work(&tconn->data.work, &barr.w);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004024 wait_for_completion(&barr.done);
4025}
4026
Philipp Reisner360cc742011-02-08 14:29:53 +01004027static void drbd_disconnect(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004028{
Philipp Reisnerbbeb6412011-02-10 13:45:46 +01004029 enum drbd_conns oc;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004030 int rv = SS_UNKNOWN_ERROR;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004031
Philipp Reisnerbbeb6412011-02-10 13:45:46 +01004032 if (tconn->cstate == C_STANDALONE)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004033 return;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004034
4035 /* asender does not clean up anything. it must not interfere, either */
Philipp Reisner360cc742011-02-08 14:29:53 +01004036 drbd_thread_stop(&tconn->asender);
4037 drbd_free_sock(tconn);
4038
4039 idr_for_each(&tconn->volumes, drbd_disconnected, tconn);
4040
4041 conn_info(tconn, "Connection closed\n");
4042
4043 spin_lock_irq(&tconn->req_lock);
Philipp Reisnerbbeb6412011-02-10 13:45:46 +01004044 oc = tconn->cstate;
4045 if (oc >= C_UNCONNECTED)
4046 rv = _conn_request_state(tconn, NS(conn, C_UNCONNECTED), CS_VERBOSE);
4047
Philipp Reisner360cc742011-02-08 14:29:53 +01004048 spin_unlock_irq(&tconn->req_lock);
4049
Philipp Reisnerbbeb6412011-02-10 13:45:46 +01004050 if (oc == C_DISCONNECTING) {
Philipp Reisner360cc742011-02-08 14:29:53 +01004051 wait_event(tconn->net_cnt_wait, atomic_read(&tconn->net_cnt) == 0);
4052
4053 crypto_free_hash(tconn->cram_hmac_tfm);
4054 tconn->cram_hmac_tfm = NULL;
4055
4056 kfree(tconn->net_conf);
4057 tconn->net_conf = NULL;
Philipp Reisnerbbeb6412011-02-10 13:45:46 +01004058 conn_request_state(tconn, NS(conn, C_STANDALONE), CS_VERBOSE);
Philipp Reisner360cc742011-02-08 14:29:53 +01004059 }
4060}
4061
4062static int drbd_disconnected(int vnr, void *p, void *data)
4063{
4064 struct drbd_conf *mdev = (struct drbd_conf *)p;
4065 enum drbd_fencing_p fp;
4066 unsigned int i;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004067
Philipp Reisner85719572010-07-21 10:20:17 +02004068 /* wait for current activity to cease. */
Philipp Reisner87eeee42011-01-19 14:16:30 +01004069 spin_lock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004070 _drbd_wait_ee_list_empty(mdev, &mdev->active_ee);
4071 _drbd_wait_ee_list_empty(mdev, &mdev->sync_ee);
4072 _drbd_wait_ee_list_empty(mdev, &mdev->read_ee);
Philipp Reisner87eeee42011-01-19 14:16:30 +01004073 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004074
4075 /* We do not have data structures that would allow us to
4076 * get the rs_pending_cnt down to 0 again.
4077 * * On C_SYNC_TARGET we do not have any data structures describing
4078 * the pending RSDataRequest's we have sent.
4079 * * On C_SYNC_SOURCE there is no data structure that tracks
4080 * the P_RS_DATA_REPLY blocks that we sent to the SyncTarget.
4081 * And no, it is not the sum of the reference counts in the
4082 * resync_LRU. The resync_LRU tracks the whole operation including
4083 * the disk-IO, while the rs_pending_cnt only tracks the blocks
4084 * on the fly. */
4085 drbd_rs_cancel_all(mdev);
4086 mdev->rs_total = 0;
4087 mdev->rs_failed = 0;
4088 atomic_set(&mdev->rs_pending_cnt, 0);
4089 wake_up(&mdev->misc_wait);
4090
Philipp Reisner7fde2be2011-03-01 11:08:28 +01004091 del_timer(&mdev->request_timer);
4092
Philipp Reisnerb411b362009-09-25 16:07:19 -07004093 del_timer_sync(&mdev->resync_timer);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004094 resync_timer_fn((unsigned long)mdev);
4095
Philipp Reisnerb411b362009-09-25 16:07:19 -07004096 /* wait for all w_e_end_data_req, w_e_end_rsdata_req, w_send_barrier,
4097 * w_make_resync_request etc. which may still be on the worker queue
4098 * to be "canceled" */
Philipp Reisnera21e9292011-02-08 15:08:49 +01004099 drbd_flush_workqueue(mdev);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004100
4101 /* This also does reclaim_net_ee(). If we do this too early, we might
4102 * miss some resync ee and pages.*/
4103 drbd_process_done_ee(mdev);
4104
4105 kfree(mdev->p_uuid);
4106 mdev->p_uuid = NULL;
4107
Philipp Reisnerfb22c402010-09-08 23:20:21 +02004108 if (!is_susp(mdev->state))
Philipp Reisner2f5cdd02011-02-21 14:29:27 +01004109 tl_clear(mdev->tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004110
Philipp Reisnerb411b362009-09-25 16:07:19 -07004111 drbd_md_sync(mdev);
4112
4113 fp = FP_DONT_CARE;
4114 if (get_ldev(mdev)) {
4115 fp = mdev->ldev->dc.fencing;
4116 put_ldev(mdev);
4117 }
4118
Philipp Reisner87f7be42010-06-11 13:56:33 +02004119 if (mdev->state.role == R_PRIMARY && fp >= FP_RESOURCE && mdev->state.pdsk >= D_UNKNOWN)
4120 drbd_try_outdate_peer_async(mdev);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004121
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01004122 /* serialize with bitmap writeout triggered by the state change,
4123 * if any. */
4124 wait_event(mdev->misc_wait, !test_bit(BITMAP_IO, &mdev->flags));
4125
Philipp Reisnerb411b362009-09-25 16:07:19 -07004126 /* tcp_close and release of sendpage pages can be deferred. I don't
4127 * want to use SO_LINGER, because apparently it can be deferred for
4128 * more than 20 seconds (longest time I checked).
4129 *
4130 * Actually we don't care for exactly when the network stack does its
4131 * put_page(), but release our reference on these pages right here.
4132 */
4133 i = drbd_release_ee(mdev, &mdev->net_ee);
4134 if (i)
4135 dev_info(DEV, "net_ee not empty, killed %u entries\n", i);
Lars Ellenberg435f0742010-09-06 12:30:25 +02004136 i = atomic_read(&mdev->pp_in_use_by_net);
4137 if (i)
4138 dev_info(DEV, "pp_in_use_by_net = %d, expected 0\n", i);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004139 i = atomic_read(&mdev->pp_in_use);
4140 if (i)
Lars Ellenberg45bb9122010-05-14 17:10:48 +02004141 dev_info(DEV, "pp_in_use = %d, expected 0\n", i);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004142
4143 D_ASSERT(list_empty(&mdev->read_ee));
4144 D_ASSERT(list_empty(&mdev->active_ee));
4145 D_ASSERT(list_empty(&mdev->sync_ee));
4146 D_ASSERT(list_empty(&mdev->done_ee));
4147
4148 /* ok, no more ee's on the fly, it is safe to reset the epoch_size */
4149 atomic_set(&mdev->current_epoch->epoch_size, 0);
4150 D_ASSERT(list_empty(&mdev->current_epoch->list));
Philipp Reisner360cc742011-02-08 14:29:53 +01004151
4152 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004153}
4154
4155/*
4156 * We support PRO_VERSION_MIN to PRO_VERSION_MAX. The protocol version
4157 * we can agree on is stored in agreed_pro_version.
4158 *
4159 * feature flags and the reserved array should be enough room for future
4160 * enhancements of the handshake protocol, and possible plugins...
4161 *
4162 * for now, they are expected to be zero, but ignored.
4163 */
Philipp Reisner8a22ccc2011-02-07 16:47:12 +01004164static int drbd_send_handshake(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004165{
Philipp Reisnere6b3ea82011-01-19 14:02:01 +01004166 /* ASSERT current == mdev->tconn->receiver ... */
Philipp Reisner8a22ccc2011-02-07 16:47:12 +01004167 struct p_handshake *p = &tconn->data.sbuf.handshake;
Andreas Gruenbachere8d17b02011-03-16 00:54:19 +01004168 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004169
Philipp Reisner8a22ccc2011-02-07 16:47:12 +01004170 if (mutex_lock_interruptible(&tconn->data.mutex)) {
4171 conn_err(tconn, "interrupted during initial handshake\n");
Andreas Gruenbachere8d17b02011-03-16 00:54:19 +01004172 return -EINTR;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004173 }
4174
Philipp Reisner8a22ccc2011-02-07 16:47:12 +01004175 if (tconn->data.socket == NULL) {
4176 mutex_unlock(&tconn->data.mutex);
Andreas Gruenbachere8d17b02011-03-16 00:54:19 +01004177 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004178 }
4179
4180 memset(p, 0, sizeof(*p));
4181 p->protocol_min = cpu_to_be32(PRO_VERSION_MIN);
4182 p->protocol_max = cpu_to_be32(PRO_VERSION_MAX);
Andreas Gruenbachere8d17b02011-03-16 00:54:19 +01004183 err = _conn_send_cmd(tconn, 0, tconn->data.socket, P_HAND_SHAKE,
Andreas Gruenbacherecf23632011-03-15 23:48:25 +01004184 &p->head, sizeof(*p), 0);
Philipp Reisner8a22ccc2011-02-07 16:47:12 +01004185 mutex_unlock(&tconn->data.mutex);
Andreas Gruenbachere8d17b02011-03-16 00:54:19 +01004186 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004187}
4188
4189/*
4190 * return values:
4191 * 1 yes, we have a valid connection
4192 * 0 oops, did not work out, please try again
4193 * -1 peer talks different language,
4194 * no point in trying again, please go standalone.
4195 */
Philipp Reisner65d11ed2011-02-07 17:35:59 +01004196static int drbd_do_handshake(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004197{
Philipp Reisner65d11ed2011-02-07 17:35:59 +01004198 /* ASSERT current == tconn->receiver ... */
4199 struct p_handshake *p = &tconn->data.rbuf.handshake;
Philipp Reisner02918be2010-08-20 14:35:10 +02004200 const int expect = sizeof(struct p_handshake) - sizeof(struct p_header80);
Philipp Reisner77351055b2011-02-07 17:24:26 +01004201 struct packet_info pi;
Andreas Gruenbachere8d17b02011-03-16 00:54:19 +01004202 int err, rv;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004203
Andreas Gruenbachere8d17b02011-03-16 00:54:19 +01004204 err = drbd_send_handshake(tconn);
4205 if (err)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004206 return 0;
4207
Andreas Gruenbacher69bc7bc2011-03-16 17:31:52 +01004208 err = drbd_recv_header(tconn, &pi);
4209 if (err)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004210 return 0;
4211
Philipp Reisner77351055b2011-02-07 17:24:26 +01004212 if (pi.cmd != P_HAND_SHAKE) {
Philipp Reisner65d11ed2011-02-07 17:35:59 +01004213 conn_err(tconn, "expected HandShake packet, received: %s (0x%04x)\n",
Philipp Reisner77351055b2011-02-07 17:24:26 +01004214 cmdname(pi.cmd), pi.cmd);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004215 return -1;
4216 }
4217
Philipp Reisner77351055b2011-02-07 17:24:26 +01004218 if (pi.size != expect) {
Philipp Reisner65d11ed2011-02-07 17:35:59 +01004219 conn_err(tconn, "expected HandShake length: %u, received: %u\n",
Philipp Reisner77351055b2011-02-07 17:24:26 +01004220 expect, pi.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004221 return -1;
4222 }
4223
Philipp Reisner65d11ed2011-02-07 17:35:59 +01004224 rv = drbd_recv(tconn, &p->head.payload, expect);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004225
4226 if (rv != expect) {
Lars Ellenberg0ddc5542011-01-21 12:35:15 +01004227 if (!signal_pending(current))
Philipp Reisner65d11ed2011-02-07 17:35:59 +01004228 conn_warn(tconn, "short read receiving handshake packet: l=%u\n", rv);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004229 return 0;
4230 }
4231
Philipp Reisnerb411b362009-09-25 16:07:19 -07004232 p->protocol_min = be32_to_cpu(p->protocol_min);
4233 p->protocol_max = be32_to_cpu(p->protocol_max);
4234 if (p->protocol_max == 0)
4235 p->protocol_max = p->protocol_min;
4236
4237 if (PRO_VERSION_MAX < p->protocol_min ||
4238 PRO_VERSION_MIN > p->protocol_max)
4239 goto incompat;
4240
Philipp Reisner65d11ed2011-02-07 17:35:59 +01004241 tconn->agreed_pro_version = min_t(int, PRO_VERSION_MAX, p->protocol_max);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004242
Philipp Reisner65d11ed2011-02-07 17:35:59 +01004243 conn_info(tconn, "Handshake successful: "
4244 "Agreed network protocol version %d\n", tconn->agreed_pro_version);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004245
4246 return 1;
4247
4248 incompat:
Philipp Reisner65d11ed2011-02-07 17:35:59 +01004249 conn_err(tconn, "incompatible DRBD dialects: "
Philipp Reisnerb411b362009-09-25 16:07:19 -07004250 "I support %d-%d, peer supports %d-%d\n",
4251 PRO_VERSION_MIN, PRO_VERSION_MAX,
4252 p->protocol_min, p->protocol_max);
4253 return -1;
4254}
4255
4256#if !defined(CONFIG_CRYPTO_HMAC) && !defined(CONFIG_CRYPTO_HMAC_MODULE)
Philipp Reisner13e60372011-02-08 09:54:40 +01004257static int drbd_do_auth(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004258{
4259 dev_err(DEV, "This kernel was build without CONFIG_CRYPTO_HMAC.\n");
4260 dev_err(DEV, "You need to disable 'cram-hmac-alg' in drbd.conf.\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01004261 return -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004262}
4263#else
4264#define CHALLENGE_LEN 64
Johannes Thomab10d96c2010-01-07 16:02:50 +01004265
4266/* Return value:
4267 1 - auth succeeded,
4268 0 - failed, try again (network error),
4269 -1 - auth failed, don't try again.
4270*/
4271
Philipp Reisner13e60372011-02-08 09:54:40 +01004272static int drbd_do_auth(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004273{
4274 char my_challenge[CHALLENGE_LEN]; /* 64 Bytes... */
4275 struct scatterlist sg;
4276 char *response = NULL;
4277 char *right_response = NULL;
4278 char *peers_ch = NULL;
Philipp Reisner13e60372011-02-08 09:54:40 +01004279 unsigned int key_len = strlen(tconn->net_conf->shared_secret);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004280 unsigned int resp_size;
4281 struct hash_desc desc;
Philipp Reisner77351055b2011-02-07 17:24:26 +01004282 struct packet_info pi;
Andreas Gruenbacher69bc7bc2011-03-16 17:31:52 +01004283 int err, rv;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004284
Philipp Reisner13e60372011-02-08 09:54:40 +01004285 desc.tfm = tconn->cram_hmac_tfm;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004286 desc.flags = 0;
4287
Philipp Reisner13e60372011-02-08 09:54:40 +01004288 rv = crypto_hash_setkey(tconn->cram_hmac_tfm,
4289 (u8 *)tconn->net_conf->shared_secret, key_len);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004290 if (rv) {
Philipp Reisner13e60372011-02-08 09:54:40 +01004291 conn_err(tconn, "crypto_hash_setkey() failed with %d\n", rv);
Johannes Thomab10d96c2010-01-07 16:02:50 +01004292 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004293 goto fail;
4294 }
4295
4296 get_random_bytes(my_challenge, CHALLENGE_LEN);
4297
Andreas Gruenbacherce9879c2011-03-15 23:34:29 +01004298 rv = !conn_send_cmd2(tconn, P_AUTH_CHALLENGE, my_challenge, CHALLENGE_LEN);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004299 if (!rv)
4300 goto fail;
4301
Andreas Gruenbacher69bc7bc2011-03-16 17:31:52 +01004302 err = drbd_recv_header(tconn, &pi);
4303 if (err) {
4304 rv = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004305 goto fail;
Andreas Gruenbacher69bc7bc2011-03-16 17:31:52 +01004306 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07004307
Philipp Reisner77351055b2011-02-07 17:24:26 +01004308 if (pi.cmd != P_AUTH_CHALLENGE) {
Philipp Reisner13e60372011-02-08 09:54:40 +01004309 conn_err(tconn, "expected AuthChallenge packet, received: %s (0x%04x)\n",
Philipp Reisner77351055b2011-02-07 17:24:26 +01004310 cmdname(pi.cmd), pi.cmd);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004311 rv = 0;
4312 goto fail;
4313 }
4314
Philipp Reisner77351055b2011-02-07 17:24:26 +01004315 if (pi.size > CHALLENGE_LEN * 2) {
Philipp Reisner13e60372011-02-08 09:54:40 +01004316 conn_err(tconn, "expected AuthChallenge payload too big.\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01004317 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004318 goto fail;
4319 }
4320
Philipp Reisner77351055b2011-02-07 17:24:26 +01004321 peers_ch = kmalloc(pi.size, GFP_NOIO);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004322 if (peers_ch == NULL) {
Philipp Reisner13e60372011-02-08 09:54:40 +01004323 conn_err(tconn, "kmalloc of peers_ch failed\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01004324 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004325 goto fail;
4326 }
4327
Philipp Reisner13e60372011-02-08 09:54:40 +01004328 rv = drbd_recv(tconn, peers_ch, pi.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004329
Philipp Reisner77351055b2011-02-07 17:24:26 +01004330 if (rv != pi.size) {
Lars Ellenberg0ddc5542011-01-21 12:35:15 +01004331 if (!signal_pending(current))
Philipp Reisner13e60372011-02-08 09:54:40 +01004332 conn_warn(tconn, "short read AuthChallenge: l=%u\n", rv);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004333 rv = 0;
4334 goto fail;
4335 }
4336
Philipp Reisner13e60372011-02-08 09:54:40 +01004337 resp_size = crypto_hash_digestsize(tconn->cram_hmac_tfm);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004338 response = kmalloc(resp_size, GFP_NOIO);
4339 if (response == NULL) {
Philipp Reisner13e60372011-02-08 09:54:40 +01004340 conn_err(tconn, "kmalloc of response failed\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01004341 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004342 goto fail;
4343 }
4344
4345 sg_init_table(&sg, 1);
Philipp Reisner77351055b2011-02-07 17:24:26 +01004346 sg_set_buf(&sg, peers_ch, pi.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004347
4348 rv = crypto_hash_digest(&desc, &sg, sg.length, response);
4349 if (rv) {
Philipp Reisner13e60372011-02-08 09:54:40 +01004350 conn_err(tconn, "crypto_hash_digest() failed with %d\n", rv);
Johannes Thomab10d96c2010-01-07 16:02:50 +01004351 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004352 goto fail;
4353 }
4354
Andreas Gruenbacherce9879c2011-03-15 23:34:29 +01004355 rv = !conn_send_cmd2(tconn, P_AUTH_RESPONSE, response, resp_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004356 if (!rv)
4357 goto fail;
4358
Andreas Gruenbacher69bc7bc2011-03-16 17:31:52 +01004359 err = drbd_recv_header(tconn, &pi);
4360 if (err) {
4361 rv = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004362 goto fail;
Andreas Gruenbacher69bc7bc2011-03-16 17:31:52 +01004363 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07004364
Philipp Reisner77351055b2011-02-07 17:24:26 +01004365 if (pi.cmd != P_AUTH_RESPONSE) {
Philipp Reisner13e60372011-02-08 09:54:40 +01004366 conn_err(tconn, "expected AuthResponse packet, received: %s (0x%04x)\n",
Philipp Reisner77351055b2011-02-07 17:24:26 +01004367 cmdname(pi.cmd), pi.cmd);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004368 rv = 0;
4369 goto fail;
4370 }
4371
Philipp Reisner77351055b2011-02-07 17:24:26 +01004372 if (pi.size != resp_size) {
Philipp Reisner13e60372011-02-08 09:54:40 +01004373 conn_err(tconn, "expected AuthResponse payload of wrong size\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07004374 rv = 0;
4375 goto fail;
4376 }
4377
Philipp Reisner13e60372011-02-08 09:54:40 +01004378 rv = drbd_recv(tconn, response , resp_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004379
4380 if (rv != resp_size) {
Lars Ellenberg0ddc5542011-01-21 12:35:15 +01004381 if (!signal_pending(current))
Philipp Reisner13e60372011-02-08 09:54:40 +01004382 conn_warn(tconn, "short read receiving AuthResponse: l=%u\n", rv);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004383 rv = 0;
4384 goto fail;
4385 }
4386
4387 right_response = kmalloc(resp_size, GFP_NOIO);
Julia Lawall2d1ee872009-12-27 22:27:11 +01004388 if (right_response == NULL) {
Philipp Reisner13e60372011-02-08 09:54:40 +01004389 conn_err(tconn, "kmalloc of right_response failed\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01004390 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004391 goto fail;
4392 }
4393
4394 sg_set_buf(&sg, my_challenge, CHALLENGE_LEN);
4395
4396 rv = crypto_hash_digest(&desc, &sg, sg.length, right_response);
4397 if (rv) {
Philipp Reisner13e60372011-02-08 09:54:40 +01004398 conn_err(tconn, "crypto_hash_digest() failed with %d\n", rv);
Johannes Thomab10d96c2010-01-07 16:02:50 +01004399 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004400 goto fail;
4401 }
4402
4403 rv = !memcmp(response, right_response, resp_size);
4404
4405 if (rv)
Philipp Reisner13e60372011-02-08 09:54:40 +01004406 conn_info(tconn, "Peer authenticated using %d bytes of '%s' HMAC\n",
4407 resp_size, tconn->net_conf->cram_hmac_alg);
Johannes Thomab10d96c2010-01-07 16:02:50 +01004408 else
4409 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004410
4411 fail:
4412 kfree(peers_ch);
4413 kfree(response);
4414 kfree(right_response);
4415
4416 return rv;
4417}
4418#endif
4419
4420int drbdd_init(struct drbd_thread *thi)
4421{
Philipp Reisner392c8802011-02-09 10:33:31 +01004422 struct drbd_tconn *tconn = thi->tconn;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004423 int h;
4424
Philipp Reisner4d641dd2011-02-08 15:40:24 +01004425 conn_info(tconn, "receiver (re)started\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07004426
4427 do {
Philipp Reisner4d641dd2011-02-08 15:40:24 +01004428 h = drbd_connect(tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004429 if (h == 0) {
Philipp Reisner4d641dd2011-02-08 15:40:24 +01004430 drbd_disconnect(tconn);
Philipp Reisner20ee6392011-01-18 15:28:59 +01004431 schedule_timeout_interruptible(HZ);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004432 }
4433 if (h == -1) {
Philipp Reisner4d641dd2011-02-08 15:40:24 +01004434 conn_warn(tconn, "Discarding network configuration.\n");
Philipp Reisnerbbeb6412011-02-10 13:45:46 +01004435 conn_request_state(tconn, NS(conn, C_DISCONNECTING), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004436 }
4437 } while (h == 0);
4438
4439 if (h > 0) {
Philipp Reisner4d641dd2011-02-08 15:40:24 +01004440 if (get_net_conf(tconn)) {
4441 drbdd(tconn);
4442 put_net_conf(tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004443 }
4444 }
4445
Philipp Reisner4d641dd2011-02-08 15:40:24 +01004446 drbd_disconnect(tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004447
Philipp Reisner4d641dd2011-02-08 15:40:24 +01004448 conn_info(tconn, "receiver terminated\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07004449 return 0;
4450}
4451
4452/* ********* acknowledge sender ******** */
4453
Philipp Reisnere4f78ed2011-03-16 11:27:48 +01004454static int got_conn_RqSReply(struct drbd_tconn *tconn, enum drbd_packet cmd)
4455{
4456 struct p_req_state_reply *p = &tconn->meta.rbuf.req_state_reply;
4457 int retcode = be32_to_cpu(p->retcode);
4458
4459 if (retcode >= SS_SUCCESS) {
4460 set_bit(CONN_WD_ST_CHG_OKAY, &tconn->flags);
4461 } else {
4462 set_bit(CONN_WD_ST_CHG_FAIL, &tconn->flags);
4463 conn_err(tconn, "Requested state change failed by peer: %s (%d)\n",
4464 drbd_set_st_err_str(retcode), retcode);
4465 }
4466 wake_up(&tconn->ping_wait);
4467
4468 return true;
4469}
4470
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01004471static int got_RqSReply(struct drbd_conf *mdev, enum drbd_packet cmd)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004472{
Philipp Reisner257d0af2011-01-26 12:15:29 +01004473 struct p_req_state_reply *p = &mdev->tconn->meta.rbuf.req_state_reply;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004474 int retcode = be32_to_cpu(p->retcode);
4475
Philipp Reisnere4f78ed2011-03-16 11:27:48 +01004476 if (retcode >= SS_SUCCESS) {
4477 set_bit(CL_ST_CHG_SUCCESS, &mdev->flags);
4478 } else {
4479 set_bit(CL_ST_CHG_FAIL, &mdev->flags);
4480 dev_err(DEV, "Requested state change failed by peer: %s (%d)\n",
4481 drbd_set_st_err_str(retcode), retcode);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004482 }
Philipp Reisnere4f78ed2011-03-16 11:27:48 +01004483 wake_up(&mdev->state_wait);
4484
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004485 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004486}
4487
Philipp Reisnerf19e4f82011-03-16 11:21:50 +01004488static int got_Ping(struct drbd_tconn *tconn, enum drbd_packet cmd)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004489{
Philipp Reisnerf19e4f82011-03-16 11:21:50 +01004490 return drbd_send_ping_ack(tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004491
4492}
4493
Philipp Reisnerf19e4f82011-03-16 11:21:50 +01004494static int got_PingAck(struct drbd_tconn *tconn, enum drbd_packet cmd)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004495{
4496 /* restore idle timeout */
Philipp Reisner2a67d8b2011-02-09 14:10:32 +01004497 tconn->meta.socket->sk->sk_rcvtimeo = tconn->net_conf->ping_int*HZ;
4498 if (!test_and_set_bit(GOT_PING_ACK, &tconn->flags))
4499 wake_up(&tconn->ping_wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004500
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004501 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004502}
4503
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01004504static int got_IsInSync(struct drbd_conf *mdev, enum drbd_packet cmd)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004505{
Philipp Reisner257d0af2011-01-26 12:15:29 +01004506 struct p_block_ack *p = &mdev->tconn->meta.rbuf.block_ack;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004507 sector_t sector = be64_to_cpu(p->sector);
4508 int blksize = be32_to_cpu(p->blksize);
4509
Philipp Reisner31890f42011-01-19 14:12:51 +01004510 D_ASSERT(mdev->tconn->agreed_pro_version >= 89);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004511
4512 update_peer_seq(mdev, be32_to_cpu(p->seq_num));
4513
Lars Ellenberg1d53f092010-09-05 01:13:24 +02004514 if (get_ldev(mdev)) {
4515 drbd_rs_complete_io(mdev, sector);
4516 drbd_set_in_sync(mdev, sector, blksize);
4517 /* rs_same_csums is supposed to count in units of BM_BLOCK_SIZE */
4518 mdev->rs_same_csum += (blksize >> BM_BLOCK_SHIFT);
4519 put_ldev(mdev);
4520 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07004521 dec_rs_pending(mdev);
Philipp Reisner778f2712010-07-06 11:14:00 +02004522 atomic_add(blksize >> 9, &mdev->rs_sect_in);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004523
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004524 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004525}
4526
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01004527static int
4528validate_req_change_req_state(struct drbd_conf *mdev, u64 id, sector_t sector,
4529 struct rb_root *root, const char *func,
4530 enum drbd_req_event what, bool missing_ok)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004531{
4532 struct drbd_request *req;
4533 struct bio_and_error m;
4534
Philipp Reisner87eeee42011-01-19 14:16:30 +01004535 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01004536 req = find_request(mdev, root, id, sector, missing_ok, func);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004537 if (unlikely(!req)) {
Philipp Reisner87eeee42011-01-19 14:16:30 +01004538 spin_unlock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004539 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004540 }
4541 __req_mod(req, what, &m);
Philipp Reisner87eeee42011-01-19 14:16:30 +01004542 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004543
4544 if (m.bio)
4545 complete_master_bio(mdev, &m);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004546 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004547}
4548
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01004549static int got_BlockAck(struct drbd_conf *mdev, enum drbd_packet cmd)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004550{
Philipp Reisner257d0af2011-01-26 12:15:29 +01004551 struct p_block_ack *p = &mdev->tconn->meta.rbuf.block_ack;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004552 sector_t sector = be64_to_cpu(p->sector);
4553 int blksize = be32_to_cpu(p->blksize);
4554 enum drbd_req_event what;
4555
4556 update_peer_seq(mdev, be32_to_cpu(p->seq_num));
4557
Andreas Gruenbacher579b57e2011-01-13 18:40:57 +01004558 if (p->block_id == ID_SYNCER) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004559 drbd_set_in_sync(mdev, sector, blksize);
4560 dec_rs_pending(mdev);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004561 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004562 }
Philipp Reisner257d0af2011-01-26 12:15:29 +01004563 switch (cmd) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004564 case P_RS_WRITE_ACK:
Philipp Reisner89e58e72011-01-19 13:12:45 +01004565 D_ASSERT(mdev->tconn->net_conf->wire_protocol == DRBD_PROT_C);
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01004566 what = WRITE_ACKED_BY_PEER_AND_SIS;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004567 break;
4568 case P_WRITE_ACK:
Philipp Reisner89e58e72011-01-19 13:12:45 +01004569 D_ASSERT(mdev->tconn->net_conf->wire_protocol == DRBD_PROT_C);
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01004570 what = WRITE_ACKED_BY_PEER;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004571 break;
4572 case P_RECV_ACK:
Philipp Reisner89e58e72011-01-19 13:12:45 +01004573 D_ASSERT(mdev->tconn->net_conf->wire_protocol == DRBD_PROT_B);
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01004574 what = RECV_ACKED_BY_PEER;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004575 break;
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01004576 case P_DISCARD_WRITE:
Philipp Reisner89e58e72011-01-19 13:12:45 +01004577 D_ASSERT(mdev->tconn->net_conf->wire_protocol == DRBD_PROT_C);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01004578 what = DISCARD_WRITE;
4579 break;
4580 case P_RETRY_WRITE:
4581 D_ASSERT(mdev->tconn->net_conf->wire_protocol == DRBD_PROT_C);
4582 what = POSTPONE_WRITE;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004583 break;
4584 default:
4585 D_ASSERT(0);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004586 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004587 }
4588
4589 return validate_req_change_req_state(mdev, p->block_id, sector,
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01004590 &mdev->write_requests, __func__,
4591 what, false);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004592}
4593
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01004594static int got_NegAck(struct drbd_conf *mdev, enum drbd_packet cmd)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004595{
Philipp Reisner257d0af2011-01-26 12:15:29 +01004596 struct p_block_ack *p = &mdev->tconn->meta.rbuf.block_ack;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004597 sector_t sector = be64_to_cpu(p->sector);
Philipp Reisner2deb8332011-01-17 18:39:18 +01004598 int size = be32_to_cpu(p->blksize);
Philipp Reisner89e58e72011-01-19 13:12:45 +01004599 bool missing_ok = mdev->tconn->net_conf->wire_protocol == DRBD_PROT_A ||
4600 mdev->tconn->net_conf->wire_protocol == DRBD_PROT_B;
Andreas Gruenbacherc3afd8f2011-01-20 22:25:40 +01004601 bool found;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004602
4603 update_peer_seq(mdev, be32_to_cpu(p->seq_num));
4604
Andreas Gruenbacher579b57e2011-01-13 18:40:57 +01004605 if (p->block_id == ID_SYNCER) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004606 dec_rs_pending(mdev);
4607 drbd_rs_failed_io(mdev, sector, size);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004608 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004609 }
Philipp Reisner2deb8332011-01-17 18:39:18 +01004610
Andreas Gruenbacherc3afd8f2011-01-20 22:25:40 +01004611 found = validate_req_change_req_state(mdev, p->block_id, sector,
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01004612 &mdev->write_requests, __func__,
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01004613 NEG_ACKED, missing_ok);
Andreas Gruenbacherc3afd8f2011-01-20 22:25:40 +01004614 if (!found) {
4615 /* Protocol A has no P_WRITE_ACKs, but has P_NEG_ACKs.
4616 The master bio might already be completed, therefore the
4617 request is no longer in the collision hash. */
4618 /* In Protocol B we might already have got a P_RECV_ACK
4619 but then get a P_NEG_ACK afterwards. */
4620 if (!missing_ok)
Philipp Reisner2deb8332011-01-17 18:39:18 +01004621 return false;
Andreas Gruenbacherc3afd8f2011-01-20 22:25:40 +01004622 drbd_set_out_of_sync(mdev, sector, size);
Philipp Reisner2deb8332011-01-17 18:39:18 +01004623 }
Philipp Reisner2deb8332011-01-17 18:39:18 +01004624 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004625}
4626
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01004627static int got_NegDReply(struct drbd_conf *mdev, enum drbd_packet cmd)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004628{
Philipp Reisner257d0af2011-01-26 12:15:29 +01004629 struct p_block_ack *p = &mdev->tconn->meta.rbuf.block_ack;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004630 sector_t sector = be64_to_cpu(p->sector);
4631
4632 update_peer_seq(mdev, be32_to_cpu(p->seq_num));
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01004633
Philipp Reisnerb411b362009-09-25 16:07:19 -07004634 dev_err(DEV, "Got NegDReply; Sector %llus, len %u; Fail original request.\n",
4635 (unsigned long long)sector, be32_to_cpu(p->blksize));
4636
4637 return validate_req_change_req_state(mdev, p->block_id, sector,
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01004638 &mdev->read_requests, __func__,
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01004639 NEG_ACKED, false);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004640}
4641
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01004642static int got_NegRSDReply(struct drbd_conf *mdev, enum drbd_packet cmd)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004643{
4644 sector_t sector;
4645 int size;
Philipp Reisner257d0af2011-01-26 12:15:29 +01004646 struct p_block_ack *p = &mdev->tconn->meta.rbuf.block_ack;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004647
4648 sector = be64_to_cpu(p->sector);
4649 size = be32_to_cpu(p->blksize);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004650
4651 update_peer_seq(mdev, be32_to_cpu(p->seq_num));
4652
4653 dec_rs_pending(mdev);
4654
4655 if (get_ldev_if_state(mdev, D_FAILED)) {
4656 drbd_rs_complete_io(mdev, sector);
Philipp Reisner257d0af2011-01-26 12:15:29 +01004657 switch (cmd) {
Philipp Reisnerd612d302010-12-27 10:53:28 +01004658 case P_NEG_RS_DREPLY:
4659 drbd_rs_failed_io(mdev, sector, size);
4660 case P_RS_CANCEL:
4661 break;
4662 default:
4663 D_ASSERT(0);
4664 put_ldev(mdev);
4665 return false;
4666 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07004667 put_ldev(mdev);
4668 }
4669
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004670 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004671}
4672
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01004673static int got_BarrierAck(struct drbd_conf *mdev, enum drbd_packet cmd)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004674{
Philipp Reisner257d0af2011-01-26 12:15:29 +01004675 struct p_barrier_ack *p = &mdev->tconn->meta.rbuf.barrier_ack;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004676
Philipp Reisner2f5cdd02011-02-21 14:29:27 +01004677 tl_release(mdev->tconn, p->barrier, be32_to_cpu(p->set_size));
Philipp Reisnerb411b362009-09-25 16:07:19 -07004678
Philipp Reisnerc4752ef2010-10-27 17:32:36 +02004679 if (mdev->state.conn == C_AHEAD &&
4680 atomic_read(&mdev->ap_in_flight) == 0 &&
Philipp Reisner370a43e2011-01-14 16:03:11 +01004681 !test_and_set_bit(AHEAD_TO_SYNC_SOURCE, &mdev->current_epoch->flags)) {
4682 mdev->start_resync_timer.expires = jiffies + HZ;
4683 add_timer(&mdev->start_resync_timer);
Philipp Reisnerc4752ef2010-10-27 17:32:36 +02004684 }
4685
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004686 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004687}
4688
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01004689static int got_OVResult(struct drbd_conf *mdev, enum drbd_packet cmd)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004690{
Philipp Reisner257d0af2011-01-26 12:15:29 +01004691 struct p_block_ack *p = &mdev->tconn->meta.rbuf.block_ack;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004692 struct drbd_work *w;
4693 sector_t sector;
4694 int size;
4695
4696 sector = be64_to_cpu(p->sector);
4697 size = be32_to_cpu(p->blksize);
4698
4699 update_peer_seq(mdev, be32_to_cpu(p->seq_num));
4700
4701 if (be64_to_cpu(p->block_id) == ID_OUT_OF_SYNC)
4702 drbd_ov_oos_found(mdev, sector, size);
4703 else
4704 ov_oos_print(mdev);
4705
Lars Ellenberg1d53f092010-09-05 01:13:24 +02004706 if (!get_ldev(mdev))
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004707 return true;
Lars Ellenberg1d53f092010-09-05 01:13:24 +02004708
Philipp Reisnerb411b362009-09-25 16:07:19 -07004709 drbd_rs_complete_io(mdev, sector);
4710 dec_rs_pending(mdev);
4711
Lars Ellenbergea5442a2010-11-05 09:48:01 +01004712 --mdev->ov_left;
4713
4714 /* let's advance progress step marks only for every other megabyte */
4715 if ((mdev->ov_left & 0x200) == 0x200)
4716 drbd_advance_rs_marks(mdev, mdev->ov_left);
4717
4718 if (mdev->ov_left == 0) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004719 w = kmalloc(sizeof(*w), GFP_NOIO);
4720 if (w) {
4721 w->cb = w_ov_finished;
Philipp Reisnera21e9292011-02-08 15:08:49 +01004722 w->mdev = mdev;
Philipp Reisnere42325a2011-01-19 13:55:45 +01004723 drbd_queue_work_front(&mdev->tconn->data.work, w);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004724 } else {
4725 dev_err(DEV, "kmalloc(w) failed.");
4726 ov_oos_print(mdev);
4727 drbd_resync_finished(mdev);
4728 }
4729 }
Lars Ellenberg1d53f092010-09-05 01:13:24 +02004730 put_ldev(mdev);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004731 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004732}
4733
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01004734static int got_skip(struct drbd_conf *mdev, enum drbd_packet cmd)
Philipp Reisner0ced55a2010-04-30 15:26:20 +02004735{
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004736 return true;
Philipp Reisner0ced55a2010-04-30 15:26:20 +02004737}
4738
Philipp Reisner32862ec2011-02-08 16:41:01 +01004739static int tconn_process_done_ee(struct drbd_tconn *tconn)
4740{
Philipp Reisner082a3432011-03-15 16:05:42 +01004741 struct drbd_conf *mdev;
4742 int i, not_empty = 0;
Philipp Reisner32862ec2011-02-08 16:41:01 +01004743
4744 do {
4745 clear_bit(SIGNAL_ASENDER, &tconn->flags);
4746 flush_signals(current);
Philipp Reisner082a3432011-03-15 16:05:42 +01004747 idr_for_each_entry(&tconn->volumes, mdev, i) {
Andreas Gruenbachere2b30322011-03-16 17:16:12 +01004748 if (drbd_process_done_ee(mdev))
Philipp Reisner082a3432011-03-15 16:05:42 +01004749 return 1; /* error */
4750 }
Philipp Reisner32862ec2011-02-08 16:41:01 +01004751 set_bit(SIGNAL_ASENDER, &tconn->flags);
Philipp Reisner082a3432011-03-15 16:05:42 +01004752
4753 spin_lock_irq(&tconn->req_lock);
4754 idr_for_each_entry(&tconn->volumes, mdev, i) {
4755 not_empty = !list_empty(&mdev->done_ee);
4756 if (not_empty)
4757 break;
4758 }
4759 spin_unlock_irq(&tconn->req_lock);
Philipp Reisner32862ec2011-02-08 16:41:01 +01004760 } while (not_empty);
4761
4762 return 0;
4763}
4764
Andreas Gruenbacher7201b972011-03-14 18:23:00 +01004765struct asender_cmd {
4766 size_t pkt_size;
Philipp Reisnera4fbda82011-03-16 11:13:17 +01004767 enum mdev_or_conn fa_type; /* first argument's type */
4768 union {
4769 int (*mdev_fn)(struct drbd_conf *mdev, enum drbd_packet cmd);
4770 int (*conn_fn)(struct drbd_tconn *tconn, enum drbd_packet cmd);
4771 };
Andreas Gruenbacher7201b972011-03-14 18:23:00 +01004772};
4773
4774static struct asender_cmd asender_tbl[] = {
Philipp Reisnerf19e4f82011-03-16 11:21:50 +01004775 [P_PING] = { sizeof(struct p_header), CONN, { .conn_fn = got_Ping } },
4776 [P_PING_ACK] = { sizeof(struct p_header), CONN, { .conn_fn = got_PingAck } },
Philipp Reisnera4fbda82011-03-16 11:13:17 +01004777 [P_RECV_ACK] = { sizeof(struct p_block_ack), MDEV, { got_BlockAck } },
4778 [P_WRITE_ACK] = { sizeof(struct p_block_ack), MDEV, { got_BlockAck } },
4779 [P_RS_WRITE_ACK] = { sizeof(struct p_block_ack), MDEV, { got_BlockAck } },
4780 [P_DISCARD_WRITE] = { sizeof(struct p_block_ack), MDEV, { got_BlockAck } },
4781 [P_NEG_ACK] = { sizeof(struct p_block_ack), MDEV, { got_NegAck } },
4782 [P_NEG_DREPLY] = { sizeof(struct p_block_ack), MDEV, { got_NegDReply } },
4783 [P_NEG_RS_DREPLY] = { sizeof(struct p_block_ack), MDEV, { got_NegRSDReply } },
4784 [P_OV_RESULT] = { sizeof(struct p_block_ack), MDEV, { got_OVResult } },
4785 [P_BARRIER_ACK] = { sizeof(struct p_barrier_ack), MDEV, { got_BarrierAck } },
4786 [P_STATE_CHG_REPLY] = { sizeof(struct p_req_state_reply), MDEV, { got_RqSReply } },
4787 [P_RS_IS_IN_SYNC] = { sizeof(struct p_block_ack), MDEV, { got_IsInSync } },
4788 [P_DELAY_PROBE] = { sizeof(struct p_delay_probe93), MDEV, { got_skip } },
4789 [P_RS_CANCEL] = { sizeof(struct p_block_ack), MDEV, { got_NegRSDReply } },
Philipp Reisnere4f78ed2011-03-16 11:27:48 +01004790 [P_CONN_ST_CHG_REPLY]={ sizeof(struct p_req_state_reply), CONN, {.conn_fn = got_conn_RqSReply}},
Philipp Reisnera4fbda82011-03-16 11:13:17 +01004791 [P_RETRY_WRITE] = { sizeof(struct p_block_ack), MDEV, { got_BlockAck } },
Andreas Gruenbacher7201b972011-03-14 18:23:00 +01004792};
4793
Philipp Reisnerb411b362009-09-25 16:07:19 -07004794int drbd_asender(struct drbd_thread *thi)
4795{
Philipp Reisner392c8802011-02-09 10:33:31 +01004796 struct drbd_tconn *tconn = thi->tconn;
Philipp Reisner32862ec2011-02-08 16:41:01 +01004797 struct p_header *h = &tconn->meta.rbuf.header;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004798 struct asender_cmd *cmd = NULL;
Philipp Reisner77351055b2011-02-07 17:24:26 +01004799 struct packet_info pi;
Philipp Reisner257d0af2011-01-26 12:15:29 +01004800 int rv;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004801 void *buf = h;
4802 int received = 0;
Philipp Reisner257d0af2011-01-26 12:15:29 +01004803 int expect = sizeof(struct p_header);
Lars Ellenbergf36af182011-03-09 22:44:55 +01004804 int ping_timeout_active = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004805
Philipp Reisnerb411b362009-09-25 16:07:19 -07004806 current->policy = SCHED_RR; /* Make this a realtime task! */
4807 current->rt_priority = 2; /* more important than all other tasks */
4808
Andreas Gruenbachere77a0a52011-01-25 15:43:39 +01004809 while (get_t_state(thi) == RUNNING) {
Philipp Reisner80822282011-02-08 12:46:30 +01004810 drbd_thread_current_set_cpu(thi);
Philipp Reisner32862ec2011-02-08 16:41:01 +01004811 if (test_and_clear_bit(SEND_PING, &tconn->flags)) {
Philipp Reisner2a67d8b2011-02-09 14:10:32 +01004812 if (!drbd_send_ping(tconn)) {
Philipp Reisner32862ec2011-02-08 16:41:01 +01004813 conn_err(tconn, "drbd_send_ping has failed\n");
Andreas Gruenbacher841ce242010-12-15 19:31:20 +01004814 goto reconnect;
4815 }
Philipp Reisner32862ec2011-02-08 16:41:01 +01004816 tconn->meta.socket->sk->sk_rcvtimeo =
4817 tconn->net_conf->ping_timeo*HZ/10;
Lars Ellenbergf36af182011-03-09 22:44:55 +01004818 ping_timeout_active = 1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004819 }
4820
Philipp Reisner32862ec2011-02-08 16:41:01 +01004821 /* TODO: conditionally cork; it may hurt latency if we cork without
4822 much to send */
4823 if (!tconn->net_conf->no_cork)
4824 drbd_tcp_cork(tconn->meta.socket);
Philipp Reisner082a3432011-03-15 16:05:42 +01004825 if (tconn_process_done_ee(tconn)) {
4826 conn_err(tconn, "tconn_process_done_ee() failed\n");
Philipp Reisner32862ec2011-02-08 16:41:01 +01004827 goto reconnect;
Philipp Reisner082a3432011-03-15 16:05:42 +01004828 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07004829 /* but unconditionally uncork unless disabled */
Philipp Reisner32862ec2011-02-08 16:41:01 +01004830 if (!tconn->net_conf->no_cork)
4831 drbd_tcp_uncork(tconn->meta.socket);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004832
4833 /* short circuit, recv_msg would return EINTR anyways. */
4834 if (signal_pending(current))
4835 continue;
4836
Philipp Reisner32862ec2011-02-08 16:41:01 +01004837 rv = drbd_recv_short(tconn->meta.socket, buf, expect-received, 0);
4838 clear_bit(SIGNAL_ASENDER, &tconn->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004839
4840 flush_signals(current);
4841
4842 /* Note:
4843 * -EINTR (on meta) we got a signal
4844 * -EAGAIN (on meta) rcvtimeo expired
4845 * -ECONNRESET other side closed the connection
4846 * -ERESTARTSYS (on data) we got a signal
4847 * rv < 0 other than above: unexpected error!
4848 * rv == expected: full header or command
4849 * rv < expected: "woken" by signal during receive
4850 * rv == 0 : "connection shut down by peer"
4851 */
4852 if (likely(rv > 0)) {
4853 received += rv;
4854 buf += rv;
4855 } else if (rv == 0) {
Philipp Reisner32862ec2011-02-08 16:41:01 +01004856 conn_err(tconn, "meta connection shut down by peer.\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07004857 goto reconnect;
4858 } else if (rv == -EAGAIN) {
Lars Ellenbergcb6518c2011-06-20 14:44:45 +02004859 /* If the data socket received something meanwhile,
4860 * that is good enough: peer is still alive. */
Philipp Reisner32862ec2011-02-08 16:41:01 +01004861 if (time_after(tconn->last_received,
4862 jiffies - tconn->meta.socket->sk->sk_rcvtimeo))
Lars Ellenbergcb6518c2011-06-20 14:44:45 +02004863 continue;
Lars Ellenbergf36af182011-03-09 22:44:55 +01004864 if (ping_timeout_active) {
Philipp Reisner32862ec2011-02-08 16:41:01 +01004865 conn_err(tconn, "PingAck did not arrive in time.\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07004866 goto reconnect;
4867 }
Philipp Reisner32862ec2011-02-08 16:41:01 +01004868 set_bit(SEND_PING, &tconn->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004869 continue;
4870 } else if (rv == -EINTR) {
4871 continue;
4872 } else {
Philipp Reisner32862ec2011-02-08 16:41:01 +01004873 conn_err(tconn, "sock_recvmsg returned %d\n", rv);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004874 goto reconnect;
4875 }
4876
4877 if (received == expect && cmd == NULL) {
Andreas Gruenbacher8172f3e2011-03-16 17:22:39 +01004878 if (decode_header(tconn, h, &pi))
Philipp Reisnerb411b362009-09-25 16:07:19 -07004879 goto reconnect;
Andreas Gruenbacher7201b972011-03-14 18:23:00 +01004880 cmd = &asender_tbl[pi.cmd];
4881 if (pi.cmd >= ARRAY_SIZE(asender_tbl) || !cmd) {
Philipp Reisner32862ec2011-02-08 16:41:01 +01004882 conn_err(tconn, "unknown command %d on meta (l: %d)\n",
Philipp Reisner77351055b2011-02-07 17:24:26 +01004883 pi.cmd, pi.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004884 goto disconnect;
4885 }
4886 expect = cmd->pkt_size;
Philipp Reisner77351055b2011-02-07 17:24:26 +01004887 if (pi.size != expect - sizeof(struct p_header)) {
Philipp Reisner32862ec2011-02-08 16:41:01 +01004888 conn_err(tconn, "Wrong packet size on meta (c: %d, l: %d)\n",
Philipp Reisner77351055b2011-02-07 17:24:26 +01004889 pi.cmd, pi.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004890 goto reconnect;
Philipp Reisner257d0af2011-01-26 12:15:29 +01004891 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07004892 }
4893 if (received == expect) {
Philipp Reisnera4fbda82011-03-16 11:13:17 +01004894 bool rv;
4895
4896 if (cmd->fa_type == CONN) {
4897 rv = cmd->conn_fn(tconn, pi.cmd);
4898 } else {
4899 struct drbd_conf *mdev = vnr_to_mdev(tconn, pi.vnr);
4900 rv = cmd->mdev_fn(mdev, pi.cmd);
4901 }
4902
4903 if (!rv)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004904 goto reconnect;
4905
Philipp Reisnera4fbda82011-03-16 11:13:17 +01004906 tconn->last_received = jiffies;
4907
Lars Ellenbergf36af182011-03-09 22:44:55 +01004908 /* the idle_timeout (ping-int)
4909 * has been restored in got_PingAck() */
Andreas Gruenbacher7201b972011-03-14 18:23:00 +01004910 if (cmd == &asender_tbl[P_PING_ACK])
Lars Ellenbergf36af182011-03-09 22:44:55 +01004911 ping_timeout_active = 0;
4912
Philipp Reisnerb411b362009-09-25 16:07:19 -07004913 buf = h;
4914 received = 0;
Philipp Reisner257d0af2011-01-26 12:15:29 +01004915 expect = sizeof(struct p_header);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004916 cmd = NULL;
4917 }
4918 }
4919
4920 if (0) {
4921reconnect:
Philipp Reisnerbbeb6412011-02-10 13:45:46 +01004922 conn_request_state(tconn, NS(conn, C_NETWORK_FAILURE), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004923 }
4924 if (0) {
4925disconnect:
Philipp Reisnerbbeb6412011-02-10 13:45:46 +01004926 conn_request_state(tconn, NS(conn, C_DISCONNECTING), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004927 }
Philipp Reisner32862ec2011-02-08 16:41:01 +01004928 clear_bit(SIGNAL_ASENDER, &tconn->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004929
Philipp Reisner32862ec2011-02-08 16:41:01 +01004930 conn_info(tconn, "asender terminated\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07004931
4932 return 0;
4933}